[dpdk-dev] [PATCH v3] mem: calculate space left in a hugetlbfs

Jianfeng Tan jianfeng.tan at intel.com
Wed Nov 18 03:42:17 CET 2015


Currently DPDK does not respect the quota of a hugetblfs mount.
It will fail to init the EAL because it tries to map the number of
free hugepages in the system rather than using the number specified
in the quota for that mount.

To solve this issue, we take the quota into consideration when
calculating the number of hugepages to map.  We use either the number
specified in the quota, or number of available hugepages, whichever
is lower.

There are possible race conditions when multiple applications
allocate hugepages in different hugetlbfs mounts of the same size,
so the suggested system would have a pool with enough hugepages for
all hugetlbfs mount quotas.

There is, however, still an open issue with
CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS. When this option is enabled
(IVSHMEM target does this by default), having hugetlbfs mounts with
quota will fail to remap hugepages because it relies on having
mapped all free hugepages in the system.

Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
---
v3 changes:
 - commit msg rework
 - add hpi->quota to record quota of each hugetlbfs
 - get_hugepage_dir -> get_hugepage_mnt_info to fill hugedir and quota
 - add info in release note

v2 changes:
 - reword title
 - fix compiler error of v1

 doc/guides/rel_notes/release_2_2.rst            |   5 +
 lib/librte_eal/common/eal_internal_cfg.h        |   1 +
 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 145 +++++++++++++++---------
 3 files changed, 98 insertions(+), 53 deletions(-)

diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst
index 0781ae6..5b8777a 100644
--- a/doc/guides/rel_notes/release_2_2.rst
+++ b/doc/guides/rel_notes/release_2_2.rst
@@ -102,6 +102,11 @@ New Features
 
 * **Added port hotplug support to xenvirt.**
 
+* **Added support of taking mount quota into account.**
+
+  Take the quota into consideration when calculating the number of hugepages
+  to map. We use either the number specified in the quota, or number of
+  available hugepages, whichever is lower.
 
 Resolved Issues
 ---------------
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..38ca410 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -50,6 +50,7 @@
  */
 struct hugepage_info {
 	uint64_t hugepage_sz;   /**< size of a huge page */
+	uint64_t quota;   /**< quota of a hugetlbfs */
 	const char *hugedir;    /**< dir where hugetlbfs is mounted */
 	uint32_t num_pages[RTE_MAX_NUMA_NODES];
 				/**< number of hugepages of that size on each socket */
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 18858e2..612d87d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -44,6 +44,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <sys/queue.h>
+#include <sys/vfs.h>
+#include <mntent.h>
 
 #include <rte_memory.h>
 #include <rte_memzone.h>
@@ -124,71 +126,90 @@ get_default_hp_size(void)
 	return size;
 }
 
-static const char *
-get_hugepage_dir(uint64_t hugepage_sz)
+static void
+get_hugetlbfs_mnt_info(struct hugepage_info *hpi)
 {
-	enum proc_mount_fieldnames {
-		DEVICE = 0,
-		MOUNTPT,
-		FSTYPE,
-		OPTIONS,
-		_FIELDNAME_MAX
-	};
+	FILE *f;
+	struct mntent *ent;
+	char *str_size;
+	char *str_pagesz;
+	uint64_t pagesz;
+
+	static const char *proc_mounts = "/proc/mounts";
+	static const char *hugetlbfs_str = "hugetlbfs";
+	static const char *opt_pagesize = "pagesize";
+	static const size_t opt_pagesize_len = sizeof("pagesize") - 1;
+	static const char *opt_size = "size";
+	static const size_t opt_size_len = sizeof("size") - 1;
 	static uint64_t default_size = 0;
-	const char proc_mounts[] = "/proc/mounts";
-	const char hugetlbfs_str[] = "hugetlbfs";
-	const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1;
-	const char pagesize_opt[] = "pagesize=";
-	const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1;
-	const char split_tok = ' ';
-	char *splitstr[_FIELDNAME_MAX];
-	char buf[BUFSIZ];
-	char *retval = NULL;
-
-	FILE *fd = fopen(proc_mounts, "r");
-	if (fd == NULL)
-		rte_panic("Cannot open %s\n", proc_mounts);
 
 	if (default_size == 0)
 		default_size = get_default_hp_size();
 
-	while (fgets(buf, sizeof(buf), fd)){
-		if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX,
-				split_tok) != _FIELDNAME_MAX) {
-			RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts);
-			break; /* return NULL */
-		}
+	f = setmntent(proc_mounts, "r");
+	if (f == NULL)
+		rte_panic("Cannot open %s\n", proc_mounts);
+
+	while (NULL != (ent = getmntent(f))) {
+
+		if (strcmp(ent->mnt_type, hugetlbfs_str) != 0)
+			continue;
 
 		/* we have a specified --huge-dir option, only examine that dir */
 		if (internal_config.hugepage_dir != NULL &&
-				strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0)
+				strcmp(ent->mnt_dir, internal_config.hugepage_dir) != 0)
 			continue;
 
-		if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){
-			const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt);
-
-			/* if no explicit page size, the default page size is compared */
-			if (pagesz_str == NULL){
-				if (hugepage_sz == default_size){
-					retval = strdup(splitstr[MOUNTPT]);
-					break;
-				}
-			}
-			/* there is an explicit page size, so check it */
-			else {
-				uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]);
-				if (pagesz == hugepage_sz) {
-					retval = strdup(splitstr[MOUNTPT]);
-					break;
-				}
-			}
-		} /* end if strncmp hugetlbfs */
-	} /* end while fgets */
+		str_pagesz = hasmntopt(ent, opt_pagesize);
+		/* if no explicit page size, the default page size is compared */
+		if (!str_pagesz)
+			pagesz = default_size;
+		/* there is an explicit page size, so check it */
+		else
+			pagesz = rte_str_to_size(&str_pagesz[opt_pagesize_len + 1]);
 
-	fclose(fd);
-	return retval;
+		if (pagesz == hpi->hugepage_sz)
+			break;
+	}
+
+	if (ent == NULL) {
+		hpi->hugedir = NULL;
+		goto end;
+	}
+
+	hpi->hugedir = strdup(ent->mnt_dir);
+
+	str_size = hasmntopt(ent, opt_size);
+	if (str_size == NULL) {
+		RTE_LOG(DEBUG, EAL, "size not specified for %s\n",
+			hpi->hugedir);
+		hpi->quota = 0;
+		goto end;
+	}
+	hpi->quota = rte_str_to_size(&str_size[opt_size_len + 1]);
+
+end:
+	endmntent(f);
 }
 
+/* Caller to make sure this mount has option size
+ * so that statistics from statfs is valid.
+ */
+static uint32_t
+get_hugetlbfs_free_pages(const char *mnt_dir)
+{
+	int r;
+	struct statfs stats;
+
+	r = statfs(mnt_dir, &stats);
+	if (r != 0)
+		rte_panic("statfs() %s error: %s\n",
+				mnt_dir, strerror(errno));
+
+	return (uint32_t)stats.f_bfree;
+}
+
+
 /*
  * Clear the hugepage directory of whatever hugepage files
  * there are. Checks if the file is locked (i.e.
@@ -300,7 +321,8 @@ eal_hugepage_info_init(void)
 		hpi = &internal_config.hugepage_info[num_sizes];
 		hpi->hugepage_sz =
 			rte_str_to_size(&dirent->d_name[dirent_start_len]);
-		hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz);
+
+		get_hugetlbfs_mnt_info(hpi);
 
 		/* first, check if we have a mountpoint */
 		if (hpi->hugedir == NULL) {
@@ -329,9 +351,26 @@ eal_hugepage_info_init(void)
 		if (clear_hugedir(hpi->hugedir) == -1)
 			break;
 
+		uint32_t num_left, num_statfs;
+		num_left = get_num_hugepages(dirent->d_name);
+		if (hpi->quota) {
+			/* when option size is specified, calculate free
+			 * pages left in this hugetlbfs using statfs.
+			 */
+			num_statfs = get_hugetlbfs_free_pages(hpi->hugedir);
+			RTE_LOG(DEBUG, EAL,
+					"%u free hugepages from a quota of 0x%" PRIx64
+					", of size 0x%" PRIx64 " mounted at %s\n",
+					num_statfs,
+					hpi->quota,
+					hpi->hugepage_sz,
+					hpi->hugedir);
+			num_left = RTE_MIN(num_left, num_statfs);
+		}
+
 		/* for now, put all pages into socket 0,
 		 * later they will be sorted */
-		hpi->num_pages[0] = get_num_hugepages(dirent->d_name);
+		hpi->num_pages[0] = num_left;
 
 #ifndef RTE_ARCH_64
 		/* for 32-bit systems, limit number of hugepages to
-- 
2.1.4



More information about the dev mailing list