[dpdk-dev] [PATCH 27/41] eal: add multiprocess init with memory hotplug

Anatoly Burakov anatoly.burakov at intel.com
Sat Mar 3 14:46:15 CET 2018


for legacy memory mode, attach to primary's memseg list, and
map hugepages as before.

for non-legacy mode, preallocate all VA space and then do a
sync of local memory map.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/bsdapp/eal/eal_hugepage_info.c   |  7 ++
 lib/librte_eal/common/eal_common_memory.c       | 99 +++++++++++++++++++++----
 lib/librte_eal/common/eal_hugepages.h           |  5 ++
 lib/librte_eal/linuxapp/eal/eal.c               | 18 +++--
 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 53 ++++++++-----
 lib/librte_eal/linuxapp/eal/eal_memory.c        | 24 ++++--
 6 files changed, 159 insertions(+), 47 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index be2dbf0..18e6e5e 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -103,3 +103,10 @@ eal_hugepage_info_init(void)
 
 	return 0;
 }
+
+/* memory hotplug is not supported in FreeBSD, so no need to implement this */
+int
+eal_hugepage_info_read(void)
+{
+	return 0;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 457e239..a571e24 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -20,6 +20,7 @@
 #include <rte_errno.h>
 #include <rte_log.h>
 
+#include "eal_memalloc.h"
 #include "eal_private.h"
 #include "eal_internal_cfg.h"
 
@@ -147,19 +148,11 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
 	char name[RTE_FBARRAY_NAME_LEN];
 	int max_pages;
 	uint64_t mem_amount;
-	void *addr;
 
 	if (!internal_config.legacy_mem) {
 		mem_amount = get_mem_amount(page_sz);
 		max_pages = mem_amount / page_sz;
-
-		addr = eal_get_virtual_area(NULL, &mem_amount, page_sz, 0, 0);
-		if (addr == NULL) {
-			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
-			return -1;
-		}
 	} else {
-		addr = NULL;
 		/* numer of memsegs in each list, these will not be single-page
 		 * segments, so RTE_MAX_LEGACY_MEMSEG is like old default.
 		 */
@@ -177,7 +170,7 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
 
 	msl->hugepage_sz = page_sz;
 	msl->socket_id = socket_id;
-	msl->base_va = addr;
+	msl->base_va = NULL;
 
 	RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
 			page_sz >> 10, socket_id);
@@ -186,16 +179,46 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
 }
 
 static int
-memseg_init(void)
+alloc_va_space(struct rte_memseg_list *msl)
+{
+	uint64_t mem_sz, page_sz;
+	void *addr;
+	int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+	flags |= MAP_HUGETLB;
+#endif
+
+	page_sz = msl->hugepage_sz;
+	mem_sz = page_sz * msl->memseg_arr.len;
+
+	addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+	if (addr == NULL) {
+		if (rte_errno == EADDRNOTAVAIL)
+			RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+				(unsigned long long)mem_sz, msl->base_va);
+		else
+			RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+		return -1;
+	}
+	msl->base_va = addr;
+
+	return 0;
+}
+
+
+static int
+memseg_primary_init(void)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	int socket_id, hpi_idx, msl_idx = 0;
 	struct rte_memseg_list *msl;
 
-	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
-		RTE_LOG(ERR, EAL, "Secondary process not supported\n");
-		return -1;
-	}
+	/* if we start allocating memory segments for pages straight away, VA
+	 * space will become fragmented, reducing chances of success when
+	 * secondary process maps the same addresses. to fix this, allocate
+	 * fbarrays first, and then allocate VA space for them.
+	 */
 
 	/* create memseg lists */
 	for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
@@ -235,12 +258,55 @@ memseg_init(void)
 				total_segs += msl->memseg_arr.len;
 				total_mem = total_segs * msl->hugepage_sz;
 				type_msl_idx++;
+
+				/* no need to preallocate VA in legacy mode */
+				if (internal_config.legacy_mem)
+					continue;
+
+				if (alloc_va_space(msl)) {
+					RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+					return -1;
+				}
 			}
 		}
 	}
 	return 0;
 }
 
+static int
+memseg_secondary_init(void)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int msl_idx = 0;
+	struct rte_memseg_list *msl;
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+		msl = &mcfg->memsegs[msl_idx];
+
+		/* skip empty memseg lists */
+		if (msl->memseg_arr.len == 0)
+			continue;
+
+		if (rte_fbarray_attach(&msl->memseg_arr)) {
+			RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+			return -1;
+		}
+
+		/* no need to preallocate VA space in legacy mode */
+		if (internal_config.legacy_mem)
+			continue;
+
+		/* preallocate VA space */
+		if (alloc_va_space(msl)) {
+			RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
 static struct rte_memseg *
 virt2memseg(const void *addr, const struct rte_memseg_list *msl)
 {
@@ -480,7 +546,10 @@ rte_eal_memory_init(void)
 	int retval;
 	RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
 
-	retval = memseg_init();
+	retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+			memseg_primary_init() :
+			memseg_secondary_init();
+
 	if (retval < 0)
 		return -1;
 
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index f963ae5..38d0b04 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -34,4 +34,9 @@ struct hugepage_file {
  */
 int eal_hugepage_info_init(void);
 
+/**
+ * Read information about hugepages on Linux, but don't clear them out.
+ */
+int eal_hugepage_info_read(void);
+
 #endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index d336c96..7a0d742 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -805,13 +805,17 @@ rte_eal_init(int argc, char **argv)
 			"KNI module inserted\n");
 	}
 
-	if (internal_config.no_hugetlbfs == 0 &&
-			internal_config.process_type != RTE_PROC_SECONDARY &&
-			eal_hugepage_info_init() < 0) {
-		rte_eal_init_alert("Cannot get hugepage information.");
-		rte_errno = EACCES;
-		rte_atomic32_clear(&run_once);
-		return -1;
+	if (internal_config.no_hugetlbfs == 0) {
+		/* rte_config isn't initialized yet */
+		ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+				eal_hugepage_info_init() :
+				eal_hugepage_info_read();
+		if (ret < 0) {
+			rte_eal_init_alert("Cannot get hugepage information.");
+			rte_errno = EACCES;
+			rte_atomic32_clear(&run_once);
+			return -1;
+		}
 	}
 
 	if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 7e2475f..7a4adce 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <sys/file.h>
 #include <dirent.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -299,15 +300,9 @@ compare_hpi(const void *a, const void *b)
 	return hpi_b->hugepage_sz - hpi_a->hugepage_sz;
 }
 
-/*
- * when we initialize the hugepage info, everything goes
- * to socket 0 by default. it will later get sorted by memory
- * initialization procedure.
- */
-int
-eal_hugepage_info_init(void)
-{
-	const char dirent_start_text[] = "hugepages-";
+static int
+hugepage_info_init(bool clear_hugepages)
+{	const char dirent_start_text[] = "hugepages-";
 	const size_t dirent_start_len = sizeof(dirent_start_text) - 1;
 	unsigned int i, total_pages, num_sizes = 0;
 	DIR *dir;
@@ -350,18 +345,20 @@ eal_hugepage_info_init(void)
 			continue;
 		}
 
-		/* try to obtain a writelock */
-		hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
+		if (clear_hugepages) {
+			/* try to obtain a writelock */
+			hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY);
 
-		/* if blocking lock failed */
-		if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
-			RTE_LOG(CRIT, EAL,
-				"Failed to lock hugepage directory!\n");
-			break;
+			/* if blocking lock failed */
+			if (flock(hpi->lock_descriptor, LOCK_EX) == -1) {
+				RTE_LOG(CRIT, EAL,
+					"Failed to lock hugepage directory!\n");
+				break;
+			}
+			/* clear out the hugepages dir from unused pages */
+			if (clear_hugedir(hpi->hugedir) == -1)
+				break;
 		}
-		/* clear out the hugepages dir from unused pages */
-		if (clear_hugedir(hpi->hugedir) == -1)
-			break;
 
 		/*
 		 * first, try to put all hugepages into relevant sockets, but
@@ -417,10 +414,26 @@ eal_hugepage_info_init(void)
 			num_pages += hpi->num_pages[j];
 		}
 		if (internal_config.hugepage_info[i].hugedir != NULL &&
-				num_pages > 0)
+				(num_pages > 0 || !clear_hugepages))
 			return 0;
 	}
 
 	/* no valid hugepage mounts available, return error */
 	return -1;
 }
+
+int eal_hugepage_info_read(void)
+{
+	return hugepage_info_init(false);
+}
+
+/*
+ * when we initialize the hugepage info, everything goes
+ * to socket 0 by default. it will later get sorted by memory
+ * initialization procedure.
+ */
+int
+eal_hugepage_info_init(void)
+{
+	return hugepage_info_init(true);
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index e0b4988..f74291f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -1569,6 +1569,22 @@ eal_legacy_hugepage_attach(void)
 	return -1;
 }
 
+static int
+eal_hugepage_attach(void)
+{
+	if (eal_memalloc_sync_with_primary()) {
+		RTE_LOG(ERR, EAL, "Could not map memory from primary process\n");
+		if (aslr_enabled() > 0) {
+			RTE_LOG(ERR, EAL, "It is recommended to "
+				"disable ASLR in the kernel "
+				"and retry running both primary "
+				"and secondary processes\n");
+		}
+		return -1;
+	}
+	return 0;
+}
+
 int
 rte_eal_hugepage_init(void)
 {
@@ -1580,11 +1596,9 @@ rte_eal_hugepage_init(void)
 int
 rte_eal_hugepage_attach(void)
 {
-	if (internal_config.legacy_mem)
-		return eal_legacy_hugepage_attach();
-	else
-		RTE_LOG(ERR, EAL, "Secondary processes aren't supported yet\n");
-	return -1;
+	return internal_config.legacy_mem ?
+			eal_legacy_hugepage_attach() :
+			eal_hugepage_attach();
 }
 
 int
-- 
2.7.4


More information about the dev mailing list