[dpdk-dev] [PATCH v8 03/21] malloc: index heaps using heap ID rather than NUMA node

Anatoly Burakov anatoly.burakov at intel.com
Mon Oct 1 14:56:11 CEST 2018


Switch over all parts of EAL to use heap ID instead of NUMA node
ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
node's index within the detected NUMA node list. Heap ID for
external heaps will be order of their creation.

This breaks the ABI, so document the changes.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 config/common_base                            |   1 +
 config/rte_config.h                           |   1 +
 doc/guides/rel_notes/release_18_11.rst        |   5 +-
 .../common/include/rte_eal_memconfig.h        |   4 +-
 .../common/include/rte_malloc_heap.h          |   1 +
 lib/librte_eal/common/malloc_heap.c           | 102 +++++++++++++-----
 lib/librte_eal/common/malloc_heap.h           |   3 +
 lib/librte_eal/common/rte_malloc.c            |  41 ++++---
 8 files changed, 114 insertions(+), 44 deletions(-)

diff --git a/config/common_base b/config/common_base
index 155c7d40e..b52770b27 100644
--- a/config/common_base
+++ b/config/common_base
@@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
 CONFIG_RTE_LIBRTE_EAL=y
 CONFIG_RTE_MAX_LCORE=128
 CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_HEAPS=32
 CONFIG_RTE_MAX_MEMSEG_LISTS=64
 # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
 # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller
diff --git a/config/rte_config.h b/config/rte_config.h
index 567051b9c..5dd2ac1ad 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -24,6 +24,7 @@
 #define RTE_BUILD_SHARED_LIB
 
 /* EAL defines */
+#define RTE_MAX_HEAPS 32
 #define RTE_MAX_MEMSEG_LISTS 128
 #define RTE_MAX_MEMSEG_PER_LIST 8192
 #define RTE_MAX_MEM_MB_PER_LIST 32768
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index d55e12a27..c627c1e88 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -145,7 +145,10 @@ ABI Changes
            of memory addressed by the segment list
          - structure ``rte_memseg_list`` now has a new flag indicating whether
            the memseg list refers to external memory
-
+         - structure ``rte_malloc_heap`` now has a new field indicating socket
+           ID the malloc heap belongs to
+         - structure ``rte_mem_config`` has had its ``malloc_heaps`` array
+           resized from ``RTE_MAX_NUMA_NODES`` to ``RTE_MAX_HEAPS`` value
 
 Removed Items
 -------------
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 6baa6854f..d7920a4e0 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -72,8 +72,8 @@ struct rte_mem_config {
 
 	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
-	/* Heaps of Malloc per socket */
-	struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+	/* Heaps of Malloc */
+	struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
 
 	/* address of mem_config in primary process. used to map shared config into
 	 * exact same address the primary process maps it.
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index d43fa9097..e7ac32d42 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -27,6 +27,7 @@ struct malloc_heap {
 
 	unsigned alloc_count;
 	size_t total_size;
+	unsigned int socket_id;
 } __rte_cache_aligned;
 
 #endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 3c8e2063b..a9cfa423f 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 	return check_flag & flags;
 }
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (heap->socket_id == socket_id)
+			return i;
+	}
+	return -1;
+}
+
 /*
  * Expand the heap with a memory area.
  */
@@ -93,12 +108,17 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg_list *found_msl;
 	struct malloc_heap *heap;
-	int msl_idx;
+	int msl_idx, heap_idx;
 
 	if (msl->external)
 		return 0;
 
-	heap = &mcfg->malloc_heaps[msl->socket_id];
+	heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+	if (heap_idx < 0) {
+		RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+		return -1;
+	}
+	heap = &mcfg->malloc_heaps[heap_idx];
 
 	/* msl is const, so find it */
 	msl_idx = msl - mcfg->memsegs;
@@ -111,6 +131,7 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 	malloc_heap_add_memory(heap, found_msl, ms->addr, len);
 
 	heap->total_size += len;
+	heap->socket_id = msl->socket_id;
 
 	RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
 			msl->socket_id);
@@ -561,12 +582,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
 
 /* this will try lower page sizes first */
 static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
-		unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+		unsigned int heap_id, unsigned int flags, size_t align,
+		size_t bound, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	int socket_id;
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -584,12 +607,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
 	 * we may still be able to allocate memory from appropriate page sizes,
 	 * we just need to request more memory first.
 	 */
+
+	socket_id = rte_socket_id_by_idx(heap_id);
+	/*
+	 * if socket ID is negative, we cannot find a socket ID for this heap -
+	 * which means it's an external heap. those can have unexpected page
+	 * sizes, so if the user asked to allocate from there - assume user
+	 * knows what they're doing, and allow allocating from there with any
+	 * page size flags.
+	 */
+	if (socket_id < 0)
+		size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
 	ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
 	if (ret != NULL)
 		goto alloc_unlock;
 
-	if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
-			contig)) {
+	/* if socket ID is invalid, this is an external heap */
+	if (socket_id < 0)
+		goto alloc_unlock;
+
+	if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+			bound, contig)) {
 		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
 
 		/* this should have succeeded */
@@ -605,7 +644,7 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 		unsigned int flags, size_t align, size_t bound, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, heap_id, i;
 	void *ret;
 
 	/* return NULL if size is 0 or alignment is not power-of-2 */
@@ -620,22 +659,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
-			contig);
+	ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+			bound, contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
 
-	/* try other heaps */
+	/* try other heaps. we are only iterating through native DPDK sockets,
+	 * so external heaps won't be included.
+	 */
 	for (i = 0; i < (int) rte_socket_count(); i++) {
-		cur_socket = rte_socket_id_by_idx(i);
-		if (cur_socket == socket)
+		if (i == heap_id)
 			continue;
-		ret = heap_alloc_on_socket(type, size, cur_socket, flags,
-				align, bound, contig);
+		ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+				bound, contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -643,11 +685,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 }
 
 static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
-		size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+		unsigned int flags, size_t align, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -665,7 +707,7 @@ void *
 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		size_t align, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, i, cur_socket, heap_id;
 	void *ret;
 
 	/* return NULL if align is not power-of-2 */
@@ -680,11 +722,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+	ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
 			contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
@@ -694,8 +738,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		cur_socket = rte_socket_id_by_idx(i);
 		if (cur_socket == socket)
 			continue;
-		ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
-				align, contig);
+		ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+				contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -760,7 +804,7 @@ malloc_heap_free(struct malloc_elem *elem)
 	/* ...of which we can't avail if we are in legacy mode, or if this is an
 	 * externally allocated segment.
 	 */
-	if (internal_config.legacy_mem || msl->external)
+	if (internal_config.legacy_mem || (msl->external > 0))
 		goto free_unlock;
 
 	/* check if we can free any memory back to the system */
@@ -917,7 +961,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 int
 malloc_heap_get_stats(struct malloc_heap *heap,
@@ -955,7 +999,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f)
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index f52cb5559..61b844b6f 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -46,6 +46,9 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f);
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
 int
 rte_eal_malloc_heap_init(void);
 
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 47ca5a742..73d6df31d 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -152,11 +152,20 @@ rte_malloc_get_socket_stats(int socket,
 		struct rte_malloc_socket_stats *socket_stats)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int heap_idx, ret = -1;
 
-	if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
-		return -1;
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
 
-	return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+	heap_idx = malloc_socket_to_heap_id(socket);
+	if (heap_idx < 0)
+		goto unlock;
+
+	ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+			socket_stats);
+unlock:
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
 }
 
 /*
@@ -168,12 +177,14 @@ rte_malloc_dump_heaps(FILE *f)
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	unsigned int idx;
 
-	for (idx = 0; idx < rte_socket_count(); idx++) {
-		unsigned int socket = rte_socket_id_by_idx(idx);
-		fprintf(f, "Heap on socket %i:\n", socket);
-		malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		fprintf(f, "Heap id: %u\n", idx);
+		malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
 	}
 
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 }
 
 /*
@@ -182,14 +193,19 @@ rte_malloc_dump_heaps(FILE *f)
 void
 rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 {
-	unsigned int socket;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int heap_id;
 	struct rte_malloc_socket_stats sock_stats;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
 	/* Iterate through all initialised heaps */
-	for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
-		if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
-			continue;
+	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 
-		fprintf(f, "Socket:%u\n", socket);
+		malloc_heap_get_stats(heap, &sock_stats);
+
+		fprintf(f, "Heap id:%u\n", heap_id);
 		fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
 		fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
 		fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +214,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
 		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
 	}
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 	return;
 }
 
-- 
2.17.1


More information about the dev mailing list