[dpdk-dev] [PATCH v9 03/21] malloc: index heaps using heap ID rather than NUMA node

Anatoly Burakov anatoly.burakov at intel.com
Tue Oct 2 15:34:41 CEST 2018


Switch over all parts of EAL to use heap ID instead of NUMA node
ID to identify heaps. Heap ID for DPDK-internal heaps is NUMA
node's index within the detected NUMA node list. Heap ID for
external heaps will be order of their creation.

This breaks the ABI, so document the changes.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 config/common_base                            |   1 +
 config/rte_config.h                           |   1 +
 doc/guides/rel_notes/release_18_11.rst        |   5 +-
 .../common/include/rte_eal_memconfig.h        |   4 +-
 .../common/include/rte_malloc_heap.h          |   1 +
 lib/librte_eal/common/malloc_heap.c           | 102 +++++++++++++-----
 lib/librte_eal/common/malloc_heap.h           |   3 +
 lib/librte_eal/common/rte_malloc.c            |  41 ++++---
 8 files changed, 114 insertions(+), 44 deletions(-)

diff --git a/config/common_base b/config/common_base
index acc5211bc..83350e0b1 100644
--- a/config/common_base
+++ b/config/common_base
@@ -61,6 +61,7 @@ CONFIG_RTE_CACHE_LINE_SIZE=64
 CONFIG_RTE_LIBRTE_EAL=y
 CONFIG_RTE_MAX_LCORE=128
 CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_HEAPS=32
 CONFIG_RTE_MAX_MEMSEG_LISTS=64
 # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
 # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller
diff --git a/config/rte_config.h b/config/rte_config.h
index 20c58dff1..816e6f879 100644
--- a/config/rte_config.h
+++ b/config/rte_config.h
@@ -24,6 +24,7 @@
 #define RTE_BUILD_SHARED_LIB
 
 /* EAL defines */
+#define RTE_MAX_HEAPS 32
 #define RTE_MAX_MEMSEG_LISTS 128
 #define RTE_MAX_MEMSEG_PER_LIST 8192
 #define RTE_MAX_MEM_MB_PER_LIST 32768
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index bc1d56130..0607a3980 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -165,7 +165,10 @@ ABI Changes
            of memory addressed by the segment list
          - structure ``rte_memseg_list`` now has a new flag indicating whether
            the memseg list refers to external memory
-
+         - structure ``rte_malloc_heap`` now has a new field indicating socket
+           ID the malloc heap belongs to
+         - structure ``rte_mem_config`` has had its ``malloc_heaps`` array
+           resized from ``RTE_MAX_NUMA_NODES`` to ``RTE_MAX_HEAPS`` value
 
 Removed Items
 -------------
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 645288b02..7634bff5d 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -72,8 +72,8 @@ struct rte_mem_config {
 
 	struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
-	/* Heaps of Malloc per socket */
-	struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+	/* Heaps of Malloc */
+	struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
 
 	/* address of mem_config in primary process. used to map shared config into
 	 * exact same address the primary process maps it.
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index d43fa9097..d432cef88 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -26,6 +26,7 @@ struct malloc_heap {
 	struct malloc_elem *volatile last;
 
 	unsigned alloc_count;
+	unsigned int socket_id;
 	size_t total_size;
 } __rte_cache_aligned;
 
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 3c8e2063b..a9cfa423f 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -66,6 +66,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 	return check_flag & flags;
 }
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int i;
+
+	for (i = 0; i < RTE_MAX_HEAPS; i++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+		if (heap->socket_id == socket_id)
+			return i;
+	}
+	return -1;
+}
+
 /*
  * Expand the heap with a memory area.
  */
@@ -93,12 +108,17 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	struct rte_memseg_list *found_msl;
 	struct malloc_heap *heap;
-	int msl_idx;
+	int msl_idx, heap_idx;
 
 	if (msl->external)
 		return 0;
 
-	heap = &mcfg->malloc_heaps[msl->socket_id];
+	heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+	if (heap_idx < 0) {
+		RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+		return -1;
+	}
+	heap = &mcfg->malloc_heaps[heap_idx];
 
 	/* msl is const, so find it */
 	msl_idx = msl - mcfg->memsegs;
@@ -111,6 +131,7 @@ malloc_add_seg(const struct rte_memseg_list *msl,
 	malloc_heap_add_memory(heap, found_msl, ms->addr, len);
 
 	heap->total_size += len;
+	heap->socket_id = msl->socket_id;
 
 	RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
 			msl->socket_id);
@@ -561,12 +582,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
 
 /* this will try lower page sizes first */
 static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
-		unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+		unsigned int heap_id, unsigned int flags, size_t align,
+		size_t bound, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+	int socket_id;
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -584,12 +607,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
 	 * we may still be able to allocate memory from appropriate page sizes,
 	 * we just need to request more memory first.
 	 */
+
+	socket_id = rte_socket_id_by_idx(heap_id);
+	/*
+	 * if socket ID is negative, we cannot find a socket ID for this heap -
+	 * which means it's an external heap. those can have unexpected page
+	 * sizes, so if the user asked to allocate from there - assume user
+	 * knows what they're doing, and allow allocating from there with any
+	 * page size flags.
+	 */
+	if (socket_id < 0)
+		size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
 	ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
 	if (ret != NULL)
 		goto alloc_unlock;
 
-	if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
-			contig)) {
+	/* if socket ID is invalid, this is an external heap */
+	if (socket_id < 0)
+		goto alloc_unlock;
+
+	if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+			bound, contig)) {
 		ret = heap_alloc(heap, type, size, flags, align, bound, contig);
 
 		/* this should have succeeded */
@@ -605,7 +644,7 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 		unsigned int flags, size_t align, size_t bound, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, heap_id, i;
 	void *ret;
 
 	/* return NULL if size is 0 or alignment is not power-of-2 */
@@ -620,22 +659,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
-			contig);
+	ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+			bound, contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
 
-	/* try other heaps */
+	/* try other heaps. we are only iterating through native DPDK sockets,
+	 * so external heaps won't be included.
+	 */
 	for (i = 0; i < (int) rte_socket_count(); i++) {
-		cur_socket = rte_socket_id_by_idx(i);
-		if (cur_socket == socket)
+		if (i == heap_id)
 			continue;
-		ret = heap_alloc_on_socket(type, size, cur_socket, flags,
-				align, bound, contig);
+		ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+				bound, contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -643,11 +685,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 }
 
 static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
-		size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+		unsigned int flags, size_t align, bool contig)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-	struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+	struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 	void *ret;
 
 	rte_spinlock_lock(&(heap->lock));
@@ -665,7 +707,7 @@ void *
 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		size_t align, bool contig)
 {
-	int socket, i, cur_socket;
+	int socket, i, cur_socket, heap_id;
 	void *ret;
 
 	/* return NULL if align is not power-of-2 */
@@ -680,11 +722,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 	else
 		socket = socket_arg;
 
-	/* Check socket parameter */
-	if (socket >= RTE_MAX_NUMA_NODES)
+	/* turn socket ID into heap ID */
+	heap_id = malloc_socket_to_heap_id(socket);
+	/* if heap id is negative, socket ID was invalid */
+	if (heap_id < 0)
 		return NULL;
 
-	ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+	ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
 			contig);
 	if (ret != NULL || socket_arg != SOCKET_ID_ANY)
 		return ret;
@@ -694,8 +738,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
 		cur_socket = rte_socket_id_by_idx(i);
 		if (cur_socket == socket)
 			continue;
-		ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
-				align, contig);
+		ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+				contig);
 		if (ret != NULL)
 			return ret;
 	}
@@ -760,7 +804,7 @@ malloc_heap_free(struct malloc_elem *elem)
 	/* ...of which we can't avail if we are in legacy mode, or if this is an
 	 * externally allocated segment.
 	 */
-	if (internal_config.legacy_mem || msl->external)
+	if (internal_config.legacy_mem || (msl->external > 0))
 		goto free_unlock;
 
 	/* check if we can free any memory back to the system */
@@ -917,7 +961,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 int
 malloc_heap_get_stats(struct malloc_heap *heap,
@@ -955,7 +999,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f)
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index f52cb5559..61b844b6f 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -46,6 +46,9 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f);
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
 int
 rte_eal_malloc_heap_init(void);
 
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index 47ca5a742..73d6df31d 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -152,11 +152,20 @@ rte_malloc_get_socket_stats(int socket,
 		struct rte_malloc_socket_stats *socket_stats)
 {
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	int heap_idx, ret = -1;
 
-	if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
-		return -1;
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
 
-	return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+	heap_idx = malloc_socket_to_heap_id(socket);
+	if (heap_idx < 0)
+		goto unlock;
+
+	ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+			socket_stats);
+unlock:
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+	return ret;
 }
 
 /*
@@ -168,12 +177,14 @@ rte_malloc_dump_heaps(FILE *f)
 	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
 	unsigned int idx;
 
-	for (idx = 0; idx < rte_socket_count(); idx++) {
-		unsigned int socket = rte_socket_id_by_idx(idx);
-		fprintf(f, "Heap on socket %i:\n", socket);
-		malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+	for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+		fprintf(f, "Heap id: %u\n", idx);
+		malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
 	}
 
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 }
 
 /*
@@ -182,14 +193,19 @@ rte_malloc_dump_heaps(FILE *f)
 void
 rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 {
-	unsigned int socket;
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	unsigned int heap_id;
 	struct rte_malloc_socket_stats sock_stats;
+
+	rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
 	/* Iterate through all initialised heaps */
-	for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
-		if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
-			continue;
+	for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+		struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
 
-		fprintf(f, "Socket:%u\n", socket);
+		malloc_heap_get_stats(heap, &sock_stats);
+
+		fprintf(f, "Heap id:%u\n", heap_id);
 		fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
 		fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
 		fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +214,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 		fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
 		fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
 	}
+	rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 	return;
 }
 
-- 
2.17.1


More information about the dev mailing list