|SUCCESS| [RFC,v4,3/3] app/test: add fastmem test suite

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Sat May 30 12:47:29 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/164649

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-05-30

I'll review this fastmem library patch against DPDK coding standards and contribution guidelines.

---

**Commit:** (git sha not provided in patch context)

**Author:** Mattias Ronnblom <hofors at lysator.liu.se>

**Subject:** [RFC v4 2/3] lib: add fastmem library

**Summary:** This patch introduces a fast general-purpose small-object allocator for DPDK applications with per-lcore caches, NUMA awareness, and power-of-two size classes from 8B to 1MiB.

---

```diff
> diff --git a/lib/fastmem/rte_fastmem.c b/lib/fastmem/rte_fastmem.c
> new file mode 100644
> index 0000000000..4add00ce80
> --- /dev/null
> +++ b/lib/fastmem/rte_fastmem.c
> @@ -0,0 +1,2123 @@

[ ... ]

> +static struct fastmem_slab *
> +slab_acquire(struct fastmem_socket_state *socket, int socket_id)
> +{
> +	struct fastmem_slab *slab;
> +
> +	rte_spinlock_lock(&socket->lock);
> +
> +	if (socket->free_head == NULL) {
> +		int rc = grow_socket(socket, socket_id);
> +
> +		if (rc < 0) {
> +			rte_spinlock_unlock(&socket->lock);
> +			return NULL;
> +		}
> +	}
> +
> +	slab = socket->free_head;
> +	socket->free_head = slab->next_free;
> +	slab->next_free = NULL;
> +
> +	rte_spinlock_unlock(&socket->lock);
> +
> +	return slab;
> +}

In slab_acquire(), does this code leak the lock when grow_socket() succeeds 
but socket->free_head is still somehow NULL after the grow? The code assumes 
grow_socket() always populates free_head on success, but is there an explicit 
guarantee preventing a scenario where socket->free_head remains NULL despite 
rc == 0?

> +static void *
> +cache_pop(struct fastmem_cache *cache, struct fastmem_bin *bin)
> +{
> +	if (cache->count > 0) {
> +		cache->alloc_cache_hits++;
> +		return cache->objs[--cache->count];
> +	}
> +
> +	cache->count = bin_alloc_bulk(bin, cache->objs, cache->target, false);
> +	if (cache->count == 0)
> +		return NULL;
> +
> +	cache->alloc_cache_misses++;
> +	return cache->objs[--cache->count];
> +}

Does cache_pop()->bin_alloc_bulk() handle cache->count correctly when 
bin_alloc_bulk() returns a partial fill (0 < count < target)? The code 
always treats count == 0 as failure but never validates that count <= 
cache->capacity, risking a buffer overrun in cache->objs[] if bin_alloc_bulk 
somehow returns a value exceeding the cache capacity.

> +static struct fastmem_cache *
> +cache_alloc(struct fastmem_socket_state *socket, unsigned int class_idx)
> +{
> +	struct fastmem_cache *cache;
> +	unsigned int capacity = cache_capacity(class_idx);
> +	size_t cache_size = sizeof(*cache) + capacity * sizeof(void *);
> +	unsigned int cache_class = size_to_class(cache_size, RTE_CACHE_LINE_SIZE);
> +	unsigned int own_socket = rte_socket_id();
> +	struct fastmem_socket_state *alloc_socket;
> +
> +	if (cache_class >= FASTMEM_N_CLASSES) {
> +		FASTMEM_LOG(ERR,
> +			"cache size %zu exceeds max size class",
> +			cache_size);
> +		return NULL;
> +	}
> +
> +	if (own_socket >= RTE_MAX_NUMA_NODES)
> +		own_socket = (unsigned int)socket->bins[0].socket_id;
> +
> +	alloc_socket = &fastmem->sockets[own_socket];
> +
> +	cache = bin_alloc_one(&alloc_socket->bins[cache_class], false);

Does cache_alloc()->bin_alloc_one() validate that own_socket (after the 
fallback to socket->bins[0].socket_id) is still within valid bounds before 
indexing fastmem->sockets[own_socket]? If socket->bins[0].socket_id is 
malformed or uninitialized, could this overflow fastmem->sockets[]?

> +static void
> +release_socket_caches(struct fastmem_socket_state *socket)
> +{
> +	unsigned int lcore;
> +	unsigned int c;
> +
> +	for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
> +		for (c = 0; c < FASTMEM_N_CLASSES; c++) {
> +			struct fastmem_cache *cache = socket->caches[lcore][c];
> +			struct fastmem_slab *cache_slab;
> +
> +			if (cache == NULL)
> +				continue;
> +
> +			if (cache->count > 0) {
> +				bin_free_bulk(&socket->bins[c],
> +					cache->objs, cache->count);
> +				cache->count = 0;
> +			}
> +
> +			cache_slab = slab_of(cache);
> +			bin_free_one(cache_slab->bin, cache, false);
> +
> +			socket->caches[lcore][c] = NULL;
> +		}
> +	}

In release_socket_caches(), does the slab_of(cache) call correctly handle 
the case where the cache struct itself was allocated from a different socket 
than the one being released? The code assumes cache_slab->bin is valid, but 
if cache was allocated cross-socket in cache_alloc(), is cache_slab->bin 
always properly initialized for the free path here?

> +static void *
> +do_alloc(size_t size, size_t align, unsigned int flags,
> +		unsigned int lcore_id, int socket_id, bool fallback)
> +{
> +	unsigned int class_idx;
> +	void *obj;
> +
> +	if (unlikely(!fastmem_assure()))
> +		return NULL;
> +
> +	if (unlikely(!normalize_align(&align))) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	class_idx = size_to_class(size, align);
> +	if (unlikely(class_idx >= FASTMEM_N_CLASSES)) {
> +		rte_errno = E2BIG;
> +		return NULL;
> +	}
> +
> +	obj = alloc_from_socket(&fastmem->sockets[socket_id],
> +			class_idx, lcore_id);

Does do_alloc() validate socket_id before indexing fastmem->sockets[socket_id]? 
The caller (rte_fastmem_alloc_socket) validates socket_id, but is there a 
code path through local_socket_id() or rte_fastmem_alloc() where socket_id 
could be out of bounds?

> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fastmem_realloc, 24.11)
> +void *
> +rte_fastmem_realloc(void *ptr, size_t size, size_t align)
> +{
> +	struct fastmem_slab *slab;
> +	unsigned int old_class, new_class;
> +	size_t old_size;
> +	void *new_ptr;
> +
> +	if (ptr == NULL)
> +		return rte_fastmem_alloc(size, align, 0);
> +
> +	if (size == 0) {
> +		rte_fastmem_free(ptr);
> +		return NULL;
> +	}
> +
> +	if (unlikely(!normalize_align(&align))) {
> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
> +	new_class = size_to_class(size, align);
> +	if (unlikely(new_class >= FASTMEM_N_CLASSES)) {
> +		rte_errno = E2BIG;
> +		return NULL;
> +	}
> +
> +	slab = slab_of(ptr);
> +	old_class = slab->bin->class_idx;

Does rte_fastmem_realloc() safely handle the case where ptr was never 
allocated by fastmem (e.g., a stray pointer from rte_malloc or the stack)? 
The slab_of() mask could produce a bogus slab pointer whose bin->class_idx 
is uninitialized memory, leading to undefined behavior in the memcpy below.

> +	new_ptr = rte_fastmem_alloc(size, align, 0);
> +	if (unlikely(new_ptr == NULL))
> +		return NULL;
> +
> +	old_size = class_size(old_class);
> +	memcpy(new_ptr, ptr, RTE_MIN(old_size, size));
> +	rte_fastmem_free(ptr);
> +
> +	return new_ptr;
> +}

In the rte_fastmem_realloc() memcpy, does the code correctly handle the case 
where old_class was garbage (from a bad ptr), causing old_size = 
class_size(old_class) to be a massive value that overflows the new 
allocation? The RTE_MIN clamp protects the destination, but could reading up 
to old_size from ptr cause a segfault if ptr was invalid?

> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fastmem_free_bulk, 24.11)
> +void
> +rte_fastmem_free_bulk(void **ptrs, unsigned int n)
> +{
> +	unsigned int lcore_id;
> +	struct fastmem_slab *slab;
> +	struct fastmem_bin *bin;
> +	struct fastmem_socket_state *socket;
> +	struct fastmem_cache *cache;
> +	unsigned int space;
> +	unsigned int i;
> +
> +	if (unlikely(n == 0))
> +		return;
> +
> +	if (unlikely(!fastmem_assure()))
> +		return;
> +
> +	lcore_id = rte_lcore_id();
> +
> +	/* Fast path: check if first object gives us the bin. */
> +	slab = slab_of(ptrs[0]);
> +	bin = slab->bin;
> +	socket = &fastmem->sockets[bin->socket_id];
> +	cache = cache_get(socket, bin->class_idx, lcore_id);
> +
> +	if (unlikely(cache == NULL)) {
> +		for (i = 0; i < n; i++)
> +			do_free(ptrs[i]);
> +		return;
> +	}
> +
> +	/*
> +	 * Try to push all objects into the cache in one memcpy.
> +	 * If any object belongs to a different bin, fall back to
> +	 * per-object free for the remainder.
> +	 */
> +	space = cache->capacity - cache->count;
> +	if (likely(n <= space)) {
> +		/* Verify all same bin (common case). */
> +		for (i = 1; i < n; i++)
> +			if (slab_of(ptrs[i])->bin != bin)
> +				goto slow;

Does rte_fastmem_free_bulk() correctly handle the case where ptrs[0] is NULL 
or otherwise invalid, causing slab_of(ptrs[0])->bin to dereference a bogus 
address before the cache == NULL guard can trigger? The code relies on 
fastmem_assure() having succeeded, but does not explicitly verify ptrs[0] 
itself is non-NULL before dereferencing slab->bin.

> +		cache->free_cache_hits += n;
> +		memcpy(&cache->objs[cache->count], ptrs,
> +			n * sizeof(void *));
> +		cache->count += n;
> +		return;
> +	}

In the memcpy fast path, does the code guarantee cache->count + n <= 
cache->capacity after the if (likely(n <= space)) check, or could a race 
condition (if cache->count is modified concurrently) overflow cache->objs[]? 
The documentation states this is MT-safe, but is cache->count read-modify-write 
here atomic or serialized?

> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fastmem_virt2iova, 24.11)
> +rte_iova_t
> +rte_fastmem_virt2iova(const void *ptr)
> +{
> +	struct fastmem_slab *slab;
> +
> +	if (unlikely(!fastmem_assure()))
> +		return RTE_BAD_IOVA;
> +
> +	slab = slab_of((void *)(uintptr_t)ptr);
> +
> +	return slab->iova_base + ((uintptr_t)ptr - (uintptr_t)slab);
> +}

Does rte_fastmem_virt2iova() safely handle the case where ptr was not 
allocated by fastmem? slab_of() masks the pointer, which could land on an 
arbitrary 2MiB-aligned address whose slab->iova_base is uninitialized or 
garbage, returning a bogus IOVA that could be used in a DMA descriptor and 
cause a bus fault or security issue.

> +RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fastmem_stats_reset, 24.11)
> +void
> +rte_fastmem_stats_reset(void)
> +{
> +	if (fastmem == NULL)
> +		return;
> +
> +	for (unsigned int s = 0; s < RTE_MAX_NUMA_NODES; s++) {
> +		struct fastmem_socket_state *socket = &fastmem->sockets[s];
> +
> +		for (unsigned int c = 0; c < FASTMEM_N_CLASSES; c++) {
> +			struct fastmem_bin *bin = &socket->bins[c];
> +
> +			rte_spinlock_lock(&bin->lock);
> +			bin->slab_acquires = 0;
> +			bin->slab_releases = 0;
> +			bin->nocache_allocs = 0;
> +			bin->nocache_frees = 0;
> +			bin->nocache_nomem = 0;
> +			rte_spinlock_unlock(&bin->lock);
> +
> +			for (unsigned int l = 0; l < RTE_MAX_LCORE; l++) {
> +				struct fastmem_cache *cache =
> +					socket->caches[l][c];
> +				if (cache == NULL)
> +					continue;
> +				cache->alloc_cache_hits = 0;
> +				cache->alloc_cache_misses = 0;
> +				cache->alloc_nomem = 0;
> +				cache->free_cache


More information about the test-report mailing list