[PATCH v5] eal: RTE_PTR_ADD/SUB char* for compiler optimizations

Bruce Richardson bruce.richardson at intel.com
Mon Jan 12 17:09:04 CET 2026


On Mon, Jan 12, 2026 at 10:40:59AM -0500, scott.k.mitch1 at gmail.com wrote:
> From: Scott Mitchell <scott.k.mitch1 at gmail.com>
> 
> Modify RTE_PTR_ADD and RTE_PTR_SUB to use char* pointer arithmetic
> instead of uintptr_t casts. Benefits of this approach:
> - API compatibility: works for both integer and pointer inputs
> - Retains simple macros: no pragmas, no _Generic
> - Enables compiler optimizations: Clang now can unroll and vectorize
> pointer loops, and GCC still can apply these optimizations.
> 
> Example use case which benefits is __rte_raw_cksum. Performance
> results from cksum_perf_autotest on Intel Xeon (Cascade Lake,
> AVX-512) built with Clang 18.1 (TSC cycles/byte):
>   Block size    Before    After    Improvement
>          100      0.40     0.24        ~40%
>         1500      0.50     0.06        ~8x
>         9000      0.49     0.06        ~8x
> 
> Signed-off-by: Scott Mitchell <scott.k.mitch1 at gmail.com>
> ---


The macro implementation LGTM

Acked-by: Bruce Richardson <bruce.richardson at intel.com>

Couple of comments on the unit tests inline below. Most are only
suggestions, so feel free to ignore if you think they aren't worth
considering.

>  app/test/meson.build         |   1 +
>  app/test/test_ptr_add_sub.c  | 190 +++++++++++++++++++++++++++++++++++
>  lib/eal/include/rte_common.h |   4 +-
>  3 files changed, 193 insertions(+), 2 deletions(-)
>  create mode 100644 app/test/test_ptr_add_sub.c
> 
> diff --git a/app/test/meson.build b/app/test/meson.build
> index efec42a6bf..80a19d65ba 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -152,6 +152,7 @@ source_file_deps = {
>      'test_power_intel_uncore.c': ['power', 'power_intel_uncore'],
>      'test_power_kvm_vm.c': ['power', 'power_kvm_vm'],
>      'test_prefetch.c': [],
> +    'test_ptr_add_sub.c': [],
>      'test_ptr_compress.c': ['ptr_compress'],
>      'test_rand_perf.c': [],
>      'test_rawdev.c': ['rawdev', 'bus_vdev', 'raw_skeleton'],
> diff --git a/app/test/test_ptr_add_sub.c b/app/test/test_ptr_add_sub.c
> new file mode 100644
> index 0000000000..ef449ca9af
> --- /dev/null
> +++ b/app/test/test_ptr_add_sub.c
> @@ -0,0 +1,190 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2026 Apple Inc.
> + */
> +
> +#include "test.h"
> +#include <stdint.h>
> +#include <stdbool.h>
> +
> +#include <rte_common.h>
> +
> +/* Test all C11 standard integer types */
> +static int
> +test_ptr_add_sub_integer_types(void)
> +{
> +	unsigned long long ull = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ull, 100), 0x1064,
> +		"RTE_PTR_ADD failed for unsigned long long");

Given all the typecasting in the macros, I don't like the casting to
uintptr_t when checking the return values. Maybe use (void *) on the value
being compared-with instead.

A suggestion: can you maybe put at the top:

#define test_initval 0x1000
#define test_increment 100
#define test_retval ((void *)0x1064))

and use those everywhere rather than the hard-coded constants?

> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_SUB(ull, 100), 0x1000 - 100,
> +		"RTE_PTR_SUB failed for unsigned long long");
> +
> +	long long ll = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ll, 100), 0x1064,
> +		"RTE_PTR_ADD failed for long long");
> +
> +	unsigned long ul = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ul, 100), 0x1064,
> +		"RTE_PTR_ADD failed for unsigned long");
> +
> +	long l = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(l, 100), 0x1064,
> +		"RTE_PTR_ADD failed for long");
> +
> +	unsigned int ui = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(ui, 100), 0x1064,
> +		"RTE_PTR_ADD failed for unsigned int");
> +
> +	int i = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(i, 100), 0x1064,
> +		"RTE_PTR_ADD failed for int");
> +
> +	unsigned short us = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(us, 100), 0x1064,
> +		"RTE_PTR_ADD failed for unsigned short");
> +
> +	short s = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(s, 100), 0x1064,
> +		"RTE_PTR_ADD failed for short");
> +
> +	unsigned char uc = 100;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(uc, 50), 150,
> +		"RTE_PTR_ADD failed for unsigned char");
> +
> +	signed char sc = 100;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(sc, 50), 150,
> +		"RTE_PTR_ADD failed for signed char");
> +
> +	char c = 100;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(c, 50), 150,
> +		"RTE_PTR_ADD failed for char");
> +

Maybe define a new set of macros for small sized ints? Or else just use 100
and 50 generally, rather than 0x1064? The test should be just as good,
right?

> +	_Bool b = 1;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(b, 99), 100,
> +		"RTE_PTR_ADD failed for _Bool");
> +
> +	bool b2 = true;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(b2, 99), 100,
> +		"RTE_PTR_ADD failed for bool");
> +
> +	return 0;
> +}
> +
> +/* Test pointer types including const correctness */
> +static int
> +test_ptr_add_sub_pointer_types(void)
> +{
> +	char buffer[256];
> +	void *result;
> +
> +	/* Test void* and const void* */
> +	void *vp = buffer;
> +	result = RTE_PTR_ADD(vp, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for void*");
> +	result = RTE_PTR_SUB(vp, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for void*");
> +
> +	const void *cvp = buffer;
> +	result = RTE_PTR_ADD(cvp, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for const void*");
> +	result = RTE_PTR_SUB(cvp, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for const void*");
> +
> +	/* Test char* and const char* */
> +	char *cp = buffer;
> +	result = RTE_PTR_ADD(cp, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for char*");
> +	result = RTE_PTR_SUB(cp, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for char*");
> +
> +	const char *ccp = buffer;
> +	result = RTE_PTR_ADD(ccp, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for const char*");
> +	result = RTE_PTR_SUB(ccp, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for const char*");
> +
> +	/* Test uint32_t* and const uint32_t* */
> +	uint32_t *u32p = (uint32_t *)buffer;
> +	result = RTE_PTR_ADD(u32p, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for uint32_t*");
> +	result = RTE_PTR_SUB(u32p, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for uint32_t*");
> +
> +	const uint32_t *cu32p = (const uint32_t *)buffer;
> +	result = RTE_PTR_ADD(cu32p, 100);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer + 100),
> +		"RTE_PTR_ADD failed for const uint32_t*");
> +	result = RTE_PTR_SUB(cu32p, 50);
> +	TEST_ASSERT_EQUAL(result, (void *)(buffer - 50),
> +		"RTE_PTR_SUB failed for const uint32_t*");
> +
> +	/* Verify assigning to const pointer works (adding const is safe) */
> +	const void *result_const = RTE_PTR_ADD(cvp, 100);
> +	TEST_ASSERT_EQUAL(result_const, (const void *)(buffer + 100),
> +		"RTE_PTR_ADD failed when assigning to const void*");
> +
> +	return 0;
> +}
> +
> +/* Test that typedefs resolve to native types correctly */
> +static int
> +test_ptr_add_sub_typedefs(void)
> +{
> +	uint64_t u64 = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u64, 100), 0x1064,
> +		"RTE_PTR_ADD failed for uint64_t");
> +
> +	uint32_t u32 = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u32, 100), 0x1064,
> +		"RTE_PTR_ADD failed for uint32_t");
> +
> +	uint16_t u16 = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u16, 100), 0x1064,
> +		"RTE_PTR_ADD failed for uint16_t");
> +
> +	uint8_t u8 = 100;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(u8, 50), 150,
> +		"RTE_PTR_ADD failed for uint8_t");
> +
> +	uintptr_t uptr = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(uptr, 100), 0x1064,
> +		"RTE_PTR_ADD failed for uintptr_t");
> +
> +	size_t sz = 0x1000;
> +	TEST_ASSERT_EQUAL((uintptr_t)RTE_PTR_ADD(sz, 100), 0x1064,
> +		"RTE_PTR_ADD failed for size_t");
> +
> +	return 0;
> +}
> +
> +/* Main test function that runs all subtests */
> +static int
> +test_ptr_add_sub(void)
> +{
> +	int ret;
> +
> +	ret = test_ptr_add_sub_integer_types();
> +	if (ret != 0)
> +		return ret;
> +
> +	ret = test_ptr_add_sub_pointer_types();
> +	if (ret != 0)
> +		return ret;
> +
> +	ret = test_ptr_add_sub_typedefs();
> +	if (ret != 0)
> +		return ret;
> +
> +	return 0;
> +}

Very comprehensive set of tests, thanks.

> +
> +REGISTER_FAST_TEST(ptr_add_sub_autotest, true, true, test_ptr_add_sub);
> diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
> index 9e7d84f929..54289871d2 100644
> --- a/lib/eal/include/rte_common.h
> +++ b/lib/eal/include/rte_common.h
> @@ -551,12 +551,12 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
>  /**
>   * add a byte-value offset to a pointer
>   */
> -#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
> +#define RTE_PTR_ADD(ptr, x) ((void *)((char *)(uintptr_t)(ptr) + (x)))
>  
>  /**
>   * subtract a byte-value offset from a pointer
>   */
> -#define RTE_PTR_SUB(ptr, x) ((void *)((uintptr_t)(ptr) - (x)))
> +#define RTE_PTR_SUB(ptr, x) ((void *)((char *)(uintptr_t)(ptr) - (x)))
>
  


More information about the dev mailing list