patch 'test/atomic: scale test based on core count' has been queued to stable release 23.11.7

Shani Peretz shperetz at nvidia.com
Wed Apr 15 11:59:47 CEST 2026
Previous message (by thread): patch 'test: add pause to synchronization spinloops' has been queued to stable release 23.11.7
Next message (by thread): patch 'test/mcslock: scale test based on core count' has been queued to stable release 23.11.7
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hi,

FYI, your patch has been queued to stable release 23.11.7

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 04/19/26. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/shanipr/dpdk-stable

This queued commit can be viewed at:
https://github.com/shanipr/dpdk-stable/commit/6aba812e79fa56c3fa0e703286c60c984d055f46

Thanks.

Shani

---
>From 6aba812e79fa56c3fa0e703286c60c984d055f46 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen at networkplumber.org>
Date: Thu, 5 Mar 2026 09:50:56 -0800
Subject: [PATCH] test/atomic: scale test based on core count

[ upstream commit c9eb695f162a0dce737337c500dd350012a44732 ]

The atomic test uses tight spinloops to synchronize worker threads
and performs a fixed 1,000,000 iterations per worker. This causes
two problems on high core count systems:

With many cores (e.g., 32), the massive contention on shared
atomic variables causes the test to exceed the 10 second timeout.

Scale iterations inversely with core count to maintain roughly
constant test duration regardless of system size

With 32 cores, iterations drop from 1,000,000 to 31,250 per worker,
which keeps the test well within the timeout while still providing
meaningful coverage.

Add helper function to test.h so that other similar problems
can be addressed in followon patches.

Bugzilla ID: 952
Fixes: af75078fece3 ("first public release")

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 app/test/test.h        | 19 ++++++++++++++++
 app/test/test_atomic.c | 51 +++++++++++++++++++++++++-----------------
 2 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/app/test/test.h b/app/test/test.h
index fd8cc10b53..7599ba758d 100644
--- a/app/test/test.h
+++ b/app/test/test.h
@@ -12,6 +12,7 @@
 
 #include <rte_hexdump.h>
 #include <rte_common.h>
+#include <rte_lcore.h>
 #include <rte_os_shim.h>
 
 #define TEST_SUCCESS EXIT_SUCCESS
@@ -211,4 +212,22 @@ void add_test_command(struct test_command *t);
 #define REGISTER_PERF_TEST REGISTER_TEST_COMMAND
 #define REGISTER_DRIVER_TEST REGISTER_TEST_COMMAND
 
+/**
+ * Scale test iterations inversely with core count.
+ *
+ * On high core count systems, tests with per-core work can exceed
+ * timeout limits due to increased lock contention and scheduling
+ * overhead. This helper scales iterations to keep total test time
+ * roughly constant regardless of core count.
+ *
+ * @param base  Base iteration count (used on single-core systems)
+ * @param min   Minimum iterations (floor to ensure meaningful testing)
+ * @return      Scaled iteration count
+ */
+static inline unsigned int
+test_scale_iterations(unsigned int base, unsigned int min)
+{
+	return RTE_MAX(base / rte_lcore_count(), min);
+}
+
 #endif
diff --git a/app/test/test_atomic.c b/app/test/test_atomic.c
index 40d9fae76b..72b13dcccd 100644
--- a/app/test/test_atomic.c
+++ b/app/test/test_atomic.c
@@ -10,6 +10,7 @@
 #include <sys/queue.h>
 
 #include <rte_memory.h>
+#include <rte_common.h>
 #include <rte_per_lcore.h>
 #include <rte_launch.h>
 #include <rte_atomic.h>
@@ -100,7 +101,15 @@
 
 #define NUM_ATOMIC_TYPES 3
 
-#define N 1000000
+#define N_BASE 1000000u
+#define N_MIN  10000u
+
+/*
+ * Number of iterations for each test, scaled inversely with core count.
+ * More cores means more contention which increases time per operation.
+ * Calculated once at test start to avoid repeated computation in workers.
+ */
+static unsigned int num_iterations;
 
 static rte_atomic16_t a16;
 static rte_atomic32_t a32;
@@ -111,36 +120,36 @@ static rte_atomic32_t synchro;
 static int
 test_atomic_usual(__rte_unused void *arg)
 {
-	unsigned i;
+	unsigned int i;
 
 	while (rte_atomic32_read(&synchro) == 0)
 		rte_pause();
 
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic16_inc(&a16);
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic16_dec(&a16);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic16_add(&a16, 5);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic16_sub(&a16, 5);
 
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic32_inc(&a32);
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic32_dec(&a32);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic32_add(&a32, 5);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic32_sub(&a32, 5);
 
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic64_inc(&a64);
-	for (i = 0; i < N; i++)
+	for (i = 0; i < num_iterations; i++)
 		rte_atomic64_dec(&a64);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic64_add(&a64, 5);
-	for (i = 0; i < (N / 5); i++)
+	for (i = 0; i < (num_iterations / 5); i++)
 		rte_atomic64_sub(&a64, 5);
 
 	return 0;
@@ -168,12 +177,12 @@ test_atomic_addsub_and_return(__rte_unused void *arg)
 	uint32_t tmp16;
 	uint32_t tmp32;
 	uint64_t tmp64;
-	unsigned i;
+	unsigned int i;
 
 	while (rte_atomic32_read(&synchro) == 0)
 		rte_pause();
 
-	for (i = 0; i < N; i++) {
+	for (i = 0; i < num_iterations; i++) {
 		tmp16 = rte_atomic16_add_return(&a16, 1);
 		rte_atomic64_add(&count, tmp16);
 
@@ -273,7 +282,7 @@ test_atomic128_cmp_exchange(__rte_unused void *arg)
 
 	expected = count128;
 
-	for (i = 0; i < N; i++) {
+	for (i = 0; i < num_iterations; i++) {
 		do {
 			rte_int128_t desired;
 
@@ -400,7 +409,7 @@ get_crc8(uint8_t *message, int length)
 static int
 test_atomic_exchange(__rte_unused void *arg)
 {
-	int i;
+	unsigned int i;
 	test16_t nt16, ot16; /* new token, old token */
 	test32_t nt32, ot32;
 	test64_t nt64, ot64;
@@ -416,7 +425,7 @@ test_atomic_exchange(__rte_unused void *arg)
 	 * appropriate crc32 hash for the data) then the test iteration has
 	 * passed.  If the token is invalid, increment the counter.
 	 */
-	for (i = 0; i < N; i++) {
+	for (i = 0; i < num_iterations; i++) {
 
 		/* Test 64bit Atomic Exchange */
 		nt64.u64 = rte_rand();
@@ -445,6 +454,8 @@ test_atomic_exchange(__rte_unused void *arg)
 static int
 test_atomic(void)
 {
+	num_iterations = test_scale_iterations(N_BASE, N_MIN);
+
 	rte_atomic16_init(&a16);
 	rte_atomic32_init(&a32);
 	rte_atomic64_init(&a64);
@@ -592,7 +603,7 @@ test_atomic(void)
 	rte_atomic32_clear(&synchro);
 
 	iterations = count128.val[0] - count128.val[1];
-	if (iterations != (uint64_t)4*N*(rte_lcore_count()-1)) {
+	if (iterations != (uint64_t)4*num_iterations*(rte_lcore_count()-1)) {
 		printf("128-bit compare and swap failed\n");
 		return -1;
 	}
-- 
2.43.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2026-04-14 14:44:33.256987605 +0300
+++ 0046-test-atomic-scale-test-based-on-core-count.patch	2026-04-14 14:44:28.631492000 +0300
@@ -1 +1 @@
-From c9eb695f162a0dce737337c500dd350012a44732 Mon Sep 17 00:00:00 2001
+From 6aba812e79fa56c3fa0e703286c60c984d055f46 Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit c9eb695f162a0dce737337c500dd350012a44732 ]
+
@@ -25 +26,0 @@
-Cc: stable at dpdk.org
@@ -34 +35 @@
-index 10dc45f19d..1f12fc5397 100644
+index fd8cc10b53..7599ba758d 100644
@@ -45,3 +46,3 @@
-@@ -223,4 +224,22 @@ void add_test_command(struct test_command *t);
-  */
- #define REGISTER_ATTIC_TEST REGISTER_TEST_COMMAND
+@@ -211,4 +212,22 @@ void add_test_command(struct test_command *t);
+ #define REGISTER_PERF_TEST REGISTER_TEST_COMMAND
+ #define REGISTER_DRIVER_TEST REGISTER_TEST_COMMAND
@@ -69 +70 @@
-index b1a0d40ece..2a4531b833 100644
+index 40d9fae76b..72b13dcccd 100644
@@ -80 +81 @@
-@@ -101,7 +102,15 @@
+@@ -100,7 +101,15 @@
@@ -97 +98 @@
-@@ -112,36 +121,36 @@ static rte_atomic32_t synchro;
+@@ -111,36 +120,36 @@ static rte_atomic32_t synchro;
@@ -147 +148 @@
-@@ -169,12 +178,12 @@ test_atomic_addsub_and_return(__rte_unused void *arg)
+@@ -168,12 +177,12 @@ test_atomic_addsub_and_return(__rte_unused void *arg)
@@ -162 +163 @@
-@@ -274,7 +283,7 @@ test_atomic128_cmp_exchange(__rte_unused void *arg)
+@@ -273,7 +282,7 @@ test_atomic128_cmp_exchange(__rte_unused void *arg)
@@ -171 +172 @@
-@@ -401,7 +410,7 @@ get_crc8(uint8_t *message, int length)
+@@ -400,7 +409,7 @@ get_crc8(uint8_t *message, int length)
@@ -180 +181 @@
-@@ -417,7 +426,7 @@ test_atomic_exchange(__rte_unused void *arg)
+@@ -416,7 +425,7 @@ test_atomic_exchange(__rte_unused void *arg)
@@ -189 +190 @@
-@@ -446,6 +455,8 @@ test_atomic_exchange(__rte_unused void *arg)
+@@ -445,6 +454,8 @@ test_atomic_exchange(__rte_unused void *arg)
@@ -198 +199 @@
-@@ -593,7 +604,7 @@ test_atomic(void)
+@@ -592,7 +603,7 @@ test_atomic(void)
Previous message (by thread): patch 'test: add pause to synchronization spinloops' has been queued to stable release 23.11.7
Next message (by thread): patch 'test/mcslock: scale test based on core count' has been queued to stable release 23.11.7
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the stable mailing list