[RFC 2/7] eal: reimplement rte_smp_*mb with rte_atomic_thread_fence
Stephen Hemminger
stephen at networkplumber.org
Thu May 21 06:17:02 CEST 2026
The rte_smp_mb(), rte_smp_wmb() and rte_smp_rmb() functions were
flagged as deprecated by commit 3ec965b6de12 ("doc: update atomic
operation deprecation") in 2021 but nothing came of it.
Reimplement them as inline wrappers over rte_atomic_thread_fence()
and drop the deprecation notice.
The API is preserved; only the implementation changes.
Generated code is unchanged on x86 (seq_cst keeps the lock-addl
trick, release/acquire collapse to a compiler barrier under TSO).
On arm64, release/acquire emit dmb ish instead of dmb ishst/ishld;
the difference is below measurement noise.
Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
doc/guides/rel_notes/deprecation.rst | 8 --
lib/eal/arm/include/rte_atomic_32.h | 6 --
lib/eal/arm/include/rte_atomic_64.h | 6 --
lib/eal/include/generic/rte_atomic.h | 106 +++++++++++--------------
lib/eal/loongarch/include/rte_atomic.h | 6 --
lib/eal/ppc/include/rte_atomic.h | 6 --
lib/eal/riscv/include/rte_atomic.h | 6 --
lib/eal/x86/include/rte_atomic.h | 33 +++-----
8 files changed, 57 insertions(+), 120 deletions(-)
diff --git a/doc/guides/rel_notes/deprecation.rst b/doc/guides/rel_notes/deprecation.rst
index 346c517623..03b763b472 100644
--- a/doc/guides/rel_notes/deprecation.rst
+++ b/doc/guides/rel_notes/deprecation.rst
@@ -47,14 +47,6 @@ Deprecation Notices
operations must be used for patches that need to be merged in 20.08 onwards.
This change will not introduce any performance degradation.
-* rte_smp_*mb: These APIs provide full barrier functionality. However, many
- use cases do not require full barriers. To support such use cases, DPDK has
- adopted atomic operations from
- https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html. These
- operations and a new wrapper ``rte_atomic_thread_fence`` instead of
- ``__atomic_thread_fence`` must be used for patches that need to be merged in
- 20.08 onwards. This change will not introduce any performance degradation.
-
* lib: will fix extending some enum/define breaking the ABI. There are multiple
samples in DPDK that enum/define terminated with a ``.*MAX.*`` value which is
used by iterators, and arrays holding these values are sized with this
diff --git a/lib/eal/arm/include/rte_atomic_32.h b/lib/eal/arm/include/rte_atomic_32.h
index 0b9a0dfa30..3809ddefb7 100644
--- a/lib/eal/arm/include/rte_atomic_32.h
+++ b/lib/eal/arm/include/rte_atomic_32.h
@@ -21,12 +21,6 @@ extern "C" {
#define rte_rmb() __sync_synchronize()
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_wmb()
-
-#define rte_smp_rmb() rte_rmb()
-
#define rte_io_mb() rte_mb()
#define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/arm/include/rte_atomic_64.h b/lib/eal/arm/include/rte_atomic_64.h
index 181bb60929..c9b41f6212 100644
--- a/lib/eal/arm/include/rte_atomic_64.h
+++ b/lib/eal/arm/include/rte_atomic_64.h
@@ -24,12 +24,6 @@ extern "C" {
#define rte_rmb() asm volatile("dmb oshld" : : : "memory")
-#define rte_smp_mb() asm volatile("dmb ish" : : : "memory")
-
-#define rte_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-
-#define rte_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-
#define rte_io_mb() rte_mb()
#define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
index 0a4f3f8528..4e9d230f85 100644
--- a/lib/eal/include/generic/rte_atomic.h
+++ b/lib/eal/include/generic/rte_atomic.h
@@ -49,69 +49,8 @@ static inline void rte_wmb(void);
* occur before the LOAD operations generated after.
*/
static inline void rte_rmb(void);
-///@}
-
-/** @name SMP Memory Barrier
- */
-///@{
-/**
- * General memory barrier between lcores
- *
- * Guarantees that the LOAD and STORE operations that precede the
- * rte_smp_mb() call are globally visible across the lcores
- * before the LOAD and STORE operations that follows it.
- *
- * @note
- * This function is deprecated.
- * It provides similar synchronization primitive as atomic fence,
- * but has different syntax and memory ordering semantic. Hence
- * deprecated for the simplicity of memory ordering semantics in use.
- *
- * rte_atomic_thread_fence(rte_memory_order_acq_rel) should be used instead.
- */
-static inline void rte_smp_mb(void);
-/**
- * Write memory barrier between lcores
- *
- * Guarantees that the STORE operations that precede the
- * rte_smp_wmb() call are globally visible across the lcores
- * before the STORE operations that follows it.
- *
- * @note
- * This function is deprecated.
- * It provides similar synchronization primitive as atomic fence,
- * but has different syntax and memory ordering semantic. Hence
- * deprecated for the simplicity of memory ordering semantics in use.
- *
- * rte_atomic_thread_fence(rte_memory_order_release) should be used instead.
- * The fence also guarantees LOAD operations that precede the call
- * are globally visible across the lcores before the STORE operations
- * that follows it.
- */
-static inline void rte_smp_wmb(void);
-
-/**
- * Read memory barrier between lcores
- *
- * Guarantees that the LOAD operations that precede the
- * rte_smp_rmb() call are globally visible across the lcores
- * before the LOAD operations that follows it.
- *
- * @note
- * This function is deprecated.
- * It provides similar synchronization primitive as atomic fence,
- * but has different syntax and memory ordering semantic. Hence
- * deprecated for the simplicity of memory ordering semantics in use.
- *
- * rte_atomic_thread_fence(rte_memory_order_acquire) should be used instead.
- * The fence also guarantees LOAD operations that precede the call
- * are globally visible across the lcores before the STORE operations
- * that follows it.
- */
-static inline void rte_smp_rmb(void);
///@}
-
/** @name I/O Memory Barrier
*/
///@{
@@ -164,6 +103,51 @@ static inline void rte_io_rmb(void);
*/
static inline void rte_atomic_thread_fence(rte_memory_order memorder);
+
+/** @name SMP Memory Barrier
+ */
+///@{
+/**
+ * General memory barrier between lcores
+ *
+ * Guarantees that the LOAD and STORE operations that precede the
+ * rte_smp_mb() call are globally visible across the lcores
+ * before the LOAD and STORE operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_mb(void)
+{
+ rte_atomic_thread_fence(rte_memory_order_seq_cst);
+}
+
+/**
+ * Write memory barrier between lcores
+ *
+ * Guarantees that the STORE operations that precede the
+ * rte_smp_wmb() call are globally visible across the lcores
+ * before the STORE operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_wmb(void)
+{
+ rte_atomic_thread_fence(rte_memory_order_release);
+}
+
+/**
+ * Read memory barrier between lcores
+ *
+ * Guarantees that the LOAD operations that precede the
+ * rte_smp_rmb() call are globally visible across the lcores
+ * before the LOAD operations that follows it.
+ */
+static __rte_always_inline void
+rte_smp_rmb(void)
+{
+ rte_atomic_thread_fence(rte_memory_order_acquire);
+}
+
+///@}
+
/*------------------------- 16 bit atomic operations -------------------------*/
#ifndef RTE_TOOLCHAIN_MSVC
diff --git a/lib/eal/loongarch/include/rte_atomic.h b/lib/eal/loongarch/include/rte_atomic.h
index c8066a4612..49e0c67020 100644
--- a/lib/eal/loongarch/include/rte_atomic.h
+++ b/lib/eal/loongarch/include/rte_atomic.h
@@ -22,12 +22,6 @@ extern "C" {
#define rte_rmb() rte_mb()
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_mb()
-
-#define rte_smp_rmb() rte_mb()
-
#define rte_io_mb() rte_mb()
#define rte_io_wmb() rte_mb()
diff --git a/lib/eal/ppc/include/rte_atomic.h b/lib/eal/ppc/include/rte_atomic.h
index 10acc238f9..1da5afccbf 100644
--- a/lib/eal/ppc/include/rte_atomic.h
+++ b/lib/eal/ppc/include/rte_atomic.h
@@ -24,12 +24,6 @@ extern "C" {
#define rte_rmb() asm volatile("sync" : : : "memory")
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_wmb()
-
-#define rte_smp_rmb() rte_rmb()
-
#define rte_io_mb() rte_mb()
#define rte_io_wmb() rte_wmb()
diff --git a/lib/eal/riscv/include/rte_atomic.h b/lib/eal/riscv/include/rte_atomic.h
index 66346ad474..dd10ad5127 100644
--- a/lib/eal/riscv/include/rte_atomic.h
+++ b/lib/eal/riscv/include/rte_atomic.h
@@ -27,12 +27,6 @@ extern "C" {
#define rte_rmb() asm volatile("fence r, r" : : : "memory")
-#define rte_smp_mb() rte_mb()
-
-#define rte_smp_wmb() rte_wmb()
-
-#define rte_smp_rmb() rte_rmb()
-
#define rte_io_mb() asm volatile("fence iorw, iorw" : : : "memory")
#define rte_io_wmb() asm volatile("fence orw, ow" : : : "memory")
diff --git a/lib/eal/x86/include/rte_atomic.h b/lib/eal/x86/include/rte_atomic.h
index e071e4234e..a850b0257c 100644
--- a/lib/eal/x86/include/rte_atomic.h
+++ b/lib/eal/x86/include/rte_atomic.h
@@ -23,10 +23,6 @@
#define rte_rmb() _mm_lfence()
-#define rte_smp_wmb() rte_compiler_barrier()
-
-#define rte_smp_rmb() rte_compiler_barrier()
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -63,20 +59,6 @@ extern "C" {
* So below we use that technique for rte_smp_mb() implementation.
*/
-static __rte_always_inline void
-rte_smp_mb(void)
-{
-#ifdef RTE_TOOLCHAIN_MSVC
- _mm_mfence();
-#else
-#ifdef RTE_ARCH_I686
- asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
-#else
- asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
-#endif
-#endif
-}
-
#define rte_io_mb() rte_mb()
#define rte_io_wmb() rte_compiler_barrier()
@@ -93,10 +75,19 @@ rte_smp_mb(void)
static __rte_always_inline void
rte_atomic_thread_fence(rte_memory_order memorder)
{
- if (memorder == rte_memory_order_seq_cst)
- rte_smp_mb();
- else
+ if (memorder == rte_memory_order_seq_cst) {
+#ifdef RTE_TOOLCHAIN_MSVC
+ _mm_mfence();
+#else
+#ifdef RTE_ARCH_I686
+ asm volatile("lock addl $0, -128(%%esp); " ::: "memory");
+#else
+ asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
+#endif
+#endif
+ } else {
__rte_atomic_thread_fence(memorder);
+ }
}
#ifdef __cplusplus
--
2.53.0
More information about the dev
mailing list