[dpdk-dev] [PATCH v3 1/3] eal/arm64: add 128-bit atomic compare exchange

Phil Yang phil.yang at arm.com
Fri Jun 28 10:11:41 CEST 2019


Add 128-bit atomic compare exchange on aarch64.

Signed-off-by: Phil Yang <phil.yang at arm.com>
Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
---
v3:
1. Avoid duplication code with macro. (Jerin Jocob)
2. Make invalid memory order to strongest barrier. (Jerin Jocob)
3. Update doc/guides/prog_guide/env_abstraction_layer.rst. (Eads Gage)
4. Fix 32-bit x86 builds issue. (Eads Gage)
5. Correct documentation issues in UT. (Eads Gage)

 .../common/include/arch/arm/rte_atomic_64.h        | 165 +++++++++++++++++++++
 .../common/include/arch/x86/rte_atomic_64.h        |  12 --
 lib/librte_eal/common/include/generic/rte_atomic.h |  17 ++-
 3 files changed, 181 insertions(+), 13 deletions(-)

diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
index 97060e4..2080c4d 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015 Cavium, Inc
+ * Copyright(c) 2019 Arm Limited
  */
 
 #ifndef _RTE_ATOMIC_ARM64_H_
@@ -14,6 +15,9 @@ extern "C" {
 #endif
 
 #include "generic/rte_atomic.h"
+#include <rte_branch_prediction.h>
+#include <rte_compat.h>
+#include <rte_debug.h>
 
 #define dsb(opt) asm volatile("dsb " #opt : : : "memory")
 #define dmb(opt) asm volatile("dmb " #opt : : : "memory")
@@ -40,6 +44,167 @@ extern "C" {
 
 #define rte_cio_rmb() dmb(oshld)
 
+/*------------------------ 128 bit atomic operations -------------------------*/
+
+#define RTE_HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
+#define RTE_HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || \
+			 (mo) == __ATOMIC_ACQ_REL || \
+			 (mo) == __ATOMIC_SEQ_CST)
+
+#define RTE_MO_LOAD(mo)  (RTE_HAS_ACQ((mo)) \
+		? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED)
+#define RTE_MO_STORE(mo) (RTE_HAS_RLS((mo)) \
+		? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
+
+#ifdef __ARM_FEATURE_ATOMICS
+#define __ATOMIC128_CAS_OP(cas_op_name, op_string)                          \
+static inline rte_int128_t                                                  \
+cas_op_name(rte_int128_t *dst, rte_int128_t old,                            \
+		rte_int128_t updated)                                               \
+{                                                                           \
+	/* caspX instructions register pair must start from even-numbered
+	 * register at operand 1.
+	 * So, specify registers for local variables here.
+	 */                                                                     \
+	register uint64_t x0 __asm("x0") = (uint64_t)old.val[0];                \
+	register uint64_t x1 __asm("x1") = (uint64_t)old.val[1];                \
+	register uint64_t x2 __asm("x2") = (uint64_t)updated.val[0];            \
+	register uint64_t x3 __asm("x3") = (uint64_t)updated.val[1];            \
+	asm volatile(                                                           \
+			op_string " %[old0], %[old1], %[upd0], %[upd1], [%[dst]]"       \
+			: [old0] "+r" (x0),                                             \
+			  [old1] "+r" (x1)                                              \
+			: [upd0] "r" (x2),                                              \
+			  [upd1] "r" (x3),                                              \
+			  [dst] "r" (dst)                                               \
+			: "memory");                                                    \
+	old.val[0] = x0;                                                        \
+	old.val[1] = x1;                                                        \
+	return old;                                                             \
+}
+
+__ATOMIC128_CAS_OP(__rte_cas_relaxed, "casp")
+__ATOMIC128_CAS_OP(__rte_cas_acquire, "caspa")
+__ATOMIC128_CAS_OP(__rte_cas_release, "caspl")
+__ATOMIC128_CAS_OP(__rte_cas_acq_rel, "caspal")
+#else
+#define __ATOMIC128_LDX_OP(ldx_op_name, op_string)                          \
+static inline rte_int128_t                                                  \
+ldx_op_name(const rte_int128_t *src)                                        \
+{                                                                           \
+	rte_int128_t ret;                                                       \
+	asm volatile(                                                           \
+			op_string " %0, %1, %2"                                         \
+			: "=&r" (ret.val[0]),                                           \
+			  "=&r" (ret.val[1])                                            \
+			: "Q" (src->val[0])                                             \
+			: "memory");                                                    \
+	return ret;                                                             \
+}
+
+__ATOMIC128_LDX_OP(__rte_ldx_relaxed, "ldxp")
+__ATOMIC128_LDX_OP(__rte_ldx_acquire, "ldaxp")
+
+#define __ATOMIC128_STX_OP(stx_op_name, op_string)                          \
+static inline uint32_t                                                      \
+stx_op_name(rte_int128_t *dst, const rte_int128_t src)                      \
+{                                                                           \
+	uint32_t ret;                                                           \
+	asm volatile(                                                           \
+			op_string " %w0, %1, %2, %3"                                    \
+			: "=&r" (ret)                                                   \
+			: "r" (src.val[0]),                                             \
+			  "r" (src.val[1]),                                             \
+			  "Q" (dst->val[0])                                             \
+			: "memory");                                                    \
+	/* Return 0 on success, 1 on failure */                                 \
+	return ret;                                                             \
+}
+
+__ATOMIC128_STX_OP(__rte_stx_relaxed, "stxp")
+__ATOMIC128_STX_OP(__rte_stx_release, "stlxp")
+#endif
+
+static inline int __rte_experimental
+rte_atomic128_cmp_exchange(rte_int128_t *dst,
+				rte_int128_t *exp,
+				const rte_int128_t *src,
+				unsigned int weak,
+				int success,
+				int failure)
+{
+	/* Always do strong CAS */
+	RTE_SET_USED(weak);
+	/* Ignore memory ordering for failure, memory order for
+	 * success must be stronger or equal
+	 */
+	RTE_SET_USED(failure);
+	/* Find invalid memory order */
+	RTE_ASSERT(success == __ATOMIC_RELAXED
+			|| success == __ATOMIC_ACQUIRE
+			|| success == __ATOMIC_RELEASE
+			|| success == __ATOMIC_ACQ_REL
+			|| success == __ATOMIC_SEQ_CST);
+
+#ifdef __ARM_FEATURE_ATOMICS
+	rte_int128_t expected = *exp;
+	rte_int128_t desired = *src;
+	rte_int128_t old;
+
+	if (success == __ATOMIC_RELAXED)
+		old = __rte_cas_relaxed(dst, expected, desired);
+	else if (success == __ATOMIC_ACQUIRE)
+		old = __rte_cas_acquire(dst, expected, desired);
+	else if (success == __ATOMIC_RELEASE)
+		old = __rte_cas_release(dst, expected, desired);
+	else
+		old = __rte_cas_acq_rel(dst, expected, desired);
+#else
+	int ldx_mo = RTE_MO_LOAD(success);
+	int stx_mo = RTE_MO_STORE(success);
+	uint32_t ret = 1;
+	register rte_int128_t expected = *exp;
+	register rte_int128_t desired = *src;
+	register rte_int128_t old;
+
+	/* ldx128 can not guarantee atomic,
+	 * Must write back src or old to verify atomicity of ldx128;
+	 */
+	do {
+		if (ldx_mo == __ATOMIC_RELAXED)
+			old = __rte_ldx_relaxed(dst);
+		else
+			old = __rte_ldx_acquire(dst);
+
+		if (likely(old.int128 == expected.int128)) {
+			if (stx_mo == __ATOMIC_RELAXED)
+				ret = __rte_stx_relaxed(dst, desired);
+			else
+				ret = __rte_stx_release(dst, desired);
+		} else {
+			/* In the failure case (since 'weak' is ignored and only
+			 * weak == 0 is implemented), expected should contain the
+			 * atomically read value of dst. This means, 'old' needs
+			 * to be stored back to ensure it was read atomically.
+			 */
+			if (stx_mo == __ATOMIC_RELAXED)
+				ret = __rte_stx_relaxed(dst, old);
+			else
+				ret = __rte_stx_release(dst, old);
+		}
+	} while (unlikely(ret));
+#endif
+
+	/* Unconditionally updating expected removes
+	 * an 'if' statement.
+	 * expected should already be in register if
+	 * not in the cache.
+	 */
+	*exp = old;
+
+	return (old.int128 == expected.int128);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 6232c57..23cf48f 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -212,18 +212,6 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
 
 /*------------------------ 128 bit atomic operations -------------------------*/
 
-/**
- * 128-bit integer structure.
- */
-RTE_STD_C11
-typedef struct {
-	RTE_STD_C11
-	union {
-		uint64_t val[2];
-		__extension__ __int128 int128;
-	};
-} __rte_aligned(16) rte_int128_t;
-
 static inline int __rte_experimental
 rte_atomic128_cmp_exchange(rte_int128_t *dst,
 			   rte_int128_t *exp,
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index 9958543..2355e50 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -1081,6 +1081,20 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
 
 /*------------------------ 128 bit atomic operations -------------------------*/
 
+#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64)
+/**
+ * 128-bit integer structure.
+ */
+RTE_STD_C11
+typedef struct {
+	RTE_STD_C11
+	union {
+		uint64_t val[2];
+		__extension__ __int128 int128;
+	};
+} __rte_aligned(16) rte_int128_t;
+#endif
+
 #ifdef __DOXYGEN__
 
 /**
@@ -1093,7 +1107,8 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
  *     *exp = *dst
  * @endcode
  *
- * @note This function is currently only available for the x86-64 platform.
+ * @note This function is currently available for the x86-64 and aarch64
+ * platforms.
  *
  * @note The success and failure arguments must be one of the __ATOMIC_* values
  * defined in the C++11 standard. For details on their behavior, refer to the
-- 
2.7.4



More information about the dev mailing list