[dpdk-dev] [PATCH v4 3/3] eal/stack: enable lock-free stack for aarch64

Phil Yang phil.yang at arm.com
Mon Jul 22 10:44:25 CEST 2019


Enable both c11 atomic and non c11 atomic lock-free stack for aarch64.

Suggested-by: Gage Eads <gage.eads at intel.com>
Suggested-by: Jerin Jacob <jerinj at marvell.com>
Signed-off-by: Phil Yang <phil.yang at arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
Tested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>

---
 doc/guides/prog_guide/env_abstraction_layer.rst |  4 +-
 doc/guides/rel_notes/release_19_08.rst          |  3 ++
 lib/librte_stack/rte_stack_lf.h                 |  4 ++
 lib/librte_stack/rte_stack_lf_c11.h             | 16 -------
 lib/librte_stack/rte_stack_lf_generic.h         | 16 -------
 lib/librte_stack/rte_stack_lf_stubs.h           | 59 +++++++++++++++++++++++++
 6 files changed, 68 insertions(+), 34 deletions(-)
 create mode 100644 lib/librte_stack/rte_stack_lf_stubs.h

diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
index f15bcd9..d569f95 100644
--- a/doc/guides/prog_guide/env_abstraction_layer.rst
+++ b/doc/guides/prog_guide/env_abstraction_layer.rst
@@ -592,8 +592,8 @@ Known Issues
   Alternatively, applications can use the lock-free stack mempool handler. When
   considering this handler, note that:
 
-  - It is currently limited to the x86_64 platform, because it uses an
-    instruction (16-byte compare-and-swap) that is not yet available on other
+  - It is currently limited to the aarch64 and x86_64 platforms, because it uses
+    an instruction (16-byte compare-and-swap) that is not yet available on other
     platforms.
   - It has worse average-case performance than the non-preemptive rte_ring, but
     software caching (e.g. the mempool cache) can mitigate this by reducing the
diff --git a/doc/guides/rel_notes/release_19_08.rst b/doc/guides/rel_notes/release_19_08.rst
index 0a3f840..25d45c1 100644
--- a/doc/guides/rel_notes/release_19_08.rst
+++ b/doc/guides/rel_notes/release_19_08.rst
@@ -212,6 +212,9 @@ New Features
 
   Added multiple cores feature to compression perf tool application.
 
+* **Added Lock-free Stack for aarch64.**
+
+  The lock-free stack implementation is enabled for aarch64 platforms.
 
 Removed Items
 -------------
diff --git a/lib/librte_stack/rte_stack_lf.h b/lib/librte_stack/rte_stack_lf.h
index f5581f0..e67630c 100644
--- a/lib/librte_stack/rte_stack_lf.h
+++ b/lib/librte_stack/rte_stack_lf.h
@@ -5,11 +5,15 @@
 #ifndef _RTE_STACK_LF_H_
 #define _RTE_STACK_LF_H_
 
+#if !(defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_ARM64))
+#include "rte_stack_lf_stubs.h"
+#else
 #ifdef RTE_USE_C11_MEM_MODEL
 #include "rte_stack_lf_c11.h"
 #else
 #include "rte_stack_lf_generic.h"
 #endif
+#endif
 
 /**
  * @internal Push several objects on the lock-free stack (MT-safe).
diff --git a/lib/librte_stack/rte_stack_lf_c11.h b/lib/librte_stack/rte_stack_lf_c11.h
index 3d677ae..999359f 100644
--- a/lib/librte_stack/rte_stack_lf_c11.h
+++ b/lib/librte_stack/rte_stack_lf_c11.h
@@ -36,12 +36,6 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
 			  struct rte_stack_lf_elem *last,
 			  unsigned int num)
 {
-#ifndef RTE_ARCH_X86_64
-	RTE_SET_USED(first);
-	RTE_SET_USED(last);
-	RTE_SET_USED(list);
-	RTE_SET_USED(num);
-#else
 	struct rte_stack_lf_head old_head;
 	int success;
 
@@ -79,7 +73,6 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
 	 * to the LIFO len update.
 	 */
 	__atomic_add_fetch(&list->len, num, __ATOMIC_RELEASE);
-#endif
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -88,14 +81,6 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
 			 void **obj_table,
 			 struct rte_stack_lf_elem **last)
 {
-#ifndef RTE_ARCH_X86_64
-	RTE_SET_USED(obj_table);
-	RTE_SET_USED(last);
-	RTE_SET_USED(list);
-	RTE_SET_USED(num);
-
-	return NULL;
-#else
 	struct rte_stack_lf_head old_head;
 	uint64_t len;
 	int success;
@@ -169,7 +154,6 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
 	} while (success == 0);
 
 	return old_head.top;
-#endif
 }
 
 #endif /* _RTE_STACK_LF_C11_H_ */
diff --git a/lib/librte_stack/rte_stack_lf_generic.h b/lib/librte_stack/rte_stack_lf_generic.h
index 3182151..3abbb53 100644
--- a/lib/librte_stack/rte_stack_lf_generic.h
+++ b/lib/librte_stack/rte_stack_lf_generic.h
@@ -36,12 +36,6 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
 			  struct rte_stack_lf_elem *last,
 			  unsigned int num)
 {
-#ifndef RTE_ARCH_X86_64
-	RTE_SET_USED(first);
-	RTE_SET_USED(last);
-	RTE_SET_USED(list);
-	RTE_SET_USED(num);
-#else
 	struct rte_stack_lf_head old_head;
 	int success;
 
@@ -75,7 +69,6 @@ __rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
 	} while (success == 0);
 
 	rte_atomic64_add((rte_atomic64_t *)&list->len, num);
-#endif
 }
 
 static __rte_always_inline struct rte_stack_lf_elem *
@@ -84,14 +77,6 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
 			 void **obj_table,
 			 struct rte_stack_lf_elem **last)
 {
-#ifndef RTE_ARCH_X86_64
-	RTE_SET_USED(obj_table);
-	RTE_SET_USED(last);
-	RTE_SET_USED(list);
-	RTE_SET_USED(num);
-
-	return NULL;
-#else
 	struct rte_stack_lf_head old_head;
 	int success;
 
@@ -159,7 +144,6 @@ __rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
 	} while (success == 0);
 
 	return old_head.top;
-#endif
 }
 
 #endif /* _RTE_STACK_LF_GENERIC_H_ */
diff --git a/lib/librte_stack/rte_stack_lf_stubs.h b/lib/librte_stack/rte_stack_lf_stubs.h
new file mode 100644
index 0000000..d924bc6
--- /dev/null
+++ b/lib/librte_stack/rte_stack_lf_stubs.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Arm Limited
+ */
+
+#ifndef _RTE_STACK_LF_STUBS_H_
+#define _RTE_STACK_LF_STUBS_H_
+
+#include <rte_common.h>
+#include <rte_atomic.h>
+
+static __rte_always_inline unsigned int
+__rte_stack_lf_count(struct rte_stack *s)
+{
+	/* stack_lf_push() and stack_lf_pop() do not update the list's contents
+	 * and stack_lf->len atomically, which can cause the list to appear
+	 * shorter than it actually is if this function is called while other
+	 * threads are modifying the list.
+	 *
+	 * However, given the inherently approximate nature of the get_count
+	 * callback -- even if the list and its size were updated atomically,
+	 * the size could change between when get_count executes and when the
+	 * value is returned to the caller -- this is acceptable.
+	 *
+	 * The stack_lf->len updates are placed such that the list may appear to
+	 * have fewer elements than it does, but will never appear to have more
+	 * elements. If the mempool is near-empty to the point that this is a
+	 * concern, the user should consider increasing the mempool size.
+	 */
+	return (unsigned int)rte_atomic64_read((rte_atomic64_t *)
+			&s->stack_lf.used.len);
+}
+
+static __rte_always_inline void
+__rte_stack_lf_push_elems(struct rte_stack_lf_list *list,
+			  struct rte_stack_lf_elem *first,
+			  struct rte_stack_lf_elem *last,
+			  unsigned int num)
+{
+	RTE_SET_USED(first);
+	RTE_SET_USED(last);
+	RTE_SET_USED(list);
+	RTE_SET_USED(num);
+}
+
+static __rte_always_inline struct rte_stack_lf_elem *
+__rte_stack_lf_pop_elems(struct rte_stack_lf_list *list,
+			 unsigned int num,
+			 void **obj_table,
+			 struct rte_stack_lf_elem **last)
+{
+	RTE_SET_USED(obj_table);
+	RTE_SET_USED(last);
+	RTE_SET_USED(list);
+	RTE_SET_USED(num);
+
+	return NULL;
+}
+
+#endif /* _RTE_STACK_LF_STUBS_H_ */
-- 
2.7.4



More information about the dev mailing list