[dpdk-dev] [RFC v2 1/2] rcu: add RCU library supporting QSBR mechanism

Honnappa Nagarahalli honnappa.nagarahalli at arm.com
Sat Dec 22 03:14:19 CET 2018


Add RCU library supporting quiescent state based memory reclamation method.
This library helps identify the quiescent state of the reader threads so
that the writers can free the memory associated with the lock less data
structures.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
Reviewed-by: Steve Capper <steve.capper at arm.com>
Reviewed-by: Gavin Hu <gavin.hu at arm.com>
---
 config/common_base                 |   6 +
 lib/Makefile                       |   2 +
 lib/librte_rcu/Makefile            |  23 +++
 lib/librte_rcu/meson.build         |   5 +
 lib/librte_rcu/rte_rcu_qsbr.c      |  63 ++++++
 lib/librte_rcu/rte_rcu_qsbr.h      | 321 +++++++++++++++++++++++++++++
 lib/librte_rcu/rte_rcu_version.map |   8 +
 lib/meson.build                    |   2 +-
 mk/rte.app.mk                      |   1 +
 9 files changed, 430 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_rcu/Makefile
 create mode 100644 lib/librte_rcu/meson.build
 create mode 100644 lib/librte_rcu/rte_rcu_qsbr.c
 create mode 100644 lib/librte_rcu/rte_rcu_qsbr.h
 create mode 100644 lib/librte_rcu/rte_rcu_version.map

diff --git a/config/common_base b/config/common_base
index d12ae98bc..e148549d8 100644
--- a/config/common_base
+++ b/config/common_base
@@ -792,6 +792,12 @@ CONFIG_RTE_LIBRTE_LATENCY_STATS=y
 #
 CONFIG_RTE_LIBRTE_TELEMETRY=n
 
+#
+# Compile librte_rcu
+#
+CONFIG_RTE_LIBRTE_RCU=y
+CONFIG_RTE_LIBRTE_RCU_DEBUG=n
+
 #
 # Compile librte_lpm
 #
diff --git a/lib/Makefile b/lib/Makefile
index b7370ef97..b674662c8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -108,6 +108,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
 DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += librte_telemetry
 DEPDIRS-librte_telemetry := librte_eal librte_metrics librte_ethdev
+DIRS-$(CONFIG_RTE_LIBRTE_RCU) += librte_rcu
+DEPDIRS-librte_rcu := librte_eal
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_rcu/Makefile b/lib/librte_rcu/Makefile
new file mode 100644
index 000000000..6aa677bd1
--- /dev/null
+++ b/lib/librte_rcu/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Arm Limited
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_rcu.a
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+LDLIBS += -lrte_eal
+
+EXPORT_MAP := rte_rcu_version.map
+
+LIBABIVER := 1
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_RCU) := rte_rcu_qsbr.c
+
+# install includes
+SYMLINK-$(CONFIG_RTE_LIBRTE_RCU)-include := rte_rcu_qsbr.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build
new file mode 100644
index 000000000..c009ae4b7
--- /dev/null
+++ b/lib/librte_rcu/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Arm Limited
+
+sources = files('rte_rcu_qsbr.c')
+headers = files('rte_rcu_qsbr.h')
diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c
new file mode 100644
index 000000000..3c2577ee2
--- /dev/null
+++ b/lib/librte_rcu/rte_rcu_qsbr.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2018 Arm Limited
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_atomic.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_errno.h>
+
+#include "rte_rcu_qsbr.h"
+
+/* Initialize a quiescent state variable */
+void __rte_experimental
+rte_rcu_qsbr_init(struct rte_rcu_qsbr *v)
+{
+	memset(v, 0, sizeof(struct rte_rcu_qsbr));
+}
+
+/* Dump the details of a single quiescent state variable to a file. */
+void __rte_experimental
+rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)
+{
+	uint64_t bmap;
+	uint32_t i, t;
+
+	RTE_ASSERT(v == NULL || f == NULL);
+
+	fprintf(f, "\nQuiescent State Variable @%p\n", v);
+
+	fprintf(f, "  Registered thread ID mask = 0x");
+	for (i = 0; i < RTE_QSBR_BIT_MAP_ELEMS; i++)
+		fprintf(f, "%lx", __atomic_load_n(&v->reg_thread_id[i],
+					__ATOMIC_ACQUIRE));
+	fprintf(f, "\n");
+
+	fprintf(f, "  Token = %lu\n",
+			__atomic_load_n(&v->token, __ATOMIC_ACQUIRE));
+
+	fprintf(f, "Quiescent State Counts for readers:\n");
+	for (i = 0; i < RTE_QSBR_BIT_MAP_ELEMS; i++) {
+		bmap = __atomic_load_n(&v->reg_thread_id[i],
+					__ATOMIC_ACQUIRE);
+		while (bmap) {
+			t = __builtin_ctzl(bmap);
+			fprintf(f, "thread ID = %d, count = %lu\n", t,
+				__atomic_load_n(&v->w[i].cnt,
+						__ATOMIC_RELAXED));
+			bmap &= ~(1UL << t);
+		}
+	}
+}
diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h
new file mode 100644
index 000000000..c818e77fd
--- /dev/null
+++ b/lib/librte_rcu/rte_rcu_qsbr.h
@@ -0,0 +1,321 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Arm Limited
+ */
+
+#ifndef _RTE_RCU_QSBR_H_
+#define _RTE_RCU_QSBR_H_
+
+/**
+ * @file
+ * RTE Quiescent State Based Reclamation (QSBR)
+ *
+ * Quiescent State (QS) is any point in the thread execution
+ * where the thread does not hold a reference to shared memory.
+ * A critical section for a data structure can be a quiescent state for
+ * another data structure.
+ *
+ * This library provides the ability to identify quiescent state.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdio.h>
+#include <stdint.h>
+#include <errno.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+
+/**< Maximum number of reader threads supported. */
+#define RTE_RCU_MAX_THREADS 128
+
+#if !RTE_IS_POWER_OF_2(RTE_RCU_MAX_THREADS)
+#error RTE_RCU_MAX_THREADS must be a power of 2
+#endif
+
+/**< Number of array elements required for the bit-map */
+#define RTE_QSBR_BIT_MAP_ELEMS (RTE_RCU_MAX_THREADS/(sizeof(uint64_t) * 8))
+
+/* Thread IDs are stored as a bitmap of 64b element array. Given thread id
+ * needs to be converted to index into the array and the id within
+ * the array element.
+ */
+#define RTE_QSBR_THR_INDEX_SHIFT 6
+#define RTE_QSBR_THR_ID_MASK 0x3f
+
+/* Worker thread counter */
+struct rte_rcu_qsbr_cnt {
+	uint64_t cnt; /**< Quiescent state counter. */
+} __rte_cache_aligned;
+
+/**
+ * RTE thread Quiescent State structure.
+ */
+struct rte_rcu_qsbr {
+	uint64_t reg_thread_id[RTE_QSBR_BIT_MAP_ELEMS] __rte_cache_aligned;
+	/**< Registered reader thread IDs - reader threads reporting
+	 * on this QS variable represented in a bit map.
+	 */
+
+	uint64_t token __rte_cache_aligned;
+	/**< Counter to allow for multiple simultaneous QS queries */
+
+	struct rte_rcu_qsbr_cnt w[RTE_RCU_MAX_THREADS] __rte_cache_aligned;
+	/**< QS counter for each reader thread, counts upto
+	 * current value of token.
+	 */
+} __rte_cache_aligned;
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize a Quiescent State (QS) variable.
+ *
+ * @param v
+ *   QS variable
+ *
+ */
+void __rte_experimental
+rte_rcu_qsbr_init(struct rte_rcu_qsbr *v);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add a reader thread, to the list of threads reporting their quiescent
+ * state on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe.
+ * Any reader thread that wants to report its quiescent state must
+ * call this API before calling rte_rcu_qsbr_update. This can be called
+ * during initialization or as part of the packet processing loop.
+ * Any ongoing QS queries may wait for the status from this registered
+ * thread.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread with this thread ID will report its quiescent state on
+ *   the QS variable.
+ */
+static __rte_always_inline void __rte_experimental
+rte_rcu_qsbr_register_thread(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	unsigned int i, id;
+
+	RTE_ASSERT(v == NULL || thread_id >= RTE_RCU_MAX_THREADS);
+
+	id = thread_id & RTE_QSBR_THR_ID_MASK;
+	i = thread_id >> RTE_QSBR_THR_INDEX_SHIFT;
+
+	/* Worker thread has to count the quiescent states
+	 * only from the current value of token.
+	 * __atomic_store_n(cnt, __ATOMIC_RELAXED) is used to ensure
+	 * 'cnt' (64b) is accessed atomically.
+	 */
+	__atomic_store_n(&v->w[thread_id].cnt,
+		__atomic_load_n(&v->token, __ATOMIC_ACQUIRE),
+		__ATOMIC_RELAXED);
+
+	/* Release the store to initial TQS count so that readers
+	 * can use it immediately after this function returns.
+	 */
+	__atomic_fetch_or(&v->reg_thread_id[i], 1UL << id, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove a reader thread, from the list of threads reporting their
+ * quiescent state on a QS variable.
+ *
+ * This is implemented as a lock-free function. It is multi-thread safe.
+ * This API can be called from the reader threads during shutdown.
+ * Ongoing QS queries will stop waiting for the status from this
+ * unregistered reader thread.
+ *
+ * @param v
+ *   QS variable
+ * @param thread_id
+ *   Reader thread with this thread ID will stop reporting its quiescent
+ *   state on the QS variable.
+ */
+static __rte_always_inline void __rte_experimental
+rte_rcu_qsbr_unregister_thread(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	unsigned int i, id;
+
+	RTE_ASSERT(v == NULL || thread_id >= RTE_RCU_MAX_THREADS);
+
+	id = thread_id & RTE_QSBR_THR_ID_MASK;
+	i = thread_id >> RTE_QSBR_THR_INDEX_SHIFT;
+
+	/* Make sure the removal of the thread from the list of
+	 * reporting threads is visible before the thread
+	 * does anything else.
+	 */
+	__atomic_fetch_and(&v->reg_thread_id[i],
+				~(1UL << id), __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Trigger the reader threads to report the quiescent state
+ * status.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe and can be called from worker threads.
+ *
+ * @param v
+ *   TQS variable
+ * @param n
+ *   Expected number of times the quiescent state is entered
+ * @param t
+ *   - If successful, this is the token for this call of the API.
+ *     This should be passed to rte_rcu_qsbr_check API.
+ */
+static __rte_always_inline void __rte_experimental
+rte_rcu_qsbr_start(struct rte_rcu_qsbr *v, unsigned int n, uint64_t *t)
+{
+	RTE_ASSERT(v == NULL || t == NULL);
+
+	/* This store release will ensure that changes to any data
+	 * structure are visible to the workers before the token
+	 * update is visible.
+	 */
+	*t = __atomic_add_fetch(&v->token, n, __ATOMIC_RELEASE);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Update quiescent state for a reader thread.
+ *
+ * This is implemented as a lock-free function. It is multi-thread safe.
+ * All the reader threads registered to report their quiescent state
+ * on the QS variable must call this API.
+ *
+ * @param v
+ *   QS variable
+ */
+static __rte_always_inline void __rte_experimental
+rte_rcu_qsbr_update(struct rte_rcu_qsbr *v, unsigned int thread_id)
+{
+	uint64_t t;
+
+	RTE_ASSERT(v == NULL || thread_id >= RTE_RCU_MAX_THREADS);
+
+	/* Load the token before the reader thread loads any other
+	 * (lock-free) data structure. This ensures that updates
+	 * to the data structures are visible if the update
+	 * to token is visible.
+	 */
+	t = __atomic_load_n(&v->token, __ATOMIC_ACQUIRE);
+
+	/* Relaxed load/store on the counter is enough as we are
+	 * reporting an already completed quiescent state.
+	 * __atomic_load_n(cnt, __ATOMIC_RELAXED) is used as 'cnt' (64b)
+	 * is accessed atomically.
+	 * Copy the current token value. This will end grace period
+	 * of multiple concurrent writers.
+	 */
+	if (__atomic_load_n(&v->w[thread_id].cnt, __ATOMIC_RELAXED) != t)
+		__atomic_store_n(&v->w[thread_id].cnt, t, __ATOMIC_RELAXED);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Checks if all the reader threads have entered the quiescent state
+ * 'n' number of times. 'n' is provided in rte_rcu_qsbr_start API.
+ *
+ * This is implemented as a lock-free function. It is multi-thread
+ * safe and can be called from the worker threads as well.
+ *
+ * @param v
+ *   QS variable
+ * @param t
+ *   Token returned by rte_rcu_qsbr_start API
+ * @param wait
+ *   If true, block till all the reader threads have completed entering
+ *   the quiescent state 'n' number of times
+ * @return
+ *   - 0 if all reader threads have NOT passed through specified number
+ *     of quiescent states.
+ *   - 1 if all reader threads have passed through specified number
+ *     of quiescent states.
+ */
+static __rte_always_inline int __rte_experimental
+rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
+{
+	uint32_t i, j, id;
+	uint64_t bmap;
+
+	RTE_ASSERT(v == NULL);
+
+	i = 0;
+	do {
+		/* Load the current registered thread bit map before
+		 * loading the reader thread quiescent state counters.
+		 */
+		bmap = __atomic_load_n(&v->reg_thread_id[i], __ATOMIC_ACQUIRE);
+		id = i << RTE_QSBR_THR_INDEX_SHIFT;
+
+		while (bmap) {
+			j = __builtin_ctzl(bmap);
+
+			/* __atomic_store_n(cnt, __ATOMIC_RELAXED)
+			 * is used to ensure 'cnt' (64b) is accessed
+			 * atomically.
+			 */
+			if (unlikely(__atomic_load_n(&v->w[id + j].cnt,
+					__ATOMIC_RELAXED) < t)) {
+				/* This thread is not in QS */
+				if (!wait)
+					return 0;
+
+				/* Loop till this thread enters QS */
+				rte_pause();
+				continue;
+			}
+
+			bmap &= ~(1UL << j);
+		}
+
+		i++;
+	} while (i < RTE_QSBR_BIT_MAP_ELEMS);
+
+	return 1;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Dump the details of a single QS variables to a file.
+ *
+ * It is NOT multi-thread safe.
+ *
+ * @param f
+ *   A pointer to a file for output
+ * @param v
+ *   QS variable
+ */
+void __rte_experimental
+rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_RCU_QSBR_H_ */
diff --git a/lib/librte_rcu/rte_rcu_version.map b/lib/librte_rcu/rte_rcu_version.map
new file mode 100644
index 000000000..0df2071be
--- /dev/null
+++ b/lib/librte_rcu/rte_rcu_version.map
@@ -0,0 +1,8 @@
+EXPERIMENTAL {
+	global:
+
+	rte_rcu_qsbr_init;
+	rte_rcu_qsbr_dump;
+
+	local: *;
+};
diff --git a/lib/meson.build b/lib/meson.build
index bb7f443f9..d0e49d4e1 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -21,7 +21,7 @@ libraries = [ 'compat', # just a header, used for versioning
 	'gro', 'gso', 'ip_frag', 'jobstats',
 	'kni', 'latencystats', 'lpm', 'member',
 	'meter', 'power', 'pdump', 'rawdev',
-	'reorder', 'sched', 'security', 'vhost',
+	'reorder', 'sched', 'security', 'rcu', 'vhost',
 	# add pkt framework libs which use other libs from above
 	'port', 'table', 'pipeline',
 	# flow_classify lib depends on pkt framework table lib
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 3ebc4e64c..d4a1d436a 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -92,6 +92,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_EAL)            += -lrte_eal
 _LDLIBS-$(CONFIG_RTE_LIBRTE_CMDLINE)        += -lrte_cmdline
 _LDLIBS-$(CONFIG_RTE_LIBRTE_REORDER)        += -lrte_reorder
 _LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched
+_LDLIBS-$(CONFIG_RTE_LIBRTE_RCU)            += -lrte_rcu
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
-- 
2.17.1



More information about the dev mailing list