[dpdk-dev] [RFC v3 5/5] lib/rcu: fix the size of register thread ID array size

Honnappa Nagarahalli honnappa.nagarahalli at arm.com
Fri Feb 22 08:04:27 CET 2019


Keeping the register thread ID size dependent on the max threads
is resulting in performance drops due to address calculations at
run time. Fixing the size of the thread ID registration array
reduces the complexity of address calculation. This change
fixes the maximum number of threads supported to 512(1 cache line
size of 64B). However, the memory required for QS counters is still
dependent on the max threads parameter. This change provides both
flexibility and addresses performance as well.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
---
 lib/librte_rcu/rte_rcu_qsbr.c | 13 ++-----------
 lib/librte_rcu/rte_rcu_qsbr.h | 29 ++++++++++-------------------
 2 files changed, 12 insertions(+), 30 deletions(-)

diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c
index 02464fdba..3cff82121 100644
--- a/lib/librte_rcu/rte_rcu_qsbr.c
+++ b/lib/librte_rcu/rte_rcu_qsbr.c
@@ -25,17 +25,12 @@
 unsigned int __rte_experimental
 rte_rcu_qsbr_get_memsize(uint32_t max_threads)
 {
-	int n;
 	ssize_t sz;
 
 	RTE_ASSERT(max_threads == 0);
 
 	sz = sizeof(struct rte_rcu_qsbr);
 
-	/* Add the size of the registered thread ID bitmap array */
-	n = RTE_ALIGN(max_threads, RTE_QSBR_THRID_ARRAY_ELM_SIZE);
-	sz += RTE_QSBR_THRID_ARRAY_SIZE(n);
-
 	/* Add the size of quiescent state counter array */
 	sz += sizeof(struct rte_rcu_qsbr_cnt) * max_threads;
 
@@ -51,9 +46,7 @@ rte_rcu_qsbr_init(struct rte_rcu_qsbr *v, uint32_t max_threads)
 	memset(v, 0, rte_rcu_qsbr_get_memsize(max_threads));
 	v->m_threads = max_threads;
 	v->ma_threads = RTE_ALIGN(max_threads, RTE_QSBR_THRID_ARRAY_ELM_SIZE);
-
 	v->num_elems = v->ma_threads/RTE_QSBR_THRID_ARRAY_ELM_SIZE;
-	v->thrid_array_size = RTE_QSBR_THRID_ARRAY_SIZE(v->ma_threads);
 }
 
 /* Dump the details of a single quiescent state variable to a file. */
@@ -74,8 +67,7 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)
 
 	fprintf(f, "  Registered thread ID mask = 0x");
 	for (i = 0; i < v->num_elems; i++)
-		fprintf(f, "%lx", __atomic_load_n(
-					RTE_QSBR_THRID_ARRAY_ELM(v, i),
+		fprintf(f, "%lx", __atomic_load_n(&v->reg_thread_id[i],
 					__ATOMIC_ACQUIRE));
 	fprintf(f, "\n");
 
@@ -84,8 +76,7 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)
 
 	fprintf(f, "Quiescent State Counts for readers:\n");
 	for (i = 0; i < v->num_elems; i++) {
-		bmap = __atomic_load_n(RTE_QSBR_THRID_ARRAY_ELM(v, i),
-					__ATOMIC_ACQUIRE);
+		bmap = __atomic_load_n(&v->reg_thread_id[i], __ATOMIC_ACQUIRE);
 		while (bmap) {
 			t = __builtin_ctzl(bmap);
 			fprintf(f, "thread ID = %d, count = %lu\n", t,
diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h
index 21fa2c198..1147f11f2 100644
--- a/lib/librte_rcu/rte_rcu_qsbr.h
+++ b/lib/librte_rcu/rte_rcu_qsbr.h
@@ -33,14 +33,9 @@ extern "C" {
  * Given thread id needs to be converted to index into the array and
  * the id within the array element.
  */
-/* Thread ID array size
- * @param ma_threads
- *   num of threads aligned to 64
- */
-#define RTE_QSBR_THRID_ARRAY_SIZE(ma_threads) \
-	RTE_ALIGN((ma_threads) >> 3, RTE_CACHE_LINE_SIZE)
+#define RTE_RCU_MAX_THREADS 512
+#define RTE_QSBR_THRID_ARRAY_ELEMS (RTE_RCU_MAX_THREADS/(sizeof(uint64_t) * 8))
 #define RTE_QSBR_THRID_ARRAY_ELM_SIZE (sizeof(uint64_t) * 8)
-#define RTE_QSBR_THRID_ARRAY_ELM(v, i) ((uint64_t *)(v + 1) + i)
 #define RTE_QSBR_THRID_INDEX_SHIFT 6
 #define RTE_QSBR_THRID_MASK 0x3f
 
@@ -49,8 +44,7 @@ struct rte_rcu_qsbr_cnt {
 	uint64_t cnt; /**< Quiescent state counter. */
 } __rte_cache_aligned;
 
-#define RTE_QSBR_CNT_ARRAY_ELM(v, i) ((struct rte_rcu_qsbr_cnt *) \
-	((uint8_t *)(v + 1) + v->thrid_array_size) + i)
+#define RTE_QSBR_CNT_ARRAY_ELM(v, i) (((struct rte_rcu_qsbr_cnt *)(v + 1)) + i)
 
 /**
  * RTE thread Quiescent State structure.
@@ -69,15 +63,14 @@ struct rte_rcu_qsbr {
 	uint64_t token __rte_cache_aligned;
 	/**< Counter to allow for multiple simultaneous QS queries */
 
-	uint32_t thrid_array_size __rte_cache_aligned;
-	/**< Registered thread ID bitmap array size in bytes */
-	uint32_t num_elems;
+	uint32_t num_elems __rte_cache_aligned;
 	/**< Number of elements in the thread ID array */
-
 	uint32_t m_threads;
 	/**< Maximum number of threads this RCU variable will use */
 	uint32_t ma_threads;
 	/**< Maximum number of threads aligned to 32 */
+
+	uint64_t reg_thread_id[RTE_QSBR_THRID_ARRAY_ELEMS] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 /**
@@ -152,8 +145,7 @@ rte_rcu_qsbr_register_thread(struct rte_rcu_qsbr *v, unsigned int thread_id)
 	/* Release the store to initial TQS count so that readers
 	 * can use it immediately after this function returns.
 	 */
-	__atomic_fetch_or(RTE_QSBR_THRID_ARRAY_ELM(v, i),
-		1UL << id, __ATOMIC_RELEASE);
+	__atomic_fetch_or(&v->reg_thread_id[i], 1UL << id, __ATOMIC_RELEASE);
 }
 
 /**
@@ -188,7 +180,7 @@ rte_rcu_qsbr_unregister_thread(struct rte_rcu_qsbr *v, unsigned int thread_id)
 	 * reporting threads is visible before the thread
 	 * does anything else.
 	 */
-	__atomic_fetch_and(RTE_QSBR_THRID_ARRAY_ELM(v, i),
+	__atomic_fetch_and(&v->reg_thread_id[i],
 				~(1UL << id), __ATOMIC_RELEASE);
 }
 
@@ -298,8 +290,7 @@ rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
 		/* Load the current registered thread bit map before
 		 * loading the reader thread quiescent state counters.
 		 */
-		bmap = __atomic_load_n(RTE_QSBR_THRID_ARRAY_ELM(v, i),
-				__ATOMIC_ACQUIRE);
+		bmap = __atomic_load_n(&v->reg_thread_id[i], __ATOMIC_ACQUIRE);
 		id = i << RTE_QSBR_THRID_INDEX_SHIFT;
 
 		while (bmap) {
@@ -324,7 +315,7 @@ rte_rcu_qsbr_check(struct rte_rcu_qsbr *v, uint64_t t, bool wait)
 				 * Re-read the bitmap.
 				 */
 				bmap = __atomic_load_n(
-						RTE_QSBR_THRID_ARRAY_ELM(v, i),
+						&v->reg_thread_id[i],
 						__ATOMIC_ACQUIRE);
 
 				continue;
-- 
2.17.1



More information about the dev mailing list