[dpdk-dev] [PATCH v2 1/8] app/test: fix deadlock in distributor test

Lukasz Wojciechowski l.wojciechow at partner.samsung.com
Wed Sep 23 03:47:06 CEST 2020


The sanity test with worker shutdown delegates all bufs
to be processed by a single lcore worker, then it freezes
one of the lcore workers and continues to send more bufs.

Problem occurred if freezed lcore is the same as the one
that is processing the mbufs. The lcore processing mbufs
might be different every time test is launched.
This is caused by keeping the value of wkr static variable
in rte_distributor_process function between running test cases.

Test freezed always lcore with 0 id. The patch changes avoids
possible collision by freezing lcore with zero_idx. The lcore
that receives the data updates the zero_idx, so it is not freezed
itself.

To reproduce the issue fixed by this patch, please run
distributor_autotest command in test app several times in a row.

Fixes: c3eabff124e6 ("distributor: add unit tests")
Cc: bruce.richardson at intel.com
Cc: stable at dpdk.org

Signed-off-by: Lukasz Wojciechowski <l.wojciechow at partner.samsung.com>
---
 app/test/test_distributor.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
index ba1f81cf8..35b25463a 100644
--- a/app/test/test_distributor.c
+++ b/app/test/test_distributor.c
@@ -28,6 +28,7 @@ struct worker_params worker_params;
 static volatile int quit;      /**< general quit variable for all threads */
 static volatile int zero_quit; /**< var for when we just want thr0 to quit*/
 static volatile unsigned worker_idx;
+static volatile unsigned zero_idx;
 
 struct worker_stats {
 	volatile unsigned handled_packets;
@@ -346,27 +347,43 @@ handle_work_for_shutdown_test(void *arg)
 	unsigned int total = 0;
 	unsigned int i;
 	unsigned int returned = 0;
+	unsigned int zero_id = 0;
 	const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
 			__ATOMIC_RELAXED);
 
 	num = rte_distributor_get_pkt(d, id, buf, buf, num);
 
+	zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
+	if (id == zero_id && num > 0) {
+		zero_id = (zero_id + 1) %  __atomic_load_n(&worker_idx,
+			__ATOMIC_ACQUIRE);
+		__atomic_store_n(&zero_idx, zero_id, __ATOMIC_RELEASE);
+	}
+
 	/* wait for quit single globally, or for worker zero, wait
 	 * for zero_quit */
-	while (!quit && !(id == 0 && zero_quit)) {
+	while (!quit && !(id == zero_id && zero_quit)) {
 		worker_stats[id].handled_packets += num;
 		count += num;
 		for (i = 0; i < num; i++)
 			rte_pktmbuf_free(buf[i]);
 		num = rte_distributor_get_pkt(d,
 				id, buf, buf, num);
+
+		zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
+		if (id == zero_id && num > 0) {
+			zero_id = (zero_id + 1) %  __atomic_load_n(&worker_idx,
+				__ATOMIC_ACQUIRE);
+			__atomic_store_n(&zero_idx, zero_id, __ATOMIC_RELEASE);
+		}
+
 		total += num;
 	}
 	worker_stats[id].handled_packets += num;
 	count += num;
 	returned = rte_distributor_return_pkt(d, id, buf, num);
 
-	if (id == 0) {
+	if (id == zero_id) {
 		/* for worker zero, allow it to restart to pick up last packet
 		 * when all workers are shutting down.
 		 */
@@ -586,6 +603,7 @@ quit_workers(struct worker_params *wp, struct rte_mempool *p)
 	rte_eal_mp_wait_lcore();
 	quit = 0;
 	worker_idx = 0;
+	zero_idx = 0;
 }
 
 static int
-- 
2.17.1



More information about the dev mailing list