[dpdk-dev] [PATCH v4 1/8] test/distributor: fix deadlock with freezed worker

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Mon Sep 28 01:34:02 CEST 2020


Hi Lukasz,
	Few comments inline

<snip>

> 
> The sanity test with worker shutdown delegates all bufs to be processed by a
> single lcore worker, then it freezes one of the lcore workers and continues to
> send more bufs.
The designated core to freeze (core with id == 0 in the existing code) gets out of the first while loop and gets into the 2nd while loop in the function ' handle_work_for_shutdown_test'.
In between these 2 while loops, it informs the distributor that it will  not accept any more packets by calling ' rte_distributor_return_pkt' (at least this API is supposed to do that). But, the distributor hangs waiting for the frozen core to start accepting packets. I think this is a problem with the distributor and not the test case.

> 
> Problem occurred if freezed lcore is the same as the one that is processing
> the mbufs. The lcore processing mbufs might be different every time test is
> launched.
> This is caused by keeping the value of wkr static variable in
> rte_distributor_process function between running test cases.
> 
> Test freezed always lcore with 0 id. The patch changes avoids possible
> collision by freezing lcore with zero_idx. The lcore that receives the data
> updates the zero_idx, so it is not freezed itself.
> 
> To reproduce the issue fixed by this patch, please run distributor_autotest
> command in test app several times in a row.
> 
> Fixes: c3eabff124e6 ("distributor: add unit tests")
> Cc: bruce.richardson at intel.com
> Cc: stable at dpdk.org
> 
> Signed-off-by: Lukasz Wojciechowski <l.wojciechow at partner.samsung.com>
> Tested-by: David Hunt <david.hunt at intel.com>
> ---
>  app/test/test_distributor.c | 22 ++++++++++++++++++++--
>  1 file changed, 20 insertions(+), 2 deletions(-)
> 
> diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c index
> ba1f81cf8..35b25463a 100644
> --- a/app/test/test_distributor.c
> +++ b/app/test/test_distributor.c
> @@ -28,6 +28,7 @@ struct worker_params worker_params;
>  static volatile int quit;      /**< general quit variable for all threads */
>  static volatile int zero_quit; /**< var for when we just want thr0 to quit*/
> static volatile unsigned worker_idx;
> +static volatile unsigned zero_idx;
> 
>  struct worker_stats {
>  	volatile unsigned handled_packets;
> @@ -346,27 +347,43 @@ handle_work_for_shutdown_test(void *arg)
>  	unsigned int total = 0;
>  	unsigned int i;
>  	unsigned int returned = 0;
> +	unsigned int zero_id = 0;
>  	const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
>  			__ATOMIC_RELAXED);
> 
>  	num = rte_distributor_get_pkt(d, id, buf, buf, num);
> 
> +	zero_id = __atomic_load_n(&zero_idx, __ATOMIC_ACQUIRE);
> +	if (id == zero_id && num > 0) {
> +		zero_id = (zero_id + 1) %  __atomic_load_n(&worker_idx,
> +			__ATOMIC_ACQUIRE);
> +		__atomic_store_n(&zero_idx, zero_id, __ATOMIC_RELEASE);
> +	}
> +
>  	/* wait for quit single globally, or for worker zero, wait
>  	 * for zero_quit */
> -	while (!quit && !(id == 0 && zero_quit)) {
> +	while (!quit && !(id == zero_id && zero_quit)) {
>  		worker_stats[id].handled_packets += num;
>  		count += num;
>  		for (i = 0; i < num; i++)
>  			rte_pktmbuf_free(buf[i]);
>  		num = rte_distributor_get_pkt(d,
>  				id, buf, buf, num);
> +
> +		zero_id = __atomic_load_n(&zero_idx,
> __ATOMIC_ACQUIRE);
> +		if (id == zero_id && num > 0) {
> +			zero_id = (zero_id + 1) %
> __atomic_load_n(&worker_idx,
> +				__ATOMIC_ACQUIRE);
> +			__atomic_store_n(&zero_idx, zero_id,
> __ATOMIC_RELEASE);
> +		}
> +
>  		total += num;
>  	}
>  	worker_stats[id].handled_packets += num;
>  	count += num;
>  	returned = rte_distributor_return_pkt(d, id, buf, num);
> 
> -	if (id == 0) {
> +	if (id == zero_id) {
>  		/* for worker zero, allow it to restart to pick up last packet
>  		 * when all workers are shutting down.
>  		 */
> @@ -586,6 +603,7 @@ quit_workers(struct worker_params *wp, struct
> rte_mempool *p)
>  	rte_eal_mp_wait_lcore();
>  	quit = 0;
>  	worker_idx = 0;
> +	zero_idx = 0;
>  }
> 
>  static int
> --
> 2.17.1



More information about the dev mailing list