[dpdk-dev] [PATCH 1/1] timer: fix race condition

Thomas Monjalon thomas at monjalon.net
Wed Dec 19 04:36:53 CET 2018


Who could review this fix please?

29/11/2018 19:53, Erik Gabriel Carrillo:
> rte_timer_manage() adds expired timers to a "run list", and walks the
> list, transitioning each timer from the PENDING to the RUNNING state.
> If another lcore resets or stops the timer at precisely this
> moment, the timer state would instead be set to CONFIG by that other
> lcore, which would cause timer_manage() to skip over it. This is
> expected behavior.
> 
> However, if a timer expires quickly enough, there exists the
> following race condition that causes the timer_manage() routine to
> misinterpret a timer in CONFIG state, resulting in lost timers:
> 
> - Thread A:
>   - starts a timer with rte_timer_reset()
>   - the timer is moved to CONFIG state
>   - the spinlock associated with the appropriate skiplist is acquired
>   - timer is inserted into the skiplist
>   - the spinlock is released
> - Thread B:
>   - executes rte_timer_manage()
>   - find above timer as expired, add it to run list
>   - walk run list, see above timer still in CONFIG state, unlink it from
>     run list and continue on
> - Thread A:
>   - move timer to PENDING state
>   - return from rte_timer_reset()
>   - timer is now in PENDING state, but not actually linked into skiplist
>     and will never get processed further by rte_timer_manage()
> 
> This commit fixes this race condition by only releasing the spinlock
> after the timer state has been transitioned from CONFIG to PENDING,
> which prevents rte_timer_manage() from seeing an incorrect state.
> 
> Fixes: 9b15ba895b9f ("timer: use a skip list")
> Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo at intel.com>
> ---
>  lib/librte_timer/rte_timer.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
> index 590488c..30c7b0a 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
>  	}
>  }
>  
> -/*
> - * add in list, lock if needed
> +/* call with lock held as necessary
> + * add in list
>   * timer must be in config state
>   * timer must not be in a list
>   */
>  static void
> -timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
> +timer_add(struct rte_timer *tim, unsigned int tim_lcore)
>  {
> -	unsigned lcore_id = rte_lcore_id();
>  	unsigned lvl;
>  	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
>  
> -	/* if timer needs to be scheduled on another core, we need to
> -	 * lock the list; if it is on local core, we need to lock if
> -	 * we are not called from rte_timer_manage() */
> -	if (tim_lcore != lcore_id || !local_is_locked)
> -		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> -
>  	/* find where exactly this element goes in the list of elements
>  	 * for each depth. */
>  	timer_get_prev_entries(tim->expire, tim_lcore, prev);
> @@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
>  	 * NOTE: this is not atomic on 32-bit*/
>  	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
>  			pending_head.sl_next[0]->expire;
> -
> -	if (tim_lcore != lcore_id || !local_is_locked)
> -		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
>  }
>  
>  /*
> @@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
>  	tim->f = fct;
>  	tim->arg = arg;
>  
> +	/* if timer needs to be scheduled on another core, we need to
> +	 * lock the destination list; if it is on local core, we need to lock if
> +	 * we are not called from rte_timer_manage()
> +	 */
> +	if (tim_lcore != lcore_id || !local_is_locked)
> +		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> +
>  	__TIMER_STAT_ADD(pending, 1);
> -	timer_add(tim, tim_lcore, local_is_locked);
> +	timer_add(tim, tim_lcore);
>  
>  	/* update state: as we are in CONFIG state, only us can modify
>  	 * the state so we don't need to use cmpset() here */
> @@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
>  	status.owner = (int16_t)tim_lcore;
>  	tim->status.u32 = status.u32;
>  
> +	if (tim_lcore != lcore_id || !local_is_locked)
> +		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> +
>  	return 0;
>  }
>  
> 







More information about the dev mailing list