[dpdk-dev] [PATCH 1/4] net/mlx5: fix Rx queue release

Raslan Darawsheh rasland at nvidia.com
Sun Oct 18 13:58:18 CEST 2020


Hi,

> -----Original Message-----
> From: dev <dev-bounces at dpdk.org> On Behalf Of Matan Azrad
> Sent: Thursday, October 15, 2020 9:38 AM
> To: dev at dpdk.org
> Cc: Slava Ovsiienko <viacheslavo at nvidia.com>
> Subject: [dpdk-dev] [PATCH 1/4] net/mlx5: fix Rx queue release
> 
> The HW objects of the Rx queue is created/destroyed in the device
> start\stop stage while the ethdev configurations for the Rx queue
> starts from the rx_queue_setup stage.
> The PMD should save all the last configurations it got from the ethdev
> and to apply them to the device in the dev_start operation.
> 
> Wrongly, last code added to mitigate the reference counters didn't take
> into account the above rule and combined the configurations and HW
> objects to be created\destroyed together.
> 
> This causes to memory leak and other memory issues.
> 
> Make sure the HW object is released in stop operation when there is no
> any reference to it while the configurations stay saved.
> 
> Fixes: 24e4b650badc ("net/mlx5: mitigate Rx queue reference counters")
> 
> Signed-off-by: Matan Azrad <matan at nvidia.com>
> Acked-by: Viacheslav Ovsiienko <viacheslavo at nvidia.com>
> ---
>  drivers/net/mlx5/mlx5_rxq.c  | 23 +++++++++++++----------
>  drivers/net/mlx5/mlx5_rxtx.h |  2 +-
>  2 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index f1d8373..e1783ba 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -447,7 +447,8 @@
>  		return -rte_errno;
>  	}
>  	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
> -	return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
> +	return (__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)
> == 1);
> +
>  }
> 
>  /* Fetches and drops all SW-owned and error CQEs to synchronize CQ. */
> @@ -1541,7 +1542,7 @@ struct mlx5_rxq_ctrl *
>  	tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
>  #endif
>  	tmpl->rxq.idx = idx;
> -	rte_atomic32_inc(&tmpl->refcnt);
> +	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
>  	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
>  	return tmpl;
>  error:
> @@ -1588,7 +1589,7 @@ struct mlx5_rxq_ctrl *
>  	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
>  	tmpl->hairpin_conf = *hairpin_conf;
>  	tmpl->rxq.idx = idx;
> -	rte_atomic32_inc(&tmpl->refcnt);
> +	__atomic_add_fetch(&tmpl->refcnt, 1, __ATOMIC_RELAXED);
>  	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
>  	return tmpl;
>  }
> @@ -1613,7 +1614,7 @@ struct mlx5_rxq_ctrl *
> 
>  	if (rxq_data) {
>  		rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
> -		rte_atomic32_inc(&rxq_ctrl->refcnt);
> +		__atomic_add_fetch(&rxq_ctrl->refcnt, 1,
> __ATOMIC_RELAXED);
>  	}
>  	return rxq_ctrl;
>  }
> @@ -1638,7 +1639,7 @@ struct mlx5_rxq_ctrl *
>  	if (!(*priv->rxqs)[idx])
>  		return 0;
>  	rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
> -	if (!rte_atomic32_dec_and_test(&rxq_ctrl->refcnt))
> +	if (__atomic_sub_fetch(&rxq_ctrl->refcnt, 1, __ATOMIC_RELAXED) >
> 1)
>  		return 1;
>  	if (rxq_ctrl->obj) {
>  		priv->obj_ops.rxq_obj_release(rxq_ctrl->obj);
> @@ -1646,13 +1647,15 @@ struct mlx5_rxq_ctrl *
>  		mlx5_free(rxq_ctrl->obj);
>  		rxq_ctrl->obj = NULL;
>  	}
> -	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD) {
> -		mlx5_mr_btree_free(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
> +	if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
>  		rxq_free_elts(rxq_ctrl);
> +	if (!__atomic_load_n(&rxq_ctrl->refcnt, __ATOMIC_RELAXED)) {
> +		if (rxq_ctrl->type == MLX5_RXQ_TYPE_STANDARD)
> +			mlx5_mr_btree_free(&rxq_ctrl-
> >rxq.mr_ctrl.cache_bh);
> +		LIST_REMOVE(rxq_ctrl, next);
> +		mlx5_free(rxq_ctrl);
> +		(*priv->rxqs)[idx] = NULL;
>  	}
> -	LIST_REMOVE(rxq_ctrl, next);
> -	mlx5_free(rxq_ctrl);
> -	(*priv->rxqs)[idx] = NULL;
>  	return 0;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 674296e..c3734e3 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -165,7 +165,7 @@ enum mlx5_rxq_type {
>  struct mlx5_rxq_ctrl {
>  	struct mlx5_rxq_data rxq; /* Data path structure. */
>  	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
> -	rte_atomic32_t refcnt; /* Reference counter. */
> +	uint32_t refcnt; /* Reference counter. */
>  	struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
>  	struct mlx5_priv *priv; /* Back pointer to private data. */
>  	enum mlx5_rxq_type type; /* Rxq type. */
> --
> 1.8.3.1

Series applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh


More information about the dev mailing list