[dpdk-dev] [PATCH] net/mlx5: fix vectorized Rx burst termination

Matan Azrad matan at mellanox.com
Tue Jun 2 07:59:59 CEST 2020



From: Alexander Kozyrev
> Maximum burst size of Vectorized Rx burst routine is set to
> MLX5_VPMD_RX_MAX_BURST(64). This limits the performance of any
> application that would like to gather more than 64 packets from the single Rx
> burst for batch processing (i.e. VPP).
> 
> The situation gets worse with a mix of zipped and unzipped CQEs.
> They are processed separately and the Rx burst function returns small
> number of packets every call.
> 
> Repeat the cycle of gathering packets from the vectorized Rx routine until a
> requested number of packets are collected or there are no more CQEs left to
> process.
> 
> Fixes: 6cb559d67b83 ("net/mlx5: add vectorized Rx/Tx burst for x86")
> Cc: stable at dpdk.org
> 
> Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
> Acked-by: Slava Ovsiienko <viacheslavo at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>

> ---
>  drivers/net/mlx5/mlx5_rxtx_vec.c         | 19 +++++++++++++------
>  drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 13 ++++++++++---
>  drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 13 ++++++++++---
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 13 ++++++++++---
>  4 files changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c
> b/drivers/net/mlx5/mlx5_rxtx_vec.c
> index 1518bdd..b38bd20 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
> @@ -103,13 +103,20 @@
>  mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n)  {
>  	struct mlx5_rxq_data *rxq = dpdk_rxq;
> -	uint16_t nb_rx;
> +	uint16_t nb_rx = 0;
> +	uint16_t tn = 0;
>  	uint64_t err = 0;
> -
> -	nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err);
> -	if (unlikely(err | rxq->err_state))
> -		nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
> -	return nb_rx;
> +	bool no_cq = false;
> +
> +	do {
> +		nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err,
> &no_cq);
> +		if (unlikely(err | rxq->err_state))
> +			nb_rx = rxq_handle_pending_error(rxq, pkts + tn,
> nb_rx);
> +		tn += nb_rx;
> +		if (unlikely(no_cq))
> +			break;
> +	} while (tn != pkts_n);
> +	return tn;
>  }
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> index 26715ef..b55138a 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h
> @@ -564,13 +564,15 @@
>   * @param[out] err
>   *   Pointer to a flag. Set non-zero value if pkts array has at least one error
>   *   packet to handle.
> + * @param[out] no_cq
> + *  Pointer to a boolean. Set true if no new CQE seen.
>   *
>   * @return
>   *   Number of packets received including errors (<= pkts_n).
>   */
>  static inline uint16_t
>  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n,
> -	    uint64_t *err)
> +	    uint64_t *err, bool *no_cq)
>  {
>  	const uint16_t q_n = 1 << rxq->cqe_n;
>  	const uint16_t q_mask = q_n - 1;
> @@ -663,8 +665,10 @@
>  	/* Not to cross queue end. */
>  	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
>  	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
> -	if (!pkts_n)
> +	if (!pkts_n) {
> +		*no_cq = !rcvd_pkt;
>  		return rcvd_pkt;
> +	}
>  	/* At this point, there shouldn't be any remaining packets. */
>  	MLX5_ASSERT(rxq->decompressed == 0);
> 
> @@ -1079,8 +1083,10 @@
>  			break;
>  	}
>  	/* If no new CQE seen, return without updating cq_db. */
> -	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP))
> +	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP)) {
> +		*no_cq = true;
>  		return rcvd_pkt;
> +	}
>  	/* Update the consumer indexes for non-compressed CQEs. */
>  	MLX5_ASSERT(nocmp_n <= pkts_n);
>  	rxq->cq_ci += nocmp_n;
> @@ -1108,6 +1114,7 @@
>  	}
>  	rte_compiler_barrier();
>  	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +	*no_cq = !rcvd_pkt;
>  	return rcvd_pkt;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> index ecafbf8..3007c03 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> @@ -378,13 +378,15 @@
>   * @param[out] err
>   *   Pointer to a flag. Set non-zero value if pkts array has at least one error
>   *   packet to handle.
> + * @param[out] no_cq
> + *   Pointer to a boolean. Set true if no new CQE seen.
>   *
>   * @return
>   *   Number of packets received including errors (<= pkts_n).
>   */
>  static inline uint16_t
>  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n,
> -	    uint64_t *err)
> +	    uint64_t *err, bool *no_cq)
>  {
>  	const uint16_t q_n = 1 << rxq->cqe_n;
>  	const uint16_t q_mask = q_n - 1;
> @@ -485,8 +487,10 @@
>  	/* Not to cross queue end. */
>  	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
>  	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
> -	if (!pkts_n)
> +	if (!pkts_n) {
> +		*no_cq = !rcvd_pkt;
>  		return rcvd_pkt;
> +	}
>  	/* At this point, there shouldn't be any remained packets. */
>  	MLX5_ASSERT(rxq->decompressed == 0);
>  	/*
> @@ -745,8 +749,10 @@
>  			break;
>  	}
>  	/* If no new CQE seen, return without updating cq_db. */
> -	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP))
> +	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP)) {
> +		*no_cq = true;
>  		return rcvd_pkt;
> +	}
>  	/* Update the consumer indexes for non-compressed CQEs. */
>  	MLX5_ASSERT(nocmp_n <= pkts_n);
>  	rxq->cq_ci += nocmp_n;
> @@ -774,6 +780,7 @@
>  	}
>  	rte_cio_wmb();
>  	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +	*no_cq = !rcvd_pkt;
>  	return rcvd_pkt;
>  }
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index 6847ae7..da5960a 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -385,13 +385,15 @@
>   * @param[out] err
>   *   Pointer to a flag. Set non-zero value if pkts array has at least one error
>   *   packet to handle.
> + * @param[out] no_cq
> + *   Pointer to a boolean. Set true if no new CQE seen.
>   *
>   * @return
>   *   Number of packets received including errors (<= pkts_n).
>   */
>  static inline uint16_t
>  rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t
> pkts_n,
> -	    uint64_t *err)
> +	    uint64_t *err, bool *no_cq)
>  {
>  	const uint16_t q_n = 1 << rxq->cqe_n;
>  	const uint16_t q_mask = q_n - 1;
> @@ -473,8 +475,10 @@
>  	/* Not to cross queue end. */
>  	pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
>  	pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
> -	if (!pkts_n)
> +	if (!pkts_n) {
> +		*no_cq = !rcvd_pkt;
>  		return rcvd_pkt;
> +	}
>  	/* At this point, there shouldn't be any remained packets. */
>  	MLX5_ASSERT(rxq->decompressed == 0);
>  	/*
> @@ -696,8 +700,10 @@
>  			break;
>  	}
>  	/* If no new CQE seen, return without updating cq_db. */
> -	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP))
> +	if (unlikely(!nocmp_n && comp_idx ==
> MLX5_VPMD_DESCS_PER_LOOP)) {
> +		*no_cq = true;
>  		return rcvd_pkt;
> +	}
>  	/* Update the consumer indexes for non-compressed CQEs. */
>  	MLX5_ASSERT(nocmp_n <= pkts_n);
>  	rxq->cq_ci += nocmp_n;
> @@ -725,6 +731,7 @@
>  	}
>  	rte_compiler_barrier();
>  	*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
> +	*no_cq = !rcvd_pkt;
>  	return rcvd_pkt;
>  }
> 
> --
> 1.8.3.1



More information about the dev mailing list