[dpdk-dev] [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations

Slava Ovsiienko viacheslavo at mellanox.com
Thu Apr 2 12:01:07 CEST 2020


> -----Original Message-----
> From: Alexander Kozyrev <akozyrev at mellanox.com>
> Sent: Wednesday, April 1, 2020 0:53
> To: dev at dpdk.org
> Cc: Raslan Darawsheh <rasland at mellanox.com>; Matan Azrad
> <matan at mellanox.com>; Slava Ovsiienko <viacheslavo at mellanox.com>;
> ferruh.yigit at intel.com; Thomas Monjalon <thomas at monjalon.net>
> Subject: [PATCH 2/4] net/mlx5: enable MPRQ multi-stride operations
> 
> MPRQ feature should be updated to allow a packet to be received into
> multiple strides in order to support the MTU exceeding 8KB.
> Special care is needed to prevent the headroom corruption in the multi-stride
> mode since the headroom space is borrowed by the PMD from the tail of the
> preceding stride. Copy the whole packet into a separate mbuf in this case or
> just the overlapping data if the Rx scattering is supported by an application.
> 
> Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>

> ---
>  drivers/net/mlx5/mlx5_rxq.c  | 25 ++++------------
> drivers/net/mlx5/mlx5_rxtx.c | 68 +++++++++++++++++++-------------------------
>  drivers/net/mlx5/mlx5_rxtx.h |  2 +-
>  3 files changed, 35 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index
> 85fcfe6..a64f536 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1793,9 +1793,7 @@ struct mlx5_rxq_ctrl *
>  	struct mlx5_priv *priv = dev->data->dev_private;
>  	struct mlx5_rxq_ctrl *tmpl;
>  	unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
> -	unsigned int mprq_stride_size;
>  	struct mlx5_dev_config *config = &priv->config;
> -	unsigned int strd_headroom_en;
>  	/*
>  	 * Always allocate extra slots, even if eventually
>  	 * the vector Rx will not be used.
> @@ -1841,27 +1839,13 @@ struct mlx5_rxq_ctrl *
>  	tmpl->socket = socket;
>  	if (dev->data->dev_conf.intr_conf.rxq)
>  		tmpl->irq = 1;
> -	/*
> -	 * LRO packet may consume all the stride memory, hence we cannot
> -	 * guaranty head-room near the packet memory in the stride.
> -	 * In this case scatter is, for sure, enabled and an empty mbuf may be
> -	 * added in the start for the head-room.
> -	 */
> -	if (lro_on_queue && RTE_PKTMBUF_HEADROOM > 0 &&
> -	    non_scatter_min_mbuf_size > mb_len) {
> -		strd_headroom_en = 0;
> -		mprq_stride_size = RTE_MIN(max_rx_pkt_len,
> -					1u << config-
> >mprq.max_stride_size_n);
> -	} else {
> -		strd_headroom_en = 1;
> -		mprq_stride_size = non_scatter_min_mbuf_size;
> -	}
>  	if (!config->mprq.stride_num_n)
>  		config->mprq.stride_num_n = MLX5_MPRQ_STRIDE_NUM_N;
>  	if (!config->mprq.stride_size_n)
> -		config->mprq.stride_size_n = (mprq_stride_size <=
> +		config->mprq.stride_size_n = (non_scatter_min_mbuf_size <=
>  				(1U << config->mprq.max_stride_size_n)) ?
> -			log2above(mprq_stride_size) :
> MLX5_MPRQ_STRIDE_SIZE_N;
> +			log2above(non_scatter_min_mbuf_size) :
> +			MLX5_MPRQ_STRIDE_SIZE_N;
>  	/*
>  	 * This Rx queue can be configured as a Multi-Packet RQ if all of the
>  	 * following conditions are met:
> @@ -1877,7 +1861,8 @@ struct mlx5_rxq_ctrl *
>  		tmpl->rxq.strd_num_n = config->mprq.stride_num_n;
>  		tmpl->rxq.strd_sz_n = config->mprq.stride_size_n;
>  		tmpl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
> -		tmpl->rxq.strd_headroom_en = strd_headroom_en;
> +		tmpl->rxq.strd_scatter_en =
> +				!!(offloads & DEV_RX_OFFLOAD_SCATTER);
>  		tmpl->rxq.mprq_max_memcpy_len =
> RTE_MIN(first_mb_free_size,
>  				config->mprq.max_memcpy_len);
>  		max_lro_size = RTE_MIN(max_rx_pkt_len, diff --git
> a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index
> f3bf763..4c27952 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -1658,21 +1658,20 @@ enum mlx5_txcmp_code {
>  	unsigned int i = 0;
>  	uint32_t rq_ci = rxq->rq_ci;
>  	uint16_t consumed_strd = rxq->consumed_strd;
> -	uint16_t headroom_sz = rxq->strd_headroom_en *
> RTE_PKTMBUF_HEADROOM;
>  	struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
> 
>  	while (i < pkts_n) {
>  		struct rte_mbuf *pkt;
>  		void *addr;
>  		int ret;
> -		unsigned int len;
> +		uint32_t len;
>  		uint16_t strd_cnt;
>  		uint16_t strd_idx;
>  		uint32_t offset;
>  		uint32_t byte_cnt;
> +		int32_t hdrm_overlap;
>  		volatile struct mlx5_mini_cqe8 *mcqe = NULL;
>  		uint32_t rss_hash_res = 0;
> -		uint8_t lro_num_seg;
> 
>  		if (consumed_strd == strd_n) {
>  			/* Replace WQE only if the buffer is still in use. */
> @@ -1719,18 +1718,6 @@ enum mlx5_txcmp_code {
>  		MLX5_ASSERT(strd_idx < strd_n);
>  		MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) &
>  			    wq_mask));
> -		lro_num_seg = cqe->lro_num_seg;
> -		/*
> -		 * Currently configured to receive a packet per a stride. But if
> -		 * MTU is adjusted through kernel interface, device could
> -		 * consume multiple strides without raising an error. In this
> -		 * case, the packet should be dropped because it is bigger
> than
> -		 * the max_rx_pkt_len.
> -		 */
> -		if (unlikely(!lro_num_seg && strd_cnt > 1)) {
> -			++rxq->stats.idropped;
> -			continue;
> -		}
>  		pkt = rte_pktmbuf_alloc(rxq->mp);
>  		if (unlikely(pkt == NULL)) {
>  			++rxq->stats.rx_nombuf;
> @@ -1742,12 +1729,16 @@ enum mlx5_txcmp_code {
>  			len -= RTE_ETHER_CRC_LEN;
>  		offset = strd_idx * strd_sz + strd_shift;
>  		addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n),
> offset);
> +		hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt *
> strd_sz;
>  		/*
>  		 * Memcpy packets to the target mbuf if:
>  		 * - The size of packet is smaller than
> mprq_max_memcpy_len.
>  		 * - Out of buffer in the Mempool for Multi-Packet RQ.
> +		 * - There is no space for a headroom and scatter is disabled.
>  		 */
> -		if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl ==
> NULL) {
> +		if (len <= rxq->mprq_max_memcpy_len ||
> +		    rxq->mprq_repl == NULL ||
> +		    (hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
>  			/*
>  			 * When memcpy'ing packet due to out-of-buffer, the
>  			 * packet must be smaller than the target mbuf.
> @@ -1769,7 +1760,7 @@ enum mlx5_txcmp_code {
>  			rte_atomic16_add_return(&buf->refcnt, 1);
>  			MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf-
> >refcnt) <=
>  				    strd_n + 1);
> -			buf_addr = RTE_PTR_SUB(addr, headroom_sz);
> +			buf_addr = RTE_PTR_SUB(addr,
> RTE_PKTMBUF_HEADROOM);
>  			/*
>  			 * MLX5 device doesn't use iova but it is necessary in a
>  			 * case where the Rx packet is transmitted via a @@ -
> 1788,43 +1779,42 @@ enum mlx5_txcmp_code {
>  			rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
>  						  buf_len, shinfo);
>  			/* Set mbuf head-room. */
> -			pkt->data_off = headroom_sz;
> +			SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
>  			MLX5_ASSERT(pkt->ol_flags ==
> EXT_ATTACHED_MBUF);
> -			/*
> -			 * Prevent potential overflow due to MTU change
> through
> -			 * kernel interface.
> -			 */
> -			if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
> -				rte_pktmbuf_free_seg(pkt);
> -				++rxq->stats.idropped;
> -				continue;
> -			}
> +			MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) <
> +				len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
>  			DATA_LEN(pkt) = len;
>  			/*
> -			 * LRO packet may consume all the stride memory, in
> this
> -			 * case packet head-room space is not guaranteed so
> must
> -			 * to add an empty mbuf for the head-room.
> +			 * Copy the last fragment of a packet (up to
> headroom
> +			 * size bytes) in case there is a stride overlap with
> +			 * a next packet's headroom. Allocate a separate
> mbuf
> +			 * to store this fragment and link it. Scatter is on.
>  			 */
> -			if (!rxq->strd_headroom_en) {
> -				struct rte_mbuf *headroom_mbuf =
> -						rte_pktmbuf_alloc(rxq->mp);
> +			if (hdrm_overlap > 0) {
> +				MLX5_ASSERT(rxq->strd_scatter_en);
> +				struct rte_mbuf *seg =
> +					rte_pktmbuf_alloc(rxq->mp);
> 
> -				if (unlikely(headroom_mbuf == NULL)) {
> +				if (unlikely(seg == NULL)) {
>  					rte_pktmbuf_free_seg(pkt);
>  					++rxq->stats.rx_nombuf;
>  					break;
>  				}
> -				PORT(pkt) = rxq->port_id;
> -				NEXT(headroom_mbuf) = pkt;
> -				pkt = headroom_mbuf;
> +				SET_DATA_OFF(seg, 0);
> +				rte_memcpy(rte_pktmbuf_mtod(seg, void *),
> +					RTE_PTR_ADD(addr, len -
> hdrm_overlap),
> +					hdrm_overlap);
> +				DATA_LEN(seg) = hdrm_overlap;
> +				DATA_LEN(pkt) = len - hdrm_overlap;
> +				NEXT(pkt) = seg;
>  				NB_SEGS(pkt) = 2;
>  			}
>  		}
>  		rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
> -		if (lro_num_seg > 1) {
> +		if (cqe->lro_num_seg > 1) {
>  			mlx5_lro_update_hdr(addr, cqe, len);
>  			pkt->ol_flags |= PKT_RX_LRO;
> -			pkt->tso_segsz = strd_sz;
> +			pkt->tso_segsz = len / cqe->lro_num_seg;
>  		}
>  		PKT_LEN(pkt) = len;
>  		PORT(pkt) = rxq->port_id;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index 939778a..d155c24 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -119,7 +119,7 @@ struct mlx5_rxq_data {
>  	unsigned int strd_sz_n:4; /* Log 2 of stride size. */
>  	unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
>  	unsigned int err_state:2; /* enum mlx5_rxq_err_state. */
> -	unsigned int strd_headroom_en:1; /* Enable mbuf headroom in
> MPRQ. */
> +	unsigned int strd_scatter_en:1; /* Scattered packets from a stride. */
>  	unsigned int lro:1; /* Enable LRO. */
>  	unsigned int :1; /* Remaining bits. */
>  	volatile uint32_t *rq_db;
> --
> 1.8.3.1



More information about the dev mailing list