[dpdk-dev] [PATCH v5] net/mlx4: support hardware TSO

Mordechay Haimovsky motih at mellanox.com
Mon Jul 9 18:22:45 CEST 2018


inline

> -----Original Message-----
> From: Matan Azrad
> Sent: Monday, July 9, 2018 4:07 PM
> To: Mordechay Haimovsky <motih at mellanox.com>; Adrien Mazarguil
> <adrien.mazarguil at 6wind.com>
> Cc: dev at dpdk.org
> Subject: RE: [PATCH v5] net/mlx4: support hardware TSO
> 
> 
> 
> Hi Moti
> 
> Please see some comments below.
> 
> From: Mordechay Haimovsky
> > Implement support for hardware TSO.
> >
> > Signed-off-by: Moti Haimovsky <motih at mellanox.com>
> > ---
> > v5:
> > * Modification to the code according to review inputs from Matan
> >   Azrad.
> > * Code optimization to the TSO header copy routine.
> > * Rearranged the TSO data-segments creation routine.
> > in reply to
> > 1530715998-15703-1-git-send-email-motih at mellanox.com
> >
> > v4:
> > * Bug fixes in filling TSO data segments.
> > * Modifications according to review inputs from Adrien Mazarguil
> >   and Matan Azrad.
> > in reply to
> > 1530190137-17848-1-git-send-email-motih at mellanox.com
> >
> > v3:
> > * Fixed compilation errors in compilers without GNU C extensions
> >   caused by a declaration of zero-length array in the code.
> > in reply to
> > 1530187032-6489-1-git-send-email-motih at mellanox.com
> >
> > v2:
> > * Fixed coding style warning.
> > in reply to
> > 1530184583-30166-1-git-send-email-motih at mellanox.com
> >
> > v1:
> > * Fixed coding style warnings.
> > in reply to
> > 1530181779-19716-1-git-send-email-motih at mellanox.com
> > ---
> >  doc/guides/nics/features/mlx4.ini |   1 +
> >  doc/guides/nics/mlx4.rst          |   3 +
> >  drivers/net/mlx4/Makefile         |   5 +
> >  drivers/net/mlx4/mlx4.c           |   9 +
> >  drivers/net/mlx4/mlx4.h           |   5 +
> >  drivers/net/mlx4/mlx4_prm.h       |  15 ++
> >  drivers/net/mlx4/mlx4_rxtx.c      | 372
> > +++++++++++++++++++++++++++++++++++++-
> >  drivers/net/mlx4/mlx4_rxtx.h      |   2 +-
> >  drivers/net/mlx4/mlx4_txq.c       |   8 +-
> >  9 files changed, 416 insertions(+), 4 deletions(-)
> >
> > diff --git a/doc/guides/nics/features/mlx4.ini
> > b/doc/guides/nics/features/mlx4.ini
> > index f6efd21..98a3f61 100644
> > --- a/doc/guides/nics/features/mlx4.ini
> > +++ b/doc/guides/nics/features/mlx4.ini
> > @@ -13,6 +13,7 @@ Queue start/stop     = Y
> >  MTU update           = Y
> >  Jumbo frame          = Y
> >  Scattered Rx         = Y
> > +TSO                  = Y
> >  Promiscuous mode     = Y
> >  Allmulticast mode    = Y
> >  Unicast MAC filter   = Y
> > diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst index
> > 491106a..12adaeb 100644
> > --- a/doc/guides/nics/mlx4.rst
> > +++ b/doc/guides/nics/mlx4.rst
> > @@ -142,6 +142,9 @@ Limitations
> >    The ability to enable/disable CRC stripping requires OFED version
> >    4.3-1.5.0.0 and above  or rdma-core version v18 and above.
> >
> > +- TSO (Transmit Segmentation Offload) is supported in OFED version
> > +  4.4 and above or in rdma-core version v18 and above.
> > +
> >  Prerequisites
> >  -------------
> >
> > diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile
> > index
> > 73f9d40..63bc003 100644
> > --- a/drivers/net/mlx4/Makefile
> > +++ b/drivers/net/mlx4/Makefile
> > @@ -85,6 +85,11 @@ mlx4_autoconf.h.new: FORCE
> >  mlx4_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
> >  	$Q $(RM) -f -- '$@'
> >  	$Q : > '$@'
> > +	$Q sh -- '$<' '$@' \
> > +		HAVE_IBV_MLX4_WQE_LSO_SEG \
> > +		infiniband/mlx4dv.h \
> > +		type 'struct mlx4_wqe_lso_seg' \
> > +		$(AUTOCONF_OUTPUT)
> >
> >  # Create mlx4_autoconf.h or update it in case it differs from the new one.
> >
> > diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index
> > d151a90..5d8c76d 100644
> > --- a/drivers/net/mlx4/mlx4.c
> > +++ b/drivers/net/mlx4/mlx4.c
> > @@ -677,6 +677,15 @@ struct mlx4_conf {
> >
> > 	IBV_RAW_PACKET_CAP_SCATTER_FCS);
> >  		DEBUG("FCS stripping toggling is %ssupported",
> >  		      priv->hw_fcs_strip ? "" : "not ");
> > +		priv->tso =
> > +			((device_attr_ex.tso_caps.max_tso > 0) &&
> > +			 (device_attr_ex.tso_caps.supported_qpts &
> > +			  (1 << IBV_QPT_RAW_PACKET)));
> > +		if (priv->tso)
> > +			priv->tso_max_payload_sz =
> > +					device_attr_ex.tso_caps.max_tso;
> > +		DEBUG("TSO is %ssupported",
> > +		      priv->tso ? "" : "not ");
> >  		/* Configure the first MAC address by default. */
> >  		err = mlx4_get_mac(priv, &mac.addr_bytes);
> >  		if (err) {
> > diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index
> > 300cb4d..89d8c38 100644
> > --- a/drivers/net/mlx4/mlx4.h
> > +++ b/drivers/net/mlx4/mlx4.h
> > @@ -47,6 +47,9 @@
> >  /** Interrupt alarm timeout value in microseconds. */  #define
> > MLX4_INTR_ALARM_TIMEOUT 100000
> >
> > +/* Maximum packet headers size (L2+L3+L4) for TSO. */ #define
> > +MLX4_MAX_TSO_HEADER 192
> > +
> >  /** Port parameter. */
> >  #define MLX4_PMD_PORT_KVARG "port"
> >
> > @@ -90,6 +93,8 @@ struct priv {
> >  	uint32_t hw_csum:1; /**< Checksum offload is supported. */
> >  	uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels.
> > */
> >  	uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported.
> > */
> > +	uint32_t tso:1; /**< Transmit segmentation offload is supported. */
> > +	uint32_t tso_max_payload_sz; /**< Max supported TSO payload
> > size. */
> >  	uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs
> format).
> > */
> >  	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
> >  	struct mlx4_drop *drop; /**< Shared resources for drop flow rules.
> > */ diff --git a/drivers/net/mlx4/mlx4_prm.h
> > b/drivers/net/mlx4/mlx4_prm.h index b771d8c..aef77ba 100644
> > --- a/drivers/net/mlx4/mlx4_prm.h
> > +++ b/drivers/net/mlx4/mlx4_prm.h
> > @@ -19,6 +19,7 @@
> >  #ifdef PEDANTIC
> >  #pragma GCC diagnostic error "-Wpedantic"
> >  #endif
> > +#include "mlx4_autoconf.h"
> >
> >  /* ConnectX-3 Tx queue basic block. */  #define MLX4_TXBB_SHIFT 6 @@
> > -40,6 +41,7 @@
> >  /* Work queue element (WQE) flags. */  #define
> > MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)  #define
> > MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
> > +#define MLX4_WQE_CTRL_RR (1 << 6)
> >
> >  /* CQE checksum flags. */
> >  enum {
> > @@ -98,6 +100,19 @@ struct mlx4_cq {
> >  	int arm_sn; /**< Rx event counter. */  };
> >
> > +#ifndef HAVE_IBV_MLX4_WQE_LSO_SEG
> > +/*
> > + * WQE LSO segment structure.
> > + * Defined here as backward compatibility for rdma-core v17 and below.
> > + * Similar definition is found in infiniband/mlx4dv.h in rdma-core
> > +v18
> > + * and above.
> > + */
> > +struct mlx4_wqe_lso_seg {
> > +	rte_be32_t mss_hdr_size;
> > +	rte_be32_t header[];
> > +};
> > +#endif
> > +
> >  /**
> >   * Retrieve a CQE entry from a CQ.
> >   *
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.c
> > b/drivers/net/mlx4/mlx4_rxtx.c index 78b6dd5..b695539 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.c
> > +++ b/drivers/net/mlx4/mlx4_rxtx.c
> > @@ -38,10 +38,29 @@
> >   * DWORD (32 byte) of a TXBB.
> >   */
> >  struct pv {
> > -	volatile struct mlx4_wqe_data_seg *dseg;
> > +	union {
> > +		volatile struct mlx4_wqe_data_seg *dseg;
> > +		volatile uint32_t *dst;
> > +	};
> >  	uint32_t val;
> >  };
> >
> > +/** A helper structure for TSO packet handling. */ struct tso_info {
> > +	/** Pointer to the array of saved first DWORD (32 byte) of a TXBB. */
> > +	struct pv *pv;
> > +	/** Current entry in the pv array. */
> > +	int pv_counter;
> > +	/** Total size of the WQE including padding. */
> > +	uint32_t wqe_size;
> > +	/** Size of TSO header to prepend to each packet to send. */
> > +	uint16_t tso_header_size;
> > +	/** Total size of the TSO segment in the WQE. */
> > +	uint16_t wqe_tso_seg_size;
> > +	/** Raw WQE size in units of 16 Bytes and without padding. */
> > +	uint8_t fence_size;
> > +};
> > +
> >  /** A table to translate Rx completion flags to packet type. */
> > uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
> >  	/*
> > @@ -368,6 +387,345 @@ struct pv {
> >  }
> >
> >  /**
> > + * Obtain and calculate TSO information needed for assembling a TSO
> WQE.
> > + *
> > + * @param buf
> > + *   Pointer to the first packet mbuf.
> > + * @param txq
> > + *   Pointer to Tx queue structure.
> > + * @param tinfo
> > + *   Pointer to a structure to fill the info with.
> > + *
> > + * @return
> > + *   0 on success, negative value upon error.
> > + */
> > +static inline int
> > +mlx4_tx_burst_tso_get_params(struct rte_mbuf *buf,
> > +			     struct txq *txq,
> > +			     struct tso_info *tinfo)
> > +{
> > +	struct mlx4_sq *sq = &txq->msq;
> > +	const uint8_t tunneled = txq->priv->hw_csum_l2tun &&
> > +				 (buf->ol_flags & PKT_TX_TUNNEL_MASK);
> > +
> > +	tinfo->tso_header_size = buf->l2_len + buf->l3_len + buf->l4_len;
> > +	if (tunneled)
> > +		tinfo->tso_header_size +=
> > +				buf->outer_l2_len + buf->outer_l3_len;
> > +	if (unlikely(buf->tso_segsz == 0 ||
> > +		     tinfo->tso_header_size == 0 ||
> > +		     tinfo->tso_header_size > MLX4_MAX_TSO_HEADER ||
> > +		     tinfo->tso_header_size > buf->data_len))
> > +		return -EINVAL;
> > +	/*
> > +	 * Calculate the WQE TSO segment size
> > +	 * Note:
> > +	 * 1. An LSO segment must be padded such that the subsequent data
> > +	 *    segment is 16-byte aligned.
> > +	 * 2. The start address of the TSO segment is always 16 Bytes aligned.
> > +	 */
> > +	tinfo->wqe_tso_seg_size = RTE_ALIGN(sizeof(struct
> > mlx4_wqe_lso_seg) +
> > +					    tinfo->tso_header_size,
> > +					    sizeof(struct
> > mlx4_wqe_data_seg));
> > +	tinfo->fence_size = ((sizeof(struct mlx4_wqe_ctrl_seg) +
> > +			     tinfo->wqe_tso_seg_size) >> MLX4_SEG_SHIFT) +
> > +			     buf->nb_segs;
> > +	tinfo->wqe_size =
> > +		RTE_ALIGN((uint32_t)(tinfo->fence_size <<
> > MLX4_SEG_SHIFT),
> > +			  MLX4_TXBB_SIZE);
> > +	/* Validate WQE size and WQE space in the send queue. */
> > +	if (sq->remain_size < tinfo->wqe_size ||
> > +	    tinfo->wqe_size > MLX4_MAX_WQE_SIZE)
> > +		return -ENOMEM;
> > +	/* Init pv. */
> > +	tinfo->pv = (struct pv *)txq->bounce_buf;
> > +	tinfo->pv_counter = 0;
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Fill the TSO WQE data segments with info on buffers to transmit .
> > + *
> > + * @param buf
> > + *   Pointer to the first packet mbuf.
> > + * @param txq
> > + *   Pointer to Tx queue structure.
> > + * @param tinfo
> > + *   Pointer to TSO info to use.
> > + * @param dseg
> > + *   Pointer to the first data segment in the TSO WQE.
> > + * @param ctrl
> > + *   Pointer to the control segment in the TSO WQE.
> > + *
> > + * @return
> > + *   0 on success, negative value upon error.
> > + */
> > +static inline volatile struct mlx4_wqe_ctrl_seg *
> > +mlx4_tx_burst_fill_tso_dsegs(struct rte_mbuf *buf,
> > +			     struct txq *txq,
> > +			     struct tso_info *tinfo,
> > +			     volatile struct mlx4_wqe_data_seg *dseg,
> > +			     volatile struct mlx4_wqe_ctrl_seg *ctrl) {
> > +	uint32_t lkey;
> > +	int nb_segs = buf->nb_segs;
> > +	int nb_segs_txbb;
> > +	struct mlx4_sq *sq = &txq->msq;
> > +	struct rte_mbuf *sbuf = buf;
> > +	struct pv *pv = tinfo->pv;
> > +	int *pv_counter = &tinfo->pv_counter;
> > +	volatile struct mlx4_wqe_ctrl_seg *ctrl_next =
> > +			(volatile struct mlx4_wqe_ctrl_seg *)
> > +				((volatile uint8_t *)ctrl + tinfo->wqe_size);
> > +	uint16_t sb_of = tinfo->tso_header_size;
> > +	uint16_t data_len;
> > +
> > +	do {
> > +		/* how many dseg entries do we have in the current TXBB ?
> > */
> > +		nb_segs_txbb = (MLX4_TXBB_SIZE -
> > +				((uintptr_t)dseg & (MLX4_TXBB_SIZE - 1))) >>
> > +			       MLX4_SEG_SHIFT;
> > +		switch (nb_segs_txbb) {
> > +		default:
> > +			/* Should never happen. */
> > +			rte_panic("%p: Invalid number of SGEs(%d) for a
> > TXBB",
> > +			(void *)txq, nb_segs_txbb);
> > +			/* rte_panic never returns. */
> 
> Since this default case should not happen because of the above calculation I
> think we don't need it.
> Just "break" if the compiler complain of default case lack.
> 
Although "default" is not mandatory in switch case statement it is a good practice to have it even just for code clarity.
so I will keep it there.

> > +		case 4:
> > +			/* Memory region key for this memory pool. */
> > +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> > +			if (unlikely(lkey == (uint32_t)-1))
> > +				goto err;
> > +			dseg->addr =
> > +
> > rte_cpu_to_be_64(rte_pktmbuf_mtod_offset(sbuf,
> > +								     uintptr_t,
> > +								     sb_of));
> > +			dseg->lkey = lkey;
> > +			/*
> > +			 * This data segment starts at the beginning of a new
> > +			 * TXBB, so we need to postpone its byte_count
> > writing
> > +			 * for later.
> > +			 */
> > +			pv[*pv_counter].dseg = dseg;
> > +			/*
> > +			 * Zero length segment is treated as inline segment
> > +			 * with zero data.
> > +			 */
> > +			data_len = sbuf->data_len - sb_of;
> 
> Is there a chance that the data_len will be negative? Rolled in this case?
Since we verify ahead the all l2,l3 and L4 headers reside in the same fragment there is no reason for
data_len to become negative, this is why I use uint16_t which is  the same data type used in struct rte_mbuf
for representing data_len , and as we do it in mlx4_tx_burst_segs.

> Maybe it is better to change it for int16_t and to replace the next check to
> be:
> data_len > 0 ? data_len : 0x80000000
> 
I will keep this the way it is for 2 reasons:
1. Seems to me more cumbersome then what I wrote.
2. Code consistency wise, this is how we also wrote it in mlx4_tx_burst_segs,
     What's good there is also good here.

> 
> And I think I found a way to remove the sb_of calculations for each segment:
> 
> Each segment will create the next segment parameters while only the pre
> loop calculation for the first segment parameters will calculate the header
> offset:
> 
> The parameters: data_len and sb_of.
> 
> So before the loop:
> sb_of = tinfo->tso_header_size;
> data_len = sbuf->data_len - sb_of;
> 
> And inside the loop (after the check of nb_segs):
> sb_of = 0;
> data_len = sbuf->data_len(the next sbuf);
> 
> so each segment calculates the next segment parameters and we don't need
> the "- sb_of" calculation per segment.
> 
NICE :)

> > +			pv[(*pv_counter)++].val =
> > +				rte_cpu_to_be_32(data_len ?
> > +						 data_len :
> > +						 0x80000000);
> > +			sb_of = 0;
> > +			sbuf = sbuf->next;
> > +			dseg++;
> > +			if (--nb_segs == 0)
> > +				return ctrl_next;
> > +			/* fallthrough */
> > +		case 3:
> > +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> > +			if (unlikely(lkey == (uint32_t)-1))
> > +				goto err;
> > +			data_len = sbuf->data_len - sb_of;
> > +			mlx4_fill_tx_data_seg(dseg,
> > +					lkey,
> > +					rte_pktmbuf_mtod_offset(sbuf,
> > +								uintptr_t,
> > +								sb_of),
> > +					rte_cpu_to_be_32(data_len ?
> > +							 data_len :
> > +							 0x80000000));
> > +			sb_of = 0;
> > +			sbuf = sbuf->next;
> > +			dseg++;
> > +			if (--nb_segs == 0)
> > +				return ctrl_next;
> > +			/* fallthrough */
> > +		case 2:
> > +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> > +			if (unlikely(lkey == (uint32_t)-1))
> > +				goto err;
> > +			data_len = sbuf->data_len - sb_of;
> > +			mlx4_fill_tx_data_seg(dseg,
> > +					lkey,
> > +					rte_pktmbuf_mtod_offset(sbuf,
> > +								uintptr_t,
> > +								sb_of),
> > +					rte_cpu_to_be_32(data_len ?
> > +							 data_len :
> > +							 0x80000000));
> > +			sb_of = 0;
> > +			sbuf = sbuf->next;
> > +			dseg++;
> > +			if (--nb_segs == 0)
> > +				return ctrl_next;
> > +			/* fallthrough */
> > +		case 1:
> > +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> > +			if (unlikely(lkey == (uint32_t)-1))
> > +				goto err;
> > +			data_len = sbuf->data_len - sb_of;
> > +			mlx4_fill_tx_data_seg(dseg,
> > +					lkey,
> > +					rte_pktmbuf_mtod_offset(sbuf,
> > +								uintptr_t,
> > +								sb_of),
> > +					rte_cpu_to_be_32(data_len ?
> > +							 data_len :
> > +							 0x80000000));
> > +			sb_of = 0;
> > +			sbuf = sbuf->next;
> > +			dseg++;
> > +			if (--nb_segs == 0)
> > +				return ctrl_next;
> > +		}
> > +		/* Wrap dseg if it points at the end of the queue. */
> > +		if ((volatile uint8_t *)dseg >= sq->eob)
> > +			dseg = (volatile struct mlx4_wqe_data_seg *)
> > +					((volatile uint8_t *)dseg - sq->size);
> > +	} while (true);
> > +err:
> > +	return NULL;
> > +}
> > +
> > +/**
> > + * Fill the packet's l2, l3 and l4 headers to the WQE.
> > + *
> > + * This will be used as the header for each TSO segment that is
> transmitted.
> > + *
> > + * @param buf
> > + *   Pointer to the first packet mbuf.
> > + * @param txq
> > + *   Pointer to Tx queue structure.
> > + * @param tinfo
> > + *   Pointer to TSO info to use.
> > + * @param ctrl
> > + *   Pointer to the control segment in the TSO WQE.
> > + *
> > + * @return
> > + *   0 on success, negative value upon error.
> > + */
> > +static inline volatile struct mlx4_wqe_data_seg *
> > +mlx4_tx_burst_fill_tso_hdr(struct rte_mbuf *buf,
> > +			   struct txq *txq,
> > +			   struct tso_info *tinfo,
> > +			   volatile struct mlx4_wqe_ctrl_seg *ctrl) {
> > +	volatile struct mlx4_wqe_lso_seg *tseg =
> > +		(volatile struct mlx4_wqe_lso_seg *)(ctrl + 1);
> > +	struct mlx4_sq *sq = &txq->msq;
> > +	struct pv *pv = tinfo->pv;
> > +	int *pv_counter = &tinfo->pv_counter;
> > +	int remain_size = tinfo->tso_header_size;
> > +	char *from = rte_pktmbuf_mtod(buf, char *);
> > +	uint16_t txbb_avail_space;
> > +	/* Union to overcome volatile constraints when copying TSO header.
> > */
> > +	union {
> > +		volatile uint8_t *vto;
> > +		uint8_t *to;
> > +	} thdr = { .vto = (volatile uint8_t *)tseg->header, };
> > +
> > +	/*
> > +	 * TSO data always starts at offset 20 from the beginning of the TXBB
> > +	 * (16 byte ctrl + 4byte TSO desc). Since each TXBB is 64Byte aligned
> > +	 * we can write the first 44 TSO header bytes without worry for TxQ
> > +	 * wrapping or overwriting the first TXBB 32bit word.
> > +	 */
> > +	txbb_avail_space = MLX4_TXBB_SIZE -
> > +			   (sizeof(struct mlx4_wqe_ctrl_seg) +
> > +			    sizeof(struct mlx4_wqe_lso_seg));
> 
> I think that better name is txbb_tail_size.
I think that txbb_avail_space is good enough, so no change here.

> 
> > +	while (remain_size >= (int)(txbb_avail_space + sizeof(uint32_t))) {
> > +		/* Copy to end of txbb. */
> > +		rte_memcpy(thdr.to, from, txbb_avail_space);
> > +		from += txbb_avail_space;
> > +		thdr.to += txbb_avail_space;
> > +		/* New TXBB, Check for TxQ wrap. */
> > +		if (thdr.to >= sq->eob)
> > +			thdr.vto = sq->buf;
> > +		/* New TXBB, stash the first 32bits for later use. */
> > +		pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
> > +		pv[(*pv_counter)++].val = *(uint32_t *)from,
> > +		from += sizeof(uint32_t);
> > +		thdr.to += sizeof(uint32_t);
> > +		remain_size -= (txbb_avail_space + sizeof(uint32_t));
> 
> You don't need the () here.
True
> 
> > +		/* Avail space in new TXBB is TXBB size - 4 */
> > +		txbb_avail_space = MLX4_TXBB_SIZE - sizeof(uint32_t);
> > +	}
> > +	if (remain_size > txbb_avail_space) {
> > +		rte_memcpy(thdr.to, from, txbb_avail_space);
> > +		from += txbb_avail_space;
> > +		thdr.to += txbb_avail_space;
> > +		remain_size -= txbb_avail_space;
> > +		/* New TXBB, Check for TxQ wrap. */
> > +		if (thdr.to >= sq->eob)
> > +			thdr.vto = sq->buf;
> > +		pv[*pv_counter].dst = (volatile uint32_t *)thdr.to;
> > +		rte_memcpy(&pv[*pv_counter].val, from, remain_size);
> > +		(*pv_counter)++;
> > +	} else {
> 
> Here it should be else if (remain_size > 0).
true
> 
> > +		rte_memcpy(thdr.to, from, remain_size);
> > +	}
> > +
> > +	tseg->mss_hdr_size = rte_cpu_to_be_32((buf->tso_segsz << 16) |
> > +					      tinfo->tso_header_size);
> > +	/* Calculate data segment location */
> > +	return (volatile struct mlx4_wqe_data_seg *)
> > +				((uintptr_t)tseg + tinfo->wqe_tso_seg_size);
> > }
> > +
> > +/**
> > + * Write data segments and header for TSO uni/multi segment packet.
> > + *
> > + * @param buf
> > + *   Pointer to the first packet mbuf.
> > + * @param txq
> > + *   Pointer to Tx queue structure.
> > + * @param ctrl
> > + *   Pointer to the WQE control segment.
> > + *
> > + * @return
> > + *   Pointer to the next WQE control segment on success, NULL otherwise.
> > + */
> > +static volatile struct mlx4_wqe_ctrl_seg * mlx4_tx_burst_tso(struct
> > +rte_mbuf *buf, struct txq *txq,
> > +		  volatile struct mlx4_wqe_ctrl_seg *ctrl) {
> > +	volatile struct mlx4_wqe_data_seg *dseg;
> > +	volatile struct mlx4_wqe_ctrl_seg *ctrl_next;
> > +	struct mlx4_sq *sq = &txq->msq;
> > +	struct tso_info tinfo;
> > +	struct pv *pv;
> > +	int pv_counter;
> > +	int ret;
> > +
> > +	ret = mlx4_tx_burst_tso_get_params(buf, txq, &tinfo);
> > +	if (unlikely(ret))
> > +		goto error;
> > +	dseg = mlx4_tx_burst_fill_tso_hdr(buf, txq, &tinfo, ctrl);
> > +	if (unlikely(dseg == NULL))
> > +		goto error;
> > +	if ((uintptr_t)dseg >= (uintptr_t)sq->eob)
> > +		dseg = (volatile struct mlx4_wqe_data_seg *)
> > +					((uintptr_t)dseg - sq->size);
> > +	ctrl_next = mlx4_tx_burst_fill_tso_dsegs(buf, txq, &tinfo, dseg, ctrl);
> > +	if (unlikely(ctrl_next == NULL))
> > +		goto error;
> > +	/* Write the first DWORD of each TXBB save earlier. */
> > +	pv = tinfo.pv;
> > +	pv_counter = tinfo.pv_counter;
> > +	/* Need a barrier here before writing the first TXBB word. */
> > +	rte_io_wmb();
> 
> > +	for (--pv_counter; pv_counter  >= 0; pv_counter--)
> 
> Since we don't need the first check do while statement is better.
> To be fully safe you can use likely check before the memory barrier.
> 
Will return the if statement But will not change the loop as it is the same as in
mlx4_tx_burst_segs and I do want to have a consistent code.

> > +		*pv[pv_counter].dst = pv[pv_counter].val;
> > +	ctrl->fence_size = tinfo.fence_size;
> > +	sq->remain_size -= tinfo.wqe_size;
> > +	return ctrl_next;
> > +error:
> > +	txq->stats.odropped++;
> > +	return NULL;
> > +}
> > +
> > +/**
> >   * Write data segments of multi-segment packet.
> >   *
> >   * @param buf
> > @@ -560,6 +918,7 @@ struct pv {
> >  			uint16_t flags16[2];
> >  		} srcrb;
> >  		uint32_t lkey;
> > +		bool tso = txq->priv->tso && (buf->ol_flags &
> > PKT_TX_TCP_SEG);
> >
> >  		/* Clean up old buffer. */
> >  		if (likely(elt->buf != NULL)) {
> > @@ -578,7 +937,16 @@ struct pv {
> >  			} while (tmp != NULL);
> >  		}
> >  		RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
> > -		if (buf->nb_segs == 1) {
> > +		if (tso) {
> > +			/* Change opcode to TSO */
> > +			owner_opcode &= ~MLX4_OPCODE_CONFIG_CMD;
> > +			owner_opcode |= MLX4_OPCODE_LSO |
> > MLX4_WQE_CTRL_RR;
> > +			ctrl_next = mlx4_tx_burst_tso(buf, txq, ctrl);
> > +			if (!ctrl_next) {
> > +				elt->buf = NULL;
> > +				break;
> > +			}
> > +		} else if (buf->nb_segs == 1) {
> >  			/* Validate WQE space in the send queue. */
> >  			if (sq->remain_size < MLX4_TXBB_SIZE) {
> >  				elt->buf = NULL;
> > diff --git a/drivers/net/mlx4/mlx4_rxtx.h
> > b/drivers/net/mlx4/mlx4_rxtx.h index 4c025e3..ffa8abf 100644
> > --- a/drivers/net/mlx4/mlx4_rxtx.h
> > +++ b/drivers/net/mlx4/mlx4_rxtx.h
> > @@ -90,7 +90,7 @@ struct mlx4_txq_stats {
> >  	unsigned int idx; /**< Mapping index. */
> >  	uint64_t opackets; /**< Total of successfully sent packets. */
> >  	uint64_t obytes; /**< Total of successfully sent bytes. */
> > -	uint64_t odropped; /**< Total of packets not sent when Tx ring full.
> > */
> > +	uint64_t odropped; /**< Total number of packets failed to transmit.
> > */
> >  };
> >
> >  /** Tx queue descriptor. */
> > diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> > index 6edaadb..9aa7440 100644
> > --- a/drivers/net/mlx4/mlx4_txq.c
> > +++ b/drivers/net/mlx4/mlx4_txq.c
> > @@ -116,8 +116,14 @@
> >  			     DEV_TX_OFFLOAD_UDP_CKSUM |
> >  			     DEV_TX_OFFLOAD_TCP_CKSUM);
> >  	}
> > -	if (priv->hw_csum_l2tun)
> > +	if (priv->tso)
> > +		offloads |= DEV_TX_OFFLOAD_TCP_TSO;
> > +	if (priv->hw_csum_l2tun) {
> >  		offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
> > +		if (priv->tso)
> > +			offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
> > +				     DEV_TX_OFFLOAD_GRE_TNL_TSO);
> > +	}
> >  	return offloads;
> >  }
> >
> > --
> > 1.8.3.1



More information about the dev mailing list