[dpdk-dev] [PATCH v3] net/mlx4: support hardware TSO

Adrien Mazarguil adrien.mazarguil at 6wind.com
Thu Jun 28 16:15:54 CEST 2018


Hi Moti,

On Thu, Jun 28, 2018 at 03:48:57PM +0300, Moti Haimovsky wrote:
> Implement support for hardware TSO.
> 
> Signed-off-by: Moti Haimovsky <motih at mellanox.com>

I only glanced at the code but overall TSO logic appears to be sound;
assuming it went through non-regression I think it's OK. Please see below
for a bunch of cosmetic comments.

> ---
> v3:
> * Fixed compilation errors in compilers without GNU C extensions
>   caused by a declaration of zero-length array in the code.
> in reply to
> 1530187032-6489-1-git-send-email-motih at mellanox.com
> 
> v2:
> * Fixed coding style warning.
> in reply to
> 1530184583-30166-1-git-send-email-motih at mellanox.com
> 
> v1:
> * Fixed coding style warnings.
> in reply to
> 1530181779-19716-1-git-send-email-motih at mellanox.com
> ---
>  doc/guides/nics/features/mlx4.ini |   1 +
>  doc/guides/nics/mlx4.rst          |   3 +
>  drivers/net/mlx4/mlx4.c           |  16 ++
>  drivers/net/mlx4/mlx4.h           |   5 +
>  drivers/net/mlx4/mlx4_prm.h       |  12 ++
>  drivers/net/mlx4/mlx4_rxtx.c      | 372 +++++++++++++++++++++++++++++++++++++-
>  drivers/net/mlx4/mlx4_rxtx.h      |   2 +-
>  drivers/net/mlx4/mlx4_txq.c       |   8 +-
>  8 files changed, 415 insertions(+), 4 deletions(-)
> 
> diff --git a/doc/guides/nics/features/mlx4.ini b/doc/guides/nics/features/mlx4.ini
> index f6efd21..98a3f61 100644
> --- a/doc/guides/nics/features/mlx4.ini
> +++ b/doc/guides/nics/features/mlx4.ini
> @@ -13,6 +13,7 @@ Queue start/stop     = Y
>  MTU update           = Y
>  Jumbo frame          = Y
>  Scattered Rx         = Y
> +TSO                  = Y
>  Promiscuous mode     = Y
>  Allmulticast mode    = Y
>  Unicast MAC filter   = Y
> diff --git a/doc/guides/nics/mlx4.rst b/doc/guides/nics/mlx4.rst
> index 491106a..12adaeb 100644
> --- a/doc/guides/nics/mlx4.rst
> +++ b/doc/guides/nics/mlx4.rst
> @@ -142,6 +142,9 @@ Limitations
>    The ability to enable/disable CRC stripping requires OFED version
>    4.3-1.5.0.0 and above  or rdma-core version v18 and above.
>  
> +- TSO (Transmit Segmentation Offload) is supported in OFED version
> +  4.4 and above or in rdma-core version v18 and above.
> +
>  Prerequisites
>  -------------
>  
> diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
> index d151a90..61b7844 100644
> --- a/drivers/net/mlx4/mlx4.c
> +++ b/drivers/net/mlx4/mlx4.c
> @@ -519,6 +519,8 @@ struct mlx4_conf {
>  		.ports.present = 0,
>  	};
>  	unsigned int vf;
> +	struct rte_mbuf mbuf;
> +	uint64_t size_test = UINT_MAX;

This requires #include <limits.h>

>  	int i;
>  
>  	(void)pci_drv;
> @@ -677,6 +679,20 @@ struct mlx4_conf {
>  					IBV_RAW_PACKET_CAP_SCATTER_FCS);
>  		DEBUG("FCS stripping toggling is %ssupported",
>  		      priv->hw_fcs_strip ? "" : "not ");
> +		/*
> +		 * No TSO SIZE is defined in DPDK, need to figure it out
> +		 * in order to see if we can support it.
> +		 */
> +		mbuf.tso_segsz = size_test;

I understand that you expect UINT_MAX to be truncated to the size of the
underlying type, but this looks convoluted.

Keep it simple, if both PMD and HW support TSO, just enable the
capability. Ideally the maximum size should be provided by the application
during dev_configure. If the API lacks such information, then this will be
checked during TX possibly causing tx_burst() to bail out early.

> +		priv->tso =
> +			((device_attr_ex.tso_caps.max_tso >= mbuf.tso_segsz) &&
> +			 (device_attr_ex.tso_caps.supported_qpts &
> +			  (1 << IBV_QPT_RAW_PACKET)));
> +		if (priv->tso)
> +			priv->tso_max_payload_sz =
> +					device_attr_ex.tso_caps.max_tso;
> +		DEBUG("TSO is %ssupported",
> +		      priv->tso ? "" : "not ");
>  		/* Configure the first MAC address by default. */
>  		err = mlx4_get_mac(priv, &mac.addr_bytes);
>  		if (err) {
> diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
> index 300cb4d..742d741 100644
> --- a/drivers/net/mlx4/mlx4.h
> +++ b/drivers/net/mlx4/mlx4.h
> @@ -47,6 +47,9 @@
>  /** Interrupt alarm timeout value in microseconds. */
>  #define MLX4_INTR_ALARM_TIMEOUT 100000
>  
> +/* Maximum Packet headers size (L2+L3+L4) for TSO. */

Packet => packet

> +#define MLX4_MAX_TSO_HEADER 192  // TODO: find the real value
> +

No "//" comments. Can this TODO be fixed before applying this patch?

>  /** Port parameter. */
>  #define MLX4_PMD_PORT_KVARG "port"
>  
> @@ -90,6 +93,8 @@ struct priv {
>  	uint32_t hw_csum:1; /**< Checksum offload is supported. */
>  	uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */
>  	uint32_t hw_fcs_strip:1; /**< FCS stripping toggling is supported. */
> +	uint32_t tso:1; /**< Transmit segmentation offload is supported */
> +	uint32_t tso_max_payload_sz; /* Max TSO payload size being supported */

Please use Doxygen format ("/**<").

>  	uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */
>  	struct rte_intr_handle intr_handle; /**< Port interrupt handle. */
>  	struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */
> diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h
> index e15a3c1..0484878 100644
> --- a/drivers/net/mlx4/mlx4_prm.h
> +++ b/drivers/net/mlx4/mlx4_prm.h
> @@ -40,6 +40,7 @@
>  /* Work queue element (WQE) flags. */
>  #define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28)
>  #define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27)
> +#define MLX4_WQE_CTRL_RR (1 << 6)
>  
>  /* CQE checksum flags. */
>  enum {
> @@ -97,6 +98,17 @@ struct mlx4_cq {
>  	int arm_sn; /**< Rx event counter. */
>  };
>  
> +/*
> + * WQE LSO segment structure.
> + * Defined here as backward compatibility for rdma-core v17 and below.
> + * Similar definition is found in infiniband/mlx4dv.h in rdma-core v18
> + * and above.
> + */
> +struct mlx4_wqe_lso_seg_ {

Is the trailing underscore purpose to avoid a conflict with v18+?

In that case, you should define this structure under #ifdef HAVE_SOMETHING
generated by auto-config-h.sh. See drivers/net/mlx5/Makefile.

> +	__be32 mss_hdr_size;
> +	__be32 header[];
> +};

Replace __be32 with DPDK types, that is, rte_be32_t.

> +
>  /**
>   * Retrieve a CQE entry from a CQ.
>   *
> diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c
> index a92da66..992d193 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.c
> +++ b/drivers/net/mlx4/mlx4_rxtx.c
> @@ -38,10 +38,25 @@
>   * DWORD (32 byte) of a TXBB.
>   */
>  struct pv {
> -	volatile struct mlx4_wqe_data_seg *dseg;
> +	union {
> +		volatile struct mlx4_wqe_data_seg *dseg;
> +		volatile uint32_t *dst;
> +	};
>  	uint32_t val;
>  };
>  
> +/** A helper struct for TSO packet handling. */

Since you chose Doxygen format, the description of each field below must
also start with "/**" when occurring before or "/**<" otherwise. Also make
sure sentences are properly capitalized and have a trailing period to keep
generated documentation neat.

> +struct tso_info {
> +	/* Total size of the WQE including padding */
> +	uint32_t wqe_size;
> +	/* size of TSO header to prepend to each packet to send */
> +	uint16_t tso_header_sz;
> +	/* Total size of the TSO entry in the WQE. */
> +	uint16_t wqe_tso_seg_size;
> +	/* Raw WQE size in units of 16 Bytes and without padding. */
> +	uint8_t fence_size;
> +};
> +
>  /** A table to translate Rx completion flags to packet type. */
>  uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = {
>  	/*
> @@ -377,6 +392,349 @@ struct pv {
>  }
>  
>  /**
> + * Obtain and calculate TSO information needed for assembling a TSO WQE.
> + *
> + * @param buf
> + *   Pointer to the first packet mbuf.
> + * @param txq
> + *   Pointer to Tx queue structure.
> + * @param tinfo
> + *   Pointer to a structure to fill the info with.
> + *
> + * @return
> + *   0 on success, negative value upon error.
> + */
> +static inline int
> +mlx4_tx_burst_tso_get_params(struct rte_mbuf *buf,
> +			     struct txq *txq,
> +			     struct tso_info *tinfo)
> +{
> +	struct mlx4_sq *sq = &txq->msq;
> +	const uint8_t tunneled = txq->priv->hw_csum_l2tun &&
> +				 (buf->ol_flags & PKT_TX_TUNNEL_MASK);
> +
> +	tinfo->tso_header_sz = buf->l2_len + buf->l3_len + buf->l4_len;
> +	if (tunneled)
> +		tinfo->tso_header_sz += buf->outer_l2_len + buf->outer_l3_len;
> +	if (unlikely(buf->tso_segsz == 0 || tinfo->tso_header_sz == 0)) {
> +		DEBUG("%p: Invalid TSO parameters", (void *)txq);

Please keep the data plane free of any error messages and other logs. Think
about millions of such messages occurring each second, not all that useful.

> +		return -EINVAL;
> +	}
> +	/* First segment must contain all TSO headers. */
> +	if (unlikely(tinfo->tso_header_sz > MLX4_MAX_TSO_HEADER) ||
> +		     tinfo->tso_header_sz > buf->data_len) {
> +		DEBUG("%p: Invalid TSO header length", (void *)txq);

Ditto.

> +		return -EINVAL;
> +	}
> +	/*
> +	 * Calculate the WQE TSO segment size
> +	 * Note:
> +	 * 1. An LSO segment must be padded such that the subsequent data
> +	 *    segment is 16-byte aligned.
> +	 * 2. The start address of the TSO segment is always 16 Bytes aligned.
> +	 */
> +	tinfo->wqe_tso_seg_size = RTE_ALIGN(sizeof(struct mlx4_wqe_lso_seg_) +
> +					    tinfo->tso_header_sz,
> +					    sizeof(struct mlx4_wqe_data_seg));
> +	tinfo->fence_size = ((sizeof(struct mlx4_wqe_ctrl_seg) +
> +			     tinfo->wqe_tso_seg_size) >> MLX4_SEG_SHIFT) +
> +			     buf->nb_segs;
> +	tinfo->wqe_size =
> +		RTE_ALIGN((uint32_t)(tinfo->fence_size << MLX4_SEG_SHIFT),
> +			  MLX4_TXBB_SIZE);
> +	/* Validate WQE size and WQE space in the send queue. */
> +	if (sq->remain_size < tinfo->wqe_size ||
> +	    tinfo->wqe_size > MLX4_MAX_WQE_SIZE)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +/**
> + * Fill the TSO WQE data segments with info on buffers to transmit .
> + *
> + * @param buf
> + *   Pointer to the first packet mbuf.
> + * @param txq
> + *   Pointer to Tx queue structure.
> + * @param tinfo
> + *   Pointer to TSO info to use.
> + * @param dseg
> + *   Pointer to the first data segment in the TSO WQE.
> + * @param pv
> + *   Pointer to a stash area for saving the first 32bit word of each TXBB
> + *   used for the TSO WQE.
> + * @param pv_counter
> + *   Current location in the stash.
> + *
> + * @return
> + *   0 on success, negative value upon error.
> + */
> +static inline int
> +mlx4_tx_burst_fill_tso_segs(struct rte_mbuf *buf,
> +			    struct txq *txq,
> +			    const struct tso_info *tinfo,
> +			    volatile struct mlx4_wqe_data_seg *dseg,
> +			    struct pv *pv, int *pv_counter)
> +{
> +	uint32_t lkey;
> +	int nb_segs = buf->nb_segs;
> +	int nb_segs_txbb;
> +	struct mlx4_sq *sq = &txq->msq;
> +	struct rte_mbuf *sbuf = buf;
> +	uint16_t sb_of = tinfo->tso_header_sz;
> +	uint16_t data_len;
> +
> +	while (nb_segs > 0) {
> +		/* Wrap dseg if it points at the end of the queue. */
> +		if ((volatile uint8_t *)dseg >= sq->eob)
> +			dseg = (volatile struct mlx4_wqe_data_seg *)
> +					(volatile uint8_t *)dseg - sq->size;
> +		/* how many dseg entries do we have in the current TXBB ? */
> +		nb_segs_txbb =
> +			(MLX4_TXBB_SIZE / sizeof(struct mlx4_wqe_data_seg)) -
> +			((uintptr_t)dseg & (MLX4_TXBB_SIZE - 1)) /
> +			sizeof(struct mlx4_wqe_data_seg);
> +		switch (nb_segs_txbb) {
> +		case 4:
> +			/* Memory region key for this memory pool. */
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			dseg->addr =
> +			    rte_cpu_to_be_64(rte_pktmbuf_mtod_offset(sbuf,
> +								     uintptr_t,
> +								     sb_of));
> +			dseg->lkey = lkey;
> +			/*
> +			 * This data segment starts at the beginning of a new
> +			 * TXBB, so we need to postpone its byte_count writing
> +			 * for later.
> +			 */
> +			pv[*pv_counter].dseg = dseg;
> +			/*
> +			 * Zero length segment is treated as inline segment
> +			 * with zero data.
> +			 */
> +			data_len = sbuf->data_len - sb_of;
> +			pv[(*pv_counter)++].val =
> +				rte_cpu_to_be_32(data_len ?
> +						 data_len :
> +						 0x80000000);
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 3:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 2:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 1:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			--nb_segs;
> +			break;
> +		default:
> +			/* Should never happen */

Then assert() is in order if it can only happen due to a programming
mistake. Crashing the application early is the best approach.

> +			ERROR("%p: invalid number of txbb data segments %d",
> +			      (void *)txq, nb_segs_txbb);

Please remove this error message.

> +			return -EINVAL;

You could replace this by rte_panic(), present when compiled in DEBUG mode.

> +		}
> +	}
> +	return 0;
> +lkey_err:
> +	DEBUG("%p: unable to get MP <-> MR association",
> +	      (void *)txq);

Ditto re log messages in the data plane (please check all remaining
occurrences).

> +	return -EFAULT;
> +}
> +
> +/**
> + * Fill the packet's l2, l3 and l4 headers to the WQE.
> + *  This will be used as the header for each TSO segment that is transmitted.

Extra space, I also suggest to add an empty line between them.

> + *
> + * @param buf
> + *   Pointer to the first packet mbuf.
> + * @param txq
> + *   Pointer to Tx queue structure.
> + * @param tinfo
> + *   Pointer to TSO info to use.
> + * @param tseg
> + *   Pointer to the TSO header field in the TSO WQE.
> + * @param pv
> + *   Pointer to a stash area for saving the first 32bit word of each TXBB
> + *   used for the TSO WQE.
> + * @param pv_counter
> + *   Current location in the stash.
> + *
> + * @return
> + *   0 on success, negative value upon error.
> + */
> +static inline int
> +mlx4_tx_burst_fill_tso_hdr(struct rte_mbuf *buf,
> +			   struct txq *txq,
> +			   const struct tso_info *tinfo,
> +			   volatile struct mlx4_wqe_lso_seg_ *tseg,
> +			    struct pv *pv, int *pv_counter)
> +{
> +	struct mlx4_sq *sq = &txq->msq;
> +	int remain_sz = tinfo->tso_header_sz;
> +	char *from = rte_pktmbuf_mtod(buf, char *);
> +	uint16_t txbb_avail_space;
> +	int copy_sz;
> +	/* Union to overcome volatile constraints when copying TSO header. */
> +	union {
> +		volatile uint8_t *vto;
> +		uint8_t *to;
> +	} thdr = { .vto = (volatile uint8_t *)tseg->header, };
> +
> +	/*
> +	 * TSO data always starts at offset 20 from the beginning of the TXBB
> +	 * (16 byte ctrl + 4byte TSO desc). Since each TXBB is 64Byte aligned
> +	 * we can write the first 44 TSO header bytes without worry for TxQ
> +	 * wrapping or overwriting the first TXBB 32bit word.
> +	 */
> +	txbb_avail_space = MLX4_TXBB_SIZE -
> +			   (sizeof(struct mlx4_wqe_ctrl_seg) +
> +			    sizeof(struct mlx4_wqe_lso_seg_));
> +	copy_sz = RTE_MIN(txbb_avail_space, remain_sz);
> +	rte_memcpy(thdr.to, from, copy_sz);
> +	remain_sz -= copy_sz;
> +	while (remain_sz > 0) {
> +		from += copy_sz;
> +		thdr.to += copy_sz;
> +		/* Start of TXBB need to check for TxQ wrap. */
> +		if (thdr.to >= sq->eob)
> +			thdr.vto = sq->buf;
> +		/* New TXBB, stash the first 32bits for later use. */
> +		pv[*pv_counter].dst = (volatile uint32_t *)thdr.vto;
> +		pv[(*pv_counter)++].val = *((uint32_t *)from);
> +		from += sizeof(uint32_t);
> +		thdr.to += sizeof(uint32_t);
> +		remain_sz -= sizeof(uint32_t);
> +		if (remain_sz <= 0)
> +			break;
> +		/* Now copy the rest */
> +		txbb_avail_space = MLX4_TXBB_SIZE - sizeof(uint32_t);
> +		copy_sz = RTE_MIN(txbb_avail_space, remain_sz);
> +		rte_memcpy(thdr.to, from, copy_sz);
> +		remain_sz -= copy_sz;
> +	}
> +	/* TODO: handle PID and IPID ? */

Who's supposed to answer this and when? Please clear TODOs or at least write
down a full description of the work that needs to be done, with any
suggestions you might have.

> +	tseg->mss_hdr_size = rte_cpu_to_be_32((buf->tso_segsz << 16) |
> +					      tinfo->tso_header_sz);
> +	return 0;
> +}
> +
> +/**
> + * Write data segments and header for TSO uni/multi segment packet.
> + *
> + * @param buf
> + *   Pointer to the first packet mbuf.
> + * @param txq
> + *   Pointer to Tx queue structure.
> + * @param ctrl
> + *   Pointer to the WQE control segment.
> + *
> + * @return
> + *   Pointer to the next WQE control segment on success, NULL otherwise.
> + */
> +static volatile struct mlx4_wqe_ctrl_seg *
> +mlx4_tx_burst_tso(struct rte_mbuf *buf, struct txq *txq,
> +		  volatile struct mlx4_wqe_ctrl_seg *ctrl)
> +{
> +	volatile struct mlx4_wqe_data_seg *dseg;
> +	volatile struct mlx4_wqe_lso_seg_ *tseg =
> +		(volatile struct mlx4_wqe_lso_seg_ *)(ctrl + 1);
> +	struct mlx4_sq *sq = &txq->msq;
> +	struct tso_info tinfo;
> +	struct pv *pv = (struct pv *)txq->bounce_buf;
> +	int pv_counter = 0;
> +	int ret;
> +
> +	ret = mlx4_tx_burst_tso_get_params(buf, txq, &tinfo);
> +	if (ret)
> +		goto error;
> +	ret = mlx4_tx_burst_fill_tso_hdr(buf, txq, &tinfo,
> +					 tseg, pv, &pv_counter);
> +	if (ret)
> +		goto error;
> +	/* Calculate data segment location */
> +	dseg = (volatile struct mlx4_wqe_data_seg *)
> +				((uintptr_t)tseg + tinfo.wqe_tso_seg_size);
> +	if ((uintptr_t)dseg >= (uintptr_t)sq->eob)
> +		dseg = (volatile struct mlx4_wqe_data_seg *)
> +					((uintptr_t)dseg - sq->size);
> +	ret = mlx4_tx_burst_fill_tso_segs(buf, txq, &tinfo,
> +					  dseg, pv, &pv_counter);
> +	if (ret)
> +		goto error;
> +	/* Write the first DWORD of each TXBB save earlier. */
> +	if (pv_counter) {
> +		/* Need a barrier here before writing the first TXBB word. */
> +		rte_io_wmb();
> +		for (--pv_counter; pv_counter  >= 0; pv_counter--)
> +			*pv[pv_counter].dst = pv[pv_counter].val;
> +	}
> +	ctrl->fence_size = tinfo.fence_size;
> +	sq->remain_size -= tinfo.wqe_size;
> +	/* Align next WQE address to the next TXBB. */
> +	return (volatile struct mlx4_wqe_ctrl_seg *)
> +		((volatile uint8_t *)ctrl + tinfo.wqe_size);
> +error:
> +	txq->stats.odropped++;
> +	rte_errno = ret;
> +	return NULL;
> +}
> +
> +/**
>   * Write data segments of multi-segment packet.
>   *
>   * @param buf
> @@ -569,6 +927,7 @@ struct pv {
>  			uint16_t flags16[2];
>  		} srcrb;
>  		uint32_t lkey;
> +		bool tso = txq->priv->tso && (buf->ol_flags & PKT_TX_TCP_SEG);
>  
>  		/* Clean up old buffer. */
>  		if (likely(elt->buf != NULL)) {
> @@ -587,7 +946,16 @@ struct pv {
>  			} while (tmp != NULL);
>  		}
>  		RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);
> -		if (buf->nb_segs == 1) {
> +		if (tso) {
> +			/* Change opcode to TSO */
> +			owner_opcode &= ~MLX4_OPCODE_CONFIG_CMD;
> +			owner_opcode |= MLX4_OPCODE_LSO | MLX4_WQE_CTRL_RR;
> +			ctrl_next = mlx4_tx_burst_tso(buf, txq, ctrl);
> +			if (!ctrl_next) {
> +				elt->buf = NULL;
> +				break;
> +			}
> +		} else if (buf->nb_segs == 1) {
>  			/* Validate WQE space in the send queue. */
>  			if (sq->remain_size < MLX4_TXBB_SIZE) {
>  				elt->buf = NULL;
> diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
> index 4c025e3..ffa8abf 100644
> --- a/drivers/net/mlx4/mlx4_rxtx.h
> +++ b/drivers/net/mlx4/mlx4_rxtx.h
> @@ -90,7 +90,7 @@ struct mlx4_txq_stats {
>  	unsigned int idx; /**< Mapping index. */
>  	uint64_t opackets; /**< Total of successfully sent packets. */
>  	uint64_t obytes; /**< Total of successfully sent bytes. */
> -	uint64_t odropped; /**< Total of packets not sent when Tx ring full. */
> +	uint64_t odropped; /**< Total number of packets failed to transmit. */
>  };
>  
>  /** Tx queue descriptor. */
> diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c
> index 6edaadb..9aa7440 100644
> --- a/drivers/net/mlx4/mlx4_txq.c
> +++ b/drivers/net/mlx4/mlx4_txq.c
> @@ -116,8 +116,14 @@
>  			     DEV_TX_OFFLOAD_UDP_CKSUM |
>  			     DEV_TX_OFFLOAD_TCP_CKSUM);
>  	}
> -	if (priv->hw_csum_l2tun)
> +	if (priv->tso)
> +		offloads |= DEV_TX_OFFLOAD_TCP_TSO;
> +	if (priv->hw_csum_l2tun) {
>  		offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
> +		if (priv->tso)
> +			offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
> +				     DEV_TX_OFFLOAD_GRE_TNL_TSO);
> +	}
>  	return offloads;
>  }
>  
> -- 
> 1.8.3.1
> 

-- 
Adrien Mazarguil
6WIND


More information about the dev mailing list