[dpdk-dev] [PATCH v3] net/mlx4: support hardware TSO

Matan Azrad matan at mellanox.com
Thu Jun 28 17:19:33 CEST 2018


Hi Moti

I started to review it but not finished all :)
Please see some comments\questions,
I will continue the review again in the next version, after addressing the next comments and Adrien comments.

From: Mordechay Haimovsky
> +		 * No TSO SIZE is defined in DPDK, need to figure it out
> +		 * in order to see if we can support it.
> +		 */
> +		mbuf.tso_segsz = size_test;
> +		priv->tso =
> +			((device_attr_ex.tso_caps.max_tso >= mbuf.tso_segsz)
> &&
> +			 (device_attr_ex.tso_caps.supported_qpts &
> +			  (1 << IBV_QPT_RAW_PACKET)));
> +		if (priv->tso)
> +			priv->tso_max_payload_sz =
> +					device_attr_ex.tso_caps.max_tso;

Are all the tso_caps fields exist in old rdma-core versions?

> +		DEBUG("TSO is %ssupported",
> +		      priv->tso ? "" : "not ");
>  		/* Configure the first MAC address by default. */
>  		err = mlx4_get_mac(priv, &mac.addr_bytes);
>  		if (err) {
...
> +mlx4_tx_burst_tso_get_params(struct rte_mbuf *buf,
> +			     struct txq *txq,
> +			     struct tso_info *tinfo)
> +{
> +	struct mlx4_sq *sq = &txq->msq;
> +	const uint8_t tunneled = txq->priv->hw_csum_l2tun &&
> +				 (buf->ol_flags & PKT_TX_TUNNEL_MASK);
> +
> +	tinfo->tso_header_sz = buf->l2_len + buf->l3_len + buf->l4_len;
> +	if (tunneled)
> +		tinfo->tso_header_sz += buf->outer_l2_len + buf-
> >outer_l3_len;

Are those tunnel sizes include outer and inner vlan sizes?

> +	if (unlikely(buf->tso_segsz == 0 || tinfo->tso_header_sz == 0)) {
> +		DEBUG("%p: Invalid TSO parameters", (void *)txq);
> +		return -EINVAL;
> +	}
> +	/* First segment must contain all TSO headers. */
> +	if (unlikely(tinfo->tso_header_sz > MLX4_MAX_TSO_HEADER) ||
> +		     tinfo->tso_header_sz > buf->data_len) {
> +		DEBUG("%p: Invalid TSO header length", (void *)txq);
> +		return -EINVAL;
> +	}
> +	/*
> +	 * Calculate the WQE TSO segment size
> +	 * Note:
> +	 * 1. An LSO segment must be padded such that the subsequent data
> +	 *    segment is 16-byte aligned.
> +	 * 2. The start address of the TSO segment is always 16 Bytes aligned.
> +	 */
...
> +static inline int
> +mlx4_tx_burst_fill_tso_segs(struct rte_mbuf *buf,
> +			    struct txq *txq,
> +			    const struct tso_info *tinfo,
> +			    volatile struct mlx4_wqe_data_seg *dseg,
> +			    struct pv *pv, int *pv_counter)
> +{
> +	uint32_t lkey;
> +	int nb_segs = buf->nb_segs;
> +	int nb_segs_txbb;
> +	struct mlx4_sq *sq = &txq->msq;
> +	struct rte_mbuf *sbuf = buf;
> +	uint16_t sb_of = tinfo->tso_header_sz;
> +	uint16_t data_len;
> +
> +	while (nb_segs > 0) {
> +		/* Wrap dseg if it points at the end of the queue. */
> +		if ((volatile uint8_t *)dseg >= sq->eob)
> +			dseg = (volatile struct mlx4_wqe_data_seg *)
> +					(volatile uint8_t *)dseg - sq->size;

I think we don't need this check in the first time, so maybe move it to the end of the loop is better.

> +		/* how many dseg entries do we have in the current TXBB ? */
> +		nb_segs_txbb =
> +			(MLX4_TXBB_SIZE / sizeof(struct mlx4_wqe_data_seg))
> -
> +			((uintptr_t)dseg & (MLX4_TXBB_SIZE - 1)) /
> +			sizeof(struct mlx4_wqe_data_seg);
> +		switch (nb_segs_txbb) {

I think this switch case is corrupted, what's happen If we have only 1\2\3 segments but we are in the start of txbbb?
You are going to write the first byte of the txbb firstly, no?

> +		case 4:
> +			/* Memory region key for this memory pool. */
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			dseg->addr =
> +			    rte_cpu_to_be_64(rte_pktmbuf_mtod_offset(sbuf,
> +								     uintptr_t,
> +								     sb_of));
> +			dseg->lkey = lkey;
> +			/*
> +			 * This data segment starts at the beginning of a new
> +			 * TXBB, so we need to postpone its byte_count writing
> +			 * for later.
> +			 */
> +			pv[*pv_counter].dseg = dseg;
> +			/*
> +			 * Zero length segment is treated as inline segment
> +			 * with zero data.
> +			 */
> +			data_len = sbuf->data_len - sb_of;
> +			pv[(*pv_counter)++].val =
> +				rte_cpu_to_be_32(data_len ?
> +						 data_len :
> +						 0x80000000);
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 3:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 2:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			if (--nb_segs == 0)
> +				break;
> +			/* fallthrough */
> +		case 1:
> +			lkey = mlx4_tx_mb2mr(txq, sbuf);
> +			if (unlikely(lkey == (uint32_t)-1))
> +				goto lkey_err;
> +			data_len = sbuf->data_len - sb_of;
> +			mlx4_fill_tx_data_seg(dseg,
> +					lkey,
> +					rte_pktmbuf_mtod_offset(sbuf,
> +								uintptr_t,
> +								sb_of),
> +					rte_cpu_to_be_32(data_len ?
> +							 data_len :
> +							 0x80000000));
> +			sb_of = 0;
> +			sbuf = sbuf->next;
> +			dseg++;
> +			--nb_segs;
> +			break;
> +		default:
> +			/* Should never happen */
> +			ERROR("%p: invalid number of txbb data segments
> %d",
> +			      (void *)txq, nb_segs_txbb);
> +			return -EINVAL;
> +		}
> +	}
> +	return 0;
> +lkey_err:
> +	DEBUG("%p: unable to get MP <-> MR association",
> +	      (void *)txq);
> +	return -EFAULT;
> +}

Matan


More information about the dev mailing list