[PATCH v9 09/14] net/idpf: add support for Rx/Tx offloading
    Andrew Rybchenko 
    andrew.rybchenko at oktetlabs.ru
       
    Fri Oct 21 10:29:47 CEST 2022
    
    
  
On 10/21/22 08:18, Junfeng Guo wrote:
> Add Rx/Tx offloading support, including TSO and CHKSUM.
Typically code path for Rx and Tx offload are absolutely
different. So, I'm wondering why both are implemented in
one patch. If they are really independent, please, split
it into Rx and Tx patches to make it easier to review.
> 
> Signed-off-by: Beilei Xing <beilei.xing at intel.com>
> Signed-off-by: Xiaoyun Li <xiaoyun.li at intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo at intel.com>
> ---
>   doc/guides/nics/features/idpf.ini |   3 +
>   drivers/net/idpf/idpf_ethdev.c    |  10 ++
>   drivers/net/idpf/idpf_rxtx.c      | 212 +++++++++++++++++++++++++++++-
>   drivers/net/idpf/idpf_rxtx.h      |  19 +++
>   4 files changed, 242 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/guides/nics/features/idpf.ini b/doc/guides/nics/features/idpf.ini
> index 79ffafc0d4..36dec39b9f 100644
> --- a/doc/guides/nics/features/idpf.ini
> +++ b/doc/guides/nics/features/idpf.ini
> @@ -10,6 +10,9 @@
>   Queue start/stop     = Y
>   Runtime Rx queue setup = Y
>   Runtime Tx queue setup = Y
> +TSO                  = P
> +L3 checksum offload  = P
> +L4 checksum offload  = P
>   Packet type parsing  = Y
>   Multiprocess aware   = Y
>   FreeBSD              = Y
> diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
> index c6d6c87274..ffde2ce7d1 100644
> --- a/drivers/net/idpf/idpf_ethdev.c
> +++ b/drivers/net/idpf/idpf_ethdev.c
> @@ -86,6 +86,16 @@ idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>   	dev_info->max_mac_addrs = IDPF_NUM_MACADDR_MAX;
>   	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
>   		RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
> +	dev_info->rx_offload_capa =
> +		RTE_ETH_RX_OFFLOAD_IPV4_CKSUM		|
> +		RTE_ETH_RX_OFFLOAD_UDP_CKSUM		|
> +		RTE_ETH_RX_OFFLOAD_TCP_CKSUM		|
> +		RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM;
> +
> +	dev_info->tx_offload_capa =
> +		RTE_ETH_TX_OFFLOAD_TCP_TSO		|
> +		RTE_ETH_TX_OFFLOAD_MULTI_SEGS		|
I guess this one is supported from the very begining
implementation of your datapath. If so, it should be reported
in the patch which adds the datapath implementation.
> +		RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE is not mentined anywhereelse
in the patch. Is it actually supported? How?
>   
>   	dev_info->default_rxconf = (struct rte_eth_rxconf) {
>   		.rx_free_thresh = IDPF_DEFAULT_RX_FREE_THRESH,
> diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
> index 6280b7887b..31e266bbec 100644
> --- a/drivers/net/idpf/idpf_rxtx.c
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -1269,6 +1269,47 @@ idpf_stop_queues(struct rte_eth_dev *dev)
>   	}
>   }
>   
> +#define IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S			\
> +	(BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S))
> +
> +static inline uint64_t
> +idpf_splitq_rx_csum_offload(uint8_t err)
> +{
> +	uint64_t flags = 0;
> +
> +	if (unlikely(!(err & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S))))
> +		return flags;
> +
> +	if (likely((err & IDPF_RX_FLEX_DESC_ADV_STATUS0_XSUM_S) == 0)) {
> +		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
> +			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
> +		return flags;
> +	}
> +
> +	if (unlikely(err & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)))
> +		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
> +
> +	if (unlikely(err & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)))
> +		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
> +
> +	if (unlikely(err & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)))
> +		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
> +
> +	if (unlikely(err & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EUDPE_S)))
> +		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
> +
> +	return flags;
> +}
> +
>   static void
>   idpf_split_rx_bufq_refill(struct idpf_rx_queue *rx_bufq)
>   {
> @@ -1343,9 +1384,11 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>   	uint16_t pktlen_gen_bufq_id;
>   	struct idpf_rx_queue *rxq;
>   	const uint32_t *ptype_tbl;
> +	uint8_t status_err0_qw1;
>   	struct rte_mbuf *rxm;
>   	uint16_t rx_id_bufq1;
>   	uint16_t rx_id_bufq2;
> +	uint64_t pkt_flags;
>   	uint16_t pkt_len;
>   	uint16_t bufq_id;
>   	uint16_t gen_id;
> @@ -1420,6 +1463,11 @@ idpf_splitq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>   				   VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M) >>
>   				  VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S];
>   
> +		status_err0_qw1 = rx_desc->status_err0_qw1;
> +		pkt_flags = idpf_splitq_rx_csum_offload(status_err0_qw1);
> +
> +		rxm->ol_flags |= pkt_flags;
> +
>   		rx_pkts[nb_rx++] = rxm;
>   	}
>   
> @@ -1496,6 +1544,49 @@ idpf_split_tx_free(struct idpf_tx_queue *cq)
>   	cq->tx_tail = next;
>   }
>   
> +/* Check if the context descriptor is needed for TX offloading */
> +static inline uint16_t
> +idpf_calc_context_desc(uint64_t flags)
> +{
> +	if (flags & RTE_MBUF_F_TX_TCP_SEG)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/* set TSO context descriptor
> + */
> +static inline void
> +idpf_set_splitq_tso_ctx(struct rte_mbuf *mbuf,
> +			union idpf_tx_offload tx_offload,
> +			volatile union idpf_flex_tx_ctx_desc *ctx_desc)
> +{
> +	uint16_t cmd_dtype;
> +	uint32_t tso_len;
> +	uint8_t hdr_len;
> +
> +	if (!tx_offload.l4_len) {
> +		PMD_TX_LOG(DEBUG, "L4 length set to 0");
> +		return;
> +	}
> +
> +	hdr_len = tx_offload.l2_len +
> +		tx_offload.l3_len +
> +		tx_offload.l4_len;
> +	cmd_dtype = IDPF_TX_DESC_DTYPE_FLEX_TSO_CTX |
> +		IDPF_TX_FLEX_CTX_DESC_CMD_TSO;
> +	tso_len = mbuf->pkt_len - hdr_len;
> +
> +	ctx_desc->tso.qw1.cmd_dtype = rte_cpu_to_le_16(cmd_dtype);
> +	ctx_desc->tso.qw0.hdr_len = hdr_len;
> +	ctx_desc->tso.qw0.mss_rt =
> +		rte_cpu_to_le_16((uint16_t)mbuf->tso_segsz &
> +				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
> +	ctx_desc->tso.qw0.flex_tlen =
> +		rte_cpu_to_le_32(tso_len &
> +				 IDPF_TXD_FLEX_CTX_MSS_RT_M);
> +}
> +
>   uint16_t
>   idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   		      uint16_t nb_pkts)
> @@ -1504,11 +1595,14 @@ idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	volatile struct idpf_flex_tx_sched_desc *txr;
>   	volatile struct idpf_flex_tx_sched_desc *txd;
>   	struct idpf_tx_entry *sw_ring;
> +	union idpf_tx_offload tx_offload = {0};
>   	struct idpf_tx_entry *txe, *txn;
>   	uint16_t nb_used, tx_id, sw_id;
>   	struct rte_mbuf *tx_pkt;
>   	uint16_t nb_to_clean;
>   	uint16_t nb_tx = 0;
> +	uint64_t ol_flags;
> +	uint16_t nb_ctx;
>   
>   	if (unlikely(!txq) || unlikely(!txq->q_started))
>   		return nb_tx;
> @@ -1538,8 +1632,29 @@ idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   
>   		if (txq->nb_free < tx_pkt->nb_segs)
>   			break;
> -		nb_used = tx_pkt->nb_segs;
>   
> +		ol_flags = tx_pkt->ol_flags;
> +		tx_offload.l2_len = tx_pkt->l2_len;
> +		tx_offload.l3_len = tx_pkt->l3_len;
> +		tx_offload.l4_len = tx_pkt->l4_len;
> +		tx_offload.tso_segsz = tx_pkt->tso_segsz;
> +		/* Calculate the number of context descriptors needed. */
> +		nb_ctx = idpf_calc_context_desc(ol_flags);
> +		nb_used = tx_pkt->nb_segs + nb_ctx;
> +
> +		/* context descriptor */
> +		if (nb_ctx) {
> +			volatile union idpf_flex_tx_ctx_desc *ctx_desc =
> +			(volatile union idpf_flex_tx_ctx_desc *)&txr[tx_id];
> +
> +			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
> +				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
> +							ctx_desc);
> +
> +			tx_id++;
> +			if (tx_id == txq->nb_tx_desc)
> +				tx_id = 0;
> +		}
>   
>   		do {
>   			txd = &txr[tx_id];
> @@ -1566,6 +1681,8 @@ idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   
>   		if (unlikely(!(tx_id % 32)))
>   			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_RE;
> +		if (ol_flags & IDPF_TX_CKSUM_OFFLOAD_MASK)
> +			txd->qw1.cmd_dtype |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN;
>   		txq->nb_free = (uint16_t)(txq->nb_free - nb_used);
>   		txq->nb_used = (uint16_t)(txq->nb_used + nb_used);
>   	}
> @@ -1580,6 +1697,48 @@ idpf_splitq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   	return nb_tx;
>   }
>   
> +#define IDPF_RX_FLEX_DESC_STATUS0_XSUM_S			\
> +	(BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |	\
> +	 BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))
> +
> +/* Translate the rx descriptor status and error fields to pkt flags */
> +static inline uint64_t
> +idpf_rxd_to_pkt_flags(uint16_t status_error)
> +{
> +	uint64_t flags = 0;
> +
> +	if (unlikely(!(status_error & BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_L3L4P_S))))
> +		return flags;
> +
> +	if (likely((status_error & IDPF_RX_FLEX_DESC_STATUS0_XSUM_S) == 0)) {
> +		flags |= (RTE_MBUF_F_RX_IP_CKSUM_GOOD |
Strictly speaking description of the
RTE_MBUF_F_RX_IP_CKSUM_GOOD says that IP checksum in the
patcket is valid, but there is no checksum in IPv6 header.
So, we can't say that it is valid for IPv6...
> +			  RTE_MBUF_F_RX_L4_CKSUM_GOOD);
> +		return flags;
> +	}
> +
> +	if (unlikely(status_error & BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_IPE_S)))
> +		flags |= RTE_MBUF_F_RX_IP_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_IP_CKSUM_GOOD;
> +
> +	if (unlikely(status_error & BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)))
> +		flags |= RTE_MBUF_F_RX_L4_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
> +
> +	if (unlikely(status_error & BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
> +		flags |= RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD;
> +
> +	if (unlikely(status_error & BIT(VIRTCHNL2_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
> +		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD;
> +	else
> +		flags |= RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD;
> +
> +	return flags;
> +}
> +
>   static inline void
>   idpf_update_rx_tail(struct idpf_rx_queue *rxq, uint16_t nb_hold,
>   		    uint16_t rx_id)
> @@ -1613,6 +1772,7 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>   	struct rte_mbuf *rxm;
>   	struct rte_mbuf *nmb;
>   	uint16_t rx_status0;
> +	uint64_t pkt_flags;
>   	uint64_t dma_addr;
>   	uint16_t nb_rx;
>   
> @@ -1682,10 +1842,13 @@ idpf_singleq_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
>   		rxm->data_len = rx_packet_len;
>   		rxm->port = rxq->port_id;
>   		rxm->ol_flags = 0;
> +		pkt_flags = idpf_rxd_to_pkt_flags(rx_status0);
>   		rxm->packet_type =
>   			ptype_tbl[(uint8_t)(rte_cpu_to_le_16(rxd.flex_nic_wb.ptype_flex_flags0) &
>   				VIRTCHNL2_RX_FLEX_DESC_PTYPE_M)];
>   
> +		rxm->ol_flags |= pkt_flags;
> +
>   
>   		rx_pkts[nb_rx++] = rxm;
>   	}
> @@ -1748,14 +1911,17 @@ idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   {
>   	volatile struct idpf_flex_tx_desc *txd;
>   	volatile struct idpf_flex_tx_desc *txr;
> +	union idpf_tx_offload tx_offload = {0};
>   	struct idpf_tx_entry *txe, *txn;
>   	struct idpf_tx_entry *sw_ring;
>   	struct idpf_tx_queue *txq;
>   	struct rte_mbuf *tx_pkt;
>   	struct rte_mbuf *m_seg;
>   	uint64_t buf_dma_addr;
> +	uint64_t ol_flags;
>   	uint16_t tx_last;
>   	uint16_t nb_used;
> +	uint16_t nb_ctx;
>   	uint16_t td_cmd;
>   	uint16_t tx_id;
>   	uint16_t nb_tx;
> @@ -1782,11 +1948,19 @@ idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   		tx_pkt = *tx_pkts++;
>   		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
>   
> +		ol_flags = tx_pkt->ol_flags;
> +		tx_offload.l2_len = tx_pkt->l2_len;
> +		tx_offload.l3_len = tx_pkt->l3_len;
> +		tx_offload.l4_len = tx_pkt->l4_len;
> +		tx_offload.tso_segsz = tx_pkt->tso_segsz;
> +		/* Calculate the number of context descriptors needed. */
> +		nb_ctx = idpf_calc_context_desc(ol_flags);
> +
>   		/* The number of descriptors that must be allocated for
>   		 * a packet equals to the number of the segments of that
>   		 * packet plus 1 context descriptor if needed.
>   		 */
> -		nb_used = (uint16_t)(tx_pkt->nb_segs);
> +		nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
>   		tx_last = (uint16_t)(tx_id + nb_used - 1);
>   
>   		/* Circular ring */
> @@ -1814,6 +1988,28 @@ idpf_singleq_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>   			}
>   		}
>   
> +		if (nb_ctx) {
> +			/* Setup TX context descriptor if required */
> +			volatile union idpf_flex_tx_ctx_desc *ctx_txd =
> +				(volatile union idpf_flex_tx_ctx_desc *)
> +							&txr[tx_id];
> +
> +			txn = &sw_ring[txe->next_id];
> +			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
> +			if (txe->mbuf) {
> +				rte_pktmbuf_free_seg(txe->mbuf);
> +				txe->mbuf = NULL;
> +			}
> +
> +			/* TSO enabled */
> +			if (ol_flags & RTE_MBUF_F_TX_TCP_SEG)
> +				idpf_set_splitq_tso_ctx(tx_pkt, tx_offload,
> +							ctx_txd);
> +
> +			txe->last_id = tx_last;
> +			tx_id = txe->next_id;
> +			txe = txn;
> +		}
>   
>   		m_seg = tx_pkt;
>   		do {
> @@ -1896,11 +2092,23 @@ idpf_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
>   			return i;
>   		}
>   
> +		if (ol_flags & IDPF_TX_OFFLOAD_NOTSUP_MASK) {
> +			rte_errno = ENOTSUP;
> +			return i;
> +		}
> +
>   		if (m->pkt_len < IDPF_MIN_FRAME_SIZE) {
>   			rte_errno = EINVAL;
>   			return i;
>   		}
>   
> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> +		ret = rte_validate_tx_offload(m);
> +		if (ret != 0) {
> +			rte_errno = -ret;
> +			return i;
> +		}
> +#endif
>   		ret = rte_net_intel_cksum_prepare(m);
>   		if (ret != 0) {
>   			rte_errno = -ret;
> diff --git a/drivers/net/idpf/idpf_rxtx.h b/drivers/net/idpf/idpf_rxtx.h
> index bd3ebe2f50..a2394ffa2c 100644
> --- a/drivers/net/idpf/idpf_rxtx.h
> +++ b/drivers/net/idpf/idpf_rxtx.h
> @@ -44,6 +44,25 @@
>   #define IDPF_MAX_TSO_FRAME_SIZE	262143
>   #define IDPF_TX_MAX_MTU_SEG     10
>   
> +#define IDPF_TX_CKSUM_OFFLOAD_MASK (		\
> +		RTE_MBUF_F_TX_IP_CKSUM |	\
> +		RTE_MBUF_F_TX_L4_MASK |		\
> +		RTE_MBUF_F_TX_TCP_SEG)
> +
> +#define IDPF_TX_OFFLOAD_MASK (			\
> +		RTE_MBUF_F_TX_OUTER_IPV6 |	\
> +		RTE_MBUF_F_TX_OUTER_IPV4 |	\
> +		RTE_MBUF_F_TX_IPV6 |		\
> +		RTE_MBUF_F_TX_IPV4 |		\
> +		RTE_MBUF_F_TX_VLAN |		\
VLAN insertion offload is not supported yet.
> +		RTE_MBUF_F_TX_IP_CKSUM |	\
> +		RTE_MBUF_F_TX_L4_MASK |		\
> +		RTE_MBUF_F_TX_TCP_SEG |		\
> +		RTE_ETH_TX_OFFLOAD_SECURITY)
What is RTE_ETH_TX_OFFLOAD_SECURITY doing in mbuf flags?
> +
> +#define IDPF_TX_OFFLOAD_NOTSUP_MASK \
> +		(RTE_MBUF_F_TX_OFFLOAD_MASK ^ IDPF_TX_OFFLOAD_MASK)
> +
>   #define IDPF_GET_PTYPE_SIZE(p) \
>   	(sizeof(struct virtchnl2_ptype) + \
>   	(((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * sizeof((p)->proto_id[0])))
    
    
More information about the dev
mailing list