[PATCH v11 2/4] net/i40e: implement mbufs recycle mode

Konstantin Ananyev konstantin.v.ananyev at yandex.ru
Fri Sep 1 01:49:51 CEST 2023


31/08/2023 18:24, Konstantin Ananyev пишет:
> 
> 
>>>
>>> Define specific function implementation for i40e driver.
>>> Currently, mbufs recycle mode can support 128bit vector path and avx2 path.
>>> And can be enabled both in fast free and no fast free mode.
>>>
>>> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
>>> Signed-off-by: Feifei Wang <feifei.wang2 at arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
>>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
>>> ---
>>>   drivers/net/i40e/i40e_ethdev.c                |   1 +
>>>   drivers/net/i40e/i40e_ethdev.h                |   2 +
>>>   .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
>>> ++++++++++++++++++
>>>   drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
>>>   drivers/net/i40e/i40e_rxtx.h                  |   4 +
>>>   drivers/net/i40e/meson.build                  |   1 +
>>>   6 files changed, 187 insertions(+)
>>>   create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>>
>>> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
>>> index 8271bbb394..50ba9aac94 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.c
>>> +++ b/drivers/net/i40e/i40e_ethdev.c
>>> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
>>>   	.flow_ops_get                 = i40e_dev_flow_ops_get,
>>>   	.rxq_info_get                 = i40e_rxq_info_get,
>>>   	.txq_info_get                 = i40e_txq_info_get,
>>> +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
>>>   	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
>>>   	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
>>>   	.timesync_enable              = i40e_timesync_enable,
>>> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
>>> index 6f65d5e0ac..af758798e1 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.h
>>> +++ b/drivers/net/i40e/i40e_ethdev.h
>>> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
>>> uint16_t queue_id,
>>>   	struct rte_eth_rxq_info *qinfo);
>>>   void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>>   	struct rte_eth_txq_info *qinfo);
>>> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
>>>   int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>>   			   struct rte_eth_burst_mode *mode);  int
>>> i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --
>>> git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> new file mode 100644
>>> index 0000000000..5663ecccde
>>> --- /dev/null
>>> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> @@ -0,0 +1,147 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright (c) 2023 Arm Limited.
>>> + */
>>> +
>>> +#include <stdint.h>
>>> +#include <ethdev_driver.h>
>>> +
>>> +#include "base/i40e_prototype.h"
>>> +#include "base/i40e_type.h"
>>> +#include "i40e_ethdev.h"
>>> +#include "i40e_rxtx.h"
>>> +
>>> +#pragma GCC diagnostic ignored "-Wcast-qual"
>>> +
>>> +void
>>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
>>> +nb_mbufs) {
>>> +	struct i40e_rx_queue *rxq = rx_queue;
>>> +	struct i40e_rx_entry *rxep;
>>> +	volatile union i40e_rx_desc *rxdp;
>>> +	uint16_t rx_id;
>>> +	uint64_t paddr;
>>> +	uint64_t dma_addr;
>>> +	uint16_t i;
>>> +
>>> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
>>> +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
>>> +
>>> +	for (i = 0; i < nb_mbufs; i++) {
>>> +		/* Initialize rxdp descs. */
>>> +		paddr = (rxep[i].mbuf)->buf_iova +
>>> RTE_PKTMBUF_HEADROOM;
>>> +		dma_addr = rte_cpu_to_le_64(paddr);
>>> +		/* flush desc with pa dma_addr */
>>> +		rxdp[i].read.hdr_addr = 0;
>>> +		rxdp[i].read.pkt_addr = dma_addr;
>>> +	}
>>> +
>>> +	/* Update the descriptor initializer index */
>>> +	rxq->rxrearm_start += nb_mbufs;
>>> +	rx_id = rxq->rxrearm_start - 1;
>>> +
>>> +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
>>> +		rxq->rxrearm_start = 0;
>>> +		rx_id = rxq->nb_rx_desc - 1;
>>> +	}
>>> +
>>> +	rxq->rxrearm_nb -= nb_mbufs;
>>> +
>>> +	rte_io_wmb();
>>> +	/* Update the tail pointer on the NIC */
>>> +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
>>> +
>>> +uint16_t
>>> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
>>> +	struct i40e_tx_queue *txq = tx_queue;
>>> +	struct i40e_tx_entry *txep;
>>> +	struct rte_mbuf **rxep;
>>> +	int i, n;
>>> +	uint16_t nb_recycle_mbufs;
>>> +	uint16_t avail = 0;
>>> +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
>>> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
>>> +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
>>> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
>>> +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
>>> +
>>> +	/* Get available recycling Rx buffers. */
>>> +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
>>> +
>>> +	/* Check Tx free thresh and Rx available space. */
>>> +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
>>> +		return 0;
>>> +
>>> +	/* check DD bits on threshold descriptor */
>>> +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
>>> +
>>> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
>>> +
>>> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
>>> +		return 0;
>>> +
>>> +	n = txq->tx_rs_thresh;
>>> +	nb_recycle_mbufs = n;
>>> +
>>> +	/* Mbufs recycle mode can only support no ring buffer wrapping
>>> around.
>>> +	 * Two case for this:
>>> +	 *
>>> +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
>>> +	 * mbuf ring size. In this case, the number of Tx freeing buffers
>>> +	 * should be equal to refill_requirement.
>>> +	 *
>>> +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
>>> +	 * with mbuf ring size. In this case, the update of refill head can not
>>> +	 * exceed the Rx mbuf ring size.
>>> +	 */
>>> +	if (refill_requirement != n ||
>>> +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
>>> +		return 0;
>>> +
>>> +	/* First buffer to free from S/W ring is at index
>>> +	 * tx_next_dd - (tx_rs_thresh-1).
>>> +	 */
>>> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
>>> +	rxep = recycle_rxq_info->mbuf_ring;
>>> +	rxep += refill_head;
>>> +
>>> +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
>>> +		/* Avoid txq contains buffers from unexpected mempool. */
>>> +		if (unlikely(recycle_rxq_info->mp
>>> +					!= txep[0].mbuf->pool))
>>> +			return 0;
>>> +
>>> +		/* Directly put mbufs from Tx to Rx. */
>>> +		for (i = 0; i < n; i++)
>>> +			rxep[i] = txep[i].mbuf;
>>> +	} else {
>>> +		for (i = 0; i < n; i++) {
>>> +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>> +
>>> +			/* If Tx buffers are not the last reference or from
>>> +			 * unexpected mempool, previous copied buffers are
>>> +			 * considered as invalid.
>>> +			 */
>>> +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
>> [Konstantin]
>> Could you pls remind me why it is ok to have rxep[i]==NULL when
>> refill_requirement is not set?
>>
>> If reill_requirement is not zero, it means each tx freed buffer must be valid and can be put into Rx
>> sw_ring. Then  the refill head of Rx buffer ring can be aligned with mbuf ring size. Briefly speaking
>> the number of Tx valid freed buffer must be equal to Rx refill_requirement. For example, i40e driver.
>>
>> If reill_requirement is zero, it means that the refill head of Rx buffer ring does not need to be aligned
>> with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-ring,
>> and not to be equal to the Rx setting rearm number. For example, mlx5 driver.
>>
>> In conclusion, above difference is due to pmd drivers have different strategies to update their Rx rearm(refill) head.
>> For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the number of each rearm is a fixed value by default.
>> For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real index. Thus its rearm number can be a different value.
> 
> Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is not free yet and can't be reused.
> Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> Or probably would be enough to skip that mbuf?
> Might be something like that:
> 
> for (i = 0, j = 0; i < n; i++) {
> 
> 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> 		if (refill_requirement) {
> 			nb_recycle_mbufs = 0;
> 			break;
> 		}
> 	} else
> 		j++;
> }
> 
> /* now j contains actual number of recycled mbufs */
> 
> ?


After another thought, it might be easier and cleaner just to:
if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
	nb_recycle_mbufs = 0;

Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns 
NULL, then we can't recycle that mbuf.	


> 
>>
>>> +					recycle_rxq_info->mp != txep[i].mbuf-
>>>> pool))
>>> +				nb_recycle_mbufs = 0;
>>> +		}
>>> +		/* If Tx buffers are not the last reference or
>>> +		 * from unexpected mempool, all recycled buffers
>>> +		 * are put into mempool.
>>> +		 */
>>> +		if (nb_recycle_mbufs == 0)
>>> +			for (i = 0; i < n; i++) {
>>> +				if (rxep[i] != NULL)
>>> +					rte_mempool_put(rxep[i]->pool,
>>> rxep[i]);
>>> +			}
>>> +	}
>>> +
>>> +	/* Update counters for Tx. */
>>> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
>>> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
>>> +	if (txq->tx_next_dd >= txq->nb_tx_desc)
>>> +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
>>> +
>>> +	return nb_recycle_mbufs;
>>> +}
>>> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
>>> b4f65b58fa..a9c9eb331c 100644
>>> --- a/drivers/net/i40e/i40e_rxtx.c
>>> +++ b/drivers/net/i40e/i40e_rxtx.c
>>> @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
>>> uint16_t queue_id,
>>>   	qinfo->conf.offloads = txq->offloads;
>>>   }
>>>
>>> +void
>>> +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
>>> +	struct i40e_rx_queue *rxq;
>>> +	struct i40e_adapter *ad =
>>> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>>> +
>>> +	rxq = dev->data->rx_queues[queue_id];
>>> +
>>> +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
>>> +	recycle_rxq_info->mp = rxq->mp;
>>> +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
>>> +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
>>> +
>>> +	if (ad->rx_vec_allowed) {
>>> +		recycle_rxq_info->refill_requirement =
>>> RTE_I40E_RXQ_REARM_THRESH;
>>> +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
>>> +	} else {
>>> +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
>>> +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
>>> +	}
>>> +}
>>> +
>>>   #ifdef RTE_ARCH_X86
>>>   static inline bool
>>>   get_avx_supported(bool request_avx512)
>>> @@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>>>   				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>>>   					i40e_recv_scattered_pkts_vec_avx2 :
>>>   					i40e_recv_scattered_pkts_vec;
>>> +				dev->recycle_rx_descriptors_refill =
>>> +					i40e_recycle_rx_descriptors_refill_vec;
>>>   			}
>>>   		} else {
>>>   			if (ad->rx_use_avx512) {
>>> @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>>>   				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>>>   					i40e_recv_pkts_vec_avx2 :
>>>   					i40e_recv_pkts_vec;
>>> +				dev->recycle_rx_descriptors_refill =
>>> +					i40e_recycle_rx_descriptors_refill_vec;
>>>   			}
>>>   		}
>>>   #else /* RTE_ARCH_X86 */
>>> +		dev->recycle_rx_descriptors_refill =
>>> +i40e_recycle_rx_descriptors_refill_vec;
>>>   		if (dev->data->scattered_rx) {
>>>   			PMD_INIT_LOG(DEBUG,
>>>   				     "Using Vector Scattered Rx (port %d).", @@
>>> -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>>>   				dev->tx_pkt_burst = ad->tx_use_avx2 ?
>>>   						    i40e_xmit_pkts_vec_avx2 :
>>>   						    i40e_xmit_pkts_vec;
>>> +				dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   			}
>>>   #else /* RTE_ARCH_X86 */
>>>   			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
>>>   				     dev->data->port_id);
>>>   			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
>>> +			dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   #endif /* RTE_ARCH_X86 */
>>>   		} else {
>>>   			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
>>>   			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
>>> +			dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   		}
>>>   		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
>>>   	} else {
>>> diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
>>> a8686224e5..b191f23e1f 100644
>>> --- a/drivers/net/i40e/i40e_rxtx.h
>>> +++ b/drivers/net/i40e/i40e_rxtx.h
>>> @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
>>> *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
>>> offset);  int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
>>>
>>> +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>>> +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
>>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
>>> +nb_mbufs);
>>> +
>>>   uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>>   			    uint16_t nb_pkts);
>>>   uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
>>> a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
>>> 8e53b87a65..3b1a233c84 100644
>>> --- a/drivers/net/i40e/meson.build
>>> +++ b/drivers/net/i40e/meson.build
>>> @@ -34,6 +34,7 @@ sources = files(
>>>           'i40e_tm.c',
>>>           'i40e_hash.c',
>>>           'i40e_vf_representor.c',
>>> +	'i40e_recycle_mbufs_vec_common.c',
>>>           'rte_pmd_i40e.c',
>>>   )
>>>
>>> --
>>> 2.25.1
> 



More information about the dev mailing list