[PATCH v11 03/18] net/idpf: add Tx queue setup

Andrew Rybchenko andrew.rybchenko at oktetlabs.ru
Tue Oct 25 11:40:11 CEST 2022


On 10/24/22 16:12, Junfeng Guo wrote:
> Add support for tx_queue_setup ops.
> 
> In the single queue model, the same descriptor queue is used by SW to
> post buffer descriptors to HW and by HW to post completed descriptors
> to SW.
> 
> In the split queue model, "RX buffer queues" are used to pass
> descriptor buffers from SW to HW while Rx queues are used only to
> pass the descriptor completions, that is, descriptors that point
> to completed buffers, from HW to SW. This is contrary to the single
> queue model in which Rx queues are used for both purposes.
> 
> Signed-off-by: Beilei Xing <beilei.xing at intel.com>
> Signed-off-by: Xiaoyun Li <xiaoyun.li at intel.com>
> Signed-off-by: Junfeng Guo <junfeng.guo at intel.com>

[snip]

> diff --git a/drivers/net/idpf/idpf_ethdev.c b/drivers/net/idpf/idpf_ethdev.c
> index 75695085f8..c0307128be 100644
> --- a/drivers/net/idpf/idpf_ethdev.c
> +++ b/drivers/net/idpf/idpf_ethdev.c
> @@ -10,6 +10,7 @@
>   #include <rte_dev.h>
>   
>   #include "idpf_ethdev.h"
> +#include "idpf_rxtx.h"
>   
>   #define IDPF_TX_SINGLE_Q	"tx_single"
>   #define IDPF_RX_SINGLE_Q	"rx_single"
> @@ -36,6 +37,7 @@ static void idpf_adapter_rel(struct idpf_adapter *adapter);
>   static const struct eth_dev_ops idpf_eth_dev_ops = {
>   	.dev_configure			= idpf_dev_configure,
>   	.dev_close			= idpf_dev_close,
> +	.tx_queue_setup			= idpf_tx_queue_setup,
>   	.dev_infos_get			= idpf_dev_info_get,
>   };
>   
> @@ -54,11 +56,23 @@ idpf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
>   	dev_info->min_mtu = RTE_ETHER_MIN_MTU;
>   
>   	dev_info->max_mac_addrs = IDPF_NUM_MACADDR_MAX;
> -	dev_info->dev_capa = 0;
> +	dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;

I've played with two Intel PMDs: i40e and ice and both have
bugs in runtime queue setup. That's why I'm trying to review
it carefully in a new driver. So, I'd like to repeat once
again: the feature does not make sense without device start
support since corresponding code cann't be in-place and
cannot be reviewed here.


>   	dev_info->rx_offload_capa = 0;
> -

Unrlated change

>   	dev_info->tx_offload_capa = 0;
>   
> +	dev_info->default_txconf = (struct rte_eth_txconf) {
> +		.tx_free_thresh = IDPF_DEFAULT_TX_FREE_THRESH,
> +		.tx_rs_thresh = IDPF_DEFAULT_TX_RS_THRESH,
> +		.offloads = 0,

There is no necessity to initialize to zero explicitly since
the compiler does it for you implicitly.

> +	};
> +
> +	dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
> +		.nb_max = IDPF_MAX_RING_DESC,
> +		.nb_min = IDPF_MIN_RING_DESC,
> +		.nb_align = IDPF_ALIGN_RING_DESC,
> +	};
> +
> +
>   	return 0;
>   }
>   
> diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c
> new file mode 100644
> index 0000000000..df0ed772c1
> --- /dev/null
> +++ b/drivers/net/idpf/idpf_rxtx.c
> @@ -0,0 +1,371 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <ethdev_driver.h>
> +#include <rte_net.h>
> +
> +#include "idpf_ethdev.h"
> +#include "idpf_rxtx.h"
> +
> +static inline int

There is no point to make it inline. It is a control path.
Please, keep it to the compiler to do its job.

> +check_tx_thresh(uint16_t nb_desc, uint16_t tx_rs_thresh,
> +		uint16_t tx_free_thresh)

[snip]

> +static inline void
> +reset_split_tx_descq(struct idpf_tx_queue *txq)

same here since it looks like control path as well.

[snip]

> +{
> +	struct idpf_tx_entry *txe;
> +	uint32_t i, size;
> +	uint16_t prev;
> +
> +	if (txq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +		return;
> +	}
> +
> +	size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)txq->desc_ring)[i] = 0;

Please, add a comment which explains why volatile is
required here.

> +
> +	txe = txq->sw_ring;
> +	prev = (uint16_t)(txq->sw_nb_desc - 1);
> +	for (i = 0; i < txq->sw_nb_desc; i++) {
> +		txe[i].mbuf = NULL;
> +		txe[i].last_id = i;
> +		txe[prev].next_id = i;
> +		prev = i;
> +	}
> +
> +	txq->tx_tail = 0;
> +	txq->nb_used = 0;
> +
> +	/* Use this as next to clean for split desc queue */
> +	txq->last_desc_cleaned = 0;
> +	txq->sw_tail = 0;
> +	txq->nb_free = txq->nb_tx_desc - 1;
> +}
> +
> +static inline void
> +reset_split_tx_complq(struct idpf_tx_queue *cq)

same here

> +{
> +	uint32_t i, size;
> +
> +	if (cq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to complq is NULL");
> +		return;
> +	}
> +
> +	size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)cq->compl_ring)[i] = 0;

same ehre

> +
> +	cq->tx_tail = 0;
> +	cq->expected_gen_id = 1;
> +}
> +
> +static inline void
> +reset_single_tx_queue(struct idpf_tx_queue *txq)

same here

> +{
> +	struct idpf_tx_entry *txe;
> +	uint32_t i, size;
> +	uint16_t prev;
> +
> +	if (txq == NULL) {
> +		PMD_DRV_LOG(DEBUG, "Pointer to txq is NULL");
> +		return;
> +	}
> +
> +	txe = txq->sw_ring;
> +	size = sizeof(struct idpf_flex_tx_desc) * txq->nb_tx_desc;
> +	for (i = 0; i < size; i++)
> +		((volatile char *)txq->tx_ring)[i] = 0;

same here

> +
> +	prev = (uint16_t)(txq->nb_tx_desc - 1);
> +	for (i = 0; i < txq->nb_tx_desc; i++) {
> +		txq->tx_ring[i].qw1.cmd_dtype =
> +			rte_cpu_to_le_16(IDPF_TX_DESC_DTYPE_DESC_DONE);
> +		txe[i].mbuf =  NULL;
> +		txe[i].last_id = i;
> +		txe[prev].next_id = i;
> +		prev = i;
> +	}
> +
> +	txq->tx_tail = 0;
> +	txq->nb_used = 0;
> +
> +	txq->last_desc_cleaned = txq->nb_tx_desc - 1;
> +	txq->nb_free = txq->nb_tx_desc - 1;
> +
> +	txq->next_dd = txq->rs_thresh - 1;
> +	txq->next_rs = txq->rs_thresh - 1;
> +}
> +
> +static int
> +idpf_tx_split_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			  uint16_t nb_desc, unsigned int socket_id,
> +			  const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	uint16_t tx_rs_thresh, tx_free_thresh;
> +	struct idpf_hw *hw = &adapter->hw;
> +	struct idpf_tx_queue *txq, *cq;
> +	const struct rte_memzone *mz;
> +	uint32_t ring_size;
> +	uint64_t offloads;
> +
> +	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
> +
> +	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > IDPF_MAX_RING_DESC ||
> +	    nb_desc < IDPF_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}

You don't need to check it here, since ethdev does it for you
before driver operation invocation.

> +
> +	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?

compare vs 0 explicitly

> +		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
> +	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?

compare vs 0 explicitly

> +		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
> +	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
> +		return -EINVAL;
> +
> +	/* Allocate the TX queue data structure. */
> +	txq = rte_zmalloc_socket("idpf split txq",
> +				 sizeof(struct idpf_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 socket_id);
> +	if (txq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
> +		return -ENOMEM;
> +	}
> +
> +	txq->nb_tx_desc = nb_desc;
> +	txq->rs_thresh = tx_rs_thresh;
> +	txq->free_thresh = tx_free_thresh;
> +	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
> +	txq->port_id = dev->data->port_id;
> +	txq->offloads = offloads;
> +	txq->tx_deferred_start = tx_conf->tx_deferred_start;

Deferred start is not supported at this point and
request with deferred start on should be rejected.

> +
> +	/* Allocate software ring */
> +	txq->sw_nb_desc = 2 * nb_desc;
> +	txq->sw_ring =
> +		rte_zmalloc_socket("idpf split tx sw ring",
> +				   sizeof(struct idpf_tx_entry) *
> +				   txq->sw_nb_desc,
> +				   RTE_CACHE_LINE_SIZE,
> +				   socket_id);
> +	if (txq->sw_ring == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate TX hardware ring descriptors. */
> +	ring_size = sizeof(struct idpf_flex_tx_sched_desc) * txq->nb_tx_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "split_tx_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->desc_ring = (struct idpf_flex_tx_sched_desc *)mz->addr;
> +
> +	txq->mz = mz;
> +	reset_split_tx_descq(txq);
> +	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> +			queue_idx * vport->chunks_info.tx_qtail_spacing);
> +
> +	/* Allocate the TX completion queue data structure. */
> +	txq->complq = rte_zmalloc_socket("idpf splitq cq",
> +					 sizeof(struct idpf_tx_queue),
> +					 RTE_CACHE_LINE_SIZE,
> +					 socket_id);
> +	cq = txq->complq;
> +	if (cq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");

Free previously allocated resources including txq->complq.
However, I'd recomment to use goto style to do cleanup in
this case.

> +		return -ENOMEM;
> +	}
> +	cq->nb_tx_desc = 2 * nb_desc;
> +	cq->queue_id = vport->chunks_info.tx_compl_start_qid + queue_idx;
> +	cq->port_id = dev->data->port_id;
> +	cq->txqs = dev->data->tx_queues;
> +	cq->tx_start_qid = vport->chunks_info.tx_start_qid;
> +
> +	ring_size = sizeof(struct idpf_splitq_tx_compl_desc) * cq->nb_tx_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "tx_split_compl_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX completion queue");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);

Free txq->complq and txq->mz

> +		return -ENOMEM;
> +	}
> +	cq->tx_ring_phys_addr = mz->iova;
> +	cq->compl_ring = (struct idpf_splitq_tx_compl_desc *)mz->addr;
> +	cq->mz = mz;
> +	reset_split_tx_complq(cq);
> +
> +	txq->q_set = true;
> +	dev->data->tx_queues[queue_idx] = txq;
> +
> +	return 0;
> +}
> +
> +static int
> +idpf_tx_single_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +			   uint16_t nb_desc, unsigned int socket_id,
> +			   const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +	struct idpf_adapter *adapter = vport->adapter;
> +	uint16_t tx_rs_thresh, tx_free_thresh;
> +	struct idpf_hw *hw = &adapter->hw;
> +	const struct rte_memzone *mz;
> +	struct idpf_tx_queue *txq;
> +	uint32_t ring_size;
> +	uint64_t offloads;
> +
> +	offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
> +
> +	if (nb_desc % IDPF_ALIGN_RING_DESC != 0 ||
> +	    nb_desc > IDPF_MAX_RING_DESC ||
> +	    nb_desc < IDPF_MIN_RING_DESC) {
> +		PMD_INIT_LOG(ERR, "Number (%u) of transmit descriptors is invalid",
> +			     nb_desc);
> +		return -EINVAL;
> +	}

You don't need to check it here, since ethdev does it for you
before driver operation invocation.

> +
> +	tx_rs_thresh = (uint16_t)((tx_conf->tx_rs_thresh) ?
> +		tx_conf->tx_rs_thresh : IDPF_DEFAULT_TX_RS_THRESH);
> +	tx_free_thresh = (uint16_t)((tx_conf->tx_free_thresh) ?
> +		tx_conf->tx_free_thresh : IDPF_DEFAULT_TX_FREE_THRESH);
> +	if (check_tx_thresh(nb_desc, tx_rs_thresh, tx_free_thresh) != 0)
> +		return -EINVAL;
> +
> +	/* Allocate the TX queue data structure. */
> +	txq = rte_zmalloc_socket("idpf txq",
> +				 sizeof(struct idpf_tx_queue),
> +				 RTE_CACHE_LINE_SIZE,
> +				 socket_id);
> +	if (txq == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for tx queue structure");
> +		return -ENOMEM;
> +	}
> +
> +	/* TODO: vlan offload */
> +
> +	txq->nb_tx_desc = nb_desc;
> +	txq->rs_thresh = tx_rs_thresh;
> +	txq->free_thresh = tx_free_thresh;
> +	txq->queue_id = vport->chunks_info.tx_start_qid + queue_idx;
> +	txq->port_id = dev->data->port_id;
> +	txq->offloads = offloads;
> +	txq->tx_deferred_start = tx_conf->tx_deferred_start;

same here

> +
> +	/* Allocate software ring */
> +	txq->sw_ring =
> +		rte_zmalloc_socket("idpf tx sw ring",
> +				   sizeof(struct idpf_tx_entry) * nb_desc,
> +				   RTE_CACHE_LINE_SIZE,
> +				   socket_id);
> +	if (txq->sw_ring == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to allocate memory for SW TX ring");
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	/* Allocate TX hardware ring descriptors. */
> +	ring_size = sizeof(struct idpf_flex_tx_desc) * nb_desc;
> +	ring_size = RTE_ALIGN(ring_size, IDPF_DMA_MEM_ALIGN);
> +	mz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx,
> +				      ring_size, IDPF_RING_BASE_ALIGN,
> +				      socket_id);
> +	if (mz == NULL) {
> +		PMD_INIT_LOG(ERR, "Failed to reserve DMA memory for TX");
> +		rte_free(txq->sw_ring);
> +		rte_free(txq);
> +		return -ENOMEM;
> +	}
> +
> +	txq->tx_ring_phys_addr = mz->iova;
> +	txq->tx_ring = (struct idpf_flex_tx_desc *)mz->addr;
> +
> +	txq->mz = mz;
> +	reset_single_tx_queue(txq);
> +	txq->q_set = true;
> +	dev->data->tx_queues[queue_idx] = txq;
> +	txq->qtx_tail = hw->hw_addr + (vport->chunks_info.tx_qtail_start +
> +			queue_idx * vport->chunks_info.tx_qtail_spacing);

I'm sorry, but it looks like too much code is duplicated.
I guess it could be a shared function to avoid it.

> +
> +	return 0;
> +}
> +
> +int
> +idpf_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> +		    uint16_t nb_desc, unsigned int socket_id,
> +		    const struct rte_eth_txconf *tx_conf)
> +{
> +	struct idpf_vport *vport = dev->data->dev_private;
> +
> +	if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
> +		return idpf_tx_single_queue_setup(dev, queue_idx, nb_desc,
> +						  socket_id, tx_conf);
> +	else
> +		return idpf_tx_split_queue_setup(dev, queue_idx, nb_desc,
> +						 socket_id, tx_conf);
> +}

[snip]



More information about the dev mailing list