[dpdk-dev] [PATCH 01/13] ethdev: support setup function for hairpin queue

Andrew Rybchenko arybchenko at solarflare.com
Thu Sep 26 14:18:36 CEST 2019


On 9/26/19 9:28 AM, Ori Kam wrote:
> This commit introduce the RX/TX hairpin setup function.

RX/TX should be Rx/Tx here and everywhere below.

> Hairpin is RX/TX queue that is used by the nic in order to offload
> wire to wire traffic.
>
> Each hairpin queue is binded to one or more queues from other type.
> For example TX hairpin queue should be binded to at least 1 RX hairpin
> queue and vice versa.

How should application find out that hairpin queues are supported?
How many?
How should application find out which ports/queues could be used for pining?
Is hair-pinning domain on device level sufficient to expose limitations?

> Signed-off-by: Ori Kam <orika at mellanox.com>
> ---
>   lib/librte_ethdev/rte_ethdev.c           | 213 +++++++++++++++++++++++++++++++
>   lib/librte_ethdev/rte_ethdev.h           | 145 +++++++++++++++++++++
>   lib/librte_ethdev/rte_ethdev_core.h      |  18 +++
>   lib/librte_ethdev/rte_ethdev_version.map |   4 +
>   4 files changed, 380 insertions(+)
>
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index 30b0c78..4021f38 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -1701,6 +1701,115 @@ struct rte_eth_dev *
>   }
>   
>   int
> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> +			       uint16_t nb_rx_desc, unsigned int socket_id,
> +			       const struct rte_eth_rxconf *rx_conf,
> +			       const struct rte_eth_hairpin_conf *hairpin_conf)

Below code duplicates rte_eth_rx_queue_setup() a lot and it is very
bad from maintenance point of view. Similar problem with Tx hairpin 
queue setup.

> +{
> +	int ret;
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_dev_info dev_info;
> +	struct rte_eth_rxconf local_conf;
> +	void **rxq;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
> +		return -EINVAL;
> +	}
> +
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
> +				-ENOTSUP);
> +
> +	rte_eth_dev_info_get(port_id, &dev_info);
> +
> +	/* Use default specified by driver, if nb_rx_desc is zero */
> +	if (nb_rx_desc == 0) {
> +		nb_rx_desc = dev_info.default_rxportconf.ring_size;
> +		/* If driver default is also zero, fall back on EAL default */
> +		if (nb_rx_desc == 0)
> +			nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE;
> +	}
> +
> +	if (nb_rx_desc > dev_info.rx_desc_lim.nb_max ||
> +			nb_rx_desc < dev_info.rx_desc_lim.nb_min ||
> +			nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) {
> +
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> +			       "<= %hu, >= %hu, and a product of %hu\n",
> +			nb_rx_desc, dev_info.rx_desc_lim.nb_max,
> +			dev_info.rx_desc_lim.nb_min,
> +			dev_info.rx_desc_lim.nb_align);
> +		return -EINVAL;
> +	}
> +
> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +			RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> +		return -EBUSY;
> +
> +	if (dev->data->dev_started &&
> +		(dev->data->rx_queue_state[rx_queue_id] !=
> +			RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;
> +
> +	rxq = dev->data->rx_queues;
> +	if (rxq[rx_queue_id]) {
> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> +		rxq[rx_queue_id] = NULL;
> +	}
> +
> +	if (rx_conf == NULL)
> +		rx_conf = &dev_info.default_rxconf;
> +
> +	local_conf = *rx_conf;
> +
> +	/*
> +	 * If an offloading has already been enabled in
> +	 * rte_eth_dev_configure(), it has been enabled on all queues,
> +	 * so there is no need to enable it in this queue again.
> +	 * The local_conf.offloads input to underlying PMD only carries
> +	 * those offloadings which are only enabled on this queue and
> +	 * not enabled on all queues.
> +	 */
> +	local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads;
> +
> +	/*
> +	 * New added offloadings for this queue are those not enabled in
> +	 * rte_eth_dev_configure() and they must be per-queue type.
> +	 * A pure per-port offloading can't be enabled on a queue while
> +	 * disabled on another queue. A pure per-port offloading can't
> +	 * be enabled for any queue as new added one if it hasn't been
> +	 * enabled in rte_eth_dev_configure().
> +	 */
> +	if ((local_conf.offloads & dev_info.rx_queue_offload_capa) !=
> +	     local_conf.offloads) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Ethdev port_id=%d rx_queue_id=%d, "
> +			"new added offloads 0x%"PRIx64" must be "
> +			"within per-queue offload capabilities "
> +			"0x%"PRIx64" in %s()\n",
> +			port_id, rx_queue_id, local_conf.offloads,
> +			dev_info.rx_queue_offload_capa,
> +			__func__);
> +		return -EINVAL;
> +	}
> +
> +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> +						      nb_rx_desc, socket_id,
> +						      &local_conf,
> +						      hairpin_conf);
> +
> +	return eth_err(port_id, ret);
> +}
> +
> +int
>   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		       uint16_t nb_tx_desc, unsigned int socket_id,
>   		       const struct rte_eth_txconf *tx_conf)
> @@ -1799,6 +1908,110 @@ struct rte_eth_dev *
>   		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
>   }
>   
> +int
> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> +			       uint16_t nb_tx_desc, unsigned int socket_id,
> +			       const struct rte_eth_txconf *tx_conf,
> +			       const struct rte_eth_hairpin_conf *hairpin_conf)
> +{
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_dev_info dev_info;
> +	struct rte_eth_txconf local_conf;
> +	void **txq;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
> +		return -EINVAL;
> +	}
> +
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
> +				-ENOTSUP);
> +
> +	rte_eth_dev_info_get(port_id, &dev_info);
> +
> +	/* Use default specified by driver, if nb_tx_desc is zero */
> +	if (nb_tx_desc == 0) {
> +		nb_tx_desc = dev_info.default_txportconf.ring_size;
> +		/* If driver default is zero, fall back on EAL default */
> +		if (nb_tx_desc == 0)
> +			nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE;
> +	}
> +	if (nb_tx_desc > dev_info.tx_desc_lim.nb_max ||
> +	    nb_tx_desc < dev_info.tx_desc_lim.nb_min ||
> +	    nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_tx_desc(=%hu), "
> +			       "should be: <= %hu, >= %hu, and a product of "
> +			       " %hu\n",
> +			       nb_tx_desc, dev_info.tx_desc_lim.nb_max,
> +			       dev_info.tx_desc_lim.nb_min,
> +			       dev_info.tx_desc_lim.nb_align);
> +		return -EINVAL;
> +	}
> +
> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> +		return -EBUSY;
> +
> +	if (dev->data->dev_started &&
> +		(dev->data->tx_queue_state[tx_queue_id] !=
> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;
> +
> +	txq = dev->data->tx_queues;
> +	if (txq[tx_queue_id]) {
> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> +		txq[tx_queue_id] = NULL;
> +	}
> +
> +	if (tx_conf == NULL)
> +		tx_conf = &dev_info.default_txconf;
> +
> +	local_conf = *tx_conf;
> +
> +	/*
> +	 * If an offloading has already been enabled in
> +	 * rte_eth_dev_configure(), it has been enabled on all queues,
> +	 * so there is no need to enable it in this queue again.
> +	 * The local_conf.offloads input to underlying PMD only carries
> +	 * those offloadings which are only enabled on this queue and
> +	 * not enabled on all queues.
> +	 */
> +	local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads;
> +
> +	/*
> +	 * New added offloadings for this queue are those not enabled in
> +	 * rte_eth_dev_configure() and they must be per-queue type.
> +	 * A pure per-port offloading can't be enabled on a queue while
> +	 * disabled on another queue. A pure per-port offloading can't
> +	 * be enabled for any queue as new added one if it hasn't been
> +	 * enabled in rte_eth_dev_configure().
> +	 */
> +	if ((local_conf.offloads & dev_info.tx_queue_offload_capa) !=
> +	     local_conf.offloads) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Ethdev port_id=%d tx_queue_id=%d, new added "
> +			       "offloads 0x%"PRIx64" must be within "
> +			       "per-queue offload capabilities 0x%"PRIx64" "
> +			       "in %s()\n",
> +			       port_id, tx_queue_id, local_conf.offloads,
> +			       dev_info.tx_queue_offload_capa,
> +			       __func__);
> +		return -EINVAL;
> +	}
> +
> +	return eth_err(port_id, (*dev->dev_ops->tx_hairpin_queue_setup)
> +		       (dev, tx_queue_id, nb_tx_desc, socket_id, &local_conf,
> +			hairpin_conf));
> +}
> +
>   void
>   rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
>   		void *userdata __rte_unused)
> diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> index 475dbda..b3b1597 100644
> --- a/lib/librte_ethdev/rte_ethdev.h
> +++ b/lib/librte_ethdev/rte_ethdev.h
> @@ -803,6 +803,30 @@ struct rte_eth_txconf {
>   	uint64_t offloads;
>   };
>   
> +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * A structure used to hold hairpin peer data.
> + */
> +struct rte_eth_hairpin_peer {
> +	uint16_t port; /**< Peer port. */
> +	uint16_t queue; /**< Peer queue. */
> +};
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * A structure used to configure hairpin binding.
> + */
> +struct rte_eth_hairpin_conf {
> +	uint16_t peer_n; /**< The number of peers. */
> +	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
> +};
> +
>   /**
>    * A structure contains information about HW descriptor ring limitations.
>    */
> @@ -1769,6 +1793,60 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   		struct rte_mempool *mb_pool);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a hairpin receive queue for an Ethernet device.
> + *
> + * The function set up the selected queue to be used in hairpin.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param rx_queue_id
> + *   The index of the receive queue to set up.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().

Is any Rx queue may be setup as hairpin queue?
Can it be still used for regular traffic?

> + * @param nb_rx_desc
> + *   The number of receive descriptors to allocate for the receive ring.

Does it still make sense for hairpin queue?

> + * @param socket_id
> + *   The *socket_id* argument is the socket identifier in case of NUMA.
> + *   The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
> + *   the DMA memory allocated for the receive descriptors of the ring.

Is it still required to be provided for hairpin Rx queue?

> + * @param rx_conf
> + *   The pointer to the configuration data to be used for the receive queue.
> + *   NULL value is allowed, in which case default RX configuration
> + *   will be used.
> + *   The *rx_conf* structure contains an *rx_thresh* structure with the values
> + *   of the Prefetch, Host, and Write-Back threshold registers of the receive
> + *   ring.
> + *   In addition it contains the hardware offloads features to activate using
> + *   the DEV_RX_OFFLOAD_* flags.
> + *   If an offloading set in rx_conf->offloads
> + *   hasn't been set in the input argument eth_conf->rxmode.offloads
> + *   to rte_eth_dev_configure(), it is a new added offloading, it must be
> + *   per-queue type and it is enabled for the queue.
> + *   No need to repeat any bit in rx_conf->offloads which has already been
> + *   enabled in rte_eth_dev_configure() at port level. An offloading enabled
> + *   at port level can't be disabled at queue level.

Which offloads still make sense in the case of hairpin Rx queue?
What about threshhods, drop enable?

> + * @param hairpin_conf
> + *   The pointer to the hairpin binding configuration.
> + * @return
> + *   - 0: Success, receive queue correctly set up.
> + *   - -EINVAL: The size of network buffers which can be allocated from the
> + *      memory pool does not fit the various buffer sizes allowed by the
> + *      device controller.
> + *   - -ENOMEM: Unable to allocate the receive ring descriptors or to
> + *      allocate network memory buffers from the memory pool when
> + *      initializing receive descriptors.
> + */
> +__rte_experimental
> +int rte_eth_rx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t rx_queue_id,
> +	 uint16_t nb_rx_desc, unsigned int socket_id,
> +	 const struct rte_eth_rxconf *rx_conf,
> +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> +
> +/**
>    * Allocate and set up a transmit queue for an Ethernet device.
>    *
>    * @param port_id
> @@ -1821,6 +1899,73 @@ int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		const struct rte_eth_txconf *tx_conf);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a transmit hairpin queue for an Ethernet device.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param tx_queue_id
> + *   The index of the transmit queue to set up.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().

Is any Tx queue may be setup as hairpin queue?

> + * @param nb_tx_desc
> + *   The number of transmit descriptors to allocate for the transmit ring.

Is it really required for hairpin queue? Are min/max/align limits still 
the same?

> + * @param socket_id
> + *   The *socket_id* argument is the socket identifier in case of NUMA.
> + *   Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for
> + *   the DMA memory allocated for the transmit descriptors of the ring.

Does it still make sense for Tx hairpin queue?

> + * @param tx_conf
> + *   The pointer to the configuration data to be used for the transmit queue.
> + *   NULL value is allowed, in which case default RX configuration
> + *   will be used.
> + *   The *tx_conf* structure contains the following data:
> + *   - The *tx_thresh* structure with the values of the Prefetch, Host, and
> + *     Write-Back threshold registers of the transmit ring.
> + *     When setting Write-Back threshold to the value greater then zero,
> + *     *tx_rs_thresh* value should be explicitly set to one.
> + *   - The *tx_free_thresh* value indicates the [minimum] number of network
> + *     buffers that must be pending in the transmit ring to trigger their
> + *     [implicit] freeing by the driver transmit function.
> + *   - The *tx_rs_thresh* value indicates the [minimum] number of transmit
> + *     descriptors that must be pending in the transmit ring before setting the
> + *     RS bit on a descriptor by the driver transmit function.
> + *     The *tx_rs_thresh* value should be less or equal then
> + *     *tx_free_thresh* value, and both of them should be less then
> + *     *nb_tx_desc* - 3.

I'm not sure that everything above makes sense for hairpin Tx queue.

> + *   - The *txq_flags* member contains flags to pass to the TX queue setup
> + *     function to configure the behavior of the TX queue. This should be set
> + *     to 0 if no special configuration is required.
> + *     This API is obsolete and will be deprecated. Applications
> + *     should set it to ETH_TXQ_FLAGS_IGNORE and use
> + *     the offloads field below.

There is no txq_flags for a long time already. So, I'm wondering when it was
copies from rte_eth_tx_queue_setup().

> + *   - The *offloads* member contains Tx offloads to be enabled.
> + *     If an offloading set in tx_conf->offloads
> + *     hasn't been set in the input argument eth_conf->txmode.offloads
> + *     to rte_eth_dev_configure(), it is a new added offloading, it must be
> + *     per-queue type and it is enabled for the queue.
> + *     No need to repeat any bit in tx_conf->offloads which has already been
> + *     enabled in rte_eth_dev_configure() at port level. An offloading enabled
> + *     at port level can't be disabled at queue level.

Which offloads do really make sense and valid to use for hairpin Tx queues?
Do we need separate caps for hairpin offloads?

> + *
> + *     Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces
> + *     the transmit function to use default values.
> + * @param hairpin_conf
> + *   The hairpin binding configuration.
> + *
> + * @return
> + *   - 0: Success, the transmit queue is correctly set up.
> + *   - -ENOMEM: Unable to allocate the transmit ring descriptors.
> + */
> +__rte_experimental
> +int rte_eth_tx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t tx_queue_id,
> +	 uint16_t nb_tx_desc, unsigned int socket_id,
> +	 const struct rte_eth_txconf *tx_conf,
> +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> +
> +/**
>    * Return the NUMA socket to which an Ethernet device is connected
>    *
>    * @param port_id
>

[snip]


More information about the dev mailing list