[dpdk-dev] [PATCH v2 1/6] ethdev: add Tx preparation

Ananyev, Konstantin konstantin.ananyev at intel.com
Mon Sep 19 15:03:07 CEST 2016


Hi Tomasz,

> 
> Added API for `rte_eth_tx_prep`
> 
> uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
> 	struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> 
> Added fields to the `struct rte_eth_desc_lim`:
> 
> 	uint16_t nb_seg_max;
> 		/**< Max number of segments per whole packet. */
> 
> 	uint16_t nb_mtu_seg_max;
> 		/**< Max number of segments per one MTU */
> 
> Created `rte_pkt.h` header with common used functions:
> 
> int rte_validate_tx_offload(struct rte_mbuf *m)
> 	to validate general requirements for tx offload in packet such a
> 	flag completness. In current implementation this funtion is called
> 	optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.
> 
> int rte_phdr_cksum_fix(struct rte_mbuf *m)
> 	to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
> 	before hardware tx checksum offload.
> 	 - for non-TSO tcp/udp packets full pseudo-header checksum is
> 	   counted and set.
> 	 - for TSO the IP payload length is not included.
> 
> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
> ---
>  config/common_base            |    1 +
>  lib/librte_ether/rte_ethdev.h |   85 ++++++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf.h    |    8 +++
>  lib/librte_net/Makefile       |    2 +-
>  lib/librte_net/rte_pkt.h      |  132 +++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 227 insertions(+), 1 deletion(-)
>  create mode 100644 lib/librte_net/rte_pkt.h
> 
> diff --git a/config/common_base b/config/common_base
> index 7830535..7ada9e0 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
>  CONFIG_RTE_LIBRTE_IEEE1588=n
>  CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
>  CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
> +CONFIG_RTE_ETHDEV_TX_PREP=y
> 
>  #
>  # Support NIC bypass logic
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index b0fe033..4fa674d 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -182,6 +182,7 @@ extern "C" {
>  #include <rte_pci.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
> +#include <rte_errno.h>
>  #include "rte_ether.h"
>  #include "rte_eth_ctrl.h"
>  #include "rte_dev_info.h"
> @@ -696,6 +697,8 @@ struct rte_eth_desc_lim {
>  	uint16_t nb_max;   /**< Max allowed number of descriptors. */
>  	uint16_t nb_min;   /**< Min allowed number of descriptors. */
>  	uint16_t nb_align; /**< Number of descriptors should be aligned to. */
> +	uint16_t nb_seg_max;     /**< Max number of segments per whole packet. */
> +	uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
>  };
> 
>  /**
> @@ -1181,6 +1184,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
>  				   uint16_t nb_pkts);
>  /**< @internal Send output packets on a transmit queue of an Ethernet device. */
> 
> +typedef uint16_t (*eth_tx_prep_t)(void *txq,
> +				   struct rte_mbuf **tx_pkts,
> +				   uint16_t nb_pkts);
> +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */
> +
>  typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
>  			       struct rte_eth_fc_conf *fc_conf);
>  /**< @internal Get current flow control parameter on an Ethernet device */
> @@ -1626,6 +1634,7 @@ enum rte_eth_dev_type {
>  struct rte_eth_dev {
>  	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
>  	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
> +	eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */
>  	struct rte_eth_dev_data *data;  /**< Pointer to device data */
>  	const struct eth_driver *driver;/**< Driver for this device */
>  	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
> @@ -2833,6 +2842,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
>  	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
>  }
> 
> +/**
> + * Process a burst of output packets on a transmit queue of an Ethernet device.
> + *
> + * The rte_eth_tx_prep() function is invoked to prepare output packets to be
> + * transmitted on the output queue *queue_id* of the Ethernet device designated
> + * by its *port_id*.
> + * The *nb_pkts* parameter is the number of packets to be prepared which are
> + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
> + * allocated from a pool created with rte_pktmbuf_pool_create().
> + * For each packet to send, the rte_eth_tx_prep() function performs
> + * the following operations:
> + *
> + * - Check if packet meets devices requirements for tx offloads.
> + *
> + * - Check limitations about number of segments.
> + *
> + * - Check additional requirements when debug is enabled.
> + *
> + * - Update and/or reset required checksums when tx offload is set for packet.
> + *
> + * The rte_eth_tx_prep() function returns the number of packets ready to be
> + * sent. A return value equal to *nb_pkts* means that all packets are valid and
> + * ready to be sent.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the transmit queue through which output packets must be
> + *   sent.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param tx_pkts
> + *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
> + *   which contain the output packets.
> + * @param nb_pkts
> + *   The maximum number of packets to process.
> + * @return
> + *   The number of packets correct and ready to be sent. The return value can be
> + *   less than the value of the *tx_pkts* parameter when some packet doesn't
> + *   meet devices requirements with rte_errno set appropriately.
> + */
> +
> +#ifdef RTE_ETHDEV_TX_PREP

Sorry for being a bit late on that discussion, but what the point of having
that config macro (RTE_ETHDEV_TX_PREP ) at all?
As I can see right now, if driver doesn't setup tx_pkt_prep, then nb_pkts
would be return anyway...

BTW, there is my another question - should it be that way?
Shouldn't we return 0 (and set rte_errno=ENOTSUP) here if dev->tx_pk_prep == NULL?

> +
> +static inline uint16_t
> +rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts,
> +		uint16_t nb_pkts)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +
> +	if (!dev->tx_pkt_prep)
> +		return nb_pkts;
> +
> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> +	if (queue_id >= dev->data->nb_tx_queues) {
> +		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
> +		rte_errno = -EINVAL;
> +		return 0;
> +	}
> +#endif
> +
> +	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
> +			tx_pkts, nb_pkts);
> +}
> +
> +#else
> +
> +static inline uint16_t
> +rte_eth_tx_prep(uint8_t port_id __rte_unused, uint16_t queue_id __rte_unused,
> +		struct rte_mbuf **tx_pkts __rte_unused, uint16_t nb_pkts)
> +{
> +	return nb_pkts;
> +}
> +
> +#endif
> +
>  typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
>  		void *userdata);
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 7ea66ed..72fd352 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -211,6 +211,14 @@ extern "C" {
>   */
>  #define PKT_TX_OUTER_IPV4   (1ULL << 59)
> 
> +#define PKT_TX_OFFLOAD_MASK (    \
> +		PKT_TX_IP_CKSUM |        \
> +		PKT_TX_L4_MASK |         \
> +		PKT_TX_OUTER_IP_CKSUM |  \
> +		PKT_TX_TCP_SEG |         \
> +		PKT_TX_QINQ_PKT |        \
> +		PKT_TX_VLAN_PKT)
> +
>  /**
>   * Packet outer header is IPv6. This flag must be set when using any
>   * outer offload feature (L4 checksum) to tell the NIC that the outer
> diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
> index ad2e482..b5abe84 100644
> --- a/lib/librte_net/Makefile
> +++ b/lib/librte_net/Makefile
> @@ -34,7 +34,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
> 
>  # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h rte_pkt.h
> 
> 
>  include $(RTE_SDK)/mk/rte.install.mk
> diff --git a/lib/librte_net/rte_pkt.h b/lib/librte_net/rte_pkt.h
> new file mode 100644
> index 0000000..a3c3e3c
> --- /dev/null
> +++ b/lib/librte_net/rte_pkt.h
> @@ -0,0 +1,132 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _RTE_PKT_H_
> +#define _RTE_PKT_H_
> +
> +#include <rte_ip.h>
> +#include <rte_udp.h>
> +#include <rte_tcp.h>
> +#include <rte_sctp.h>
> +
> +/**
> + * Validate general requirements for tx offload in packet.
> + */
> +static inline int
> +rte_validate_tx_offload(struct rte_mbuf *m)
> +{
> +	uint64_t ol_flags = m->ol_flags;
> +
> +	/* Does packet set any of available offloads? */
> +	if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
> +		return 0;
> +
> +	/* IP checksum can be counted only for IPv4 packet */
> +	if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
> +		return -EINVAL;
> +
> +	if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
> +		/* IP type not set */
> +		if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
> +			return -EINVAL;
> +
> +	if (ol_flags & PKT_TX_TCP_SEG) {
> +
> +		/* PKT_TX_IP_CKSUM offload not set for IPv4 TSO packet */
> +		if ((ol_flags & PKT_TX_IPV4) && !(ol_flags & PKT_TX_IP_CKSUM))
> +			return -EINVAL;
> +
> +		if (m->tso_segsz == 0)
> +			return -EINVAL;
> +
> +	}


I suppose these 2 if(ol_flags & PKT_TX_TCP_SEG) above could be united into one.

> +
> +	/* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
> +	if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && !(ol_flags & PKT_TX_OUTER_IPV4))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/**
> + * Fix pseudo header checksum for TSO and non-TSO tcp/udp packets before
> + * hardware tx checksum.
> + * For non-TSO tcp/udp packets full pseudo-header checksum is counted and set.
> + * For TSO the IP payload length is not included.
> + */
> +static inline int
> +rte_phdr_cksum_fix(struct rte_mbuf *m)
> +{
> +	struct ipv4_hdr *ipv4_hdr;
> +	struct ipv6_hdr *ipv6_hdr;
> +	struct tcp_hdr *tcp_hdr;
> +	struct udp_hdr *udp_hdr;
> +
> +	if (m->ol_flags & PKT_TX_IPV4) {
> +		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
> +
> +		if (m->ol_flags & PKT_TX_IP_CKSUM)
> +			ipv4_hdr->hdr_checksum = 0;
> +
> +		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
> +			/* non-TSO udp */
> +			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, m->l2_len +
> +					m->l3_len);
> +			udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
> +		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
> +				(m->ol_flags & PKT_TX_TCP_SEG)) {
> +			/* non-TSO tcp or TSO */
> +			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, m->l2_len +
> +					m->l3_len);
> +			tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
> +		}
> +	} else if (m->ol_flags & PKT_TX_IPV6) {
> +		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
> +
> +		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
> +			/* non-TSO udp */
> +			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, m->l2_len +
> +					m->l3_len);
> +			udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
> +		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
> +				(m->ol_flags & PKT_TX_TCP_SEG)) {
> +			/* non-TSO tcp or TSO */
> +			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, m->l2_len +
> +					m->l3_len);
> +			tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
> +		}
> +	}
> +	return 0;
> +}

We probably need to take into account that in some cases outer_l*_len could be setup here
(in case of tunneled packets).

Konstantin

> +
> +#endif /* _RTE_PKT_H_ */
> --
> 1.7.9.5



More information about the dev mailing list