[dpdk-dev] [PATCH v3] i40: fix the VXLAN TSO issue
Ananyev, Konstantin
konstantin.ananyev at intel.com
Tue Jul 19 12:29:52 CEST 2016
>
> Problem:
> When using the TSO + VXLAN feature in i40e, the outer UDP length fields in the multiple UDP segments which are TSOed by the i40e will
> have a wrong value.
>
> Fix this problem by adding the tunnel type field in the i40e descriptor which was missed before.
>
> Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
>
> Signed-off-by: Zhe Tao <zhe.tao at intel.com>
> ---
> v2: edited the comments
> v3: added external IP offload flag when TSO is enabled for tunnelling packets
>
> app/test-pmd/csumonly.c | 29 +++++++++++++++++++++--------
> drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
> lib/librte_mbuf/rte_mbuf.h | 16 +++++++++++++++-
> 3 files changed, 45 insertions(+), 12 deletions(-)
>
> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index ac4bd8f..aaa006f 100644
> --- a/app/test-pmd/csumonly.c
> +++ b/app/test-pmd/csumonly.c
> @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info) static void parse_vxlan(struct
> udp_hdr *udp_hdr,
> struct testpmd_offload_info *info,
> - uint32_t pkt_type)
> + uint32_t pkt_type,
> + uint64_t *ol_flags)
> {
> struct ether_hdr *eth_hdr;
>
> @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
> RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
> return;
>
> + *ol_flags |= PKT_TX_TUNNEL_VXLAN;
Hmm, I don't actually see much difference between that version and the previous one.
Regarding your comment on V2:
" this flag is for tunnelling type, and CTD is based on whether we need to do the
external ip offload and TSO.so this flag will not cause one extra CTD."
I think CTD selection should be based not only on is EIP cksum is enabled or not.
You can have tunneled packet with TSO on over IPv6, right?
I think for i40e we need CTD each time PKT_TX_TUNNEL_ is on.
> info->is_tunnel = 1;
> info->outer_ethertype = info->ethertype;
> info->outer_l2_len = info->l2_len;
> @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
>
> /* Parse a gre header */
> static void
> -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> +parse_gre(struct simple_gre_hdr *gre_hdr,
> + struct testpmd_offload_info *info,
> + uint64_t *ol_flags)
> {
> struct ether_hdr *eth_hdr;
> struct ipv4_hdr *ipv4_hdr;
> @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
> if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
> return;
>
> + *ol_flags |= PKT_TX_TUNNEL_GRE;
> +
> gre_len += sizeof(struct simple_gre_hdr);
>
> if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct
> testpmd_offload_info *info,
> * packet */
> static uint64_t
> process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
> - uint16_t testpmd_ol_flags)
> + uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
> {
> struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
> struct ipv6_hdr *ipv6_hdr = outer_l3_hdr; @@ -428,7 +434,8 @@ process_outer_cksums(void *outer_l3_hdr, struct
> testpmd_offload_info *info,
> ipv4_hdr->hdr_checksum = 0;
> ol_flags |= PKT_TX_OUTER_IPV4;
>
> - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)
> + if ((testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) ||
> + (info->tso_segsz != 0))
> ol_flags |= PKT_TX_OUTER_IP_CKSUM;
Why do you need to always raise OUTER_IP_CKSUM when TSO is enabled?
> else
> ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); @@ -442,6 +449,9 @@ process_outer_cksums(void
> *outer_l3_hdr, struct testpmd_offload_info *info,
> * hardware supporting it today, and no API for it. */
>
> udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
> + if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
> + ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
> + udp_hdr->dgram_cksum = 0;
> /* do not recalculate udp cksum if it was 0 */
> if (udp_hdr->dgram_cksum != 0) {
> udp_hdr->dgram_cksum = 0;
> @@ -705,15 +715,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> if (info.l4_proto == IPPROTO_UDP) {
> struct udp_hdr *udp_hdr;
> udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
> - info.l3_len);
> - parse_vxlan(udp_hdr, &info, m->packet_type);
> + info.l3_len);
> + parse_vxlan(udp_hdr, &info, m->packet_type,
> + &ol_flags);
> } else if (info.l4_proto == IPPROTO_GRE) {
> struct simple_gre_hdr *gre_hdr;
> gre_hdr = (struct simple_gre_hdr *)
> ((char *)l3_hdr + info.l3_len);
> - parse_gre(gre_hdr, &info);
> + parse_gre(gre_hdr, &info, &ol_flags);
> } else if (info.l4_proto == IPPROTO_IPIP) {
> void *encap_ip_hdr;
> +
> + ol_flags |= PKT_TX_TUNNEL_IPIP;
> encap_ip_hdr = (char *)l3_hdr + info.l3_len;
> parse_encap_ip(encap_ip_hdr, &info);
> }
> @@ -745,7 +758,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
> * processed in hardware. */
> if (info.is_tunnel == 1) {
> ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
> - testpmd_ol_flags);
> + testpmd_ol_flags, ol_flags);
> }
>
> /* step 4: fill the mbuf meta data (flags and header lengths) */ diff --git a/drivers/net/i40e/i40e_rxtx.c
> b/drivers/net/i40e/i40e_rxtx.c index 049a813..4c987f2 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
> union i40e_tx_offload tx_offload,
> uint32_t *cd_tunneling)
> {
> + /* Tx pkts tunnel type*/
> + if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
> + *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
> + else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
> + *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
> +
> /* UDP tunneling packet TX checksum offload */
> if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
I believe the problem is still there: you setup EIPLEN and NATLEN only when
PKT_TX_OUTER_IP_CKSUM is on.
Same story with MACLEN, you setup it with tx_offload.outer_l2_len,
only when PKT_TX_OUTER_IP_CKSUM is on:
if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
*td_offset |= (tx_offload.outer_l2_len >> 1)
<< I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
else if (ol_flags & PKT_TX_OUTER_IPV4)
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
else if (ol_flags & PKT_TX_OUTER_IPV6)
*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
/* Now set the ctx descriptor fields */
*cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
(tx_offload.l2_len >> 1) <<
I40E_TXD_CTX_QW0_NATLEN_SHIFT;
} else
*td_offset |= (tx_offload.l2_len >> 1)
<< I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
So if user would like to enable TSO for tunneled packets over outer IPv6 packets,
I suppose it wouldn't work, right?
Again people can choose to setup PKT_TX_TUNNEL_VXLAN for non-tso packets,
and don't ask for OUTER_IP_CHECKSUM.
I think it needs to be something like that:
if (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
(ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN ||
ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE) {
...
}
Also, I think to modify i40e_calc_context_desc(), so it return 1,
when tunneling flags (VXLAN, GRE) is on.
Another thing, if we introduce new ol_flags PKT_TX_TUNNEL_*,
don't we need to update dev_info.tx_offload_capa, so user can
query does device support that or not?
Konstantin
>
> @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
>
> /* set i40e TSO context descriptor */
> static inline uint64_t
> -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
> + union i40e_tx_offload tx_offload)
> {
> uint64_t ctx_desc = 0;
> uint32_t cd_cmd, hdr_len, cd_tso_len;
> @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> }
>
> /**
> - * in case of tunneling packet, the outer_l2_len and
> + * in case of non tunneling packet, the outer_l2_len and
> * outer_l3_len must be 0.
> */
> hdr_len = tx_offload.outer_l2_len +
> @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> ((uint64_t)mbuf->tso_segsz <<
> I40E_TXD_CTX_QW1_MSS_SHIFT);
> -
> return ctx_desc;
> }
>
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 15e3a10..90812ea 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -133,6 +133,17 @@ extern "C" {
> /* add new TX flags here */
>
> /**
> + * Bits 45:48 used for the tunnel type.
> + * When doing Tx offload like TSO or checksum, the HW needs to
> +configure the
> + * tunnel type into the HW descriptors.
> + */
> +#define PKT_TX_TUNNEL_VXLAN (1ULL << 45)
> +#define PKT_TX_TUNNEL_GRE (2ULL << 45)
> +#define PKT_TX_TUNNEL_IPIP (3ULL << 45)
> +/* add new TX TUNNEL type here */
> +#define PKT_TX_TUNNEL_MASK (0xFULL << 45)
> +
> +/**
> * Second VLAN insertion (QinQ) flag.
> */
> #define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */
> @@ -867,7 +878,10 @@ struct rte_mbuf {
> union {
> uint64_t tx_offload; /**< combined for easy fetch */
> struct {
> - uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> + uint64_t l2_len:7;
> + /**< L2 (MAC) Header Length if it isn't a tunneling pkt.
> + * for tunnel it is outer L4 len+tunnel len+inner L2 len
> + */
> uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> uint64_t tso_segsz:16; /**< TCP TSO segment size */
> --
> 2.1.4
More information about the dev
mailing list