[dpdk-dev] [PATCH v3] i40: fix the VXLAN TSO issue

Tan, Jianfeng jianfeng.tan at intel.com
Tue Jul 26 14:22:16 CEST 2016


Hi Konstantin,


On 7/19/2016 6:29 PM, Ananyev, Konstantin wrote:
>
>> Problem:
>> When using the TSO + VXLAN feature in i40e, the outer UDP length fields in the multiple UDP segments which are TSOed by the i40e will
>> have a wrong value.
>>
>> Fix this problem by adding the tunnel type field in the i40e descriptor which was missed before.
>>
>> Fixes: 77b8301733c3 ("i40e: VXLAN Tx checksum offload")
>>
>> Signed-off-by: Zhe Tao <zhe.tao at intel.com>
>> ---
>> v2: edited the comments
>> v3: added external IP offload flag when TSO is enabled for tunnelling packets
>>
>>   app/test-pmd/csumonly.c      | 29 +++++++++++++++++++++--------
>>   drivers/net/i40e/i40e_rxtx.c | 12 +++++++++---
>>   lib/librte_mbuf/rte_mbuf.h   | 16 +++++++++++++++-
>>   3 files changed, 45 insertions(+), 12 deletions(-)
>>
>> diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index ac4bd8f..aaa006f 100644
>> --- a/app/test-pmd/csumonly.c
>> +++ b/app/test-pmd/csumonly.c
>> @@ -204,7 +204,8 @@ parse_ethernet(struct ether_hdr *eth_hdr, struct testpmd_offload_info *info)  static void  parse_vxlan(struct
>> udp_hdr *udp_hdr,
>>   	    struct testpmd_offload_info *info,
>> -	    uint32_t pkt_type)
>> +	    uint32_t pkt_type,
>> +	    uint64_t *ol_flags)
>>   {
>>   	struct ether_hdr *eth_hdr;
>>
>> @@ -215,6 +216,7 @@ parse_vxlan(struct udp_hdr *udp_hdr,
>>   		RTE_ETH_IS_TUNNEL_PKT(pkt_type) == 0)
>>   		return;
>>
>> +	*ol_flags |= PKT_TX_TUNNEL_VXLAN;
>
> Hmm, I don't actually see much difference between that version and the previous one.
>   Regarding your comment on V2:
> " this flag is for tunnelling type, and CTD is based on whether we need to do the
> external ip offload and TSO.so this flag will not cause one extra CTD."
> I think CTD selection should be based not only on is EIP cksum is enabled or not.
> You can have tunneled packet with TSO on over IPv6, right?
> I think for i40e we need CTD each time PKT_TX_TUNNEL_ is on.

Yes, I agree. Depending on whether EIP cksum is enabled to fill 
tunneling parameters is not correct under the case: EIP cksum is not 
needed, tso is needed (tunneling parameter will not be filled). And app 
should not set PKT_TX_TUNNEL_ if there's no TSO and no checksum offload 
to avoid extra filling of tunneling parameters.

Another corner case is that, EIP cksum not needed, TSO not needed, but 
inner L3/L4 checksum needed, so:
(1) App to set PKT_TX_TUNNEL_ on, but in this case, no extra CTD will be 
used according to i40e_calc_context_desc(), so it cannot work right.
(2) App to unset PKT_TX_TUNNEL_, will not work either.

How do you think?
(Sorry, the answer is in your following comments. We need to change 
i40e_calc_context_desc() to return 1 if PKT_TX_TUNNEL_ is on.)


>
>
>>   	info->is_tunnel = 1;
>>   	info->outer_ethertype = info->ethertype;
>>   	info->outer_l2_len = info->l2_len;
>> @@ -231,7 +233,9 @@ parse_vxlan(struct udp_hdr *udp_hdr,
>>
>>   /* Parse a gre header */
>>   static void
>> -parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
>> +parse_gre(struct simple_gre_hdr *gre_hdr,
>> +	  struct testpmd_offload_info *info,
>> +	  uint64_t *ol_flags)
>>   {
>>   	struct ether_hdr *eth_hdr;
>>   	struct ipv4_hdr *ipv4_hdr;
>> @@ -242,6 +246,8 @@ parse_gre(struct simple_gre_hdr *gre_hdr, struct testpmd_offload_info *info)
>>   	if ((gre_hdr->flags & _htons(~GRE_SUPPORTED_FIELDS)) != 0)
>>   		return;
>>
>> +	*ol_flags |= PKT_TX_TUNNEL_GRE;
>> +
>>   	gre_len += sizeof(struct simple_gre_hdr);
>>
>>   	if (gre_hdr->flags & _htons(GRE_KEY_PRESENT)) @@ -417,7 +423,7 @@ process_inner_cksums(void *l3_hdr, const struct
>> testpmd_offload_info *info,
>>    * packet */
>>   static uint64_t
>>   process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
>> -	uint16_t testpmd_ol_flags)
>> +	uint16_t testpmd_ol_flags, uint64_t orig_ol_flags)
>>   {
>>   	struct ipv4_hdr *ipv4_hdr = outer_l3_hdr;
>>   	struct ipv6_hdr *ipv6_hdr = outer_l3_hdr; @@ -428,7 +434,8 @@ process_outer_cksums(void *outer_l3_hdr, struct
>> testpmd_offload_info *info,
>>   		ipv4_hdr->hdr_checksum = 0;
>>   		ol_flags |= PKT_TX_OUTER_IPV4;
>>
>> -		if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM)
>> +		if ((testpmd_ol_flags & TESTPMD_TX_OFFLOAD_OUTER_IP_CKSUM) ||
>> +		    (info->tso_segsz != 0))
>>   			ol_flags |= PKT_TX_OUTER_IP_CKSUM;
> Why do you need to always raise OUTER_IP_CKSUM when TSO is enabled?

I believe it's not necessary if we fill tunneling parameters according 
to PKT_TX_TUNNEL_ as you suggest above.

>
>>   		else
>>   			ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr); @@ -442,6 +449,9 @@ process_outer_cksums(void
>> *outer_l3_hdr, struct testpmd_offload_info *info,
>>   	 * hardware supporting it today, and no API for it. */
>>
>>   	udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
>> +	if ((orig_ol_flags & PKT_TX_TCP_SEG) &&
>> +	    ((orig_ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN))
>> +		udp_hdr->dgram_cksum = 0;
>>   	/* do not recalculate udp cksum if it was 0 */
>>   	if (udp_hdr->dgram_cksum != 0) {
>>   		udp_hdr->dgram_cksum = 0;
>> @@ -705,15 +715,18 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>>   			if (info.l4_proto == IPPROTO_UDP) {
>>   				struct udp_hdr *udp_hdr;
>>   				udp_hdr = (struct udp_hdr *)((char *)l3_hdr +
>> -					info.l3_len);
>> -				parse_vxlan(udp_hdr, &info, m->packet_type);
>> +					   info.l3_len);
>> +				parse_vxlan(udp_hdr, &info, m->packet_type,
>> +					    &ol_flags);
>>   			} else if (info.l4_proto == IPPROTO_GRE) {
>>   				struct simple_gre_hdr *gre_hdr;
>>   				gre_hdr = (struct simple_gre_hdr *)
>>   					((char *)l3_hdr + info.l3_len);
>> -				parse_gre(gre_hdr, &info);
>> +				parse_gre(gre_hdr, &info, &ol_flags);
>>   			} else if (info.l4_proto == IPPROTO_IPIP) {
>>   				void *encap_ip_hdr;
>> +
>> +				ol_flags |= PKT_TX_TUNNEL_IPIP;
>>   				encap_ip_hdr = (char *)l3_hdr + info.l3_len;
>>   				parse_encap_ip(encap_ip_hdr, &info);
>>   			}
>> @@ -745,7 +758,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
>>   		 * processed in hardware. */
>>   		if (info.is_tunnel == 1) {
>>   			ol_flags |= process_outer_cksums(outer_l3_hdr, &info,
>> -				testpmd_ol_flags);
>> +				testpmd_ol_flags, ol_flags);
>>   		}
>>
>>   		/* step 4: fill the mbuf meta data (flags and header lengths) */ diff --git a/drivers/net/i40e/i40e_rxtx.c
>> b/drivers/net/i40e/i40e_rxtx.c index 049a813..4c987f2 100644
>> --- a/drivers/net/i40e/i40e_rxtx.c
>> +++ b/drivers/net/i40e/i40e_rxtx.c
>> @@ -801,6 +801,12 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
>>   			union i40e_tx_offload tx_offload,
>>   			uint32_t *cd_tunneling)
>>   {
>> +	/* Tx pkts tunnel type*/
>> +	if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN)
>> +		*cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING;
>> +	else if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE)
>> +		*cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING;
>> +
>>   	/* UDP tunneling packet TX checksum offload */
>>   	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
>
> I believe the problem is still there: you setup EIPLEN and NATLEN only when
> PKT_TX_OUTER_IP_CKSUM is on.
> Same story with MACLEN, you setup it with tx_offload.outer_l2_len,
> only when PKT_TX_OUTER_IP_CKSUM is on:
>
> if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
>
>                  *td_offset |= (tx_offload.outer_l2_len >> 1)
>                                  << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
>
>                  if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
>                          *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
>                  else if (ol_flags & PKT_TX_OUTER_IPV4)
>                          *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
>                  else if (ol_flags & PKT_TX_OUTER_IPV6)
>                          *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
>
>                  /* Now set the ctx descriptor fields */
>                  *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
>                                  I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
>                                  (tx_offload.l2_len >> 1) <<
>                                  I40E_TXD_CTX_QW0_NATLEN_SHIFT;
>
>          } else
>                  *td_offset |= (tx_offload.l2_len >> 1)
>                          << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
>
> So if user would like to enable TSO for tunneled packets over outer IPv6 packets,
> I suppose it wouldn't work, right?
> Again people can choose to setup PKT_TX_TUNNEL_VXLAN for non-tso packets,
> and don't ask for OUTER_IP_CHECKSUM.
>   
> I think it needs to be something like that:
>
> if (ol_flags & PKT_TX_OUTER_IP_CKSUM) ||
> 	(ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN ||
> 	ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE) {
> 	...
> }

Is "ol_flags & PKT_TX_OUTER_IP_CKSUM" necessary? Tunneling type is 
indispensable to make PKT_TX_OUTER_IP_CKSUM work. I mean something like 
this:

    if ((ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_VXLAN ||
         (ol_flags & PKT_TX_TUNNEL_MASK) == PKT_TX_TUNNEL_GRE) {
         ...
    }

>
> Also, I think to modify i40e_calc_context_desc(), so it return 1,
> when tunneling flags (VXLAN, GRE) is on.

Agreed.

>
> Another thing, if we introduce new ol_flags PKT_TX_TUNNEL_*,
> don't we need to update dev_info.tx_offload_capa, so user can
> query does device support that or not?

Agreed.

Thanks,
Jianfeng

>
> Konstantin
>
>> @@ -1510,7 +1516,8 @@ i40e_calc_context_desc(uint64_t flags)
>>
>>   /* set i40e TSO context descriptor */
>>   static inline uint64_t
>> -i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
>> +i40e_set_tso_ctx(struct rte_mbuf *mbuf,
>> +		 union i40e_tx_offload tx_offload)
>>   {
>>   	uint64_t ctx_desc = 0;
>>   	uint32_t cd_cmd, hdr_len, cd_tso_len;
>> @@ -1521,7 +1528,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
>>   	}
>>
>>   	/**
>> -	 * in case of tunneling packet, the outer_l2_len and
>> +	 * in case of non tunneling packet, the outer_l2_len and
>>   	 * outer_l3_len must be 0.
>>   	 */
>>   	hdr_len = tx_offload.outer_l2_len +
>> @@ -1537,7 +1544,6 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
>>   		 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
>>   		((uint64_t)mbuf->tso_segsz <<
>>   		 I40E_TXD_CTX_QW1_MSS_SHIFT);
>> -
>>   	return ctx_desc;
>>   }
>>
>> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 15e3a10..90812ea 100644
>> --- a/lib/librte_mbuf/rte_mbuf.h
>> +++ b/lib/librte_mbuf/rte_mbuf.h
>> @@ -133,6 +133,17 @@ extern "C" {
>>   /* add new TX flags here */
>>
>>   /**
>> + * Bits 45:48 used for the tunnel type.
>> + * When doing Tx offload like TSO or checksum, the HW needs to
>> +configure the
>> + * tunnel type into the HW descriptors.
>> + */
>> +#define PKT_TX_TUNNEL_VXLAN   (1ULL << 45)
>> +#define PKT_TX_TUNNEL_GRE   (2ULL << 45)
>> +#define PKT_TX_TUNNEL_IPIP    (3ULL << 45)
>> +/* add new TX TUNNEL type here */
>> +#define PKT_TX_TUNNEL_MASK    (0xFULL << 45)
>> +
>> +/**
>>    * Second VLAN insertion (QinQ) flag.
>>    */
>>   #define PKT_TX_QINQ_PKT    (1ULL << 49)   /**< TX packet with double VLAN inserted. */
>> @@ -867,7 +878,10 @@ struct rte_mbuf {
>>   	union {
>>   		uint64_t tx_offload;       /**< combined for easy fetch */
>>   		struct {
>> -			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
>> +			uint64_t l2_len:7;
>> +			/**< L2 (MAC) Header Length if it isn't a tunneling pkt.
>> +			 * for tunnel it is outer L4 len+tunnel len+inner L2 len
>> +			 */
>>   			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
>>   			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
>>   			uint64_t tso_segsz:16; /**< TCP TSO segment size */
>> --
>> 2.1.4



More information about the dev mailing list