[dpdk-dev] [PATCH v5 3/3] ip_frag: extend IPv6 fragment header retrieval

Ananyev, Konstantin konstantin.ananyev at intel.com
Thu Nov 8 00:00:19 CET 2018


Hi Cody,

>Hey Konstantin,
>Thanks for reviewing -- I see your point about this patch only removing one of the places where the code makes assumptions about the position of the fragmentation header.

>Unfortunately at the moment I don't have the resources to dedicate to writing the complete solution and doing a performance check, so I think I should withdraw the patch.

Ok, NP that's understandable.
Again I provided my comments quite late.

> I hope it at least serves as a blueprint in case someone comes back to it.

Yes, I think it definitely will be useful.

>I might be able to come back to it eventually, but likely not soon.

Hopefully you'll will :)
Thanks
Konstantin

> 
> Add the ability to parse IPv6 extenders to find the
> IPv6 fragment header, and update callers.
> 
> According to RFC 8200, there is no guarantee that the IPv6
> Fragment extension header will come before any other extension
> header, even though it is recommended.
> 
> Signed-off-by: Cody Doucette <doucette at bu.edu>
> Signed-off-by: Qiaobin Fu <qiaobinf at bu.edu>
> Reviewed-by: Michel Machado <michel at digirati.com.br>
> ---
>  examples/ip_reassembly/main.c               |  6 ++--
>  lib/librte_ip_frag/rte_ip_frag.h            | 23 ++++++-------
>  lib/librte_ip_frag/rte_ip_frag_version.map  |  1 +
>  lib/librte_ip_frag/rte_ipv6_fragmentation.c | 38 +++++++++++++++++++++
>  lib/librte_ip_frag/rte_ipv6_reassembly.c    |  4 +--
>  lib/librte_port/rte_port_ras.c              |  6 ++--
>  6 files changed, 59 insertions(+), 19 deletions(-)
> 
> diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
> index 17b55d4c7..3a827bd6c 100644
> --- a/examples/ip_reassembly/main.c
> +++ b/examples/ip_reassembly/main.c
> @@ -365,12 +365,14 @@ reassemble(struct rte_mbuf *m, uint16_t portid, uint32_t queue,
>               eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
>       } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
>               /* if packet is IPv6 */
> -             struct ipv6_extension_fragment *frag_hdr;
> +             const struct ipv6_extension_fragment *frag_hdr;
> +             struct ipv6_extension_fragment frag_hdr_buf;
>               struct ipv6_hdr *ip_hdr;
> 
>               ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
> 
> -             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
> +             frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(m,
> +                     ip_hdr, &frag_hdr_buf);

I looked at the patch once again, and it seems incomplete to me.
Sorry for late comments.
Yes, right now te_ipv6_frag_get_ipv6_fragment_header can properly
retrieve ipv6 fragment info, but it is not enough to make things work
for situation when we have packet with frag header not the first and only
extension header.
In the same function, few lines below, we setup l3_len based on that assumption:

m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);

mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);

And inside rte_ipv6_frag_reassemble_packet() we still assume the same:
...
frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1);
ip_hdr->proto = frag_hdr->next_header;

I think we need a function that would allow us to get offset of frag_hdr. 
Actually probably we can have a generic one here, that can return offset for
any requested ext header or total length of ipv6 header.
Something like that:

struct rte_ipv6_get_xhdr_ofs {
   uint16_t find_proto;    /* header proto to find */
   uint16_t  next_proto;  /* next header proto */
   uint32_t next_ofs;       /* offset to start search */
};

struct int 
rte_ipv6_get_xhdr_ofs(struct rte_mbuf *pkt, rte_ipv6_get_xhdr_ofs *find); 

that would go through ipv6 ext headers till either requested proto is found, or end of IPv6 header. 
Then user can do something like that:

/* find fragment extention */
ipv6_get_xhdr_ofs ofs = {
        .find_proto = IPPROTO_FRAGMENT,
        .next_proto = ipv6_hdr->proto,
        .ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);
if(rc == 0) 
     frag_hdr = rte_pktmbuf_mtod_offset(m, .., ofs.ofs);
...     

/* get size of IPv6 header plus all known extensions */
ipv6_get_xhdr_ofs ofs = {
        .find_proto = IPPROTO_MAX,
        .next_proto = ipv6_hdr->proto,
        .ofs = sizeof(struct ipv6_hdr),
};
rc = rte_ipv6_get_xhdr_ofs(pkt, &ofs);     


> 
>               if (frag_hdr != NULL) {
>                       struct rte_mbuf *mo;
> diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h
> index 7f425f610..6fc8106bc 100644
> --- a/lib/librte_ip_frag/rte_ip_frag.h
> +++ b/lib/librte_ip_frag/rte_ip_frag.h
> @@ -211,28 +211,25 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
>  struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
>               struct rte_ip_frag_death_row *dr,
>               struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr,
> -             struct ipv6_extension_fragment *frag_hdr);
> +             const struct ipv6_extension_fragment *frag_hdr);
> 
>  /**
>   * Return a pointer to the packet's fragment header, if found.
> - * It only looks at the extension header that's right after the fixed IPv6
> - * header, and doesn't follow the whole chain of extension headers.
>   *
> - * @param hdr
> + * @param pkt
> + *   Pointer to the mbuf of the packet.
> + * @param ip_hdr
>   *   Pointer to the IPv6 header.
> + * @param frag_hdr
> + *   A pointer to the buffer where the fragment header
> + *   will be copied if it is not contiguous in mbuf data.
>   * @return
>   *   Pointer to the IPv6 fragment extension header, or NULL if it's not
>   *   present.
>   */
> -static inline struct ipv6_extension_fragment *
> -rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr)
> -{
> -     if (hdr->proto == IPPROTO_FRAGMENT) {
> -             return (struct ipv6_extension_fragment *) ++hdr;
> -     }
> -     else
> -             return NULL;
> -}
> +const struct ipv6_extension_fragment *rte_ipv6_frag_get_ipv6_fragment_header(
> +             struct rte_mbuf *pkt, const struct ipv6_hdr *ip_hdr,
> +             struct ipv6_extension_fragment *frag_hdr);

Another thing - wouldn't it be ab API/ABI breakage?
One more question - making it non-inline - how much it would  affect performance?
My guess - no big difference, but did you check?
Konstantin

> 
>  /**
>   * IPv4 fragmentation.
> diff --git a/lib/librte_ip_frag/rte_ip_frag_version.map b/lib/librte_ip_frag/rte_ip_frag_version.map
> index d40d5515f..8b4c82d08 100644
> --- a/lib/librte_ip_frag/rte_ip_frag_version.map
> +++ b/lib/librte_ip_frag/rte_ip_frag_version.map
> @@ -8,6 +8,7 @@ DPDK_2.0 {
>       rte_ipv4_fragment_packet;
>       rte_ipv6_frag_reassemble_packet;
>       rte_ipv6_fragment_packet;
> +     rte_ipv6_frag_get_ipv6_fragment_header;
> 
>       local: *;
>  };
> diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> index 62a7e4e83..bd847dd3d 100644
> --- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c
> @@ -176,3 +176,41 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in,
> 
>       return out_pkt_pos;
>  }
> +
> +const struct ipv6_extension_fragment *
> +rte_ipv6_frag_get_ipv6_fragment_header(struct rte_mbuf *pkt,
> +     const struct ipv6_hdr *ip_hdr,
> +     struct ipv6_extension_fragment *frag_hdr)
> +{
> +     size_t offset = sizeof(struct ipv6_hdr);
> +     uint8_t nexthdr = ip_hdr->proto;
> +
> +     while (ipv6_ext_hdr(nexthdr)) {
> +             struct ipv6_opt_hdr opt;
> +             const struct ipv6_opt_hdr *popt = rte_pktmbuf_read(pkt,
> +                     offset, sizeof(opt), &opt);
> +             if (popt == NULL)
> +                     return NULL;
> +
> +             switch (nexthdr) {
> +             case IPPROTO_NONE:
> +                     return NULL;
> +
> +             case IPPROTO_FRAGMENT:
> +                     return rte_pktmbuf_read(pkt, offset,
> +                             sizeof(*frag_hdr), frag_hdr);
> +
> +             case IPPROTO_AH:
> +                     offset += (popt->hdrlen + 2) << 2;
> +                     break;
> +
> +             default:
> +                     offset += (popt->hdrlen + 1) << 3;
> +                     break;
> +             }
> +
> +             nexthdr = popt->nexthdr;
> +     }
> +
> +     return NULL;
> +}
> diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> index db249fe60..b2d67a3f0 100644
> --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c
> +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c
> @@ -135,8 +135,8 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp)
>  #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3)
>  struct rte_mbuf *
>  rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
> -             struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> -             struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr)
> +     struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
> +     struct ipv6_hdr *ip_hdr, const struct ipv6_extension_fragment *frag_hdr)
>  {
>       struct ip_frag_pkt *fp;
>       struct ip_frag_key key;
> diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c
> index c8b2e19bf..28764f744 100644
> --- a/lib/librte_port/rte_port_ras.c
> +++ b/lib/librte_port/rte_port_ras.c
> @@ -184,9 +184,11 @@ process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt)
>       /* Assume there is no ethernet header */
>       struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *);
> 
> -     struct ipv6_extension_fragment *frag_hdr;
> +     const struct ipv6_extension_fragment *frag_hdr;
> +     struct ipv6_extension_fragment frag_hdr_buf;
>       uint16_t frag_data = 0;
> -     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr);
> +     frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt, pkt_hdr,
> +             &frag_hdr_buf);
>       if (frag_hdr != NULL)
>               frag_data = rte_be_to_cpu_16(frag_hdr->frag_data);
> 
> --
> 2.17.1


More information about the dev mailing list