[dpdk-dev] [PATCH v9 1/6] gso: add Generic Segmentation Offload API framework

Mark Kavanagh mark.b.kavanagh at intel.com
Thu Oct 5 22:36:43 CEST 2017


From: Jiayu Hu <jiayu.hu at intel.com>

Generic Segmentation Offload (GSO) is a SW technique to split large
packets into small ones. Akin to TSO, GSO enables applications to
operate on large packets, thus reducing per-packet processing overhead.

To enable more flexibility to applications, DPDK GSO is implemented
as a standalone library. Applications explicitly use the GSO library
to segment packets. To segment a packet requires two steps. The first
is to set proper flags to mbuf->ol_flags, where the flags are the same
as that of TSO. The second is to call the segmentation API,
rte_gso_segment(). This patch introduces the GSO API framework to DPDK.

rte_gso_segment() splits an input packet into small ones in each
invocation. The GSO library refers to these small packets generated
by rte_gso_segment() as GSO segments. Each of the newly-created GSO
segments is organized as a two-segment MBUF, where the first segment is a
standard MBUF, which stores a copy of packet header, and the second is an
indirect MBUF which points to a section of data in the input packet.
rte_gso_segment() reduces the refcnt of the input packet by 1. Therefore,
when all GSO segments are freed, the input packet is freed automatically.
Additionally, since each GSO segment has multiple MBUFs (i.e. 2 MBUFs),
the driver of the interface which the GSO segments are sent to should
support to transmit multi-segment packets.

The GSO framework clears the PKT_TX_TCP_SEG flag for both the input
packet, and all produced GSO segments in the event of success, since
segmentation in hardware is no longer required at that point.

Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
Signed-off-by: Mark Kavanagh <mark.b.kavanagh at intel.com>
---
 config/common_base                     |   5 ++
 doc/api/doxy-api-index.md              |   1 +
 doc/api/doxy-api.conf                  |   1 +
 doc/guides/rel_notes/release_17_11.rst |   1 +
 lib/Makefile                           |   2 +
 lib/librte_gso/Makefile                |  49 +++++++++++
 lib/librte_gso/rte_gso.c               |  52 ++++++++++++
 lib/librte_gso/rte_gso.h               | 143 +++++++++++++++++++++++++++++++++
 lib/librte_gso/rte_gso_version.map     |   7 ++
 mk/rte.app.mk                          |   1 +
 10 files changed, 262 insertions(+)
 create mode 100644 lib/librte_gso/Makefile
 create mode 100644 lib/librte_gso/rte_gso.c
 create mode 100644 lib/librte_gso/rte_gso.h
 create mode 100644 lib/librte_gso/rte_gso_version.map

diff --git a/config/common_base b/config/common_base
index 12f6be9..58ca5c0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -653,6 +653,11 @@ CONFIG_RTE_LIBRTE_IP_FRAG_TBL_STAT=n
 CONFIG_RTE_LIBRTE_GRO=y
 
 #
+# Compile GSO library
+#
+CONFIG_RTE_LIBRTE_GSO=y
+
+#
 # Compile librte_meter
 #
 CONFIG_RTE_LIBRTE_METER=y
diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
index 19e0d4f..6512918 100644
--- a/doc/api/doxy-api-index.md
+++ b/doc/api/doxy-api-index.md
@@ -101,6 +101,7 @@ The public API headers are grouped by topics:
   [TCP]                (@ref rte_tcp.h),
   [UDP]                (@ref rte_udp.h),
   [GRO]                (@ref rte_gro.h),
+  [GSO]                (@ref rte_gso.h),
   [frag/reass]         (@ref rte_ip_frag.h),
   [LPM IPv4 route]     (@ref rte_lpm.h),
   [LPM IPv6 route]     (@ref rte_lpm6.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
index 823554f..408f2e6 100644
--- a/doc/api/doxy-api.conf
+++ b/doc/api/doxy-api.conf
@@ -47,6 +47,7 @@ INPUT                   = doc/api/doxy-api-index.md \
                           lib/librte_ether \
                           lib/librte_eventdev \
                           lib/librte_gro \
+                          lib/librte_gso \
                           lib/librte_hash \
                           lib/librte_ip_frag \
                           lib/librte_jobstats \
diff --git a/doc/guides/rel_notes/release_17_11.rst b/doc/guides/rel_notes/release_17_11.rst
index f6f9169..5bb36b7 100644
--- a/doc/guides/rel_notes/release_17_11.rst
+++ b/doc/guides/rel_notes/release_17_11.rst
@@ -174,6 +174,7 @@ The libraries prepended with a plus sign were incremented in this version.
      librte_ethdev.so.7
      librte_eventdev.so.2
      librte_gro.so.1
+   + librte_gso.so.1
      librte_hash.so.2
      librte_ip_frag.so.1
      librte_jobstats.so.1
diff --git a/lib/Makefile b/lib/Makefile
index 86caba1..3d123f4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -108,6 +108,8 @@ DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
 DEPDIRS-librte_reorder := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_PDUMP) += librte_pdump
 DEPDIRS-librte_pdump := librte_eal librte_mempool librte_mbuf librte_ether
+DIRS-$(CONFIG_RTE_LIBRTE_GSO) += librte_gso
+DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ether librte_net
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
new file mode 100644
index 0000000..aeaacbc
--- /dev/null
+++ b/lib/librte_gso/Makefile
@@ -0,0 +1,49 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_gso.a
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
+
+EXPORT_MAP := rte_gso_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
+
+# install this header file
+SYMLINK-$(CONFIG_RTE_LIBRTE_GSO)-include += rte_gso.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
new file mode 100644
index 0000000..b773636
--- /dev/null
+++ b/lib/librte_gso/rte_gso.c
@@ -0,0 +1,52 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+
+#include "rte_gso.h"
+
+int
+rte_gso_segment(struct rte_mbuf *pkt,
+		const struct rte_gso_ctx *gso_ctx,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out)
+{
+	if (pkt == NULL || pkts_out == NULL || gso_ctx == NULL ||
+			nb_pkts_out < 1)
+		return -EINVAL;
+
+	pkt->ol_flags &= (~PKT_TX_TCP_SEG);
+	pkts_out[0] = pkt;
+
+	return 1;
+}
diff --git a/lib/librte_gso/rte_gso.h b/lib/librte_gso/rte_gso.h
new file mode 100644
index 0000000..7d343d7
--- /dev/null
+++ b/lib/librte_gso/rte_gso.h
@@ -0,0 +1,143 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_GSO_H_
+#define _RTE_GSO_H_
+
+/**
+ * @file
+ * Interface to GSO library
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/* GSO flags for rte_gso_ctx. */
+#define RTE_GSO_FLAG_IPID_FIXED (1ULL << 0)
+/**< Use fixed IP ids for output GSO segments. Setting
+ * !RTE_GSO_IPID_FIXED indicates using incremental IP ids.
+ */
+
+/**
+ * GSO context structure.
+ */
+struct rte_gso_ctx {
+	struct rte_mempool *direct_pool;
+	/**< MBUF pool for allocating direct buffers, which are used
+	 * to store packet headers for GSO segments.
+	 */
+	struct rte_mempool *indirect_pool;
+	/**< MBUF pool for allocating indirect buffers, which are used
+	 * to locate packet payloads for GSO segments. The indirect
+	 * buffer doesn't contain any data, but simply points to an
+	 * offset within the packet to segment.
+	 */
+	uint64_t flag;
+	/**< flag that controls specific attributes of output segments,
+	 * such as the type of IP ID generated (i.e. fixed or incremental).
+	 */
+	uint32_t gso_types;
+	/**< the bit mask of required GSO types. The GSO library
+	 * uses the same macros as that of describing device TX
+	 * offloading capabilities (i.e. DEV_TX_OFFLOAD_*_TSO) for
+	 * gso_types.
+	 *
+	 * For example, if applications want to segment TCP/IPv4
+	 * packets, set DEV_TX_OFFLOAD_TCP_TSO in gso_types.
+	 */
+	uint16_t gso_size;
+	/**< maximum size of an output GSO segment, including packet
+	 * header and payload, measured in bytes.
+	 */
+};
+
+/**
+ * Segmentation function, which supports processing of both single- and
+ * multi- MBUF packets.
+ *
+ * Note that we refer to the packets that are segmented from the input
+ * packet as 'GSO segments'. rte_gso_segment() doesn't check if the
+ * input packet has correct checksums, and doesn't update checksums for
+ * output GSO segments. Additionally, it doesn't process IP fragment
+ * packets.
+ *
+ * Before calling rte_gso_segment(), applications must set proper ol_flags
+ * for the packet. The GSO library uses the same macros as that of TSO.
+ * For example, set PKT_TX_TCP_SEG and PKT_TX_IPV4 in ol_flags to segment
+ * a TCP/IPv4 packet. If rte_gso_segment() succceds, the PKT_TX_TCP_SEG
+ * flag is removed for all GSO segments and the input packet.
+ *
+ * Each of the newly-created GSO segments is organized as a two-segment
+ * MBUF, where the first segment is a standard MBUF, which stores a copy
+ * of packet header, and the second is an indirect MBUF which points to
+ * a section of data in the input packet. Since each GSO segment has
+ * multiple MBUFs (i.e. typically 2 MBUFs), the driver of the interface which
+ * the GSO segments are sent to should support transmission of multi-segment
+ * packets.
+ *
+ * If the input packet is GSO'd, its mbuf refcnt reduces by 1. Therefore,
+ * when all GSO segments are freed, the input packet is freed automatically.
+ *
+ * If the memory space in pkts_out or MBUF pools is insufficient, this
+ * function fails, and it returns (-1) * errno. Otherwise, GSO succeeds,
+ * and this function returns the number of output GSO segments filled in
+ * pkts_out.
+ *
+ * @param pkt
+ *  The packet mbuf to segment.
+ * @param ctx
+ *  GSO context object pointer.
+ * @param pkts_out
+ *  Pointer array used to store the MBUF addresses of output GSO
+ *  segments, when rte_gso_segment() succeeds.
+ * @param nb_pkts_out
+ *  The max number of items that pkts_out can keep.
+ *
+ * @return
+ *  - The number of GSO segments filled in pkts_out on success.
+ *  - Return -ENOMEM if run out of memory in MBUF pools.
+ *  - Return -EINVAL for invalid parameters.
+ */
+int rte_gso_segment(struct rte_mbuf *pkt,
+		const struct rte_gso_ctx *ctx,
+		struct rte_mbuf **pkts_out,
+		uint16_t nb_pkts_out);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_GSO_H_ */
diff --git a/lib/librte_gso/rte_gso_version.map b/lib/librte_gso/rte_gso_version.map
new file mode 100644
index 0000000..e1fd453
--- /dev/null
+++ b/lib/librte_gso/rte_gso_version.map
@@ -0,0 +1,7 @@
+DPDK_17.11 {
+	global:
+
+	rte_gso_segment;
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index c25fdd9..d4c9873 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -66,6 +66,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PDUMP)          += -lrte_pdump
 _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)    += -lrte_distributor
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG)        += -lrte_ip_frag
 _LDLIBS-$(CONFIG_RTE_LIBRTE_GRO)            += -lrte_gro
+_LDLIBS-$(CONFIG_RTE_LIBRTE_GSO)            += -lrte_gso
 _LDLIBS-$(CONFIG_RTE_LIBRTE_METER)          += -lrte_meter
 _LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM)            += -lrte_lpm
-- 
1.9.3



More information about the dev mailing list