[dpdk-dev] [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup

Jiayu Hu jiayu.hu at intel.com
Sat Nov 25 04:17:39 CET 2017


This patch updates TCP/IPv4 GRO as follows:
- remove IP identification check when merge TCP/IPv4 packets
- extract common internal functions for supporting tunneled GRO
- rename internal functions and variants for better understanding
- update comments

Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
---
 lib/librte_gro/gro_tcp4.c | 294 +++++++++++++---------------------------------
 lib/librte_gro/gro_tcp4.h | 236 ++++++++++++++++++++++++++-----------
 lib/librte_gro/rte_gro.c  |  60 +++++-----
 3 files changed, 278 insertions(+), 312 deletions(-)

diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 61a0423..a560a84 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -34,8 +34,6 @@
 #include <rte_mbuf.h>
 #include <rte_cycles.h>
 #include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
 
 #include "gro_tcp4.h"
 
@@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
 	}
 	tbl->max_item_num = entries_num;
 
-	size = sizeof(struct gro_tcp4_key) * entries_num;
-	tbl->keys = rte_zmalloc_socket(__func__,
+	size = sizeof(struct gro_tcp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
 			size,
 			RTE_CACHE_LINE_SIZE,
 			socket_id);
-	if (tbl->keys == NULL) {
+	if (tbl->flows == NULL) {
 		rte_free(tbl->items);
 		rte_free(tbl);
 		return NULL;
 	}
-	/* INVALID_ARRAY_INDEX indicates empty key */
+	/* INVALID_ARRAY_INDEX indicates an empty flow */
 	for (i = 0; i < entries_num; i++)
-		tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
-	tbl->max_key_num = entries_num;
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
 
 	return tbl;
 }
@@ -97,111 +95,11 @@ gro_tcp4_tbl_destroy(void *tbl)
 
 	if (tcp_tbl) {
 		rte_free(tcp_tbl->items);
-		rte_free(tcp_tbl->keys);
+		rte_free(tcp_tbl->flows);
 	}
 	rte_free(tcp_tbl);
 }
 
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
-		struct rte_mbuf *pkt,
-		uint16_t ip_id,
-		uint32_t sent_seq,
-		int cmp)
-{
-	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
-	uint16_t tcp_datalen;
-
-	if (cmp > 0) {
-		pkt_head = item_src->firstseg;
-		pkt_tail = pkt;
-	} else {
-		pkt_head = pkt;
-		pkt_tail = item_src->firstseg;
-	}
-
-	/* check if the packet length will be beyond the max value */
-	tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
-		pkt_tail->l3_len - pkt_tail->l4_len;
-	if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
-			TCP4_MAX_L3_LENGTH)
-		return 0;
-
-	/* remove packet header for the tail packet */
-	rte_pktmbuf_adj(pkt_tail,
-			pkt_tail->l2_len +
-			pkt_tail->l3_len +
-			pkt_tail->l4_len);
-
-	/* chain two packets together */
-	if (cmp > 0) {
-		item_src->lastseg->next = pkt;
-		item_src->lastseg = rte_pktmbuf_lastseg(pkt);
-		/* update IP ID to the larger value */
-		item_src->ip_id = ip_id;
-	} else {
-		lastseg = rte_pktmbuf_lastseg(pkt);
-		lastseg->next = item_src->firstseg;
-		item_src->firstseg = pkt;
-		/* update sent_seq to the smaller value */
-		item_src->sent_seq = sent_seq;
-	}
-	item_src->nb_merged++;
-
-	/* update mbuf metadata for the merged packet */
-	pkt_head->nb_segs += pkt_tail->nb_segs;
-	pkt_head->pkt_len += pkt_tail->pkt_len;
-
-	return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
-		struct tcp_hdr *tcp_hdr,
-		uint16_t tcp_hl,
-		uint16_t tcp_dl,
-		uint16_t ip_id,
-		uint32_t sent_seq)
-{
-	struct rte_mbuf *pkt0 = item->firstseg;
-	struct ipv4_hdr *ipv4_hdr0;
-	struct tcp_hdr *tcp_hdr0;
-	uint16_t tcp_hl0, tcp_dl0;
-	uint16_t len;
-
-	ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
-			pkt0->l2_len);
-	tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
-	tcp_hl0 = pkt0->l4_len;
-
-	/* check if TCP option fields equal. If not, return 0. */
-	len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
-	if ((tcp_hl != tcp_hl0) ||
-			((len > 0) && (memcmp(tcp_hdr + 1,
-					tcp_hdr0 + 1,
-					len) != 0)))
-		return 0;
-
-	/* check if the two packets are neighbors */
-	tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
-	if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
-			(ip_id == (item->ip_id + 1)))
-		/* append the new packet */
-		return 1;
-	else if (((sent_seq + tcp_dl) == item->sent_seq) &&
-			((ip_id + item->nb_merged) == item->ip_id))
-		/* pre-pend the new packet */
-		return -1;
-	else
-		return 0;
-}
-
 static inline uint32_t
 find_an_empty_item(struct gro_tcp4_tbl *tbl)
 {
@@ -215,13 +113,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
 }
 
 static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
 {
 	uint32_t i;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++)
-		if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
 			return i;
 	return INVALID_ARRAY_INDEX;
 }
@@ -229,7 +127,6 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
 static inline uint32_t
 insert_new_item(struct gro_tcp4_tbl *tbl,
 		struct rte_mbuf *pkt,
-		uint16_t ip_id,
 		uint32_t sent_seq,
 		uint32_t prev_idx,
 		uint64_t start_time)
@@ -245,7 +142,6 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
 	tbl->items[item_idx].start_time = start_time;
 	tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
 	tbl->items[item_idx].sent_seq = sent_seq;
-	tbl->items[item_idx].ip_id = ip_id;
 	tbl->items[item_idx].nb_merged = 1;
 	tbl->item_num++;
 
@@ -265,7 +161,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 {
 	uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
 
-	/* set NULL to firstseg to indicate it's an empty item */
+	/* NULL indicates an empty item */
 	tbl->items[item_idx].firstseg = NULL;
 	tbl->item_num--;
 	if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -275,53 +171,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
 }
 
 static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
-		struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+		struct tcp4_flow_key *src,
 		uint32_t item_idx)
 {
-	struct tcp4_key *key_dst;
-	uint32_t key_idx;
+	struct tcp4_flow_key *dst;
+	uint32_t flow_idx;
 
-	key_idx = find_an_empty_key(tbl);
-	if (key_idx == INVALID_ARRAY_INDEX)
+	flow_idx = find_an_empty_flow(tbl);
+	if (flow_idx == INVALID_ARRAY_INDEX)
 		return INVALID_ARRAY_INDEX;
 
-	key_dst = &(tbl->keys[key_idx].key);
+	dst = &(tbl->flows[flow_idx].key);
 
-	ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
-	ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
-	key_dst->ip_src_addr = key_src->ip_src_addr;
-	key_dst->ip_dst_addr = key_src->ip_dst_addr;
-	key_dst->recv_ack = key_src->recv_ack;
-	key_dst->src_port = key_src->src_port;
-	key_dst->dst_port = key_src->dst_port;
+	ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+	ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+	dst->ip_src_addr = src->ip_src_addr;
+	dst->ip_dst_addr = src->ip_dst_addr;
+	dst->recv_ack = src->recv_ack;
+	dst->src_port = src->src_port;
+	dst->dst_port = src->dst_port;
 
-	/* non-INVALID_ARRAY_INDEX value indicates this key is valid */
-	tbl->keys[key_idx].start_index = item_idx;
-	tbl->key_num++;
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
 
-	return key_idx;
+	return flow_idx;
 }
 
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
-	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
-		return 0;
-
-	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
-		return 0;
-
-	return ((k1.ip_src_addr == k2.ip_src_addr) &&
-			(k1.ip_dst_addr == k2.ip_dst_addr) &&
-			(k1.recv_ack == k2.recv_ack) &&
-			(k1.src_port == k2.src_port) &&
-			(k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
 static inline void
 update_header(struct gro_tcp4_item *item)
 {
@@ -343,30 +219,32 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	struct ipv4_hdr *ipv4_hdr;
 	struct tcp_hdr *tcp_hdr;
 	uint32_t sent_seq;
-	uint16_t tcp_dl, ip_id;
+	uint16_t tcp_dl, hdr_len;
 
-	struct tcp4_key key;
+	struct tcp4_flow_key key;
 	uint32_t cur_idx, prev_idx, item_idx;
-	uint32_t i, max_key_num;
+	uint32_t i, max_flow_num;
 	int cmp;
 
 	eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
 	ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
 	tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+	hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
 
 	/*
-	 * if FIN, SYN, RST, PSH, URG, ECE or
-	 * CWR is set, return immediately.
+	 * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+	 * or CWR set.
 	 */
 	if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
 		return -1;
-	/* if payload length is 0, return immediately */
-	tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
-		pkt->l4_len;
-	if (tcp_dl == 0)
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	tcp_dl = pkt->pkt_len - hdr_len;
+	if (tcp_dl <= 0)
 		return -1;
 
-	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
 	sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
 
 	ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
@@ -377,49 +255,51 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 	key.dst_port = tcp_hdr->dst_port;
 	key.recv_ack = tcp_hdr->recv_ack;
 
-	/* search for a key */
-	max_key_num = tbl->max_key_num;
-	for (i = 0; i < max_key_num; i++) {
-		if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
-				is_same_key(tbl->keys[i].key, key))
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	for (i = 0; i < max_flow_num; i++) {
+		if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
+				is_same_tcp4_flow(tbl->flows[i].key, key))
 			break;
 	}
 
-	/* can't find a key, so insert a new key and a new item. */
-	if (i == tbl->max_key_num) {
-		item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
+	/*
+	 * Fail to find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (i == tbl->max_flow_num) {
+		item_idx = insert_new_item(tbl, pkt, sent_seq,
 				INVALID_ARRAY_INDEX, start_time);
 		if (item_idx == INVALID_ARRAY_INDEX)
 			return -1;
-		if (insert_new_key(tbl, &key, item_idx) ==
+		if (insert_new_flow(tbl, &key, item_idx) ==
 				INVALID_ARRAY_INDEX) {
-			/*
-			 * fail to insert a new key, so
-			 * delete the inserted item
-			 */
+			/* Fail to insert a new flow. */
 			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
 			return -1;
 		}
 		return 0;
 	}
 
-	/* traverse all packets in the item group to find one to merge */
-	cur_idx = tbl->keys[i].start_index;
+	/*
+	 * Check all packets in the flow and try to find a neighbor for
+	 * the input packet.
+	 */
+	cur_idx = tbl->flows[i].start_index;
 	prev_idx = cur_idx;
 	do {
 		cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
-				pkt->l4_len, tcp_dl, ip_id, sent_seq);
+				pkt->l4_len, tcp_dl, sent_seq, 0);
 		if (cmp) {
 			if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
-						pkt, ip_id,
-						sent_seq, cmp))
+						pkt, sent_seq, cmp, 0))
 				return 1;
 			/*
-			 * fail to merge two packets since the packet
-			 * length will be greater than the max value.
-			 * So insert the packet into the item group.
+			 * Fail to merge the two packets, as the packet
+			 * length is greater than the max value. Store
+			 * the packet into the flow.
 			 */
-			if (insert_new_item(tbl, pkt, ip_id, sent_seq,
+			if (insert_new_item(tbl, pkt, sent_seq,
 						prev_idx, start_time) ==
 					INVALID_ARRAY_INDEX)
 				return -1;
@@ -429,11 +309,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		cur_idx = tbl->items[cur_idx].next_pkt_idx;
 	} while (cur_idx != INVALID_ARRAY_INDEX);
 
-	/*
-	 * can't find a packet in the item group to merge,
-	 * so insert the packet into the item group.
-	 */
-	if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
+	/* Fail to find a neighbor, so store the packet into the flow. */
+	if (insert_new_item(tbl, pkt, sent_seq, prev_idx,
 				start_time) == INVALID_ARRAY_INDEX)
 		return -1;
 
@@ -448,44 +325,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 {
 	uint16_t k = 0;
 	uint32_t i, j;
-	uint32_t max_key_num = tbl->max_key_num;
+	uint32_t max_flow_num = tbl->max_flow_num;
 
-	for (i = 0; i < max_key_num; i++) {
-		/* all keys have been checked, return immediately */
-		if (tbl->key_num == 0)
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
 			return k;
 
-		j = tbl->keys[i].start_index;
+		j = tbl->flows[i].start_index;
 		while (j != INVALID_ARRAY_INDEX) {
 			if (tbl->items[j].start_time <= flush_timestamp) {
 				out[k++] = tbl->items[j].firstseg;
 				if (tbl->items[j].nb_merged > 1)
 					update_header(&(tbl->items[j]));
 				/*
-				 * delete the item and get
-				 * the next packet index
+				 * Delete the packet and get the next
+				 * packet in the flow.
 				 */
-				j = delete_item(tbl, j,
-						INVALID_ARRAY_INDEX);
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
 
-				/*
-				 * delete the key as all of
-				 * packets are flushed
-				 */
-				if (j == INVALID_ARRAY_INDEX) {
-					tbl->keys[i].start_index =
-						INVALID_ARRAY_INDEX;
-					tbl->key_num--;
-				} else
-					/* update start_index of the key */
-					tbl->keys[i].start_index = j;
-
-				if (k == nb_out)
+				if (unlikely(k == nb_out))
 					return k;
 			} else
 				/*
-				 * left packets of this key won't be
-				 * timeout, so go to check other keys.
+				 * The left packets in this flow won't be
+				 * timeout. Go to check other flows.
 				 */
 				break;
 		}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a81716..de9925e 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -33,17 +33,20 @@
 #ifndef _GRO_TCP4_H_
 #define _GRO_TCP4_H_
 
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
 
 /*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of L3
+ * header, L4 header and the payload.
  */
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
 
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow. */
+struct tcp4_flow_key {
 	struct ether_addr eth_saddr;
 	struct ether_addr eth_daddr;
 	uint32_t ip_src_addr;
@@ -54,43 +57,39 @@ struct tcp4_key {
 	uint16_t dst_port;
 };
 
-struct gro_tcp4_key {
-	struct tcp4_key key;
+struct gro_tcp4_flow {
+	struct tcp4_flow_key key;
 	/*
-	 * the index of the first packet in the item group.
-	 * If the value is INVALID_ARRAY_INDEX, it means
-	 * the key is empty.
+	 * The index of the first packet in the flow.
+	 * INVALID_ARRAY_INDEX indicates an empty flow.
 	 */
 	uint32_t start_index;
 };
 
 struct gro_tcp4_item {
 	/*
-	 * first segment of the packet. If the value
+	 * First segment of the packet. If the value
 	 * is NULL, it means the item is empty.
 	 */
 	struct rte_mbuf *firstseg;
-	/* last segment of the packet */
+	/* Last segment of the packet */
 	struct rte_mbuf *lastseg;
 	/*
-	 * the time when the first packet is inserted
+	 * The time when the first packet is inserted
 	 * into the table. If a packet in the table is
 	 * merged with an incoming packet, this value
-	 * won't be updated. We set this value only
-	 * when the first packet is inserted into the
-	 * table.
+	 * won't be updated.
 	 */
 	uint64_t start_time;
 	/*
-	 * we use next_pkt_idx to chain the packets that
-	 * have same key value but can't be merged together.
+	 * next_pkt_idx is used to chain the packets that
+	 * are in the same flow but can't be merged together
+	 * (i.e. caused by packet reordering).
 	 */
 	uint32_t next_pkt_idx;
-	/* the sequence number of the packet */
+	/* TCP sequence number of the packet */
 	uint32_t sent_seq;
-	/* the IP ID of the packet */
-	uint16_t ip_id;
-	/* the number of merged packets */
+	/* The number of merged packets */
 	uint16_t nb_merged;
 };
 
@@ -100,31 +99,31 @@ struct gro_tcp4_item {
 struct gro_tcp4_tbl {
 	/* item array */
 	struct gro_tcp4_item *items;
-	/* key array */
-	struct gro_tcp4_key *keys;
+	/* flow array */
+	struct gro_tcp4_flow *flows;
 	/* current item number */
 	uint32_t item_num;
-	/* current key num */
-	uint32_t key_num;
+	/* current flow num */
+	uint32_t flow_num;
 	/* item array size */
 	uint32_t max_item_num;
-	/* key array size */
-	uint32_t max_key_num;
+	/* flow array size */
+	uint32_t max_flow_num;
 };
 
 /**
  * This function creates a TCP/IPv4 reassembly table.
  *
  * @param socket_id
- *  socket index for allocating TCP/IPv4 reassemble table
+ *  Socket index for allocating the TCP/IPv4 reassemble table
  * @param max_flow_num
- *  the maximum number of flows in the TCP/IPv4 GRO table
+ *  The maximum number of flows in the TCP/IPv4 GRO table
  * @param max_item_per_flow
- *  the maximum packet number per flow.
+ *  The maximum number of packets per flow
  *
  * @return
- *  if create successfully, return a pointer which points to the
- *  created TCP/IPv4 GRO table. Otherwise, return NULL.
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
  */
 void *gro_tcp4_tbl_create(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -134,62 +133,53 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
  * This function destroys a TCP/IPv4 reassembly table.
  *
  * @param tbl
- *  a pointer points to the TCP/IPv4 reassembly table.
+ *  Pointer pointint to the TCP/IPv4 reassembly table.
  */
 void gro_tcp4_tbl_destroy(void *tbl);
 
 /**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload. It returns the packet if there is no available space in the
+ * table.
  *
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums.
+ * Additionally, it doesn't re-calculate checksums for the merged packet.
+ * If the input packet is IP fragmented, it assumes the packet is complete.
  *
  * @param pkt
- *  packet to reassemble.
+ *  Packet to reassemble
  * @param tbl
- *  a pointer that points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to the TCP/IPv4 reassembly table
  * @start_time
- *  the start time that the packet is inserted into the table
+ *  The time when the packet is inserted into the table
  *
  * @return
- *  if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- *  or URG bit is set, or there is no available space in the table to
- *  insert a new item or a new key, return a negative value. If the
- *  packet is merged successfully, return an positive value. If the
- *  packet is inserted into the table, return 0.
+ *  - Return a positive value if the input packet is merged.
+ *  - Return zero if the input packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters.
  */
 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
 		struct gro_tcp4_tbl *tbl,
 		uint64_t start_time);
 
 /**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
  *
  * @param tbl
- *  a pointer that points to a TCP GRO table.
+ *  Pointer points to a TCP/IPv4 reassembly table
  * @param flush_timestamp
- *  this function flushes packets which are inserted into the table
- *  before or at the flush_timestamp.
+ *  Flush packets which are inserted into the table before or at the
+ *  flush_timestamp
  * @param out
- *  pointer array which is used to keep flushed packets.
+ *  Pointer array used to keep flushed packets
  * @param nb_out
- *  the element number of out. It's also the max number of timeout
+ *  The element number in 'out'. It also determines the maximum number of
  *  packets that can be flushed finally.
  *
  * @return
- *  the number of packets that are returned.
+ *  The number of flushed packets
  */
 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
 		uint64_t flush_timestamp,
@@ -201,10 +191,124 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
  * reassembly table.
  *
  * @param tbl
- *  pointer points to a TCP/IPv4 reassembly table.
+ *  Pointer pointing to a TCP/IPv4 reassembly table
  *
  * @return
- *  the number of packets in the table
+ *  The number of packets in the table
  */
 uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+	if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
+		return 0;
+
+	if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
+		return 0;
+
+	return ((k1.ip_src_addr == k2.ip_src_addr) &&
+			(k1.ip_dst_addr == k2.ip_dst_addr) &&
+			(k1.recv_ack == k2.recv_ack) &&
+			(k1.src_port == k2.src_port) &&
+			(k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+		struct tcp_hdr *tcph,
+		uint16_t tcp_hl,
+		uint16_t tcp_dl,
+		uint32_t sent_seq,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_orig = item->firstseg;
+	struct ipv4_hdr *iph_orig;
+	struct tcp_hdr *tcph_orig;
+	uint16_t len, l4_len_orig;
+
+	iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+			l2_offset + pkt_orig->l2_len);
+	tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+	l4_len_orig = pkt_orig->l4_len;
+
+	/* Check if TCP option fields equal */
+	len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+	if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+				(memcmp(tcph + 1, tcph_orig + 1,
+					len) != 0)))
+		return 0;
+
+	/* Check if the two packets are neighbors */
+	len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+		pkt_orig->l3_len - l4_len_orig;
+	if (sent_seq == item->sent_seq + len)
+		/* Append the new packet */
+		return 1;
+	else if (sent_seq + tcp_dl == item->sent_seq)
+		/* Pre-pend the new packet */
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+		struct rte_mbuf *pkt,
+		uint32_t sent_seq,
+		int cmp,
+		uint16_t l2_offset)
+{
+	struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+	uint16_t hdr_len;
+
+	if (cmp > 0) {
+		pkt_head = item->firstseg;
+		pkt_tail = pkt;
+	} else {
+		pkt_head = pkt;
+		pkt_tail = item->firstseg;
+	}
+
+	/* Check if the length is greater than the max value */
+	hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+		pkt_head->l4_len;
+	if (pkt_head->pkt_len - l2_offset - pkt_head->l2_len +
+			pkt_tail->pkt_len - hdr_len > MAX_IPV4_PKT_LENGTH)
+		return 0;
+
+	/* Remove packet header for the tail packet */
+	rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+	/* Chain two packets together */
+	if (cmp > 0) {
+		item->lastseg->next = pkt;
+		item->lastseg = rte_pktmbuf_lastseg(pkt);
+	} else {
+		lastseg = rte_pktmbuf_lastseg(pkt);
+		lastseg->next = item->firstseg;
+		item->firstseg = pkt;
+		/* Update sent_seq to the smaller value */
+		item->sent_seq = sent_seq;
+	}
+	item->nb_merged++;
+
+	/* Update mbuf metadata for the merged packet */
+	pkt_head->nb_segs += pkt_tail->nb_segs;
+	pkt_head->pkt_len += pkt_tail->pkt_len;
+
+	return 1;
+}
 #endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7853246..dfee932 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -51,6 +51,9 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, NULL};
 
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
 /*
  * GRO context structure, which is used to merge packets. It keeps
  * many reassembly tables of desired GRO types. Applications need to
@@ -131,62 +134,55 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		const struct rte_gro_param *param)
 {
-	uint16_t i;
-	uint16_t nb_after_gro = nb_pkts;
-	uint32_t item_num;
-
-	/* allocate a reassembly table for TCP/IPv4 GRO */
+	/* Allocate a reassembly table for TCP/IPv4 GRO. */
 	struct gro_tcp4_tbl tcp_tbl;
-	struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
 	struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
-	uint16_t unprocess_num = 0;
-	int32_t ret;
 	uint64_t current_time;
+	uint32_t item_num;
+	int32_t ret;
+	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
 
 	if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
 
-	/* get the actual number of packets */
+	/* Get the actual number of packets. */
 	item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
-			param->max_item_per_flow));
+				param->max_item_per_flow));
 	item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
 
 	for (i = 0; i < item_num; i++)
-		tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+		tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
 
-	tcp_tbl.keys = tcp_keys;
+	tcp_tbl.flows = tcp_flows;
 	tcp_tbl.items = tcp_items;
-	tcp_tbl.key_num = 0;
+	tcp_tbl.flow_num = 0;
 	tcp_tbl.item_num = 0;
-	tcp_tbl.max_key_num = item_num;
+	tcp_tbl.max_flow_num = item_num;
 	tcp_tbl.max_item_num = item_num;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			ret = gro_tcp4_reassemble(pkts[i],
-					&tcp_tbl,
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
 					current_time);
 			if (ret > 0)
-				/* merge successfully */
+				/* Merge successfully */
 				nb_after_gro--;
-			else if (ret < 0) {
-				unprocess_pkts[unprocess_num++] =
-					pkts[i];
-			}
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
 			unprocess_pkts[unprocess_num++] = pkts[i];
 	}
 
-	/* re-arrange GROed packets */
 	if (nb_after_gro < nb_pkts) {
+		/* Flush packets from the tables. */
 		i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
 				pkts, nb_pkts);
+		/* Copy unprocessed packets. */
 		if (unprocess_num > 0) {
 			memcpy(&pkts[i], unprocess_pkts,
 					sizeof(struct rte_mbuf *) *
@@ -202,10 +198,11 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 		uint16_t nb_pkts,
 		void *ctx)
 {
-	uint16_t i, unprocess_num = 0;
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
+	void *tbl;
 	uint64_t current_time;
+	uint16_t i, unprocess_num = 0;
 
 	if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
 		return nb_pkts;
@@ -213,12 +210,9 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
-					RTE_PTYPE_L4_TCP)) ==
-				(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
-			if (gro_tcp4_reassemble(pkts[i],
-						gro_ctx->tbls
-						[RTE_GRO_TCP_IPV4_INDEX],
+		if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+			tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+			if (gro_tcp4_reassemble(pkts[i], tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else
@@ -252,6 +246,7 @@ rte_gro_timeout_flush(void *ctx,
 				flush_timestamp,
 				out, max_nb_out);
 	}
+
 	return 0;
 }
 
@@ -274,5 +269,6 @@ rte_gro_get_pkt_count(void *ctx)
 			continue;
 		item_num += pkt_count_fn(gro_ctx->tbls[i]);
 	}
+
 	return item_num;
 }
-- 
2.7.4



More information about the dev mailing list