[dpdk-dev] [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup
Jiayu Hu
jiayu.hu at intel.com
Sat Nov 25 04:17:39 CET 2017
This patch updates TCP/IPv4 GRO as follows:
- remove IP identification check when merge TCP/IPv4 packets
- extract common internal functions for supporting tunneled GRO
- rename internal functions and variants for better understanding
- update comments
Signed-off-by: Jiayu Hu <jiayu.hu at intel.com>
---
lib/librte_gro/gro_tcp4.c | 294 +++++++++++++---------------------------------
lib/librte_gro/gro_tcp4.h | 236 ++++++++++++++++++++++++++-----------
lib/librte_gro/rte_gro.c | 60 +++++-----
3 files changed, 278 insertions(+), 312 deletions(-)
diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c
index 61a0423..a560a84 100644
--- a/lib/librte_gro/gro_tcp4.c
+++ b/lib/librte_gro/gro_tcp4.c
@@ -34,8 +34,6 @@
#include <rte_mbuf.h>
#include <rte_cycles.h>
#include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
#include "gro_tcp4.h"
@@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id,
}
tbl->max_item_num = entries_num;
- size = sizeof(struct gro_tcp4_key) * entries_num;
- tbl->keys = rte_zmalloc_socket(__func__,
+ size = sizeof(struct gro_tcp4_flow) * entries_num;
+ tbl->flows = rte_zmalloc_socket(__func__,
size,
RTE_CACHE_LINE_SIZE,
socket_id);
- if (tbl->keys == NULL) {
+ if (tbl->flows == NULL) {
rte_free(tbl->items);
rte_free(tbl);
return NULL;
}
- /* INVALID_ARRAY_INDEX indicates empty key */
+ /* INVALID_ARRAY_INDEX indicates an empty flow */
for (i = 0; i < entries_num; i++)
- tbl->keys[i].start_index = INVALID_ARRAY_INDEX;
- tbl->max_key_num = entries_num;
+ tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+ tbl->max_flow_num = entries_num;
return tbl;
}
@@ -97,111 +95,11 @@ gro_tcp4_tbl_destroy(void *tbl)
if (tcp_tbl) {
rte_free(tcp_tbl->items);
- rte_free(tcp_tbl->keys);
+ rte_free(tcp_tbl->flows);
}
rte_free(tcp_tbl);
}
-/*
- * merge two TCP/IPv4 packets without updating checksums.
- * If cmp is larger than 0, append the new packet to the
- * original packet. Otherwise, pre-pend the new packet to
- * the original packet.
- */
-static inline int
-merge_two_tcp4_packets(struct gro_tcp4_item *item_src,
- struct rte_mbuf *pkt,
- uint16_t ip_id,
- uint32_t sent_seq,
- int cmp)
-{
- struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
- uint16_t tcp_datalen;
-
- if (cmp > 0) {
- pkt_head = item_src->firstseg;
- pkt_tail = pkt;
- } else {
- pkt_head = pkt;
- pkt_tail = item_src->firstseg;
- }
-
- /* check if the packet length will be beyond the max value */
- tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len -
- pkt_tail->l3_len - pkt_tail->l4_len;
- if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen >
- TCP4_MAX_L3_LENGTH)
- return 0;
-
- /* remove packet header for the tail packet */
- rte_pktmbuf_adj(pkt_tail,
- pkt_tail->l2_len +
- pkt_tail->l3_len +
- pkt_tail->l4_len);
-
- /* chain two packets together */
- if (cmp > 0) {
- item_src->lastseg->next = pkt;
- item_src->lastseg = rte_pktmbuf_lastseg(pkt);
- /* update IP ID to the larger value */
- item_src->ip_id = ip_id;
- } else {
- lastseg = rte_pktmbuf_lastseg(pkt);
- lastseg->next = item_src->firstseg;
- item_src->firstseg = pkt;
- /* update sent_seq to the smaller value */
- item_src->sent_seq = sent_seq;
- }
- item_src->nb_merged++;
-
- /* update mbuf metadata for the merged packet */
- pkt_head->nb_segs += pkt_tail->nb_segs;
- pkt_head->pkt_len += pkt_tail->pkt_len;
-
- return 1;
-}
-
-static inline int
-check_seq_option(struct gro_tcp4_item *item,
- struct tcp_hdr *tcp_hdr,
- uint16_t tcp_hl,
- uint16_t tcp_dl,
- uint16_t ip_id,
- uint32_t sent_seq)
-{
- struct rte_mbuf *pkt0 = item->firstseg;
- struct ipv4_hdr *ipv4_hdr0;
- struct tcp_hdr *tcp_hdr0;
- uint16_t tcp_hl0, tcp_dl0;
- uint16_t len;
-
- ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) +
- pkt0->l2_len);
- tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len);
- tcp_hl0 = pkt0->l4_len;
-
- /* check if TCP option fields equal. If not, return 0. */
- len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr);
- if ((tcp_hl != tcp_hl0) ||
- ((len > 0) && (memcmp(tcp_hdr + 1,
- tcp_hdr0 + 1,
- len) != 0)))
- return 0;
-
- /* check if the two packets are neighbors */
- tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0;
- if ((sent_seq == (item->sent_seq + tcp_dl0)) &&
- (ip_id == (item->ip_id + 1)))
- /* append the new packet */
- return 1;
- else if (((sent_seq + tcp_dl) == item->sent_seq) &&
- ((ip_id + item->nb_merged) == item->ip_id))
- /* pre-pend the new packet */
- return -1;
- else
- return 0;
-}
-
static inline uint32_t
find_an_empty_item(struct gro_tcp4_tbl *tbl)
{
@@ -215,13 +113,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl)
}
static inline uint32_t
-find_an_empty_key(struct gro_tcp4_tbl *tbl)
+find_an_empty_flow(struct gro_tcp4_tbl *tbl)
{
uint32_t i;
- uint32_t max_key_num = tbl->max_key_num;
+ uint32_t max_flow_num = tbl->max_flow_num;
- for (i = 0; i < max_key_num; i++)
- if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX)
+ for (i = 0; i < max_flow_num; i++)
+ if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
return i;
return INVALID_ARRAY_INDEX;
}
@@ -229,7 +127,6 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl)
static inline uint32_t
insert_new_item(struct gro_tcp4_tbl *tbl,
struct rte_mbuf *pkt,
- uint16_t ip_id,
uint32_t sent_seq,
uint32_t prev_idx,
uint64_t start_time)
@@ -245,7 +142,6 @@ insert_new_item(struct gro_tcp4_tbl *tbl,
tbl->items[item_idx].start_time = start_time;
tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX;
tbl->items[item_idx].sent_seq = sent_seq;
- tbl->items[item_idx].ip_id = ip_id;
tbl->items[item_idx].nb_merged = 1;
tbl->item_num++;
@@ -265,7 +161,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
{
uint32_t next_idx = tbl->items[item_idx].next_pkt_idx;
- /* set NULL to firstseg to indicate it's an empty item */
+ /* NULL indicates an empty item */
tbl->items[item_idx].firstseg = NULL;
tbl->item_num--;
if (prev_item_idx != INVALID_ARRAY_INDEX)
@@ -275,53 +171,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx,
}
static inline uint32_t
-insert_new_key(struct gro_tcp4_tbl *tbl,
- struct tcp4_key *key_src,
+insert_new_flow(struct gro_tcp4_tbl *tbl,
+ struct tcp4_flow_key *src,
uint32_t item_idx)
{
- struct tcp4_key *key_dst;
- uint32_t key_idx;
+ struct tcp4_flow_key *dst;
+ uint32_t flow_idx;
- key_idx = find_an_empty_key(tbl);
- if (key_idx == INVALID_ARRAY_INDEX)
+ flow_idx = find_an_empty_flow(tbl);
+ if (flow_idx == INVALID_ARRAY_INDEX)
return INVALID_ARRAY_INDEX;
- key_dst = &(tbl->keys[key_idx].key);
+ dst = &(tbl->flows[flow_idx].key);
- ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr));
- ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr));
- key_dst->ip_src_addr = key_src->ip_src_addr;
- key_dst->ip_dst_addr = key_src->ip_dst_addr;
- key_dst->recv_ack = key_src->recv_ack;
- key_dst->src_port = key_src->src_port;
- key_dst->dst_port = key_src->dst_port;
+ ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr));
+ ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr));
+ dst->ip_src_addr = src->ip_src_addr;
+ dst->ip_dst_addr = src->ip_dst_addr;
+ dst->recv_ack = src->recv_ack;
+ dst->src_port = src->src_port;
+ dst->dst_port = src->dst_port;
- /* non-INVALID_ARRAY_INDEX value indicates this key is valid */
- tbl->keys[key_idx].start_index = item_idx;
- tbl->key_num++;
+ tbl->flows[flow_idx].start_index = item_idx;
+ tbl->flow_num++;
- return key_idx;
+ return flow_idx;
}
-static inline int
-is_same_key(struct tcp4_key k1, struct tcp4_key k2)
-{
- if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
- return 0;
-
- if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
- return 0;
-
- return ((k1.ip_src_addr == k2.ip_src_addr) &&
- (k1.ip_dst_addr == k2.ip_dst_addr) &&
- (k1.recv_ack == k2.recv_ack) &&
- (k1.src_port == k2.src_port) &&
- (k1.dst_port == k2.dst_port));
-}
-
-/*
- * update packet length for the flushed packet.
- */
static inline void
update_header(struct gro_tcp4_item *item)
{
@@ -343,30 +219,32 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct ipv4_hdr *ipv4_hdr;
struct tcp_hdr *tcp_hdr;
uint32_t sent_seq;
- uint16_t tcp_dl, ip_id;
+ uint16_t tcp_dl, hdr_len;
- struct tcp4_key key;
+ struct tcp4_flow_key key;
uint32_t cur_idx, prev_idx, item_idx;
- uint32_t i, max_key_num;
+ uint32_t i, max_flow_num;
int cmp;
eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);
tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);
+ hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;
/*
- * if FIN, SYN, RST, PSH, URG, ECE or
- * CWR is set, return immediately.
+ * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE
+ * or CWR set.
*/
if (tcp_hdr->tcp_flags != TCP_ACK_FLAG)
return -1;
- /* if payload length is 0, return immediately */
- tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len -
- pkt->l4_len;
- if (tcp_dl == 0)
+ /*
+ * Don't process the packet whose payload length is less than or
+ * equal to 0.
+ */
+ tcp_dl = pkt->pkt_len - hdr_len;
+ if (tcp_dl <= 0)
return -1;
- ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq);
ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr));
@@ -377,49 +255,51 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
key.dst_port = tcp_hdr->dst_port;
key.recv_ack = tcp_hdr->recv_ack;
- /* search for a key */
- max_key_num = tbl->max_key_num;
- for (i = 0; i < max_key_num; i++) {
- if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) &&
- is_same_key(tbl->keys[i].key, key))
+ /* Search for a matched flow. */
+ max_flow_num = tbl->max_flow_num;
+ for (i = 0; i < max_flow_num; i++) {
+ if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) &&
+ is_same_tcp4_flow(tbl->flows[i].key, key))
break;
}
- /* can't find a key, so insert a new key and a new item. */
- if (i == tbl->max_key_num) {
- item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq,
+ /*
+ * Fail to find a matched flow. Insert a new flow and store the
+ * packet into the flow.
+ */
+ if (i == tbl->max_flow_num) {
+ item_idx = insert_new_item(tbl, pkt, sent_seq,
INVALID_ARRAY_INDEX, start_time);
if (item_idx == INVALID_ARRAY_INDEX)
return -1;
- if (insert_new_key(tbl, &key, item_idx) ==
+ if (insert_new_flow(tbl, &key, item_idx) ==
INVALID_ARRAY_INDEX) {
- /*
- * fail to insert a new key, so
- * delete the inserted item
- */
+ /* Fail to insert a new flow. */
delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
return -1;
}
return 0;
}
- /* traverse all packets in the item group to find one to merge */
- cur_idx = tbl->keys[i].start_index;
+ /*
+ * Check all packets in the flow and try to find a neighbor for
+ * the input packet.
+ */
+ cur_idx = tbl->flows[i].start_index;
prev_idx = cur_idx;
do {
cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr,
- pkt->l4_len, tcp_dl, ip_id, sent_seq);
+ pkt->l4_len, tcp_dl, sent_seq, 0);
if (cmp) {
if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),
- pkt, ip_id,
- sent_seq, cmp))
+ pkt, sent_seq, cmp, 0))
return 1;
/*
- * fail to merge two packets since the packet
- * length will be greater than the max value.
- * So insert the packet into the item group.
+ * Fail to merge the two packets, as the packet
+ * length is greater than the max value. Store
+ * the packet into the flow.
*/
- if (insert_new_item(tbl, pkt, ip_id, sent_seq,
+ if (insert_new_item(tbl, pkt, sent_seq,
prev_idx, start_time) ==
INVALID_ARRAY_INDEX)
return -1;
@@ -429,11 +309,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,
cur_idx = tbl->items[cur_idx].next_pkt_idx;
} while (cur_idx != INVALID_ARRAY_INDEX);
- /*
- * can't find a packet in the item group to merge,
- * so insert the packet into the item group.
- */
- if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx,
+ /* Fail to find a neighbor, so store the packet into the flow. */
+ if (insert_new_item(tbl, pkt, sent_seq, prev_idx,
start_time) == INVALID_ARRAY_INDEX)
return -1;
@@ -448,44 +325,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
{
uint16_t k = 0;
uint32_t i, j;
- uint32_t max_key_num = tbl->max_key_num;
+ uint32_t max_flow_num = tbl->max_flow_num;
- for (i = 0; i < max_key_num; i++) {
- /* all keys have been checked, return immediately */
- if (tbl->key_num == 0)
+ for (i = 0; i < max_flow_num; i++) {
+ if (unlikely(tbl->flow_num == 0))
return k;
- j = tbl->keys[i].start_index;
+ j = tbl->flows[i].start_index;
while (j != INVALID_ARRAY_INDEX) {
if (tbl->items[j].start_time <= flush_timestamp) {
out[k++] = tbl->items[j].firstseg;
if (tbl->items[j].nb_merged > 1)
update_header(&(tbl->items[j]));
/*
- * delete the item and get
- * the next packet index
+ * Delete the packet and get the next
+ * packet in the flow.
*/
- j = delete_item(tbl, j,
- INVALID_ARRAY_INDEX);
+ j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+ tbl->flows[i].start_index = j;
+ if (j == INVALID_ARRAY_INDEX)
+ tbl->flow_num--;
- /*
- * delete the key as all of
- * packets are flushed
- */
- if (j == INVALID_ARRAY_INDEX) {
- tbl->keys[i].start_index =
- INVALID_ARRAY_INDEX;
- tbl->key_num--;
- } else
- /* update start_index of the key */
- tbl->keys[i].start_index = j;
-
- if (k == nb_out)
+ if (unlikely(k == nb_out))
return k;
} else
/*
- * left packets of this key won't be
- * timeout, so go to check other keys.
+ * The left packets in this flow won't be
+ * timeout. Go to check other flows.
*/
break;
}
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a81716..de9925e 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -33,17 +33,20 @@
#ifndef _GRO_TCP4_H_
#define _GRO_TCP4_H_
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
#define INVALID_ARRAY_INDEX 0xffffffffUL
#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
/*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of L3
+ * header, L4 header and the payload.
*/
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow. */
+struct tcp4_flow_key {
struct ether_addr eth_saddr;
struct ether_addr eth_daddr;
uint32_t ip_src_addr;
@@ -54,43 +57,39 @@ struct tcp4_key {
uint16_t dst_port;
};
-struct gro_tcp4_key {
- struct tcp4_key key;
+struct gro_tcp4_flow {
+ struct tcp4_flow_key key;
/*
- * the index of the first packet in the item group.
- * If the value is INVALID_ARRAY_INDEX, it means
- * the key is empty.
+ * The index of the first packet in the flow.
+ * INVALID_ARRAY_INDEX indicates an empty flow.
*/
uint32_t start_index;
};
struct gro_tcp4_item {
/*
- * first segment of the packet. If the value
+ * First segment of the packet. If the value
* is NULL, it means the item is empty.
*/
struct rte_mbuf *firstseg;
- /* last segment of the packet */
+ /* Last segment of the packet */
struct rte_mbuf *lastseg;
/*
- * the time when the first packet is inserted
+ * The time when the first packet is inserted
* into the table. If a packet in the table is
* merged with an incoming packet, this value
- * won't be updated. We set this value only
- * when the first packet is inserted into the
- * table.
+ * won't be updated.
*/
uint64_t start_time;
/*
- * we use next_pkt_idx to chain the packets that
- * have same key value but can't be merged together.
+ * next_pkt_idx is used to chain the packets that
+ * are in the same flow but can't be merged together
+ * (i.e. caused by packet reordering).
*/
uint32_t next_pkt_idx;
- /* the sequence number of the packet */
+ /* TCP sequence number of the packet */
uint32_t sent_seq;
- /* the IP ID of the packet */
- uint16_t ip_id;
- /* the number of merged packets */
+ /* The number of merged packets */
uint16_t nb_merged;
};
@@ -100,31 +99,31 @@ struct gro_tcp4_item {
struct gro_tcp4_tbl {
/* item array */
struct gro_tcp4_item *items;
- /* key array */
- struct gro_tcp4_key *keys;
+ /* flow array */
+ struct gro_tcp4_flow *flows;
/* current item number */
uint32_t item_num;
- /* current key num */
- uint32_t key_num;
+ /* current flow num */
+ uint32_t flow_num;
/* item array size */
uint32_t max_item_num;
- /* key array size */
- uint32_t max_key_num;
+ /* flow array size */
+ uint32_t max_flow_num;
};
/**
* This function creates a TCP/IPv4 reassembly table.
*
* @param socket_id
- * socket index for allocating TCP/IPv4 reassemble table
+ * Socket index for allocating the TCP/IPv4 reassemble table
* @param max_flow_num
- * the maximum number of flows in the TCP/IPv4 GRO table
+ * The maximum number of flows in the TCP/IPv4 GRO table
* @param max_item_per_flow
- * the maximum packet number per flow.
+ * The maximum number of packets per flow
*
* @return
- * if create successfully, return a pointer which points to the
- * created TCP/IPv4 GRO table. Otherwise, return NULL.
+ * - Return the table pointer on success.
+ * - Return NULL on failure.
*/
void *gro_tcp4_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
@@ -134,62 +133,53 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
* This function destroys a TCP/IPv4 reassembly table.
*
* @param tbl
- * a pointer points to the TCP/IPv4 reassembly table.
+ * Pointer pointint to the TCP/IPv4 reassembly table.
*/
void gro_tcp4_tbl_destroy(void *tbl);
/**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload. It returns the packet if there is no available space in the
+ * table.
*
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums.
+ * Additionally, it doesn't re-calculate checksums for the merged packet.
+ * If the input packet is IP fragmented, it assumes the packet is complete.
*
* @param pkt
- * packet to reassemble.
+ * Packet to reassemble
* @param tbl
- * a pointer that points to a TCP/IPv4 reassembly table.
+ * Pointer pointing to the TCP/IPv4 reassembly table
* @start_time
- * the start time that the packet is inserted into the table
+ * The time when the packet is inserted into the table
*
* @return
- * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- * or URG bit is set, or there is no available space in the table to
- * insert a new item or a new key, return a negative value. If the
- * packet is merged successfully, return an positive value. If the
- * packet is inserted into the table, return 0.
+ * - Return a positive value if the input packet is merged.
+ * - Return zero if the input packet isn't merged but stored in the table.
+ * - Return a negative value for invalid parameters.
*/
int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_tcp4_tbl *tbl,
uint64_t start_time);
/**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
*
* @param tbl
- * a pointer that points to a TCP GRO table.
+ * Pointer points to a TCP/IPv4 reassembly table
* @param flush_timestamp
- * this function flushes packets which are inserted into the table
- * before or at the flush_timestamp.
+ * Flush packets which are inserted into the table before or at the
+ * flush_timestamp
* @param out
- * pointer array which is used to keep flushed packets.
+ * Pointer array used to keep flushed packets
* @param nb_out
- * the element number of out. It's also the max number of timeout
+ * The element number in 'out'. It also determines the maximum number of
* packets that can be flushed finally.
*
* @return
- * the number of packets that are returned.
+ * The number of flushed packets
*/
uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
uint64_t flush_timestamp,
@@ -201,10 +191,124 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
* reassembly table.
*
* @param tbl
- * pointer points to a TCP/IPv4 reassembly table.
+ * Pointer pointing to a TCP/IPv4 reassembly table
*
* @return
- * the number of packets in the table
+ * The number of packets in the table
*/
uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+ if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0)
+ return 0;
+
+ if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0)
+ return 0;
+
+ return ((k1.ip_src_addr == k2.ip_src_addr) &&
+ (k1.ip_dst_addr == k2.ip_dst_addr) &&
+ (k1.recv_ack == k2.recv_ack) &&
+ (k1.src_port == k2.src_port) &&
+ (k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+ struct tcp_hdr *tcph,
+ uint16_t tcp_hl,
+ uint16_t tcp_dl,
+ uint32_t sent_seq,
+ uint16_t l2_offset)
+{
+ struct rte_mbuf *pkt_orig = item->firstseg;
+ struct ipv4_hdr *iph_orig;
+ struct tcp_hdr *tcph_orig;
+ uint16_t len, l4_len_orig;
+
+ iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+ l2_offset + pkt_orig->l2_len);
+ tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+ l4_len_orig = pkt_orig->l4_len;
+
+ /* Check if TCP option fields equal */
+ len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr);
+ if ((tcp_hl != l4_len_orig) || ((len > 0) &&
+ (memcmp(tcph + 1, tcph_orig + 1,
+ len) != 0)))
+ return 0;
+
+ /* Check if the two packets are neighbors */
+ len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+ pkt_orig->l3_len - l4_len_orig;
+ if (sent_seq == item->sent_seq + len)
+ /* Append the new packet */
+ return 1;
+ else if (sent_seq + tcp_dl == item->sent_seq)
+ /* Pre-pend the new packet */
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+ struct rte_mbuf *pkt,
+ uint32_t sent_seq,
+ int cmp,
+ uint16_t l2_offset)
+{
+ struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+ uint16_t hdr_len;
+
+ if (cmp > 0) {
+ pkt_head = item->firstseg;
+ pkt_tail = pkt;
+ } else {
+ pkt_head = pkt;
+ pkt_tail = item->firstseg;
+ }
+
+ /* Check if the length is greater than the max value */
+ hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+ pkt_head->l4_len;
+ if (pkt_head->pkt_len - l2_offset - pkt_head->l2_len +
+ pkt_tail->pkt_len - hdr_len > MAX_IPV4_PKT_LENGTH)
+ return 0;
+
+ /* Remove packet header for the tail packet */
+ rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+ /* Chain two packets together */
+ if (cmp > 0) {
+ item->lastseg->next = pkt;
+ item->lastseg = rte_pktmbuf_lastseg(pkt);
+ } else {
+ lastseg = rte_pktmbuf_lastseg(pkt);
+ lastseg->next = item->firstseg;
+ item->firstseg = pkt;
+ /* Update sent_seq to the smaller value */
+ item->sent_seq = sent_seq;
+ }
+ item->nb_merged++;
+
+ /* Update mbuf metadata for the merged packet */
+ pkt_head->nb_segs += pkt_tail->nb_segs;
+ pkt_head->pkt_len += pkt_tail->pkt_len;
+
+ return 1;
+}
#endif
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index 7853246..dfee932 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -51,6 +51,9 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
gro_tcp4_tbl_pkt_count, NULL};
+#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+ ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP))
+
/*
* GRO context structure, which is used to merge packets. It keeps
* many reassembly tables of desired GRO types. Applications need to
@@ -131,62 +134,55 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,
uint16_t nb_pkts,
const struct rte_gro_param *param)
{
- uint16_t i;
- uint16_t nb_after_gro = nb_pkts;
- uint32_t item_num;
-
- /* allocate a reassembly table for TCP/IPv4 GRO */
+ /* Allocate a reassembly table for TCP/IPv4 GRO. */
struct gro_tcp4_tbl tcp_tbl;
- struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM];
+ struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
struct rte_mbuf *unprocess_pkts[nb_pkts];
- uint16_t unprocess_num = 0;
- int32_t ret;
uint64_t current_time;
+ uint32_t item_num;
+ int32_t ret;
+ uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0)
return nb_pkts;
- /* get the actual number of packets */
+ /* Get the actual number of packets. */
item_num = RTE_MIN(nb_pkts, (param->max_flow_num *
- param->max_item_per_flow));
+ param->max_item_per_flow));
item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM);
for (i = 0; i < item_num; i++)
- tcp_keys[i].start_index = INVALID_ARRAY_INDEX;
+ tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
- tcp_tbl.keys = tcp_keys;
+ tcp_tbl.flows = tcp_flows;
tcp_tbl.items = tcp_items;
- tcp_tbl.key_num = 0;
+ tcp_tbl.flow_num = 0;
tcp_tbl.item_num = 0;
- tcp_tbl.max_key_num = item_num;
+ tcp_tbl.max_flow_num = item_num;
tcp_tbl.max_item_num = item_num;
current_time = rte_rdtsc();
for (i = 0; i < nb_pkts; i++) {
- if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
- RTE_PTYPE_L4_TCP)) ==
- (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
- ret = gro_tcp4_reassemble(pkts[i],
- &tcp_tbl,
+ if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+ ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl,
current_time);
if (ret > 0)
- /* merge successfully */
+ /* Merge successfully */
nb_after_gro--;
- else if (ret < 0) {
- unprocess_pkts[unprocess_num++] =
- pkts[i];
- }
+ else if (ret < 0)
+ unprocess_pkts[unprocess_num++] = pkts[i];
} else
unprocess_pkts[unprocess_num++] = pkts[i];
}
- /* re-arrange GROed packets */
if (nb_after_gro < nb_pkts) {
+ /* Flush packets from the tables. */
i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time,
pkts, nb_pkts);
+ /* Copy unprocessed packets. */
if (unprocess_num > 0) {
memcpy(&pkts[i], unprocess_pkts,
sizeof(struct rte_mbuf *) *
@@ -202,10 +198,11 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
uint16_t nb_pkts,
void *ctx)
{
- uint16_t i, unprocess_num = 0;
struct rte_mbuf *unprocess_pkts[nb_pkts];
struct gro_ctx *gro_ctx = ctx;
+ void *tbl;
uint64_t current_time;
+ uint16_t i, unprocess_num = 0;
if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0)
return nb_pkts;
@@ -213,12 +210,9 @@ rte_gro_reassemble(struct rte_mbuf **pkts,
current_time = rte_rdtsc();
for (i = 0; i < nb_pkts; i++) {
- if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 |
- RTE_PTYPE_L4_TCP)) ==
- (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) {
- if (gro_tcp4_reassemble(pkts[i],
- gro_ctx->tbls
- [RTE_GRO_TCP_IPV4_INDEX],
+ if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) {
+ tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
+ if (gro_tcp4_reassemble(pkts[i], tbl,
current_time) < 0)
unprocess_pkts[unprocess_num++] = pkts[i];
} else
@@ -252,6 +246,7 @@ rte_gro_timeout_flush(void *ctx,
flush_timestamp,
out, max_nb_out);
}
+
return 0;
}
@@ -274,5 +269,6 @@ rte_gro_get_pkt_count(void *ctx)
continue;
item_num += pkt_count_fn(gro_ctx->tbls[i]);
}
+
return item_num;
}
--
2.7.4
More information about the dev
mailing list