<div dir="ltr"><div>Please find the attached pcap files for the testing done. <br></div><div><br></div><div>Thanks,</div><div>Kumara.<br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Thu, Oct 13, 2022 at 3:49 PM Kumara Parameshwaran <<a href="mailto:kumaraparamesh92@gmail.com">kumaraparamesh92@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Kumara Parameshwaran <<a href="mailto:kumaraparamesh92@gmail.com" target="_blank">kumaraparamesh92@gmail.com</a>><br>
<br>
When a TCP packet contains flags like PSH it is returned<br>
immediately to the application though there might be packets of<br>
the same flow in the GRO table. If PSH flag is set on a segment<br>
packets upto the segment should be delivered immediately. But the<br>
current implementation delivers the last arrived packet with PSH flag<br>
set causing re-ordering<br>
<br>
With this patch, if a packet does not contain only ACK flag and if there are<br>
no previous packets for the flow the packet would be returned<br>
immediately, else will be merged with the previous segment and the<br>
flag on the last segment will be set on the entire segment.<br>
This is the behaviour with linux stack as well<br>
<br>
Signed-off-by: Kumara Parameshwaran <<a href="mailto:kumaraparamesh92@gmail.com" target="_blank">kumaraparamesh92@gmail.com</a>><br>
---<br>
v1:<br>
If the received packet is not a pure ACK packet, we check if<br>
there are any previous packets in the flow, if present we indulge<br>
the received packet also in the coalescing logic and update the flags<br>
of the last recived packet to the entire segment which would avoid<br>
re-ordering.<br>
<br>
Lets say a case where P1(PSH), P2(ACK), P3(ACK) are received in burst mode,<br>
P1 contains PSH flag and since it does not contain any prior packets in the flow<br>
we copy it to unprocess_packets and P2(ACK) and P3(ACK) are merged together.<br>
In the existing case the P2,P3 would be delivered as single segment first and the<br>
unprocess_packets will be copied later which will cause reordering. With the patch<br>
copy the unprocess packets first and then the packets from the GRO table.<br>
<br>
Testing done<br>
The csum test-pmd was modifited to support the following<br>
GET request of 10MB from client to server via test-pmd (static arp entries added in client<br>
and server). Enable GRO and TSO in test-pmd where the packets recived from the client mac<br>
would be sent to server mac and vice versa.<br>
In above testing, without the patch the client observerd re-ordering of 25 packets<br>
and with the patch there were no packet re-ordering observerd.<br>
<br>
lib/gro/gro_tcp4.c | 35 ++++++++++++++++++++++++++++-------<br>
lib/gro/rte_gro.c | 18 +++++++++---------<br>
2 files changed, 37 insertions(+), 16 deletions(-)<br>
<br>
diff --git a/lib/gro/gro_tcp4.c b/lib/gro/gro_tcp4.c<br>
index 8f5e800250..9ed891c253 100644<br>
--- a/lib/gro/gro_tcp4.c<br>
+++ b/lib/gro/gro_tcp4.c<br>
@@ -188,6 +188,19 @@ update_header(struct gro_tcp4_item *item)<br>
pkt->l2_len);<br>
}<br>
<br>
+static inline void<br>
+update_tcp_hdr_flags(struct rte_tcp_hdr *tcp_hdr, struct rte_mbuf *pkt)<br>
+{<br>
+ struct rte_ether_hdr *eth_hdr;<br>
+ struct rte_ipv4_hdr *ipv4_hdr;<br>
+ struct rte_tcp_hdr *merged_tcp_hdr;<br>
+<br>
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);<br>
+ ipv4_hdr = (struct rte_ipv4_hdr *)((char *)eth_hdr + pkt->l2_len);<br>
+ merged_tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);<br>
+ merged_tcp_hdr->tcp_flags |= tcp_hdr->tcp_flags;<br>
+}<br>
+<br>
int32_t<br>
gro_tcp4_reassemble(struct rte_mbuf *pkt,<br>
struct gro_tcp4_tbl *tbl,<br>
@@ -206,6 +219,7 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,<br>
uint32_t i, max_flow_num, remaining_flow_num;<br>
int cmp;<br>
uint8_t find;<br>
+ uint32_t start_idx;<br>
<br>
/*<br>
* Don't process the packet whose TCP header length is greater<br>
@@ -219,12 +233,6 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,<br>
tcp_hdr = (struct rte_tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len);<br>
hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len;<br>
<br>
- /*<br>
- * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE<br>
- * or CWR set.<br>
- */<br>
- if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG)<br>
- return -1;<br>
/*<br>
* Don't process the packet whose payload length is less than or<br>
* equal to 0.<br>
@@ -263,12 +271,21 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,<br>
if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {<br>
if (is_same_tcp4_flow(tbl->flows[i].key, key)) {<br>
find = 1;<br>
+ start_idx = tbl->flows[i].start_index;<br>
break;<br>
}<br>
remaining_flow_num--;<br>
}<br>
}<br>
<br>
+ if (tcp_hdr->tcp_flags != RTE_TCP_ACK_FLAG) {<br>
+ if (find)<br>
+ /* Since PSH flag is set, start time will be set to 0 so it will be flushed immediately */<br>
+ tbl->items[start_idx].start_time = 0;<br>
+ else<br>
+ return -1;<br>
+ }<br>
+<br>
/*<br>
* Fail to find a matched flow. Insert a new flow and store the<br>
* packet into the flow.<br>
@@ -303,8 +320,12 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt,<br>
is_atomic);<br>
if (cmp) {<br>
if (merge_two_tcp4_packets(&(tbl->items[cur_idx]),<br>
- pkt, cmp, sent_seq, ip_id, 0))<br>
+ pkt, cmp, sent_seq, ip_id, 0)) {<br>
+ if (tbl->items[cur_idx].start_time == 0)<br>
+ update_tcp_hdr_flags(tcp_hdr, tbl->items[cur_idx].firstseg);<br>
return 1;<br>
+ }<br>
+<br>
/*<br>
* Fail to merge the two packets, as the packet<br>
* length is greater than the max value. Store<br>
diff --git a/lib/gro/rte_gro.c b/lib/gro/rte_gro.c<br>
index e35399fd42..87c5502dce 100644<br>
--- a/lib/gro/rte_gro.c<br>
+++ b/lib/gro/rte_gro.c<br>
@@ -283,10 +283,17 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,<br>
if ((nb_after_gro < nb_pkts)<br>
|| (unprocess_num < nb_pkts)) {<br>
i = 0;<br>
+ /* Copy unprocessed packets */<br>
+ if (unprocess_num > 0) {<br>
+ memcpy(&pkts[i], unprocess_pkts,<br>
+ sizeof(struct rte_mbuf *) *<br>
+ unprocess_num);<br>
+ i = unprocess_num;<br>
+ }<br>
/* Flush all packets from the tables */<br>
if (do_vxlan_tcp_gro) {<br>
- i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,<br>
- 0, pkts, nb_pkts);<br>
+ i += gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,<br>
+ 0, &pkts[i], nb_pkts - i);<br>
}<br>
<br>
if (do_vxlan_udp_gro) {<br>
@@ -304,13 +311,6 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts,<br>
i += gro_udp4_tbl_timeout_flush(&udp_tbl, 0,<br>
&pkts[i], nb_pkts - i);<br>
}<br>
- /* Copy unprocessed packets */<br>
- if (unprocess_num > 0) {<br>
- memcpy(&pkts[i], unprocess_pkts,<br>
- sizeof(struct rte_mbuf *) *<br>
- unprocess_num);<br>
- }<br>
- nb_after_gro = i + unprocess_num;<br>
}<br>
<br>
return nb_after_gro;<br>
-- <br>
2.25.1<br>
<br>
</blockquote></div>