[dpdk-dev] [PATCH v2 1/2] net/mlx5: fix erroneous index handling for Tx ring

Yongseok Koh yskoh at mellanox.com
Tue May 9 22:49:30 CEST 2017


In case of resource deficiency on Tx, mlx5_tx_burst() breaks the loop
without rolling back consumed resources (txq->wqes[] and txq->elts[]). This
can make application crash because unposted mbufs can be freed while
processing completions. Other Tx functions don't have this issue.

Fixes: 3f13f8c23a7c ("net/mlx5: support hardware TSO")
Fixes: f04f1d51564b ("net/mlx5: fix Tx WQE corruption caused by starvation")
CC: stable at dpdk.org

Reported-by: Hanoch Haim <hhaim at cisco.com>
Signed-off-by: Yongseok Koh <yskoh at mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 6254228a9..cf63434d5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -504,6 +504,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	uint16_t max_wqe;
 	unsigned int comp;
 	volatile struct mlx5_wqe_v *wqe = NULL;
+	volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
 	unsigned int segs_n = 0;
 	struct rte_mbuf *buf = NULL;
 	uint8_t *raw;
@@ -524,6 +525,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		volatile rte_v128u32_t *dseg = NULL;
 		uint32_t length;
 		unsigned int ds = 0;
+		unsigned int sg = 0; /* counter of additional segs attached. */
 		uintptr_t addr;
 		uint64_t naddr;
 		uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
@@ -815,12 +817,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		};
 		(*txq->elts)[elts_head] = buf;
 		elts_head = (elts_head + 1) & (elts_n - 1);
-		++j;
-		--segs_n;
-		if (segs_n)
+		++sg;
+		/* Advance counter only if all segs are successfully posted. */
+		if (sg < segs_n) {
 			goto next_seg;
-		else
+		} else {
 			--pkts_n;
+			j += sg;
+		}
 next_pkt:
 		++i;
 		/* Initialize known and common part of the WQE structure. */
@@ -853,6 +857,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		}
 next_wqe:
 		txq->wqe_ci += (ds + 3) / 4;
+		/* Save the last successful WQE for completion request */
+		last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;
 #ifdef MLX5_PMD_SOFT_COUNTERS
 		/* Increment sent bytes counter. */
 		txq->stats.obytes += total_length;
@@ -861,16 +867,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	/* Take a shortcut if nothing must be sent. */
 	if (unlikely((i + k) == 0))
 		return 0;
+	txq->elts_head = (txq->elts_head + i + j) & (elts_n - 1);
 	/* Check whether completion threshold has been reached. */
 	comp = txq->elts_comp + i + j + k;
 	if (comp >= MLX5_TX_COMP_THRESH) {
-		volatile struct mlx5_wqe_ctrl *w =
-			(volatile struct mlx5_wqe_ctrl *)wqe;
-
 		/* Request completion on last WQE. */
-		w->ctrl2 = htonl(8);
+		last_wqe->ctrl2 = htonl(8);
 		/* Save elts_head in unused "immediate" field of WQE. */
-		w->ctrl3 = elts_head;
+		last_wqe->ctrl3 = txq->elts_head;
 		txq->elts_comp = 0;
 	} else {
 		txq->elts_comp = comp;
@@ -880,8 +884,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 	txq->stats.opackets += i;
 #endif
 	/* Ring QP doorbell. */
-	mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe);
-	txq->elts_head = elts_head;
+	mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);
 	return i;
 }
 
-- 
2.11.0



More information about the dev mailing list