[dpdk-dev] [PATCH v2 3/6] net/mlx5: fix completion queue drain loop

Viacheslav Ovsiienko viacheslavo at mellanox.com
Mon Aug 5 15:03:51 CEST 2019


The completion loop speed optimizations for error-free
operations are done - no CQE field fetch on each loop
iteration. Also, code size is oprimized - the flush
buffers routine is invoked once.

Fixes: 318ea4cfa1b1 ("net/mlx5: fix Tx completion descriptors fetching loop")

Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 98 +++++++++++++++++++++++++++++---------------
 drivers/net/mlx5/mlx5_rxtx.h |  8 ++--
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1ec3793..a890f41 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -654,9 +654,10 @@ enum mlx5_txcmp_code {
  *   Pointer to the error CQE.
  *
  * @return
- *   The last Tx buffer element to free.
+ *   Negative value if queue recovery failed,
+ *   the last Tx buffer element to free otherwise.
  */
-uint16_t
+int
 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq,
 			 volatile struct mlx5_err_cqe *err_cqe)
 {
@@ -706,6 +707,7 @@ enum mlx5_txcmp_code {
 			return txq->elts_head;
 		}
 		/* Recovering failed - try again later on the same WQE. */
+		return -1;
 	} else {
 		txq->cq_ci++;
 	}
@@ -2010,6 +2012,45 @@ enum mlx5_txcmp_code {
 }
 
 /**
+ * Update completion queue consuming index via doorbell
+ * and flush the completed data buffers.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param valid CQE pointer
+ *   if not NULL update txq->wqe_pi and flush the buffers
+ * @param itail
+ *   if not negative - flush the buffers till this index.
+ * @param olx
+ *   Configured Tx offloads mask. It is fully defined at
+ *   compile time and may be used for optimization.
+ */
+static __rte_always_inline void
+mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq,
+		   volatile struct mlx5_cqe *last_cqe,
+		   int itail,
+		   unsigned int olx __rte_unused)
+{
+	uint16_t tail;
+
+	if (likely(last_cqe != NULL)) {
+		txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter);
+		tail = ((volatile struct mlx5_wqe_cseg *)
+			(txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc;
+	} else if (itail >= 0) {
+		tail = (uint16_t)itail;
+	} else {
+		return;
+	}
+	rte_compiler_barrier();
+	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
+	if (likely(tail != txq->elts_tail)) {
+		mlx5_tx_free_elts(txq, tail, olx);
+		assert(tail == txq->elts_tail);
+	}
+}
+
+/**
  * Manage TX completions. This routine checks the CQ for
  * arrived CQEs, deduces the last accomplished WQE in SQ,
  * updates SQ producing index and frees all completed mbufs.
@@ -2028,10 +2069,11 @@ enum mlx5_txcmp_code {
 			  unsigned int olx __rte_unused)
 {
 	unsigned int count = MLX5_TX_COMP_MAX_CQE;
-	bool update = false;
-	uint16_t tail = txq->elts_tail;
+	volatile struct mlx5_cqe *last_cqe = NULL;
 	int ret;
 
+	static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
+	static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value");
 	do {
 		volatile struct mlx5_cqe *cqe;
 
@@ -2043,32 +2085,30 @@ enum mlx5_txcmp_code {
 				assert(ret == MLX5_CQE_STATUS_HW_OWN);
 				break;
 			}
-			/* Some error occurred, try to restart. */
+			/*
+			 * Some error occurred, try to restart.
+			 * We have no barrier after WQE related Doorbell
+			 * written, make sure all writes are completed
+			 * here, before we might perform SQ reset.
+			 */
 			rte_wmb();
-			tail = mlx5_tx_error_cqe_handle
+			ret = mlx5_tx_error_cqe_handle
 				(txq, (volatile struct mlx5_err_cqe *)cqe);
-			if (likely(tail != txq->elts_tail)) {
-				mlx5_tx_free_elts(txq, tail, olx);
-				assert(tail == txq->elts_tail);
-			}
-			/* Allow flushing all CQEs from the queue. */
-			count = txq->cqe_s;
-		} else {
-			volatile struct mlx5_wqe_cseg *cseg;
-
-			/* Normal transmit completion. */
-			++txq->cq_ci;
-			rte_cio_rmb();
-			txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
-			cseg = (volatile struct mlx5_wqe_cseg *)
-				(txq->wqes + (txq->wqe_pi & txq->wqe_m));
-			tail = cseg->misc;
+			/*
+			 * Flush buffers, update consuming index
+			 * if recovery succeeded. Otherwise
+			 * just try to recover later.
+			 */
+			last_cqe = NULL;
+			break;
 		}
+		/* Normal transmit completion. */
+		++txq->cq_ci;
+		last_cqe = cqe;
 #ifndef NDEBUG
 		if (txq->cq_pi)
 			--txq->cq_pi;
 #endif
-		update = true;
 	/*
 	 * We have to restrict the amount of processed CQEs
 	 * in one tx_burst routine call. The CQ may be large
@@ -2078,17 +2118,7 @@ enum mlx5_txcmp_code {
 	 * latency.
 	 */
 	} while (--count);
-	if (likely(tail != txq->elts_tail)) {
-		/* Free data buffers from elts. */
-		mlx5_tx_free_elts(txq, tail, olx);
-		assert(tail == txq->elts_tail);
-	}
-	if (likely(update)) {
-		/* Update the consumer index. */
-		rte_compiler_barrier();
-		*txq->cq_db =
-		rte_cpu_to_be_32(txq->cq_ci);
-	}
+	mlx5_tx_comp_flush(txq, last_cqe, ret, olx);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index c209d99..aaa02a2 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -400,7 +400,7 @@ struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,
 void mlx5_set_ptype_table(void);
 void mlx5_set_cksum_table(void);
 void mlx5_set_swp_types_table(void);
-__rte_noinline uint16_t mlx5_tx_error_cqe_handle
+__rte_noinline int mlx5_tx_error_cqe_handle
 				(struct mlx5_txq_data *restrict txq,
 				 volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
@@ -499,9 +499,9 @@ int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova,
 
 /* CQE status. */
 enum mlx5_cqe_status {
-	MLX5_CQE_STATUS_SW_OWN,
-	MLX5_CQE_STATUS_HW_OWN,
-	MLX5_CQE_STATUS_ERR,
+	MLX5_CQE_STATUS_SW_OWN = -1,
+	MLX5_CQE_STATUS_HW_OWN = -2,
+	MLX5_CQE_STATUS_ERR = -3,
 };
 
 /**
-- 
1.8.3.1



More information about the dev mailing list