[dpdk-stable] [PATCH v1 7/9] net/mlx5: handle Tx completion with error

Matan Azrad matan at mellanox.com
Thu May 30 12:20:37 CEST 2019


When WQEs are posted to the HW to send packets, the PMD may get a
completion report with error from the HW, aka error CQE which is
associated to a bad WQE.

The error reason may be bad address, wrong lkey, bad sizes, etc.
that can wrongly be configured by the PMD or by the user.

Checking all the optional mistakes to prevent error CQEs doesn't make
sense due to performance impacts and huge complexity.

The error CQEs change the SQ state to error state what causes all the
next posted WQEs to be completed with CQE flush error forever.

Currently, the PMD doesn't handle Tx error CQEs and even may crashed
when one of them appears.

Extend the Tx data-path to detect these error CQEs, to report them by
the statistics error counters, to recover the SQ by moving the state
to ready again and adjusting the management variables appropriately.

Sometimes the error CQE root cause is very hard to debug and even may
be related to some corner cases which are not reproducible easily, hence
a dump file with debug information will be created for the first number
of error CQEs, this number can be configured by the PMD probe
parameters.

Cc: stable at dpdk.org

Signed-off-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/mlx5/mlx5_prm.h           |  11 +++
 drivers/net/mlx5/mlx5_rxtx.c          | 166 ++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_rxtx.h          |  81 ++++++++++-------
 drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  10 +-
 drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  10 +-
 drivers/net/mlx5/mlx5_txq.c           |   4 +-
 6 files changed, 231 insertions(+), 51 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 8c42380..22db86b 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -153,6 +153,17 @@
 /* Maximum number of DS in WQE. */
 #define MLX5_DSEG_MAX 63
 
+/* The completion mode offset in the WQE control segment line 2. */
+#define MLX5_COMP_MODE_OFFSET 2
+
+/* Completion mode. */
+enum mlx5_completion_mode {
+	MLX5_COMP_ONLY_ERR = 0x0,
+	MLX5_COMP_ONLY_FIRST_ERR = 0x1,
+	MLX5_COMP_ALWAYS = 0x2,
+	MLX5_COMP_CQE_AND_EQE = 0x3,
+};
+
 /* Subset of struct mlx5_wqe_eth_seg. */
 struct mlx5_wqe_eth_seg_small {
 	uint32_t rsvd0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 5369fc1..36e2dd3 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -570,6 +570,141 @@
 }
 
 /**
+ * Move QP from error state to running state.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param qp
+ *   The qp pointer for recovery.
+ *
+ * @return
+ *   0 on success, else errno value.
+ */
+static int
+tx_recover_qp(struct mlx5_txq_data *txq, struct ibv_qp *qp)
+{
+	int ret;
+	struct ibv_qp_attr mod = {
+					.qp_state = IBV_QPS_RESET,
+					.port_num = 1,
+				};
+	ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+	if (ret) {
+		DRV_LOG(ERR, "Cannot change the Tx QP state to RESET %d\n",
+			ret);
+		return ret;
+	}
+	mod.qp_state = IBV_QPS_INIT;
+	ret = mlx5_glue->modify_qp(qp, &mod,
+				   (IBV_QP_STATE | IBV_QP_PORT));
+	if (ret) {
+		DRV_LOG(ERR, "Cannot change Tx QP state to INIT %d\n", ret);
+		return ret;
+	}
+	mod.qp_state = IBV_QPS_RTR;
+	ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+	if (ret) {
+		DRV_LOG(ERR, "Cannot change Tx QP state to RTR %d\n", ret);
+		return ret;
+	}
+	mod.qp_state = IBV_QPS_RTS;
+	ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE);
+	if (ret) {
+		DRV_LOG(ERR, "Cannot change Tx QP state to RTS %d\n", ret);
+		return ret;
+	}
+	txq->wqe_ci = 0;
+	txq->wqe_pi = 0;
+	txq->elts_comp = 0;
+	return 0;
+}
+
+/* Return 1 if the error CQE is signed otherwise, sign it and return 0. */
+static int
+check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe)
+{
+	static const uint8_t magic[] = "seen";
+	int ret = 1;
+	unsigned int i;
+
+	for (i = 0; i < sizeof(magic); ++i)
+		if (!ret || err_cqe->rsvd1[i] != magic[i]) {
+			ret = 0;
+			err_cqe->rsvd1[i] = magic[i];
+		}
+	return ret;
+}
+
+/**
+ * Handle error CQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ * @param error_cqe
+ *   Pointer to the error CQE.
+ *
+ * @return
+ *   The last Tx buffer element to free.
+ */
+uint16_t
+mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
+			 volatile struct mlx5_err_cqe *err_cqe)
+{
+	if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) {
+		const uint16_t wqe_m = ((1 << txq->wqe_n) - 1);
+		struct mlx5_txq_ctrl *txq_ctrl =
+				container_of(txq, struct mlx5_txq_ctrl, txq);
+		uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter);
+		int seen = check_err_cqe_seen(err_cqe);
+
+		if (!seen && txq_ctrl->dump_file_n <
+		    txq_ctrl->priv->config.max_dump_files_num) {
+			MKSTR(err_str, "Unexpected CQE error syndrome "
+			      "0x%02x CQN = %u SQN = %u wqe_counter = %u "
+			      "wq_ci = %u cq_ci = %u", err_cqe->syndrome,
+			      txq_ctrl->cqn, txq->qp_num_8s >> 8,
+			      rte_be_to_cpu_16(err_cqe->wqe_counter),
+			      txq->wqe_ci, txq->cq_ci);
+			MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u",
+			      PORT_ID(txq_ctrl->priv), txq->idx,
+			      txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc());
+			mlx5_dump_debug_information(name, NULL, err_str, 0);
+			mlx5_dump_debug_information(name, "MLX5 Error CQ:",
+						    (const void *)((uintptr_t)
+						    &(*txq->cqes)[0]),
+						    sizeof(*err_cqe) *
+						    (1 << txq->cqe_n));
+			mlx5_dump_debug_information(name, "MLX5 Error SQ:",
+						    (const void *)((uintptr_t)
+						    tx_mlx5_wqe(txq, 0)),
+						    MLX5_WQE_SIZE *
+						    (1 << txq->wqe_n));
+			txq_ctrl->dump_file_n++;
+		}
+		if (!seen)
+			/*
+			 * Count errors in WQEs units.
+			 * Later it can be improved to count error packets,
+			 * for example, by SQ parsing to find how much packets
+			 * should be counted for each WQE.
+			 */
+			txq->stats.oerrors += ((txq->wqe_ci & wqe_m) -
+						new_wqe_pi) & wqe_m;
+		if ((rte_eal_process_type() == RTE_PROC_PRIMARY) &&
+		    tx_recover_qp(txq, txq_ctrl->ibv->qp) == 0) {
+			txq->cq_ci++;
+			/* Release all the remaining buffers. */
+			return txq->elts_head;
+		}
+		/* Recovering failed - try again later on the same WQE. */
+	} else {
+		txq->cq_ci++;
+	}
+	/* Do not release buffers. */
+	return txq->elts_tail;
+}
+
+/**
  * DPDK callback for TX.
  *
  * @param dpdk_txq
@@ -709,7 +844,9 @@
 				wqe->ctrl = (rte_v128u32_t){
 					rte_cpu_to_be_32(txq->wqe_ci << 8),
 					rte_cpu_to_be_32(txq->qp_num_8s | 1),
-					0,
+					rte_cpu_to_be_32
+						(MLX5_COMP_ONLY_FIRST_ERR <<
+						 MLX5_COMP_MODE_OFFSET),
 					0,
 				};
 				ds = 1;
@@ -882,7 +1019,8 @@
 				rte_cpu_to_be_32((txq->wqe_ci << 8) |
 						 MLX5_OPCODE_TSO),
 				rte_cpu_to_be_32(txq->qp_num_8s | ds),
-				0,
+				rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+						 MLX5_COMP_MODE_OFFSET),
 				0,
 			};
 			wqe->eseg = (rte_v128u32_t){
@@ -897,7 +1035,8 @@
 				rte_cpu_to_be_32((txq->wqe_ci << 8) |
 						 MLX5_OPCODE_SEND),
 				rte_cpu_to_be_32(txq->qp_num_8s | ds),
-				0,
+				rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+						 MLX5_COMP_MODE_OFFSET),
 				0,
 			};
 			wqe->eseg = (rte_v128u32_t){
@@ -926,7 +1065,8 @@
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request completion on last WQE. */
-		last_wqe->ctrl2 = rte_cpu_to_be_32(8);
+		last_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						   MLX5_COMP_MODE_OFFSET);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		last_wqe->ctrl3 = txq->elts_head;
 		txq->elts_comp = 0;
@@ -973,7 +1113,8 @@
 	mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
 					     (txq->wqe_ci << 8) |
 					     MLX5_OPCODE_TSO);
-	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+					     MLX5_COMP_MODE_OFFSET);
 	mpw->wqe->ctrl[3] = 0;
 	mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
 		(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
@@ -1145,7 +1286,8 @@
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						MLX5_COMP_MODE_OFFSET);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
@@ -1189,7 +1331,8 @@
 	mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
 					     (txq->wqe_ci << 8) |
 					     MLX5_OPCODE_TSO);
-	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+					     MLX5_COMP_MODE_OFFSET);
 	mpw->wqe->ctrl[3] = 0;
 	mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
 	mpw->wqe->eseg.inline_hdr_sz = 0;
@@ -1447,7 +1590,8 @@
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						MLX5_COMP_MODE_OFFSET);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
@@ -1491,7 +1635,8 @@
 		rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
 				 (txq->wqe_ci << 8) |
 				 MLX5_OPCODE_ENHANCED_MPSW);
-	mpw->wqe->ctrl[2] = 0;
+	mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<
+					     MLX5_COMP_MODE_OFFSET);
 	mpw->wqe->ctrl[3] = 0;
 	memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
 	if (unlikely(padding)) {
@@ -1738,7 +1883,8 @@
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request completion on last WQE. */
-		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						MLX5_COMP_MODE_OFFSET);
 		/* Save elts_head in unused "immediate" field of WQE. */
 		wqe->ctrl[3] = elts_head;
 		txq->elts_comp = 0;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index d944fbe..f4538eb 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -248,6 +248,8 @@ struct mlx5_txq_ctrl {
 	struct mlx5_priv *priv; /* Back pointer to private data. */
 	off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
 	void *bf_reg; /* BlueFlame register from Verbs. */
+	uint32_t cqn; /* CQ number. */
+	uint16_t dump_file_n; /* Number of dump files. */
 };
 
 #define MLX5_TX_BFREG(txq) \
@@ -353,6 +355,8 @@ uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 				  uint16_t pkts_n);
 uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,
 			    uint16_t pkts_n);
+__rte_noinline uint16_t mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
+					volatile struct mlx5_err_cqe *err_cqe);
 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
 void mlx5_rxq_initialize(struct mlx5_rxq_data *rxq);
 __rte_noinline int mlx5_rx_err_handle(struct mlx5_rxq_data *rxq,
@@ -508,6 +512,51 @@ enum mlx5_cqe_status {
 }
 
 /**
+ * Handle the next CQE.
+ *
+ * @param txq
+ *   Pointer to TX queue structure.
+ *
+ * @return
+ *   The last Tx buffer element to free.
+ */
+static __rte_always_inline uint16_t
+mlx5_tx_cqe_handle(struct mlx5_txq_data *txq)
+{
+	const unsigned int cqe_n = 1 << txq->cqe_n;
+	const unsigned int cqe_cnt = cqe_n - 1;
+	uint16_t last_elts;
+	union {
+		volatile struct mlx5_cqe *cqe;
+		volatile struct mlx5_err_cqe *err_cqe;
+	} u = {
+		.cqe =  &(*txq->cqes)[txq->cq_ci & cqe_cnt],
+	};
+	int ret = check_cqe(u.cqe, cqe_n, txq->cq_ci);
+
+	if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+		if (unlikely(ret == MLX5_CQE_STATUS_ERR))
+			last_elts = mlx5_tx_error_cqe_handle(txq, u.err_cqe);
+		else
+			/* Do not release buffers. */
+			return txq->elts_tail;
+	} else {
+		uint16_t new_wqe_pi = rte_be_to_cpu_16(u.cqe->wqe_counter);
+		volatile struct mlx5_wqe_ctrl *ctrl =
+				(volatile struct mlx5_wqe_ctrl *)
+					tx_mlx5_wqe(txq, new_wqe_pi);
+
+		/* Release completion burst buffers. */
+		last_elts = ctrl->ctrl3;
+		txq->wqe_pi = new_wqe_pi;
+		txq->cq_ci++;
+	}
+	rte_compiler_barrier();
+	*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
+	return last_elts;
+}
+
+/**
  * Manage TX completions.
  *
  * When sending a burst, mlx5_tx_burst() posts several WRs.
@@ -520,39 +569,13 @@ enum mlx5_cqe_status {
 {
 	const uint16_t elts_n = 1 << txq->elts_n;
 	const uint16_t elts_m = elts_n - 1;
-	const unsigned int cqe_n = 1 << txq->cqe_n;
-	const unsigned int cqe_cnt = cqe_n - 1;
 	uint16_t elts_free = txq->elts_tail;
 	uint16_t elts_tail;
-	uint16_t cq_ci = txq->cq_ci;
-	volatile struct mlx5_cqe *cqe = NULL;
-	volatile struct mlx5_wqe_ctrl *ctrl;
 	struct rte_mbuf *m, *free[elts_n];
 	struct rte_mempool *pool = NULL;
 	unsigned int blk_n = 0;
 
-	cqe = &(*txq->cqes)[cq_ci & cqe_cnt];
-	if (unlikely(check_cqe(cqe, cqe_n, cq_ci)))
-		return;
-#ifndef NDEBUG
-	if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
-	    (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
-		if (!check_cqe_seen(cqe)) {
-			DRV_LOG(ERR, "unexpected error CQE, Tx stopped");
-			rte_hexdump(stderr, "MLX5 TXQ:",
-				    (const void *)((uintptr_t)txq->wqes),
-				    ((1 << txq->wqe_n) *
-				     MLX5_WQE_SIZE));
-		}
-		return;
-	}
-#endif /* NDEBUG */
-	++cq_ci;
-	rte_cio_rmb();
-	txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
-	ctrl = (volatile struct mlx5_wqe_ctrl *)
-		tx_mlx5_wqe(txq, txq->wqe_pi);
-	elts_tail = ctrl->ctrl3;
+	elts_tail = mlx5_tx_cqe_handle(txq);
 	assert((elts_tail & elts_m) < (1 << txq->wqe_n));
 	/* Free buffers. */
 	while (elts_free != elts_tail) {
@@ -583,11 +606,7 @@ enum mlx5_cqe_status {
 		++elts_free;
 	}
 #endif
-	txq->cq_ci = cq_ci;
 	txq->elts_tail = elts_tail;
-	/* Update the consumer index. */
-	rte_compiler_barrier();
-	*txq->cq_db = rte_cpu_to_be_32(cq_ci);
 }
 
 /**
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index 6a1b2bb..fd64a6e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -165,7 +165,7 @@
 		ctrl = vreinterpretq_u8_u32((uint32x4_t) {
 				MLX5_OPC_MOD_MPW << 24 |
 				txq->wqe_ci << 8 | MLX5_OPCODE_TSO,
-				txq->qp_num_8s | ds, 0, 0});
+				txq->qp_num_8s | ds, 4, 0});
 		ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
 		vst1q_u8((void *)t_wqe, ctrl);
 		/* Fill ESEG in the header. */
@@ -182,7 +182,8 @@
 	if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
-		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						MLX5_COMP_MODE_OFFSET);
 		wqe->ctrl[3] = txq->elts_head;
 		txq->elts_comp = 0;
 	}
@@ -229,7 +230,7 @@
 	unsigned int pos;
 	uint16_t max_elts;
 	uint16_t max_wqe;
-	uint32_t comp_req = 0;
+	uint32_t comp_req;
 	const uint16_t wq_n = 1 << txq->wqe_n;
 	const uint16_t wq_mask = wq_n - 1;
 	uint16_t wq_idx = txq->wqe_ci & wq_mask;
@@ -284,12 +285,13 @@
 	}
 	if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
 		txq->elts_comp += pkts_n;
+		comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
 	} else {
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request a completion. */
 		txq->elts_comp = 0;
-		comp_req = 8;
+		comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
 	}
 	/* Fill CTRL in the header. */
 	ctrl = vreinterpretq_u8_u32((uint32x4_t) {
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index cc2f251..a495cd9 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -163,7 +163,7 @@
 		} while (--segs_n);
 		++wqe_ci;
 		/* Fill CTRL in the header. */
-		ctrl = _mm_set_epi32(0, 0, txq->qp_num_8s | ds,
+		ctrl = _mm_set_epi32(0, 4, txq->qp_num_8s | ds,
 				     MLX5_OPC_MOD_MPW << 24 |
 				     txq->wqe_ci << 8 | MLX5_OPCODE_TSO);
 		ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
@@ -182,7 +182,8 @@
 	if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
-		wqe->ctrl[2] = rte_cpu_to_be_32(8);
+		wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
+						MLX5_COMP_MODE_OFFSET);
 		wqe->ctrl[3] = txq->elts_head;
 		txq->elts_comp = 0;
 	}
@@ -229,7 +230,7 @@
 	unsigned int pos;
 	uint16_t max_elts;
 	uint16_t max_wqe;
-	uint32_t comp_req = 0;
+	uint32_t comp_req;
 	const uint16_t wq_n = 1 << txq->wqe_n;
 	const uint16_t wq_mask = wq_n - 1;
 	uint16_t wq_idx = txq->wqe_ci & wq_mask;
@@ -284,12 +285,13 @@
 	}
 	if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
 		txq->elts_comp += pkts_n;
+		comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
 	} else {
 		/* A CQE slot must always be available. */
 		assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
 		/* Request a completion. */
 		txq->elts_comp = 0;
-		comp_req = 8;
+		comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
 	}
 	/* Fill CTRL in the header. */
 	ctrl = _mm_set_epi32(txq->elts_head, comp_req,
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index b281c45..ff6c564 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -430,8 +430,7 @@ struct mlx5_txq_ibv *
 	attr.cq = (struct ibv_cq_init_attr_ex){
 		.comp_mask = 0,
 	};
-	cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
-		((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
+	cqe_n = desc / MLX5_TX_COMP_THRESH + 1;
 	if (is_empw_burst_func(tx_pkt_burst))
 		cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
 	tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
@@ -563,6 +562,7 @@ struct mlx5_txq_ibv *
 	txq_ibv->cq = tmpl.cq;
 	rte_atomic32_inc(&txq_ibv->refcnt);
 	txq_ctrl->bf_reg = qp.bf.reg;
+	txq_ctrl->cqn = cq_info.cqn;
 	txq_uar_init(txq_ctrl);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
-- 
1.8.3.1



More information about the stable mailing list