[dpdk-stable] [PATCH v1 8/9] net/mlx5: recover secondary process Rx errors

Matan Azrad matan at mellanox.com
Thu May 30 12:20:38 CEST 2019


The RQ errors recovery mechanism in the PMD invokes a Verbs functions to
modify the RQ states in order to reset the RQ and to reactivate it.

These Verbs functions are not allowed to be invoked from a secondary
process, hence the PMD skips the recovery when the error is captured by
secondary processes queues.

Using the DPDK IPC mechanism the secondary process can request Verbs
queues state modifications to be done synchronically by the primary
process.

Add support for secondary process Rx errors recovery.

Cc: stable at dpdk.org

Signed-off-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/mlx5/mlx5.h         | 11 +++++
 drivers/net/mlx5/mlx5_mp.c      | 46 +++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.c    | 98 +++++++++++++++++++++++++++++++++--------
 drivers/net/mlx5/mlx5_rxtx.h    |  3 ++
 drivers/net/mlx5/mlx5_trigger.c |  1 +
 5 files changed, 141 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 4c339d0..85a6d02 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -61,6 +61,13 @@ enum mlx5_mp_req_type {
 	MLX5_MP_REQ_CREATE_MR,
 	MLX5_MP_REQ_START_RXTX,
 	MLX5_MP_REQ_STOP_RXTX,
+	MLX5_MP_REQ_QUEUE_STATE_MODIFY,
+};
+
+struct mlx5_mp_arg_queue_state_modify {
+	uint8_t is_wq; /* Set if WQ. */
+	uint16_t queue_id; /* DPDK queue ID. */
+	enum ibv_wq_state state; /* WQ requested state. */
 };
 
 /* Pameters for IPC. */
@@ -71,6 +78,8 @@ struct mlx5_mp_param {
 	RTE_STD_C11
 	union {
 		uintptr_t addr; /* MLX5_MP_REQ_CREATE_MR */
+		struct mlx5_mp_arg_queue_state_modify state_modify;
+		/* MLX5_MP_REQ_QUEUE_STATE_MODIFY */
 	} args;
 };
 
@@ -542,6 +551,8 @@ int mlx5_ctrl_flow(struct rte_eth_dev *dev,
 void mlx5_mp_req_stop_rxtx(struct rte_eth_dev *dev);
 int mlx5_mp_req_mr_create(struct rte_eth_dev *dev, uintptr_t addr);
 int mlx5_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev);
+int mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
+				   struct mlx5_mp_arg_queue_state_modify *sm);
 void mlx5_mp_init_primary(void);
 void mlx5_mp_uninit_primary(void);
 void mlx5_mp_init_secondary(void);
diff --git a/drivers/net/mlx5/mlx5_mp.c b/drivers/net/mlx5/mlx5_mp.c
index cea74ad..3ccae51 100644
--- a/drivers/net/mlx5/mlx5_mp.c
+++ b/drivers/net/mlx5/mlx5_mp.c
@@ -85,6 +85,12 @@
 		res->result = 0;
 		ret = rte_mp_reply(&mp_res, peer);
 		break;
+	case MLX5_MP_REQ_QUEUE_STATE_MODIFY:
+		mp_init_msg(dev, &mp_res, param->type);
+		res->result = mlx5_queue_state_modify_primary
+					(dev, &param->args.state_modify);
+		ret = rte_mp_reply(&mp_res, peer);
+		break;
 	default:
 		rte_errno = EINVAL;
 		DRV_LOG(ERR, "port %u invalid mp request type",
@@ -271,6 +277,46 @@
 }
 
 /**
+ * Request Verbs queue state modification to the primary process.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet structure.
+ * @param sm
+ *   State modify parameters.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_mp_req_queue_state_modify(struct rte_eth_dev *dev,
+			       struct mlx5_mp_arg_queue_state_modify *sm)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_msg *mp_res;
+	struct rte_mp_reply mp_rep;
+	struct mlx5_mp_param *req = (struct mlx5_mp_param *)mp_req.param;
+	struct mlx5_mp_param *res;
+	struct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};
+	int ret;
+
+	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
+	mp_init_msg(dev, &mp_req, MLX5_MP_REQ_QUEUE_STATE_MODIFY);
+	req->args.state_modify = *sm;
+	ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
+	if (ret) {
+		DRV_LOG(ERR, "port %u request to primary process failed",
+			dev->data->port_id);
+		return -rte_errno;
+	}
+	assert(mp_rep.nb_received == 1);
+	mp_res = &mp_rep.msgs[0];
+	res = (struct mlx5_mp_param *)mp_res->param;
+	ret = res->result;
+	free(mp_rep.msgs);
+	return ret;
+}
+
+/**
  * Request Verbs command file descriptor for mmap to the primary process.
  *
  * @param[in] dev
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 36e2dd3..cb3baad 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2031,6 +2031,75 @@
 }
 
 /**
+ * Modify a Verbs queue state.
+ * This must be called from the primary process.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param sm
+ *   State modify request parameters.
+ *
+ * @return
+ *   0 in case of success else non-zero value and rte_errno is set.
+ */
+int
+mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
+			const struct mlx5_mp_arg_queue_state_modify *sm)
+{
+	int ret;
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	if (sm->is_wq) {
+		struct ibv_wq_attr mod = {
+			.attr_mask = IBV_WQ_ATTR_STATE,
+			.wq_state = sm->state,
+		};
+		struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id];
+		struct mlx5_rxq_ctrl *rxq_ctrl =
+			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+
+		ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
+		if (ret) {
+			DRV_LOG(ERR, "Cannot change Rx WQ state to %u  - %s\n",
+					sm->state, strerror(errno));
+			rte_errno = errno;
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * Modify a Verbs queue state.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param sm
+ *   State modify request parameters.
+ *
+ * @return
+ *   0 in case of success else non-zero value.
+ */
+static int
+mlx5_queue_state_modify(struct rte_eth_dev *dev,
+			struct mlx5_mp_arg_queue_state_modify *sm)
+{
+	int ret = 0;
+
+	switch (rte_eal_process_type()) {
+	case RTE_PROC_PRIMARY:
+		ret = mlx5_queue_state_modify_primary(dev, sm);
+		break;
+	case RTE_PROC_SECONDARY:
+		ret = mlx5_mp_req_queue_state_modify(dev, sm);
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+/**
  * Handle a Rx error.
  * The function inserts the RQ state to reset when the first error CQE is
  * shown, then drains the CQ by the caller function loop. When the CQ is empty,
@@ -2053,15 +2122,13 @@
 	const unsigned int wqe_n = 1 << rxq->elts_n;
 	struct mlx5_rxq_ctrl *rxq_ctrl =
 			container_of(rxq, struct mlx5_rxq_ctrl, rxq);
-	struct ibv_wq_attr mod = {
-		.attr_mask = IBV_WQ_ATTR_STATE,
-	};
 	union {
 		volatile struct mlx5_cqe *cqe;
 		volatile struct mlx5_err_cqe *err_cqe;
 	} u = {
 		.cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask],
 	};
+	struct mlx5_mp_arg_queue_state_modify sm;
 	int ret;
 
 	switch (rxq->err_state) {
@@ -2069,21 +2136,17 @@
 		rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET;
 		/* Fall-through */
 	case MLX5_RXQ_ERR_STATE_NEED_RESET:
-		if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		sm.is_wq = 1;
+		sm.queue_id = rxq->idx;
+		sm.state = IBV_WQS_RESET;
+		if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm))
 			return -1;
-		mod.wq_state = IBV_WQS_RESET;
-		ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
-		if (ret) {
-			DRV_LOG(ERR, "Cannot change Rx WQ state to RESET %s\n",
-				strerror(errno));
-			return -1;
-		}
 		if (rxq_ctrl->dump_file_n <
 		    rxq_ctrl->priv->config.max_dump_files_num) {
 			MKSTR(err_str, "Unexpected CQE error syndrome "
 			      "0x%02x CQN = %u RQN = %u wqe_counter = %u"
 			      " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome,
-			      rxq->cqn, rxq_ctrl->ibv->wq->wq_num,
+			      rxq->cqn, rxq_ctrl->wqn,
 			      rte_be_to_cpu_16(u.err_cqe->wqe_counter),
 			      rxq->rq_ci << rxq->sges_n, rxq->cq_ci);
 			MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u",
@@ -2113,13 +2176,12 @@
 			 */
 			*rxq->rq_db = rte_cpu_to_be_32(0);
 			rte_cio_wmb();
-			mod.wq_state = IBV_WQS_RDY;
-			ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod);
-			if (ret) {
-				DRV_LOG(ERR, "Cannot change Rx WQ state to RDY"
-					" %s\n", strerror(errno));
+			sm.is_wq = 1;
+			sm.queue_id = rxq->idx;
+			sm.state = IBV_WQS_RDY;
+			if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv),
+						    &sm))
 				return -1;
-			}
 			if (mbuf_prepare) {
 				const uint16_t q_mask = wqe_n - 1;
 				uint16_t elt_idx;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index f4538eb..92fba29 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -161,6 +161,7 @@ struct mlx5_rxq_ctrl {
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 	uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */
 	uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */
+	uint32_t wqn; /* WQ number. */
 	uint16_t dump_file_n; /* Number of dump files. */
 };
 
@@ -374,6 +375,8 @@ uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts,
 uint32_t mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 void mlx5_dump_debug_information(const char *path, const char *title,
 				 const void *buf, unsigned int len);
+int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
+			const struct mlx5_mp_arg_queue_state_modify *sm);
 
 /* Vectorized version of mlx5_rxtx.c */
 int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index b7fde35..b6af539 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -126,6 +126,7 @@
 		rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i);
 		if (!rxq_ctrl->ibv)
 			goto error;
+		rxq_ctrl->wqn = rxq_ctrl->ibv->wq->wq_num;
 	}
 	return 0;
 error:
-- 
1.8.3.1



More information about the stable mailing list