[dpdk-dev] [PATCH 3/4] net/mlx5: preserve indirect actions across port restart

Dmitry Kozlyuk dkozlyuk at nvidia.com
Tue Jul 27 09:31:19 CEST 2021


MLX5 PMD uses reference counting to manage RX queue resources.
After port stop shared RSS actions kept references to RX queues,
preventing resource release. As a result, internal PMD mempool
for such queues had been exhausted after a number of port restarts.
Diagnostic message from rte_eth_dev_start():

    Rx queue allocation failed: Cannot allocate memory

Dereference RX queues used by indirect actions on port stop (detach)
and restore references on port start (attach) in order to allow RX queue
resource release, but keep indirect actions across the port restart.
Replace queue IDs in HW by drop queue ID on detach and restore actual
queue IDs on attach.

Fixes: 4b61b8774be9 ("ethdev: introduce indirect flow action")
Cc: bingz at nvidia.com
Cc: stable at dpdk.org

Signed-off-by: Dmitry Kozlyuk <dkozlyuk at nvidia.com>
Acked-by: Matan Azrad <matan at nvidia.com>
---
 drivers/net/mlx5/mlx5_flow.c    | 194 ++++++++++++++++++++++++++++----
 drivers/net/mlx5/mlx5_flow.h    |   2 +
 drivers/net/mlx5/mlx5_rx.h      |   4 +
 drivers/net/mlx5/mlx5_rxq.c     |  99 ++++++++++++++--
 drivers/net/mlx5/mlx5_trigger.c |  10 ++
 5 files changed, 275 insertions(+), 34 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index e8d2678877..5343720ec9 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1524,6 +1524,58 @@ mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
 	return 0;
 }
 
+/**
+ * Validate queue numbers for device RSS.
+ *
+ * @param[in] dev
+ *   Configured device.
+ * @param[in] queues
+ *   Array of queue numbers.
+ * @param[in] queues_n
+ *   Size of the @p queues array.
+ * @param[out] error
+ *   On error, filled with a textual error description.
+ * @param[out] queue
+ *   On error, filled with an offending queue index in @p queues array.
+ *
+ * @return
+ *   0 on success, a negative errno code on error.
+ */
+static int
+mlx5_validate_rss_queues(const struct rte_eth_dev *dev,
+			 const uint16_t *queues, uint32_t queues_n,
+			 const char **error, uint32_t *queue_idx)
+{
+	const struct mlx5_priv *priv = dev->data->dev_private;
+	enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
+	uint32_t i;
+
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq_ctrl;
+
+		if (queues[i] >= priv->rxqs_n) {
+			*error = "queue index out of range";
+			*queue_idx = i;
+			return -EINVAL;
+		}
+		if (!(*priv->rxqs)[queues[i]]) {
+			*error =  "queue is not configured";
+			*queue_idx = i;
+			return -EINVAL;
+		}
+		rxq_ctrl = container_of((*priv->rxqs)[queues[i]],
+					struct mlx5_rxq_ctrl, rxq);
+		if (i == 0)
+			rxq_type = rxq_ctrl->type;
+		if (rxq_type != rxq_ctrl->type) {
+			*error = "combining hairpin and regular RSS queues is not supported";
+			*queue_idx = i;
+			return -ENOTSUP;
+		}
+	}
+	return 0;
+}
+
 /*
  * Validate the rss action.
  *
@@ -1544,8 +1596,9 @@ mlx5_validate_action_rss(struct rte_eth_dev *dev,
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
 	const struct rte_flow_action_rss *rss = action->conf;
-	enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
-	unsigned int i;
+	int ret;
+	const char *message;
+	uint32_t queue_idx;
 
 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
@@ -1609,27 +1662,12 @@ mlx5_validate_action_rss(struct rte_eth_dev *dev,
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
 					  NULL, "No queues configured");
-	for (i = 0; i != rss->queue_num; ++i) {
-		struct mlx5_rxq_ctrl *rxq_ctrl;
-
-		if (rss->queue[i] >= priv->rxqs_n)
-			return rte_flow_error_set
-				(error, EINVAL,
-				 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 &rss->queue[i], "queue index out of range");
-		if (!(*priv->rxqs)[rss->queue[i]])
-			return rte_flow_error_set
-				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 &rss->queue[i], "queue is not configured");
-		rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
-					struct mlx5_rxq_ctrl, rxq);
-		if (i == 0)
-			rxq_type = rxq_ctrl->type;
-		if (rxq_type != rxq_ctrl->type)
-			return rte_flow_error_set
-				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-				 &rss->queue[i],
-				 "combining hairpin and regular RSS queues is not supported");
+	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
+				       &message, &queue_idx);
+	if (ret != 0) {
+		return rte_flow_error_set(error, -ret,
+					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+					  &rss->queue[queue_idx], message);
 	}
 	return 0;
 }
@@ -8493,6 +8531,116 @@ mlx5_action_handle_flush(struct rte_eth_dev *dev)
 	return ret;
 }
 
+/**
+ * Validate existing indirect actions against current device configuration
+ * and attach them to device resources.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_action_handle_attach(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_indexed_pool *ipool =
+			priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
+	struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
+	int ret = 0;
+	uint32_t idx;
+
+	ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+		struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+		const char *message;
+		uint32_t queue_idx;
+
+		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
+					       ind_tbl->queues_n,
+					       &message, &queue_idx);
+		if (ret != 0) {
+			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
+				dev->data->port_id, ind_tbl->queues[queue_idx],
+				message);
+			break;
+		}
+	}
+	if (ret != 0)
+		return ret;
+	ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+		struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
+		if (ret != 0) {
+			DRV_LOG(ERR, "Port %u could not attach "
+				"indirection table obj %p",
+				dev->data->port_id, (void *)ind_tbl);
+			goto error;
+		}
+	}
+	return 0;
+error:
+	shared_rss_last = shared_rss;
+	ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+		struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+		if (shared_rss == shared_rss_last)
+			break;
+		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
+			DRV_LOG(CRIT, "Port %u could not detach "
+				"indirection table obj %p on rollback",
+				dev->data->port_id, (void *)ind_tbl);
+	}
+	return ret;
+}
+
+/**
+ * Detach indirect actions of the device from its resources.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_action_handle_detach(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_indexed_pool *ipool =
+			priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
+	struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
+	int ret = 0;
+	uint32_t idx;
+
+	ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+		struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
+		if (ret != 0) {
+			DRV_LOG(ERR, "Port %u could not detach "
+				"indirection table obj %p",
+				dev->data->port_id, (void *)ind_tbl);
+			goto error;
+		}
+	}
+	return 0;
+error:
+	shared_rss_last = shared_rss;
+	ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
+		struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
+
+		if (shared_rss == shared_rss_last)
+			break;
+		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
+			DRV_LOG(CRIT, "Port %u could not attach "
+				"indirection table obj %p on rollback",
+				dev->data->port_id, (void *)ind_tbl);
+	}
+	return ret;
+}
+
 #ifndef HAVE_MLX5DV_DR
 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
 #else
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index da39eeb596..251d643f8c 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -1575,6 +1575,8 @@ struct mlx5_flow_meter_sub_policy *mlx5_flow_meter_sub_policy_rss_prepare
 void mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
 		struct mlx5_flow_meter_policy *mtr_policy);
 int mlx5_flow_dv_discover_counter_offset_support(struct rte_eth_dev *dev);
+int mlx5_action_handle_attach(struct rte_eth_dev *dev);
+int mlx5_action_handle_detach(struct rte_eth_dev *dev);
 int mlx5_action_handle_flush(struct rte_eth_dev *dev);
 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id);
 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh);
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 3f2b99fb65..7319ad0264 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -222,6 +222,10 @@ int mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
 			      struct mlx5_ind_table_obj *ind_tbl,
 			      uint16_t *queues, const uint32_t queues_n,
 			      bool standalone);
+int mlx5_ind_table_obj_attach(struct rte_eth_dev *dev,
+			      struct mlx5_ind_table_obj *ind_tbl);
+int mlx5_ind_table_obj_detach(struct rte_eth_dev *dev,
+			      struct mlx5_ind_table_obj *ind_tbl);
 struct mlx5_list_entry *mlx5_hrxq_create_cb(void *tool_ctx, void *cb_ctx);
 int mlx5_hrxq_match_cb(void *tool_ctx, struct mlx5_list_entry *entry,
 		       void *cb_ctx);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 49165f482e..1140f6067e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -2024,6 +2024,26 @@ mlx5_ind_table_obj_new(struct rte_eth_dev *dev, const uint16_t *queues,
 	return ind_tbl;
 }
 
+static int
+mlx5_ind_table_obj_check_standalone(struct rte_eth_dev *dev __rte_unused,
+				    struct mlx5_ind_table_obj *ind_tbl)
+{
+	uint32_t refcnt;
+
+	refcnt = __atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED);
+	if (refcnt <= 1)
+		return 0;
+	/*
+	 * Modification of indirection tables having more than 1
+	 * reference is unsupported.
+	 */
+	DRV_LOG(DEBUG,
+		"Port %u cannot modify indirection table %p (refcnt %u > 1).",
+		dev->data->port_id, (void *)ind_tbl, refcnt);
+	rte_errno = EINVAL;
+	return -rte_errno;
+}
+
 /**
  * Modify an indirection table.
  *
@@ -2056,18 +2076,8 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
 
 	MLX5_ASSERT(standalone);
 	RTE_SET_USED(standalone);
-	if (__atomic_load_n(&ind_tbl->refcnt, __ATOMIC_RELAXED) > 1) {
-		/*
-		 * Modification of indirection ntables having more than 1
-		 * reference unsupported. Intended for standalone indirection
-		 * tables only.
-		 */
-		DRV_LOG(DEBUG,
-			"Port %u cannot modify indirection table (refcnt> 1).",
-			dev->data->port_id);
-		rte_errno = EINVAL;
+	if (mlx5_ind_table_obj_check_standalone(dev, ind_tbl) < 0)
 		return -rte_errno;
-	}
 	for (i = 0; i != queues_n; ++i) {
 		if (!mlx5_rxq_get(dev, queues[i])) {
 			ret = -rte_errno;
@@ -2093,6 +2103,73 @@ mlx5_ind_table_obj_modify(struct rte_eth_dev *dev,
 	return ret;
 }
 
+/**
+ * Attach an indirection table to its queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param ind_table
+ *   Indirection table to attach.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ind_table_obj_attach(struct rte_eth_dev *dev,
+			  struct mlx5_ind_table_obj *ind_tbl)
+{
+	unsigned int i;
+	int ret;
+
+	ret = mlx5_ind_table_obj_modify(dev, ind_tbl, ind_tbl->queues,
+					ind_tbl->queues_n, true);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
+			dev->data->port_id, (void *)ind_tbl);
+		return ret;
+	}
+	for (i = 0; i < ind_tbl->queues_n; i++)
+		mlx5_rxq_get(dev, ind_tbl->queues[i]);
+	return 0;
+}
+
+/**
+ * Detach an indirection table from its queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param ind_table
+ *   Indirection table to detach.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_ind_table_obj_detach(struct rte_eth_dev *dev,
+			  struct mlx5_ind_table_obj *ind_tbl)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	const unsigned int n = rte_is_power_of_2(ind_tbl->queues_n) ?
+			       log2above(ind_tbl->queues_n) :
+			       log2above(priv->config.ind_table_max_size);
+	unsigned int i;
+	int ret;
+
+	ret = mlx5_ind_table_obj_check_standalone(dev, ind_tbl);
+	if (ret != 0)
+		return ret;
+	MLX5_ASSERT(priv->obj_ops.ind_table_modify);
+	ret = priv->obj_ops.ind_table_modify(dev, n, NULL, 0, ind_tbl);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Port %u could not modify indirect table obj %p",
+			dev->data->port_id, (void *)ind_tbl);
+		return ret;
+	}
+	for (i = 0; i < ind_tbl->queues_n; i++)
+		mlx5_rxq_release(dev, ind_tbl->queues[i]);
+	return ret;
+}
+
 int
 mlx5_hrxq_match_cb(void *tool_ctx __rte_unused, struct mlx5_list_entry *entry,
 		   void *cb_ctx)
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index a9d5d58fd9..6761a84a68 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -14,6 +14,7 @@
 #include <mlx5_malloc.h>
 
 #include "mlx5.h"
+#include "mlx5_flow.h"
 #include "mlx5_mr.h"
 #include "mlx5_rx.h"
 #include "mlx5_tx.h"
@@ -1115,6 +1116,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
 	mlx5_rxq_timestamp_set(dev);
 	/* Set a mask and offset of scheduling on timestamp into Tx queues. */
 	mlx5_txq_dynf_timestamp_set(dev);
+	/* Attach indirection table objects detached on port stop. */
+	ret = mlx5_action_handle_attach(dev);
+	if (ret) {
+		DRV_LOG(ERR,
+			"port %u failed to attach indirect actions: %s",
+			dev->data->port_id, rte_strerror(rte_errno));
+		goto error;
+	}
 	/*
 	 * In non-cached mode, it only needs to start the default mreg copy
 	 * action and no flow created by application exists anymore.
@@ -1187,6 +1196,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
 	/* All RX queue flags will be cleared in the flush interface. */
 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
 	mlx5_flow_meter_rxq_flush(dev);
+	mlx5_action_handle_detach(dev);
 	mlx5_rx_intr_vec_disable(dev);
 	priv->sh->port[priv->dev_port - 1].ih_port_id = RTE_MAX_ETHPORTS;
 	priv->sh->port[priv->dev_port - 1].devx_ih_port_id = RTE_MAX_ETHPORTS;
-- 
2.25.1



More information about the dev mailing list