[dpdk-dev] [PATCH v3 14/30] net/mlx5: make indirection tables shareable

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Mon Oct 9 16:44:50 CEST 2017


Indirection table in verbs side resides in a list of final work queues to
spread the packets according to an higher level queue.  This indirection
table can be shared among the hash Rx queues which points to them.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
Acked-by: Yongseok Koh <yskoh at mellanox.com>
---
 drivers/net/mlx5/mlx5.c       |   3 +
 drivers/net/mlx5/mlx5.h       |   2 +
 drivers/net/mlx5/mlx5_flow.c  |  83 ++++++++++-------------
 drivers/net/mlx5/mlx5_rxq.c   | 153 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxtx.h  |  17 +++++
 drivers/net/mlx5/mlx5_utils.h |   2 +
 6 files changed, 214 insertions(+), 46 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index c2c3d1b..46b4067 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -235,6 +235,9 @@ mlx5_dev_close(struct rte_eth_dev *dev)
 	if (priv->reta_idx != NULL)
 		rte_free(priv->reta_idx);
 	priv_socket_uninit(priv);
+	ret = mlx5_priv_ind_table_ibv_verify(priv);
+	if (ret)
+		WARN("%p: some Indirection table still remain", (void *)priv);
 	ret = mlx5_priv_rxq_ibv_verify(priv);
 	if (ret)
 		WARN("%p: some Verbs Rx queue still remain", (void *)priv);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index d0ef21a..ab17ce6 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -151,6 +151,8 @@ struct priv {
 	LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
 	LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+	/* Verbs Indirection tables. */
+	LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
 	uint32_t link_speed_capa; /* Link speed capabilities. */
 	struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
 	rte_spinlock_t lock; /* Lock for control functions. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 362ec91..dc9adeb 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -90,7 +90,7 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
 struct rte_flow {
 	TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
-	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	struct mlx5_ind_table_ibv *ind_table; /**< Indirection table. */
 	struct ibv_qp *qp; /**< Verbs queue pair. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_wq *wq; /**< Verbs work queue. */
@@ -98,8 +98,6 @@ struct rte_flow {
 	uint32_t mark:1; /**< Set if the flow is marked. */
 	uint32_t drop:1; /**< Drop queue. */
 	uint64_t hash_fields; /**< Fields that participate in the hash. */
-	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< List of queues. */
-	uint16_t queues_n; /**< Number of queues in the list. */
 };
 
 /** Static initializer for items. */
@@ -1089,9 +1087,6 @@ priv_flow_create_action_queue(struct priv *priv,
 {
 	struct rte_flow *rte_flow;
 	unsigned int i;
-	unsigned int j;
-	const unsigned int wqs_n = 1 << log2above(flow->actions.queues_n);
-	struct ibv_wq *wqs[wqs_n];
 
 	assert(priv->pd);
 	assert(priv->ctx);
@@ -1102,36 +1097,29 @@ priv_flow_create_action_queue(struct priv *priv,
 				   NULL, "cannot allocate flow memory");
 		return NULL;
 	}
-	for (i = 0; i < flow->actions.queues_n; ++i) {
-		struct mlx5_rxq_ibv *rxq_ibv =
-			mlx5_priv_rxq_ibv_get(priv, flow->actions.queues[i]);
-
-		wqs[i] = rxq_ibv->wq;
-		rte_flow->queues[i] = flow->actions.queues[i];
-		++rte_flow->queues_n;
-		(*priv->rxqs)[flow->actions.queues[i]]->mark |=
-			flow->actions.mark;
-	}
-	/* finalise indirection table. */
-	for (j = 0; i < wqs_n; ++i, ++j) {
-		wqs[i] = wqs[j];
-		if (j == flow->actions.queues_n)
-			j = 0;
+	for (i = 0; i != flow->actions.queues_n; ++i) {
+		struct mlx5_rxq_data *q =
+			(*priv->rxqs)[flow->actions.queues[i]];
+
+		q->mark |= flow->actions.mark;
 	}
 	rte_flow->mark = flow->actions.mark;
 	rte_flow->ibv_attr = flow->ibv_attr;
 	rte_flow->hash_fields = flow->hash_fields;
-	rte_flow->ind_table = ibv_create_rwq_ind_table(
-		priv->ctx,
-		&(struct ibv_rwq_ind_table_init_attr){
-			.log_ind_tbl_size = log2above(flow->actions.queues_n),
-			.ind_tbl = wqs,
-			.comp_mask = 0,
-		});
+	rte_flow->ind_table =
+		mlx5_priv_ind_table_ibv_get(priv, flow->actions.queues,
+					    flow->actions.queues_n);
 	if (!rte_flow->ind_table) {
-		rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-				   NULL, "cannot allocate indirection table");
-		goto error;
+		rte_flow->ind_table =
+			mlx5_priv_ind_table_ibv_new(priv, flow->actions.queues,
+						    flow->actions.queues_n);
+		if (!rte_flow->ind_table) {
+			rte_flow_error_set(error, ENOMEM,
+					   RTE_FLOW_ERROR_TYPE_HANDLE,
+					   NULL,
+					   "cannot allocate indirection table");
+			goto error;
+		}
 	}
 	rte_flow->qp = ibv_create_qp_ex(
 		priv->ctx,
@@ -1148,7 +1136,7 @@ priv_flow_create_action_queue(struct priv *priv,
 				.rx_hash_key = rss_hash_default_key,
 				.rx_hash_fields_mask = rte_flow->hash_fields,
 			},
-			.rwq_ind_tbl = rte_flow->ind_table,
+			.rwq_ind_tbl = rte_flow->ind_table->ind_table,
 			.pd = priv->pd
 		});
 	if (!rte_flow->qp) {
@@ -1171,7 +1159,7 @@ priv_flow_create_action_queue(struct priv *priv,
 	if (rte_flow->qp)
 		ibv_destroy_qp(rte_flow->qp);
 	if (rte_flow->ind_table)
-		ibv_destroy_rwq_ind_table(rte_flow->ind_table);
+		mlx5_priv_ind_table_ibv_release(priv, rte_flow->ind_table);
 	rte_free(rte_flow);
 	return NULL;
 }
@@ -1297,13 +1285,10 @@ priv_flow_destroy(struct priv *priv,
 		goto free;
 	if (flow->qp)
 		claim_zero(ibv_destroy_qp(flow->qp));
-	if (flow->ind_table)
-		claim_zero(ibv_destroy_rwq_ind_table(flow->ind_table));
-	for (i = 0; i != flow->queues_n; ++i) {
+	for (i = 0; i != flow->ind_table->queues_n; ++i) {
 		struct rte_flow *tmp;
-		struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[flow->queues[i]];
-		struct mlx5_rxq_ctrl *rxq_ctrl =
-			container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+		struct mlx5_rxq_data *rxq_data =
+			(*priv->rxqs)[flow->ind_table->queues[i]];
 
 		/*
 		 * To remove the mark from the queue, the queue must not be
@@ -1319,14 +1304,17 @@ priv_flow_destroy(struct priv *priv,
 					continue;
 				if (!tmp->mark)
 					continue;
-				for (j = 0; (j != tmp->queues_n) && !mark; j++)
-					if (tmp->queues[j] == flow->queues[i])
+				for (j = 0;
+				     (j != tmp->ind_table->queues_n) && !mark;
+				     j++)
+					if (tmp->ind_table->queues[j] ==
+					    flow->ind_table->queues[i])
 						mark = 1;
 			}
 			rxq_data->mark = mark;
 		}
-		mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
 	}
+	mlx5_priv_ind_table_ibv_release(priv, flow->ind_table);
 free:
 	rte_free(flow->ibv_attr);
 	DEBUG("Flow destroyed %p", (void *)flow);
@@ -1518,9 +1506,10 @@ priv_flow_stop(struct priv *priv)
 		flow->ibv_flow = NULL;
 		if (flow->mark) {
 			unsigned int n;
+			struct mlx5_ind_table_ibv *ind_tbl = flow->ind_table;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 0;
+			for (n = 0; n < ind_tbl->queues_n; ++n)
+				(*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
 		}
 		DEBUG("Flow %p removed", (void *)flow);
 	}
@@ -1562,8 +1551,10 @@ priv_flow_start(struct priv *priv)
 		if (flow->mark) {
 			unsigned int n;
 
-			for (n = 0; n < flow->queues_n; ++n)
-				(*priv->rxqs)[flow->queues[n]]->mark = 1;
+			for (n = 0; n < flow->ind_table->queues_n; ++n) {
+				uint16_t idx = flow->ind_table->queues[n];
+				(*priv->rxqs)[idx]->mark = 1;
+			}
 		}
 	}
 	return 0;
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 87efeed..4a53282 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1622,3 +1622,156 @@ mlx5_priv_rxq_verify(struct priv *priv)
 	}
 	return ret;
 }
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+		log2above(queues_n) :
+		priv->ind_table_max_size;
+	struct ibv_wq *wq[1 << wq_n];
+	unsigned int i;
+	unsigned int j;
+
+	ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+			     queues_n * sizeof(uint16_t), 0);
+	if (!ind_tbl)
+		return NULL;
+	for (i = 0; i != queues_n; ++i) {
+		struct mlx5_rxq_ctrl *rxq =
+			mlx5_priv_rxq_get(priv, queues[i]);
+
+		if (!rxq)
+			goto error;
+		wq[i] = rxq->ibv->wq;
+		ind_tbl->queues[i] = queues[i];
+	}
+	ind_tbl->queues_n = queues_n;
+	/* Finalise indirection table. */
+	for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+		wq[i] = wq[j];
+	ind_tbl->ind_table = ibv_create_rwq_ind_table(
+		priv->ctx,
+		&(struct ibv_rwq_ind_table_init_attr){
+			.log_ind_tbl_size = wq_n,
+			.ind_tbl = wq,
+			.comp_mask = 0,
+		});
+	if (!ind_tbl->ind_table)
+		goto error;
+	rte_atomic32_inc(&ind_tbl->refcnt);
+	LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	return ind_tbl;
+error:
+	rte_free(ind_tbl);
+	DEBUG("%p cannot create indirection table", (void *)priv);
+	return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param queues
+ *   Queues entering in the indirection table.
+ * @param queues_n
+ *   Number of queues in the array.
+ *
+ * @return
+ *   An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+			    uint16_t queues_n)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		if ((ind_tbl->queues_n == queues_n) &&
+		    (memcmp(ind_tbl->queues, queues,
+			    ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+		     == 0))
+			break;
+	}
+	if (ind_tbl) {
+		unsigned int i;
+
+		rte_atomic32_inc(&ind_tbl->refcnt);
+		DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+		      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+		for (i = 0; i != ind_tbl->queues_n; ++i)
+			mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+	}
+	return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param ind_table
+ *   Indirection table to release.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+				struct mlx5_ind_table_ibv *ind_tbl)
+{
+	unsigned int i;
+
+	DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+	      (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+	if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+		claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+	for (i = 0; i != ind_tbl->queues_n; ++i)
+		claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+	if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+		LIST_REMOVE(ind_tbl, next);
+		rte_free(ind_tbl);
+		return 0;
+	}
+	return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ *  Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+	struct mlx5_ind_table_ibv *ind_tbl;
+	int ret = 0;
+
+	LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+		DEBUG("%p: Verbs indirection table %p still referenced",
+		      (void *)priv, (void *)ind_tbl);
+		++ret;
+	}
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 44cfef5..b7c75bf 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -156,6 +156,15 @@ struct mlx5_rxq_ctrl {
 	unsigned int irq:1; /* Whether IRQ is enabled. */
 };
 
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+	LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+	rte_atomic32_t refcnt; /* Reference counter. */
+	struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+	uint16_t queues_n; /**< Number of queues in the list. */
+	uint16_t queues[]; /**< Queue list. */
+};
+
 /* Hash RX queue types. */
 enum hash_rxq_type {
 	HASH_RXQ_TCPV4,
@@ -345,6 +354,14 @@ int mlx5_priv_rxq_release(struct priv *, uint16_t);
 int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
 int mlx5_priv_rxq_verify(struct priv *);
 int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
+						       uint16_t [],
+						       uint16_t);
+int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
+int mlx5_priv_ind_table_ibv_verify(struct priv *);
 
 /* mlx5_txq.c */
 
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787..218ae83 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
 
 #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
 #define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
 
 #else /* NDEBUG */
 
 #define DEBUG(...) (void)0
 #define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
 
 #endif /* NDEBUG */
 
-- 
2.1.4



More information about the dev mailing list