[PATCH 3/3] drivers: enhance the Tx queue affinity

Jiawei Wang jiaweiw at nvidia.com
Fri Feb 3 06:21:07 CET 2023


Previous patch supports the tx phy affinity configuration in the Tx
queue API, it supports to set the affinity value per Queue.

This patch updates TIS creation with tx_phy_affinity value of
Tx queue, TIS index 1 goes to hardware port 0, TIS index 2 goes to
hardware port 1, and TIS index 0 is reserved for default HWS hash mode.

Signed-off-by: Jiawei Wang <jiaweiw at nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h |  8 -------
 drivers/net/mlx5/mlx5.c        | 43 +++++++++++++++-------------------
 drivers/net/mlx5/mlx5_devx.c   | 24 ++++++++++---------
 drivers/net/mlx5/mlx5_ethdev.c |  1 +
 drivers/net/mlx5/mlx5_tx.h     |  1 +
 drivers/net/mlx5/mlx5_txq.c    |  8 +++++++
 6 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 8bbb800206..ded001d0b2 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2331,14 +2331,6 @@ struct mlx5_ifc_query_nic_vport_context_in_bits {
 	u8 reserved_at_68[0x18];
 };
 
-/*
- * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
- * Each TIS binds to one PF by setting lag_tx_port_affinity (>0).
- * Once LAG enabled, we create multiple TISs and bind each one to
- * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1.
- */
-#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \
-						    (index) % (num) + 1 : 0)
 struct mlx5_ifc_tisc_bits {
 	u8 strict_lag_tx_port_affinity[0x1];
 	u8 reserved_at_1[0x3];
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b8643cebdd..c75c98b8b0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1162,9 +1162,9 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
 static int
 mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
 {
-	int i;
 	struct mlx5_devx_lag_context lag_ctx = { 0 };
 	struct mlx5_devx_tis_attr tis_attr = { 0 };
+	int i;
 
 	tis_attr.transport_domain = sh->td->id;
 	if (sh->bond.n_port) {
@@ -1178,35 +1178,30 @@ mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
 			DRV_LOG(ERR, "Failed to query lag affinity.");
 			return -1;
 		}
-		if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
-			for (i = 0; i < sh->bond.n_port; i++) {
-				tis_attr.lag_tx_port_affinity =
-					MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
-							sh->bond.n_port);
-				sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
-						&tis_attr);
-				if (!sh->tis[i]) {
-					DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
-						" %s.", i, sh->bond.n_port,
-						sh->ibdev_name);
-					return -1;
-				}
-			}
+		if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS)
 			DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
 				sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
 				lag_ctx.tx_remap_affinity_2);
-			return 0;
-		}
-		if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+		else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
 			DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
 					sh->ibdev_name);
 	}
-	tis_attr.lag_tx_port_affinity = 0;
-	sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
-	if (!sh->tis[0]) {
-		DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
-			" %s.", sh->ibdev_name);
-		return -1;
+	for (i = 0; i <= sh->bond.n_port; i++) {
+		/*
+		 * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
+		 * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0).
+		 * Once LAG enabled, we create multiple TISs and bind each one to
+		 * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1.
+		 * TIS[0] is reserved for HW Hash mode.
+		 */
+		tis_attr.lag_tx_port_affinity = i;
+		sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
+		if (!sh->tis[i]) {
+			DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device"
+				" %s.", i, sh->bond.n_port,
+				sh->ibdev_name);
+			return -1;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index f6e1943fd7..a3fe0b3b98 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1190,17 +1190,19 @@ static uint32_t
 mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	int tis_idx;
-
-	if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode ==
-			MLX5_LAG_MODE_TIS) {
-		tis_idx = (priv->lag_affinity_idx + queue_idx) %
-			priv->sh->bond.n_port;
-		DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
-			dev->data->port_id, queue_idx, tis_idx + 1,
-			priv->sh->lag.tx_remap_affinity[tis_idx]);
-	} else {
-		tis_idx = 0;
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx];
+	int tis_idx = 0;
+
+	if (priv->sh->bond.n_port) {
+		if (txq_data->tx_phy_affinity) {
+			tis_idx = txq_data->tx_phy_affinity;
+		} else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+			tis_idx = (priv->lag_affinity_idx + queue_idx) %
+				priv->sh->bond.n_port + 1;
+			DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
+				dev->data->port_id, queue_idx, tis_idx,
+				priv->sh->lag.tx_remap_affinity[tis_idx - 1]);
+		}
 	}
 	MLX5_ASSERT(priv->sh->tis[tis_idx]);
 	return priv->sh->tis[tis_idx]->id;
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 4a85415ff3..354bb4f022 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -352,6 +352,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	info->switch_info.domain_id = priv->domain_id;
 	info->switch_info.port_id = priv->representor_id;
 	info->switch_info.rx_domain = 0; /* No sub Rx domains. */
+	info->nb_phy_ports = priv->sh->bond.n_port;
 	if (priv->representor) {
 		uint16_t port_id;
 
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index a056be7ca8..674c2aebe5 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -144,6 +144,7 @@ struct mlx5_txq_data {
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
 	uint16_t inlen_mode; /* Minimal data length to inline. */
+	uint8_t tx_phy_affinity; /* TxQ affinity configuration. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint64_t offloads; /* Offloads for Tx Queue. */
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5543f2c570..4e53706807 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -392,6 +392,13 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		container_of(txq, struct mlx5_txq_ctrl, txq);
 	int res;
 
+	if (conf->tx_phy_affinity > priv->num_lag_ports) {
+		rte_errno = EINVAL;
+		DRV_LOG(ERR, "port %u unable to setup Tx queue index %u"
+			" affinity is %u exceeds the maximum %u", dev->data->port_id,
+			idx, conf->tx_phy_affinity, priv->num_lag_ports);
+		return -rte_errno;
+	}
 	res = mlx5_tx_queue_pre_setup(dev, idx, &desc);
 	if (res)
 		return res;
@@ -1095,6 +1102,7 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	tmpl->txq.elts_m = desc - 1;
 	tmpl->txq.port_id = dev->data->port_id;
 	tmpl->txq.idx = idx;
+	tmpl->txq.tx_phy_affinity = conf->tx_phy_affinity;
 	txq_set_params(tmpl);
 	if (txq_adjust_params(tmpl))
 		goto error;
-- 
2.18.1



More information about the dev mailing list