[PATCH] net/mlx5: use port index as representor index
Dariusz Sosnowski
dsosnowski at nvidia.com
Fri May 22 12:19:33 CEST 2026
Since the offending commit, mlx5 driver supports probing
representors on BlueField DPUs with Socket Direct (SD).
Such card can be connected to 2 different CPUs on the host system.
On DPU, user would see the following network devices:
- p0 and p1 - physical ports
- pf0hpf and pf2hpf - PF0 on CPU 0 and CPU 1 respectively
- pf1hpf and pf3hpf - PF1 on CPU 0 and CPU 1 respectively
mlx5 driver finds the relevant netdev by matching information
provided in representor devarg to phys_port_name
reported by Linux kernel.
For the above interfaces phys_port_name's would be reported
and probed as:
- p0 -> p0, no need for representor devarg
- p1 -> p1, with representor=pf1
- pf0hpf -> c1pf0, with representor=c1pf0vf65535
- pf1hpf -> c1pf1, with representor=c1pf1vf65535
- pf2hpf -> c2pf0, with representor=c2pf0vf65535
- pf3hpf -> c2pf1, with representor=c2pf1vf65535
Although hot-plugging all these representors is successful,
RTE_ETH_FOREACH_MATCHING_DEV() macro would find DPDK ports.
This is caused missing information reported by mlx5 driver,
through rte_eth_representor_info_get() API.
Specifically, mlx5 driver did not report controller index for all
representor ranges.
Until now mlx5 driver used static encoding for 16-bit representor_id:
- 2 bits for representor type
- 2 bits for PF index
- 2 bits for representor index (either VF or SF number)
Controller index was not encoded. This caused the mentioned issue
and on top of that:
- limits the number of PFs
- limits the number of SFs
This patch changes the mlx5 driver logic for
rte_eth_representor_info_get().
Instead of static encoding:
- representor_id's will be dynamically assigned
to each probed representor.
- rte_eth_representor_info_get() will report N ranges:
- N == number of probed ports on single embedded switch
- Each range will define single representor_id
for given controller/PF/VF/SF.
Fixes: 2f7cdd821b1b ("net/mlx5: fix probing to allow BlueField Socket Direct")
Cc: stable at dpdk.org
Signed-off-by: Dariusz Sosnowski <dsosnowski at nvidia.com>
Acked-by: Bing Zhao <bingz at nvidia.com>
---
drivers/net/mlx5/linux/mlx5_os.c | 6 +-
drivers/net/mlx5/mlx5.h | 19 +++
drivers/net/mlx5/mlx5_ethdev.c | 284 +++++++++++++++++++------------
3 files changed, 199 insertions(+), 110 deletions(-)
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 0fc721592b..5305523c1b 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1677,9 +1677,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
err = ENOMEM;
goto error;
}
+ priv->port_info.type = spawn->info.name_type;
+ priv->port_info.ctrl_num = spawn->info.ctrl_num;
+ priv->port_info.pf_num = spawn->info.pf_num;
+ priv->port_info.port_num = spawn->info.port_name;
if (priv->representor) {
eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
- eth_dev->data->representor_id = priv->representor_id;
+ eth_dev->data->representor_id = eth_dev->data->port_id;
MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {
struct mlx5_priv *opriv =
rte_eth_devices[port_id].data->dev_private;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 49a0c03544..23803b450b 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1984,6 +1984,24 @@ struct mlx5_quota_ctx {
struct mlx5_indexed_pool *quota_ipool; /* Manage quota objects */
};
+/* Stores info parsed from phys_port_name related to given DPDK port. */
+struct mlx5_representor_info {
+ enum mlx5_nl_phys_port_name_type type;
+ /* PCI controller index. 0 if no controller was reported in phys_port_name. */
+ int32_t ctrl_num;
+ /* PF index. */
+ int32_t pf_num;
+ /*
+ * Representor number:
+ *
+ * - For VF/SF - VF/SF index.
+ * - For PFHPF - -1.
+ * - For uplink - physical port index.
+ * - For others - VF representor is assumed, so VF index.
+ */
+ int32_t port_num;
+};
+
struct mlx5_nta_sample_ctx;
struct mlx5_priv {
struct rte_eth_dev_data *dev_data; /* Pointer to device data. */
@@ -2019,6 +2037,7 @@ struct mlx5_priv {
uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */
uint32_t vport_meta_mask; /* Used for vport index field match mask. */
uint16_t representor_id; /* UINT16_MAX if not a representor. */
+ struct mlx5_representor_info port_info;
int32_t pf_bond; /* >=0, representor owner PF index in bonding. */
int32_t mpesw_owner; /* >=0, representor owner PF index in MPESW. */
int32_t mpesw_port; /* Related port index of MPESW device. < 0 - no MPESW. */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index a29cdeeb50..e14b7f148b 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -345,6 +345,23 @@ mlx5_dev_get_max_wq_size(struct mlx5_dev_ctx_shared *sh)
return max_wqe;
}
+/**
+ * Get switch port ID for given DPDK port.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @return
+ * Switch port ID reported through rte_eth_dev_info_get().
+ */
+static uint16_t
+mlx5_dev_switch_info_port_id_get(struct rte_eth_dev *dev)
+{
+ if (rte_eth_dev_is_repr(dev))
+ return dev->data->port_id;
+
+ return UINT16_MAX;
+}
+
/**
* DPDK callback to get information about the device.
*
@@ -401,7 +418,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
info->dev_capa |= RTE_ETH_DEV_CAPA_RXQ_SHARE;
info->switch_info.name = dev->data->name;
info->switch_info.domain_id = priv->domain_id;
- info->switch_info.port_id = priv->representor_id;
+ info->switch_info.port_id = mlx5_dev_switch_info_port_id_get(dev);
info->switch_info.rx_domain = 0; /* No sub Rx domains. */
if (priv->representor) {
uint16_t port_id;
@@ -472,14 +489,162 @@ mlx5_representor_id_encode(const struct mlx5_switch_info *info,
return MLX5_REPRESENTOR_ID(pf, type, repr);
}
+static unsigned int
+mlx5_representor_info_count_one(struct mlx5_priv *priv)
+{
+ switch (priv->port_info.type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+ return 2;
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ /* Only representor uplinks should be reported */
+ if (!priv->representor)
+ return 0;
+ return 1;
+ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+ /* FALLTHROUGH */
+ default:
+ return 1;
+ }
+}
+
+static unsigned int
+mlx5_representor_info_count(struct rte_eth_dev *dev)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ uint16_t port_id;
+ unsigned int count = 0;
+
+ MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
+ struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private;
+
+ if (!opriv ||
+ opriv->sh != priv->sh ||
+ opriv->domain_id != priv->domain_id)
+ continue;
+
+ count += mlx5_representor_info_count_one(opriv);
+ }
+
+ return count;
+}
+
+static void
+mlx5_representor_info_fill_one(struct mlx5_priv *priv,
+ struct rte_eth_representor_info *info)
+{
+ struct rte_eth_representor_range *range;
+ unsigned int count;
+
+ count = mlx5_representor_info_count_one(priv);
+ if (count == 0)
+ return;
+
+ if (info->nb_ranges + count > info->nb_ranges_alloc) {
+ DRV_LOG(ERR, "port %u representor info already full", priv->dev_data->port_id);
+ return;
+ }
+
+ range = &info->ranges[info->nb_ranges];
+
+ switch (priv->port_info.type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ range->type = RTE_ETH_REPRESENTOR_PF;
+ range->controller = priv->port_info.ctrl_num;
+ range->pf = priv->port_info.port_num;
+ range->id_base = priv->dev_data->port_id;
+ range->id_end = range->id_base;
+ snprintf(range->name, sizeof(range->name), "pf%d", range->pf);
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
+ /* Secondly, fill in SF variant. */
+ range->type = RTE_ETH_REPRESENTOR_SF;
+ range->controller = priv->port_info.ctrl_num;
+ range->pf = priv->port_info.pf_num;
+ range->sf = priv->port_info.port_num;
+ range->id_base = priv->dev_data->port_id;
+ range->id_end = range->id_base;
+ snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+ /*
+ * Host PF can be probed either through VF(0xffff) or SF(0xffff).
+ * Firstly fill in VF variant.
+ */
+ range->type = RTE_ETH_REPRESENTOR_VF;
+ range->controller = priv->port_info.ctrl_num;
+ range->pf = priv->port_info.pf_num;
+ range->vf = UINT16_MAX;
+ range->id_base = priv->dev_data->port_id;
+ range->id_end = range->id_base;
+ snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
+
+ /* Move the SF variant. */
+ range++;
+
+ /* Fill in SF variant. */
+ range->type = RTE_ETH_REPRESENTOR_SF;
+ range->controller = priv->port_info.ctrl_num;
+ range->pf = priv->port_info.pf_num;
+ range->sf = UINT16_MAX;
+ range->id_base = priv->dev_data->port_id;
+ range->id_end = range->id_base;
+ snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
+ /* FALLTHROUGH */
+ case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
+ range->type = RTE_ETH_REPRESENTOR_VF;
+ range->controller = priv->port_info.ctrl_num;
+ range->pf = priv->port_info.pf_num;
+ range->vf = priv->port_info.port_num;
+ range->id_base = priv->dev_data->port_id;
+ range->id_end = range->id_base;
+ snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
+ break;
+ }
+
+ info->nb_ranges += count;
+}
+
+static unsigned int
+mlx5_representor_info_fill(struct rte_eth_dev *dev,
+ struct rte_eth_representor_info *info)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ uint16_t port_id;
+
+ info->controller = priv->port_info.ctrl_num;
+ info->pf = RTE_DEV_TO_PCI(dev->device)->addr.function;
+
+ MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
+ struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private;
+
+ if (!opriv ||
+ opriv->sh != priv->sh ||
+ opriv->domain_id != priv->domain_id)
+ continue;
+
+ mlx5_representor_info_fill_one(opriv, info);
+ }
+
+ return info->nb_ranges;
+}
+
/**
* DPDK callback to get information about representor.
*
- * Representor ID bits definition:
- * vf/sf: 12
- * type: 2
- * pf: 2
- *
* @param dev
* Pointer to Ethernet device structure.
* @param[out] info
@@ -492,110 +657,11 @@ int
mlx5_representor_info_get(struct rte_eth_dev *dev,
struct rte_eth_representor_info *info)
{
- struct mlx5_priv *priv = dev->data->dev_private;
- /* Representor types: PF, VF, HPF at VF, SF and HPF at SF, total 5. */
- int n_type = RTE_ETH_REPRESENTOR_PF + 2; /* Maximal type + 2 for HPFs. */
- int n_pf = 8; /* Maximal number of PFs. */
- int i = 0, pf;
- int n_entries;
-
if (info == NULL)
- goto out;
-
- n_entries = n_type * n_pf;
- if ((uint32_t)n_entries > info->nb_ranges_alloc)
- n_entries = info->nb_ranges_alloc;
-
- info->controller = 0;
- info->pf = 0;
- if (mlx5_is_port_on_mpesw_device(priv)) {
- info->pf = priv->mpesw_port;
- for (i = 0; i < n_pf; i++) {
- /* PF range, both ports will show the same information. */
- info->ranges[i].type = RTE_ETH_REPRESENTOR_PF;
- info->ranges[i].controller = 0;
- info->ranges[i].pf = priv->mpesw_owner + i + 1;
- info->ranges[i].vf = 0;
- /*
- * The representor indexes should be the values set of "priv->mpesw_port".
- * In the real case now, only 1 PF/UPLINK representor is supported.
- * The port index will always be the value of "owner + 1".
- */
- info->ranges[i].id_base =
- MLX5_REPRESENTOR_ID(priv->mpesw_owner,
- info->ranges[i].type,
- info->ranges[i].pf);
- info->ranges[i].id_end =
- MLX5_REPRESENTOR_ID(priv->mpesw_owner,
- info->ranges[i].type,
- info->ranges[i].pf);
- snprintf(info->ranges[i].name,
- sizeof(info->ranges[i].name),
- "pf%d", info->ranges[i].pf);
- }
- } else if (priv->pf_bond >= 0)
- info->pf = priv->pf_bond;
- for (pf = 0; pf < n_pf; ++pf) {
- /* VF range. */
- info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
- info->ranges[i].controller = 0;
- info->ranges[i].pf = pf;
- info->ranges[i].vf = 0;
- info->ranges[i].id_base =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
- info->ranges[i].id_end =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- snprintf(info->ranges[i].name,
- sizeof(info->ranges[i].name), "pf%dvf", pf);
- i++;
- if (i == n_entries)
- break;
- /* HPF range of VF type. */
- info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
- info->ranges[i].controller = 0;
- info->ranges[i].pf = pf;
- info->ranges[i].vf = UINT16_MAX;
- info->ranges[i].id_base =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- info->ranges[i].id_end =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- snprintf(info->ranges[i].name,
- sizeof(info->ranges[i].name), "pf%dvf", pf);
- i++;
- if (i == n_entries)
- break;
- /* SF range. */
- info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
- info->ranges[i].controller = 0;
- info->ranges[i].pf = pf;
- info->ranges[i].vf = 0;
- info->ranges[i].id_base =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
- info->ranges[i].id_end =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- snprintf(info->ranges[i].name,
- sizeof(info->ranges[i].name), "pf%dsf", pf);
- i++;
- if (i == n_entries)
- break;
- /* HPF range of SF type. */
- info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
- info->ranges[i].controller = 0;
- info->ranges[i].pf = pf;
- info->ranges[i].vf = UINT16_MAX;
- info->ranges[i].id_base =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- info->ranges[i].id_end =
- MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
- snprintf(info->ranges[i].name,
- sizeof(info->ranges[i].name), "pf%dsf", pf);
- i++;
- if (i == n_entries)
- break;
- }
- info->nb_ranges = i;
-out:
- return n_type * n_pf;
+ return mlx5_representor_info_count(dev);
+
+ return mlx5_representor_info_fill(dev, info);
+
}
/**
--
2.47.3
More information about the stable
mailing list