|SUCCESS| net/mlx5: use port index as representor index
dpdklab at iol.unh.edu
dpdklab at iol.unh.edu
Fri May 22 14:56:59 CEST 2026
Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/164262
_AI Code Review_
=== Patch Review: series.patch (via Claude) ===
Review date: 2026-05-22
git sha: 2f7cdd821b1b (from commit message reference)
Author: Dariusz Sosnowski <dsosnowski at nvidia.com>
Subject: net/mlx5: use port index as representor index
This patch changes how the mlx5 driver assigns and reports representor
IDs to address issues with hot-plugging representors on BlueField DPUs
with Socket Direct (SD). The driver now uses dynamic representor_id
assignment instead of static encoding, with representor info reporting
individual ranges for each probed port on the embedded switch.
> diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
> index 0fc721592b..5305523c1b 100644
> --- a/drivers/net/mlx5/linux/mlx5_os.c
> +++ b/drivers/net/mlx5/linux/mlx5_os.c
[ ... ]
> @@ -1677,9 +1677,13 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
> err = ENOMEM;
> goto error;
> }
> + priv->port_info.type = spawn->info.name_type;
> + priv->port_info.ctrl_num = spawn->info.ctrl_num;
> + priv->port_info.pf_num = spawn->info.pf_num;
> + priv->port_info.port_num = spawn->info.port_name;
> if (priv->representor) {
> eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
> - eth_dev->data->representor_id = priv->representor_id;
> + eth_dev->data->representor_id = eth_dev->data->port_id;
Does this assignment violate the representor_id semantics? The
representor_id field is typically used to uniquely identify a
representor within its domain, while port_id is a global DPDK port
identifier. Can this collision cause issues in applications expecting
representor_id to have domain-local meaning?
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
> index 49a0c03544..23803b450b 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
[ ... ]
> @@ -1984,6 +1984,24 @@ struct mlx5_quota_ctx {
> struct mlx5_indexed_pool *quota_ipool; /* Manage quota objects */
> };
>
> +/* Stores info parsed from phys_port_name related to given DPDK port. */
> +struct mlx5_representor_info {
> + enum mlx5_nl_phys_port_name_type type;
> + /* PCI controller index. 0 if no controller was reported in phys_port_name. */
> + int32_t ctrl_num;
> + /* PF index. */
> + int32_t pf_num;
> + /*
> + * Representor number:
> + *
> + * - For VF/SF - VF/SF index.
> + * - For PFHPF - -1.
> + * - For uplink - physical port index.
> + * - For others - VF representor is assumed, so VF index.
> + */
> + int32_t port_num;
> +};
Can port_num be negative given the PFHPF case uses -1? Should this field
use a signed type or should PFHPF use a different sentinel like UINT32_MAX?
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index a29cdeeb50..e14b7f148b 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
[ ... ]
> @@ -345,6 +345,23 @@ mlx5_dev_get_max_wq_size(struct mlx5_dev_ctx_shared *sh)
> return max_wqe;
> }
>
> +/**
> + * Get switch port ID for given DPDK port.
> + *
> + * @param dev
> + * Pointer to Ethernet device structure.
> + * @return
> + * Switch port ID reported through rte_eth_dev_info_get().
> + */
> +static uint16_t
> +mlx5_dev_switch_info_port_id_get(struct rte_eth_dev *dev)
> +{
> + if (rte_eth_dev_is_repr(dev))
> + return dev->data->port_id;
> +
> + return UINT16_MAX;
> +}
Is rte_eth_dev_is_repr() performing a NULL check on dev->data? If not,
does this function need to validate the dev pointer before dereferencing?
[ ... ]
> @@ -472,14 +489,162 @@ mlx5_representor_id_encode(const struct mlx5_switch_info *info,
> return MLX5_REPRESENTOR_ID(pf, type, repr);
> }
>
> +static unsigned int
> +mlx5_representor_info_count_one(struct mlx5_priv *priv)
> +{
> + switch (priv->port_info.type) {
> + case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
> + return 2;
> + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
> + /* Only representor uplinks should be reported */
> + if (!priv->representor)
> + return 0;
> + return 1;
> + case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
> + /* FALLTHROUGH */
> + default:
> + return 1;
> + }
> +}
The FALLTHROUGH comments violate the coding style which requires
fallthrough cases to be on one line separated by /. Should these be
written as:
case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
instead?
> +static unsigned int
> +mlx5_representor_info_count(struct rte_eth_dev *dev)
> +{
> + struct mlx5_priv *priv = dev->data->dev_private;
> + uint16_t port_id;
> + unsigned int count = 0;
> +
> + MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
> + struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private;
> +
> + if (!opriv ||
> + opriv->sh != priv->sh ||
> + opriv->domain_id != priv->domain_id)
> + continue;
Does this code need to check whether rte_eth_devices[port_id].data is
valid before dereferencing dev_private? Can the data pointer be NULL in
this iteration?
> + count += mlx5_representor_info_count_one(opriv);
> + }
> +
> + return count;
> +}
> +
> +static void
> +mlx5_representor_info_fill_one(struct mlx5_priv *priv,
> + struct rte_eth_representor_info *info)
> +{
> + struct rte_eth_representor_range *range;
> + unsigned int count;
> +
> + count = mlx5_representor_info_count_one(priv);
> + if (count == 0)
> + return;
> +
> + if (info->nb_ranges + count > info->nb_ranges_alloc) {
> + DRV_LOG(ERR, "port %u representor info already full", priv->dev_data->port_id);
> + return;
> + }
Can this overflow check overflow during the addition? Would a safer
check be:
if (count > info->nb_ranges_alloc - info->nb_ranges) {
> + range = &info->ranges[info->nb_ranges];
> +
> + switch (priv->port_info.type) {
> + case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
> + range->type = RTE_ETH_REPRESENTOR_PF;
> + range->controller = priv->port_info.ctrl_num;
> + range->pf = priv->port_info.port_num;
> + range->id_base = priv->dev_data->port_id;
> + range->id_end = range->id_base;
> + snprintf(range->name, sizeof(range->name), "pf%d", range->pf);
> + break;
> + case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
> + /* Secondly, fill in SF variant. */
> + range->type = RTE_ETH_REPRESENTOR_SF;
> + range->controller = priv->port_info.ctrl_num;
> + range->pf = priv->port_info.pf_num;
> + range->sf = priv->port_info.port_num;
> + range->id_base = priv->dev_data->port_id;
> + range->id_end = range->id_base;
> + snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
> + break;
> + case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
> + /*
> + * Host PF can be probed either through VF(0xffff) or SF(0xffff).
> + * Firstly fill in VF variant.
> + */
> + range->type = RTE_ETH_REPRESENTOR_VF;
> + range->controller = priv->port_info.ctrl_num;
> + range->pf = priv->port_info.pf_num;
> + range->vf = UINT16_MAX;
> + range->id_base = priv->dev_data->port_id;
> + range->id_end = range->id_base;
> + snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
> +
> + /* Move the SF variant. */
> + range++;
Does this code verify that moving range forward will not exceed
info->ranges[info->nb_ranges_alloc]? The earlier bounds check confirms
count entries are available, but can this increment still overflow the
array?
> + /* Fill in SF variant. */
> + range->type = RTE_ETH_REPRESENTOR_SF;
> + range->controller = priv->port_info.ctrl_num;
> + range->pf = priv->port_info.pf_num;
> + range->sf = UINT16_MAX;
> + range->id_base = priv->dev_data->port_id;
> + range->id_end = range->id_base;
> + snprintf(range->name, sizeof(range->name), "pf%dsf", range->pf);
> + break;
> + case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_NOTSET:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
> + /* FALLTHROUGH */
> + case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
> + range->type = RTE_ETH_REPRESENTOR_VF;
> + range->controller = priv->port_info.ctrl_num;
> + range->pf = priv->port_info.pf_num;
> + range->vf = priv->port_info.port_num;
> + range->id_base = priv->dev_data->port_id;
> + range->id_end = range->id_base;
> + snprintf(range->name, sizeof(range->name), "pf%dvf", range->pf);
> + break;
> + }
> +
> + info->nb_ranges += count;
> +}
Same FALLTHROUGH style issue as earlier.
> +static unsigned int
> +mlx5_representor_info_fill(struct rte_eth_dev *dev,
> + struct rte_eth_representor_info *info)
> +{
> + struct mlx5_priv *priv = dev->data->dev_private;
> + uint16_t port_id;
> +
> + info->controller = priv->port_info.ctrl_num;
> + info->pf = RTE_DEV_TO_PCI(dev->device)->addr.function;
> +
> + MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
> + struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private;
> +
> + if (!opriv ||
> + opriv->sh != priv->sh ||
> + opriv->domain_id != priv->domain_id)
> + continue;
Same question as earlier: Does this code need to validate
rte_eth_devices[port_id].data before accessing dev_private?
More information about the test-report
mailing list