[dpdk-dev] [PATCH v4 07/10] net/mlx5: probe all port representors

Shahaf Shuler shahafs at mellanox.com
Mon Jul 9 13:57:29 CEST 2018


Hi Adrien,


Thursday, July 5, 2018 11:46 AM, Adrien Mazarguil:
> Subject: [PATCH v4 07/10] net/mlx5: probe all port representors
> 
> Probe existing port representors in addition to their master device and
> associate them automatically.
> 
> To avoid collision between Ethernet devices, they are named as follows:
> 
> - "{DBDF}" for master/switch devices.
> - "{DBDF}_representor_{rep}" with "rep" starting from 0 for port
>   representors.
> 
> (Patch based on prior work from Yuanhan Liu)
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
> Reviewed-by: Xueming Li <xuemingl at mellanox.com>
> Cc: Xueming Li <xuemingl at mellanox.com>
> Cc: Shahaf Shuler <shahafs at mellanox.com>
> --
> v4 changes:
> 
> - Fixed domain ID release once the last port using it is closed. Closed
>   devices are not necessarily detached, their presence is not a good
>   indicator. Code was modified to check if they still use their domain IDs
>   before deciding to release it.
> 
> v3 changes:
> 
> - Nelio introduced mlx5_dev_to_port_id() to prevent the master device
> from
>   releasing a domain ID while representors are still bound. It is now
>   released by the last device closed.
> - Reverted to original naming convention as requested by Xueming and
>   Shahaf; "net_" prefix and "_0" suffix were dropped.
> - mlx5_dev_spawn() (previously mlx5_dev_spawn_one()) now decides on
> its own
>   whether underlying device is a representor.
> - Devices can now be probed in any order and not necessarily all at once;
>   representors can exist without a master device.
> - mlx5_pci_probe() iterates on the list of devices directly instead of
>   relying on an intermediate function (previously mlx5_dev_spawn()).
> - mlx5_get_ifname() was rewritten to rely on mlx5_nl_ifindex() when faced
>   with a representor.
> - Since it is not necessarily present, master device is now dynamically
>   retrieved in mlx5_dev_infos_get().
> 
> v2 changes:
> 
> - Added representor information to dev_infos_get(). DPDK port ID of master
>   device is now stored in the private structure to retrieve it
>   conveniently.
> - Master device is assigned dummy representor ID value -1 to better
>   distinguish from the the first actual representor reported by
>   dev_infos_get() as those are indexed from 0.
> - Added RTE_ETH_DEV_REPRESENTOR device flag.
> ---
>  drivers/net/mlx5/mlx5.c        | 134 ++++++++++++++++++++++++++++-------
> -
>  drivers/net/mlx5/mlx5.h        |  12 +++-
>  drivers/net/mlx5/mlx5_ethdev.c | 133
> +++++++++++++++++++++++++++++++----
>  drivers/net/mlx5/mlx5_mac.c    |   2 +-
>  drivers/net/mlx5/mlx5_stats.c  |   6 +-
>  5 files changed, 238 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> d06ba9886..c02afbb82 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -307,7 +307,27 @@ mlx5_dev_close(struct rte_eth_dev *dev)
>  	if (ret)
>  		DRV_LOG(WARNING, "port %u some flows still remain",
>  			dev->data->port_id);
> +	if (priv->domain_id !=
> RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
> +		unsigned int c = 0;
> +		unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0);
> +		uint16_t port_id[i];
> +
> +		i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i),
> i);
> +		while (i--) {
> +			struct priv *opriv =
> +				rte_eth_devices[port_id[i]].data-
> >dev_private;
> +
> +			if (!opriv ||
> +			    opriv->domain_id != priv->domain_id ||
> +			    &rte_eth_devices[port_id[i]] == dev)
> +				continue;
> +			++c;
> +		}
> +		if (!c)
> +			claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
> +	}
>  	memset(priv, 0, sizeof(*priv));
> +	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
>  }
> 
>  const struct eth_dev_ops mlx5_dev_ops = { @@ -647,6 +667,8 @@
> mlx5_uar_init_secondary(struct rte_eth_dev *dev)
>   *   Verbs device.
>   * @param vf
>   *   If nonzero, enable VF-specific features.
> + * @param[in] switch_info
> + *   Switch properties of Ethernet device.
>   *
>   * @return
>   *   A valid Ethernet device object on success, NULL otherwise and rte_errno
> @@ -655,7 +677,8 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev)
> static struct rte_eth_dev *  mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	       struct ibv_device *ibv_dev,
> -	       int vf)
> +	       int vf,
> +	       const struct mlx5_switch_info *switch_info)
>  {
>  	struct ibv_context *ctx;
>  	struct ibv_device_attr_ex attr;
> @@ -697,6 +720,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
> #endif
>  	struct ether_addr mac;
>  	char name[RTE_ETH_NAME_MAX_LEN];
> +	int own_domain_id = 0;
> +	unsigned int i;
> 
>  	/* Prepare shared data between primary and secondary process. */
>  	mlx5_prepare_shared_data();
> @@ -805,7 +830,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		DEBUG("ibv_query_device_ex() failed");
>  		goto error;
>  	}
> -	rte_strlcpy(name, dpdk_dev->name, sizeof(name));
> +	if (!switch_info->representor)
> +		rte_strlcpy(name, dpdk_dev->name, sizeof(name));
> +	else
> +		snprintf(name, sizeof(name), "%s_representor_%u",
> +			 dpdk_dev->name, switch_info->port_name);
> +	DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
>  	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
>  		eth_dev = rte_eth_dev_attach_secondary(name);
>  		if (eth_dev == NULL) {
> @@ -874,6 +904,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		goto error;
>  	}
>  	priv->ctx = ctx;
> +	strncpy(priv->ibdev_name, priv->ctx->device->name,
> +		sizeof(priv->ibdev_name));
>  	strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
>  		sizeof(priv->ibdev_path));
>  	priv->device_attr = attr;
> @@ -883,6 +915,41 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  	priv->nl_socket_rdma = mlx5_nl_init(0, NETLINK_RDMA);
>  	priv->nl_socket_route =	mlx5_nl_init(RTMGRP_LINK,
> NETLINK_ROUTE);
>  	priv->nl_sn = 0;
> +	priv->representor = !!switch_info->representor;
> +	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> +	priv->representor_id =
> +		switch_info->representor ? switch_info->port_name : -1;
> +	/*
> +	 * Look for sibling devices in order to reuse their switch domain
> +	 * if any, otherwise allocate one.
> +	 */
> +	i = mlx5_dev_to_port_id(dpdk_dev, NULL, 0);
> +	if (i > 0) {
> +		uint16_t port_id[i];
> +
> +		i = RTE_MIN(mlx5_dev_to_port_id(dpdk_dev, port_id, i), i);
> +		while (i--) {
> +			const struct priv *opriv =
> +				rte_eth_devices[port_id[i]].data-
> >dev_private;
> +
> +			if (!opriv ||
> +			    opriv->domain_id ==
> +			    RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
> +				continue;
> +			priv->domain_id = opriv->domain_id;

It looks like for the second port it will use the domain_id of the first port. Is that what you intent? 

Note - I couldn't test it due to compilation errors:

/.autodirect/swgwork/shahafs/workspace/dpdk.org/drivers/net/mlx5/mlx5_nl.c: In function 'mlx5_nl_switch_info_cb':
/.autodirect/swgwork/shahafs/workspace/dpdk.org/drivers/net/mlx5/mlx5_nl.c:843:8: error: 'IFLA_PHYS_PORT_NAME' undecl
ared (first use in this function)
   case IFLA_PHYS_PORT_NAME:
        ^
/.autodirect/swgwork/shahafs/workspace/dpdk.org/drivers/net/mlx5/mlx5_nl.c:843:8: note: each undeclared identifier is
 reported only once for each function it appears in
/.autodirect/swgwork/shahafs/workspace/dpdk.org/drivers/net/mlx5/mlx5_nl.c:851:8: error: 'IFLA_PHYS_SWITCH_ID' undecl
ared (first use in this function)
   case IFLA_PHYS_SWITCH_ID:
        ^

My system info:
NAME="Red Hat Enterprise Linux Server"
VERSION="7.3 (Maipo)"
ID="rhel"
ID_LIKE="fedora"
VERSION_ID="7.3"
PRETTY_NAME="Red Hat Enterprise Linux Server 7.3 (Maipo)"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:redhat:enterprise_linux:7.3:GA:server"
HOME_URL="https://www.redhat.com/"
BUG_REPORT_URL="https://bugzilla.redhat.com/"

REDHAT_BUGZILLA_PRODUCT="Red Hat Enterprise Linux 7"
REDHAT_BUGZILLA_PRODUCT_VERSION=7.3
REDHAT_SUPPORT_PRODUCT="Red Hat Enterprise Linux"
REDHAT_SUPPORT_PRODUCT_VERSION="7.3"
Red Hat Enterprise Linux Server release 7.3 (Maipo)
Red Hat Enterprise Linux Server release 7.3 (Maipo)


> +			break;
> +		}
> +	}
> +	if (priv->domain_id ==
> RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
> +		err = rte_eth_switch_domain_alloc(&priv->domain_id);
> +		if (err) {
> +			err = rte_errno;
> +			DRV_LOG(ERR, "unable to allocate switch domain:
> %s",
> +				strerror(rte_errno));
> +			goto error;
> +		}
> +		own_domain_id = 1;
> +	}
>  	err = mlx5_args(&config, dpdk_dev->devargs);
>  	if (err) {
>  		err = rte_errno;
> @@ -966,6 +1033,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  		err = ENOMEM;
>  		goto error;
>  	}
> +	if (priv->representor)
> +		eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
>  	eth_dev->data->dev_private = priv;
>  	priv->dev_data = eth_dev->data;
>  	eth_dev->data->mac_addrs = priv->mac;
> @@ -1084,6 +1153,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  			close(priv->nl_socket_route);
>  		if (priv->nl_socket_rdma >= 0)
>  			close(priv->nl_socket_rdma);
> +		if (own_domain_id)
> +			claim_zero(rte_eth_switch_domain_free(priv-
> >domain_id));
>  		rte_free(priv);
>  	}
>  	if (pd)
> @@ -1100,7 +1171,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
>  /**
>   * DPDK callback to register a PCI device.
>   *
> - * This function spawns an Ethernet device out of a given PCI device.
> + * This function spawns Ethernet devices out of a given PCI device.
>   *
>   * @param[in] pci_drv
>   *   PCI driver structure (mlx5_driver).
> @@ -1115,7 +1186,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
>  	       struct rte_pci_device *pci_dev)  {
>  	struct ibv_device **ibv_list;
> -	struct rte_eth_dev *eth_dev = NULL;
>  	unsigned int n = 0;
>  	int vf;
>  	int ret;
> @@ -1150,9 +1220,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
> 
>  	unsigned int ifindex[n];
>  	struct mlx5_switch_info info[n];
> +	struct rte_eth_dev *eth_list[n];
>  	int nl_route = n ? mlx5_nl_init(0, NETLINK_ROUTE) : -1;
>  	int nl_rdma = n ? mlx5_nl_init(0, NETLINK_RDMA) : -1;
>  	unsigned int i;
> +	unsigned int u;
> 
>  	/*
>  	 * The existence of several matching entries (n > 1) means port @@ -
> 1187,28 +1259,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
>  		close(nl_rdma);
>  	if (nl_route >= 0)
>  		close(nl_route);
> -	/* Look for master device. */
> -	for (i = 0; i != n; ++i) {
> -		if (!info[i].master)
> -			continue;
> -		/* Make it the first entry. */
> -		if (i == 0)
> -			break;
> -		ibv_match[n] = ibv_match[0];
> -		ibv_match[0] = ibv_match[i];
> -		ibv_match[n] = NULL;
> -		break;
> -	}
> -	if (n && i == n) {
> -		if (n == 1 && !info[0].representor) {
> +	/* Count unidentified devices. */
> +	for (u = 0, i = 0; i != n; ++i)
> +		if (!info[i].master && !info[i].representor)
> +			++u;
> +	if (u) {
> +		if (n == 1 && u == 1) {
>  			/* Case #2. */
>  			DRV_LOG(INFO, "no switch support detected");
> -		} else if (n == 1) {
> -			/* Case #3. */
> -			DRV_LOG(ERR,
> -				"device looks like a port representor, this is"
> -				" not supported yet");
> -			n = 0;
>  		} else {
>  			/* Case #3. */
>  			DRV_LOG(ERR,
> @@ -1227,8 +1285,19 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
>  	default:
>  		vf = 0;
>  	}
> -	if (n)
> -		eth_dev = mlx5_dev_spawn(&pci_dev->device,
> ibv_match[0], vf);
> +	for (i = 0; i != n; ++i) {
> +		uint32_t restore;
> +
> +		eth_list[i] = mlx5_dev_spawn(&pci_dev->device,
> ibv_match[i],
> +					     vf, &info[i]);
> +		if (!eth_list[i])
> +			break;
> +		restore = eth_list[i]->data->dev_flags;
> +		rte_eth_copy_pci_info(eth_list[i], pci_dev);
> +		/* Restore non-PCI flags cleared by the above call. */
> +		eth_list[i]->data->dev_flags |= restore;
> +		rte_eth_dev_probing_finish(eth_list[i]);
> +	}
>  	mlx5_glue->free_device_list(ibv_list);
>  	if (!n) {
>  		DRV_LOG(WARNING,
> @@ -1238,7 +1307,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
>  			pci_dev->addr.devid, pci_dev->addr.function);
>  		rte_errno = ENOENT;
>  		ret = -rte_errno;
> -	} else if (!eth_dev) {
> +	} else if (i != n) {
>  		DRV_LOG(ERR,
>  			"probe of PCI device " PCI_PRI_FMT " aborted after"
>  			" encountering an error: %s",
> @@ -1246,9 +1315,16 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
>  			pci_dev->addr.devid, pci_dev->addr.function,
>  			strerror(rte_errno));
>  		ret = -rte_errno;
> +		/* Roll back. */
> +		while (i--) {
> +			mlx5_dev_close(eth_list[i]);
> +			if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +				rte_free(eth_list[i]->data->dev_private);
> +			claim_zero(rte_eth_dev_release_port(eth_list[i]));
> +		}
> +		/* Restore original error. */
> +		rte_errno = -ret;
>  	} else {
> -		rte_eth_copy_pci_info(eth_dev, pci_dev);
> -		rte_eth_dev_probing_finish(eth_dev);
>  		ret = 0;
>  	}
>  	return ret;
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 704046270..cc01310e0 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -159,6 +159,7 @@ struct priv {
>  	struct ibv_context *ctx; /* Verbs context. */
>  	struct ibv_device_attr_ex device_attr; /* Device properties. */
>  	struct ibv_pd *pd; /* Protection Domain. */
> +	char ibdev_name[IBV_SYSFS_NAME_MAX]; /* IB device name. */


Why we need a dedicated entry for the ibdev_name? it is already part of priv->ctx->device->name. 

>  	char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for
> secondary */
>  	struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC
> addresses. */
>  	BITFIELD_DECLARE(mac_own, uint64_t,
> MLX5_MAX_MAC_ADDRESSES); @@ -168,6 +169,9 @@ struct priv {
>  	/* Device properties. */
>  	uint16_t mtu; /* Configured MTU. */
>  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
> +	unsigned int representor:1; /* Device is a port representor. */
> +	uint16_t domain_id; /* Switch domain identifier. */
> +	int32_t representor_id; /* Port representor identifier. */
>  	/* RX/TX queues. */
>  	unsigned int rxqs_n; /* RX queues array size. */
>  	unsigned int txqs_n; /* TX queues array size. */ @@ -217,9 +221,12
> @@ int mlx5_getenv_int(const char *);
> 
>  /* mlx5_ethdev.c */
> 
> +int mlx5_get_master_ifname(const struct rte_eth_dev *dev,
> +			   char (*ifname)[IF_NAMESIZE]);
>  int mlx5_get_ifname(const struct rte_eth_dev *dev, char
> (*ifname)[IF_NAMESIZE]);  int mlx5_ifindex(const struct rte_eth_dev *dev);
> -int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr);
> +int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
> +	       int master);
>  int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu);  int
> mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep,
>  		   unsigned int flags);
> @@ -244,6 +251,9 @@ int mlx5_set_link_up(struct rte_eth_dev *dev);  int
> mlx5_is_removed(struct rte_eth_dev *dev);  eth_tx_burst_t
> mlx5_select_tx_function(struct rte_eth_dev *dev);  eth_rx_burst_t
> mlx5_select_rx_function(struct rte_eth_dev *dev);
> +unsigned int mlx5_dev_to_port_id(const struct rte_device *dev,
> +				 uint16_t *port_list,
> +				 unsigned int port_list_n);
> 
>  /* mlx5_mac.c */
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 819f5baad..05f66f7b6 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -27,6 +27,7 @@
>  #include <time.h>
> 
>  #include <rte_atomic.h>
> +#include <rte_common.h>
>  #include <rte_ethdev_driver.h>
>  #include <rte_bus_pci.h>
>  #include <rte_mbuf.h>
> @@ -93,7 +94,7 @@ struct ethtool_link_settings {  #endif
> 
>  /**
> - * Get interface name from private structure.
> + * Get master interface name from private structure.
>   *
>   * @param[in] dev
>   *   Pointer to Ethernet device.
> @@ -104,7 +105,8 @@ struct ethtool_link_settings {
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_get_ifname(const struct rte_eth_dev *dev, char
> (*ifname)[IF_NAMESIZE])
> +mlx5_get_master_ifname(const struct rte_eth_dev *dev,
> +		       char (*ifname)[IF_NAMESIZE])
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	DIR *dir;
> @@ -179,6 +181,39 @@ mlx5_get_ifname(const struct rte_eth_dev *dev,
> char (*ifname)[IF_NAMESIZE])  }
> 
>  /**
> + * Get interface name from private structure.
> + *
> + * This is a port representor-aware version of mlx5_get_master_ifname().
> + *
> + * @param[in] dev
> + *   Pointer to Ethernet device.
> + * @param[out] ifname
> + *   Interface name output buffer.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +int
> +mlx5_get_ifname(const struct rte_eth_dev *dev, char
> +(*ifname)[IF_NAMESIZE]) {
> +	struct priv *priv = dev->data->dev_private;
> +	unsigned int ifindex =
> +		priv->nl_socket_rdma >= 0 ?
> +		mlx5_nl_ifindex(priv->nl_socket_rdma, priv->ibdev_name) :
> 0;
> +
> +	if (!ifindex) {
> +		if (!priv->representor)
> +			return mlx5_get_master_ifname(dev, ifname);
> +		rte_errno = ENXIO;
> +		return -rte_errno;
> +	}
> +	if (if_indextoname(ifindex, &(*ifname)[0]))
> +		return 0;
> +	rte_errno = errno;
> +	return -rte_errno;
> +}
> +
> +/**
>   * Get the interface index from device name.
>   *
>   * @param[in] dev
> @@ -214,12 +249,16 @@ mlx5_ifindex(const struct rte_eth_dev *dev)
>   *   Request number to pass to ioctl().
>   * @param[out] ifr
>   *   Interface request structure output buffer.
> + * @param master
> + *   When device is a port representor, perform request on master device
> + *   instead.
>   *
>   * @return
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  int
> -mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
> +mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
> +	   int master)
>  {
>  	int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
>  	int ret = 0;
> @@ -228,7 +267,10 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req,
> struct ifreq *ifr)
>  		rte_errno = errno;
>  		return -rte_errno;
>  	}
> -	ret = mlx5_get_ifname(dev, &ifr->ifr_name);
> +	if (master)
> +		ret = mlx5_get_master_ifname(dev, &ifr->ifr_name);
> +	else
> +		ret = mlx5_get_ifname(dev, &ifr->ifr_name);
>  	if (ret)
>  		goto error;
>  	ret = ioctl(sock, req, ifr);
> @@ -258,7 +300,7 @@ int
>  mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)  {
>  	struct ifreq request;
> -	int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
> +	int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request, 0);
> 
>  	if (ret)
>  		return ret;
> @@ -282,7 +324,7 @@ mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t
> mtu)  {
>  	struct ifreq request = { .ifr_mtu = mtu, };
> 
> -	return mlx5_ifreq(dev, SIOCSIFMTU, &request);
> +	return mlx5_ifreq(dev, SIOCSIFMTU, &request, 0);
>  }
> 
>  /**
> @@ -302,13 +344,13 @@ int
>  mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int
> flags)  {
>  	struct ifreq request;
> -	int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
> +	int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request, 0);
> 
>  	if (ret)
>  		return ret;
>  	request.ifr_flags &= keep;
>  	request.ifr_flags |= flags & ~keep;
> -	return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
> +	return mlx5_ifreq(dev, SIOCSIFFLAGS, &request, 0);
>  }
> 
>  /**
> @@ -477,6 +519,30 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev,
> struct rte_eth_dev_info *info)
>  	info->speed_capa = priv->link_speed_capa;
>  	info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK;
>  	mlx5_set_default_params(dev, info);
> +	info->switch_info.name = dev->data->name;
> +	info->switch_info.domain_id = priv->domain_id;
> +	info->switch_info.port_id = priv->representor_id;
> +	if (priv->representor) {
> +		unsigned int i = mlx5_dev_to_port_id(dev->device, NULL, 0);
> +		uint16_t port_id[i];
> +
> +		i = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, i),
> i);
> +		while (i--) {
> +			struct priv *opriv =
> +				rte_eth_devices[port_id[i]].data-
> >dev_private;
> +
> +			if (!opriv ||
> +			    opriv->representor ||
> +			    opriv->domain_id != priv->domain_id)
> +				continue;
> +			/*
> +			 * Override switch name with that of the master
> +			 * device.
> +			 */
> +			info->switch_info.name = opriv->dev_data->name;
> +			break;

According to this logic it means once the master device is closed, all the representors are no longer belong to the same switch (switch name of each is different) which is not correct.
According to your notes it is possible to close master w/o closing the representor. 

Why not just storing the master switch name when probing the representor and to use it as is on the dev_info? 

> +		}
> +	}
>  }
> 
>  /**
> @@ -540,7 +606,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev
> *dev,
>  	int link_speed = 0;
>  	int ret;
> 
> -	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed:
> %s",
>  			dev->data->port_id, strerror(rte_errno)); @@ -550,7
> +616,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
>  	dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
>  				(ifr.ifr_flags & IFF_RUNNING));
>  	ifr.ifr_data = (void *)&edata;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING,
>  			"port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed:
> %s", @@ -611,7 +677,7 @@ mlx5_link_update_unlocked_gs(struct
> rte_eth_dev *dev,
>  	uint64_t sc;
>  	int ret;
> 
> -	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed:
> %s",
>  			dev->data->port_id, strerror(rte_errno)); @@ -621,7
> +687,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
>  	dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
>  				(ifr.ifr_flags & IFF_RUNNING));
>  	ifr.ifr_data = (void *)&gcmd;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(DEBUG,
>  			"port %u ioctl(SIOCETHTOOL,
> ETHTOOL_GLINKSETTINGS)"
> @@ -638,7 +704,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev
> *dev,
> 
>  	*ecmd = gcmd;
>  	ifr.ifr_data = (void *)ecmd;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(DEBUG,
>  			"port %u ioctl(SIOCETHTOOL,
> ETHTOOL_GLINKSETTINGS)"
> @@ -801,7 +867,7 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev,
> struct rte_eth_fc_conf *fc_conf)
>  	int ret;
> 
>  	ifr.ifr_data = (void *)ðpause;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING,
>  			"port %u ioctl(SIOCETHTOOL,
> ETHTOOL_GPAUSEPARAM) failed:"
> @@ -854,7 +920,7 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev,
> struct rte_eth_fc_conf *fc_conf)
>  		ethpause.tx_pause = 1;
>  	else
>  		ethpause.tx_pause = 0;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 0);
>  	if (ret) {
>  		DRV_LOG(WARNING,
>  			"port %u ioctl(SIOCETHTOOL,
> ETHTOOL_SPAUSEPARAM)"
> @@ -1193,3 +1259,40 @@ mlx5_is_removed(struct rte_eth_dev *dev)
>  		return 1;
>  	return 0;
>  }
> +
> +/**
> + * Get port ID list of mlx5 instances sharing a common device.
> + *
> + * @param[in] dev
> + *   Device to look for.
> + * @param[out] port_list
> + *   Result buffer for collected port IDs.
> + * @param port_list_n
> + *   Maximum number of entries in result buffer. If 0, @p port_list can be
> + *   NULL.
> + *
> + * @return
> + *   Number of matching instances regardless of the @p port_list_n
> + *   parameter, 0 if none were found.
> + */
> +unsigned int
> +mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
> +		    unsigned int port_list_n)
> +{
> +	uint16_t id;
> +	unsigned int n = 0;
> +
> +	RTE_ETH_FOREACH_DEV(id) {
> +		struct rte_eth_dev *ldev = &rte_eth_devices[id];
> +
> +		if (!ldev->device ||
> +		    !ldev->device->driver ||
> +		    strcmp(ldev->device->driver->name,
> MLX5_DRIVER_NAME) ||
> +		    ldev->device != dev)
> +			continue;
> +		if (n < port_list_n)
> +			port_list[n] = id;
> +		n++;
> +	}
> +	return n;
> +}
> diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
> index 672a47619..12ee37f55 100644
> --- a/drivers/net/mlx5/mlx5_mac.c
> +++ b/drivers/net/mlx5/mlx5_mac.c
> @@ -49,7 +49,7 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t
> (*mac)[ETHER_ADDR_LEN])
>  	struct ifreq request;
>  	int ret;
> 
> -	ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);
> +	ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request, 0);
>  	if (ret)
>  		return ret;
>  	memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); diff -
> -git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index
> 875dd1027..91f3d474a 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -146,7 +146,7 @@ mlx5_read_dev_counters(struct rte_eth_dev *dev,
> uint64_t *stats)
>  	et_stats->cmd = ETHTOOL_GSTATS;
>  	et_stats->n_stats = xstats_ctrl->stats_n;
>  	ifr.ifr_data = (caddr_t)et_stats;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING,
>  			"port %u unable to read statistic values from device",
> @@ -194,7 +194,7 @@ mlx5_ethtool_get_stats_n(struct rte_eth_dev *dev)
> {
> 
>  	drvinfo.cmd = ETHTOOL_GDRVINFO;
>  	ifr.ifr_data = (caddr_t)&drvinfo;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING, "port %u unable to query number of
> statistics",
>  			dev->data->port_id);
> @@ -244,7 +244,7 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
>  	strings->string_set = ETH_SS_STATS;
>  	strings->len = dev_stats_n;
>  	ifr.ifr_data = (caddr_t)strings;
> -	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
> +	ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
>  	if (ret) {
>  		DRV_LOG(WARNING, "port %u unable to get statistic
> names",
>  			dev->data->port_id);
> --
> 2.11.0


More information about the dev mailing list