[dpdk-dev] [PATCH v2 01/13] net/mlx5: add representor recognition on kernels 5.x

Shahaf Shuler shahafs at mellanox.com
Tue Mar 26 13:20:10 CET 2019


Monday, March 25, 2019 7:03 PM, Viacheslav Ovsiienko:
> Subject: [PATCH v2 01/13] net/mlx5: add representor recognition on kernels
> 5.x
> 
> The master device and VF representors were distinguished by presence of
> port name, master device did not have one. The new Linux kernels starting
> from 5.0 provide the port name for master device and the implemented
> representor recognizing method does not work.
> The new recognizing method is based on quiering the VF number, created on
> the base of the device.
> 
> The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> attribute is specified in the Netlink request message.
> 
> Also the presence of device symlink in device sysfs folder is added to
> distinguish representors with sysfs based method.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> ---
>  drivers/net/mlx5/Makefile      | 10 ++++++++++
>  drivers/net/mlx5/meson.build   |  4 ++++
>  drivers/net/mlx5/mlx5.c        | 17 ++++++++++++++---
>  drivers/net/mlx5/mlx5.h        |  1 +
>  drivers/net/mlx5/mlx5_ethdev.c | 14 ++++++++++++--
>  drivers/net/mlx5/mlx5_nl.c     | 37
> ++++++++++++++++++++++++++++++++++---
>  6 files changed, 75 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 1ed299d..3dd7e38 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
>  		enum RDMA_NLDEV_ATTR_NDEV_INDEX \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_NUM_VF \
> +		linux/if_link.h \
> +		enum IFLA_NUM_VF \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_EXT_MASK \
> +		linux/if_link.h \
> +		enum IFLA_EXT_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_IFLA_PHYS_SWITCH_ID \
>  		linux/if_link.h \
>  		enum IFLA_PHYS_SWITCH_ID \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index 0cf2f08..e3cb9bc 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -133,6 +133,10 @@ if build
>  		'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
>  		[ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
>  		'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> +		[ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> +		'IFLA_NUM_VF' ],
> +		[ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> +		'IFLA_EXT_MASK' ],
>  		[ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
>  		'IFLA_PHYS_SWITCH_ID' ],
>  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ad1975c..032f1f2
> 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -13,7 +13,6 @@
>  #include <errno.h>
>  #include <net/if.h>
>  #include <sys/mman.h>
> -#include <linux/netlink.h>
>  #include <linux/rtnetlink.h>
> 
>  /* Verbs header. */
> @@ -1001,9 +1000,21 @@
>  	priv->nl_socket_route =	mlx5_nl_init(NETLINK_ROUTE);
>  	priv->nl_sn = 0;
>  	priv->representor = !!switch_info->representor;
> +	priv->master = !!switch_info->master;
>  	priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> -	priv->representor_id =
> -		switch_info->representor ? switch_info->port_name : -1;
> +	/*
> +	 * Currently we support sinlge E-Switch per PF configurations
> +	 * only and representors_id field contains the vport index for
> +	 * corresponding VF, deduced from representor port name.
> +	 * For exapmple, let's have the IB device port 10, it has
> +	 * attached network device eth0, which has port name attribute
> +	 * pf0vf2, we can deduce the VF number as 2, and set vport index
> +	 * as 3 (2+1). This assigning schema should be changed if the
> +	 * multiple E-Switch instance per PF configurations or/and PCI
> +	 * subfunctions dded.
> +	 */
> +	priv->representor_id = switch_info->representor ?
> +			       switch_info->port_name + 1 : -1;

The representor_id field is also being used on the info query from user. We shouldn't report back the vport id of the representor rather its index according to phys_port_name.
You can define a different field, maybe called vport. 

>  	/*
>  	 * Look for sibling devices in order to reuse their switch domain
>  	 * if any, otherwise allocate one.
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> a88cb4a..58bc37f 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -214,6 +214,7 @@ struct mlx5_priv {
>  	uint16_t mtu; /* Configured MTU. */
>  	unsigned int isolated:1; /* Whether isolated mode is enabled. */
>  	unsigned int representor:1; /* Device is a port representor. */
> +	unsigned int master:1; /* Device is a E-Switch master. */
>  	uint16_t domain_id; /* Switch domain identifier. */
>  	int32_t representor_id; /* Port representor identifier. */
>  	/* RX/TX queues. */
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 2e8a296..64c6b6f 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  		.port_name = 0,
>  		.switch_id = 0,
>  	};
> +	DIR *dir;
>  	bool port_name_set = false;
>  	bool port_switch_id_set = false;
> +	bool device_dir = false;
>  	char c;
>  	int ret;
> 
> @@ -1376,6 +1378,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev,
> char *fw_ver, size_t fw_size)
>  	      ifname);
>  	MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
>  	      ifname);
> +	MKSTR(pci_device, "/sys/class/net/%s/device",
> +	      ifname);
> 
>  	file = fopen(phys_port_name, "rb");
>  	if (file != NULL) {
> @@ -1394,8 +1398,14 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  		fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
>  		c == '\n';
>  	fclose(file);
> -	data.master = port_switch_id_set && !port_name_set;
> -	data.representor = port_switch_id_set && port_name_set;
> +	dir = opendir(pci_device);
> +	if (dir != NULL) {
> +		closedir(dir);
> +		device_dir = true;
> +	}
> +	data.master = port_switch_id_set && (!port_name_set ||
> device_dir);
> +	data.representor = port_switch_id_set && port_name_set &&
> !device_dir;
> +	assert(!(data.master && data.representor));
>  	*info = data;
>  	return 0;
>  }
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> 8a10109..9d0fcba 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -65,6 +65,12 @@
>  #endif
> 
>  /* These are normally found in linux/if_link.h. */
> +#ifndef HAVE_IFLA_NUM_VF
> +#define IFLA_NUM_VF 21
> +#endif
> +#ifndef HAVE_IFLA_EXT_MASK
> +#define IFLA_EXT_MASK 29
> +#endif
>  #ifndef HAVE_IFLA_PHYS_SWITCH_ID
>  #define IFLA_PHYS_SWITCH_ID 36
>  #endif
> @@ -837,6 +843,7 @@ struct mlx5_nl_ifindex_data {
>  	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
>  	bool port_name_set = false;
>  	bool switch_id_set = false;
> +	bool num_vf_set = false;
> 
>  	if (nh->nlmsg_type != RTM_NEWLINK)
>  		goto error;
> @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data {
>  		if (ra->rta_len > nh->nlmsg_len - off)
>  			goto error;
>  		switch (ra->rta_type) {
> +		case IFLA_NUM_VF:
> +			num_vf_set = true;
> +			break;
>  		case IFLA_PHYS_PORT_NAME:
>  			port_name_set =
>  				mlx5_translate_port_name((char *)payload,
> @@ -864,8 +874,20 @@ struct mlx5_nl_ifindex_data {
>  		}
>  		off += RTA_ALIGN(ra->rta_len);
>  	}
> -	info.master = switch_id_set && !port_name_set;
> -	info.representor = switch_id_set && port_name_set;
> +	if (switch_id_set) {
> +		if (info.port_name_new) {
> +			/* New representors naming schema. */
> +			if (port_name_set) {
> +				info.master = (info.port_name == -1);
> +				info.representor = (info.port_name != -1);
> +			}
> +		} else {
> +			/* Legacy representors naming schema. */
> +			info.master = (!port_name_set || num_vf_set);
> +			info.representor = port_name_set && !num_vf_set;
> +		}
> +	}
> +	assert(!(data.master && data.representor));
>  	memcpy(arg, &info, sizeof(info));
>  	return 0;
>  error:
> @@ -893,9 +915,13 @@ struct mlx5_nl_ifindex_data {
>  	struct {
>  		struct nlmsghdr nh;
>  		struct ifinfomsg info;
> +		struct rtattr rta;
> +		uint32_t extmask;
>  	} req = {
>  		.nh = {
> -			.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
> +			.nlmsg_len = NLMSG_LENGTH
> +					(sizeof(req.info) +
> +					 RTA_LENGTH(sizeof(uint32_t))),
>  			.nlmsg_type = RTM_GETLINK,
>  			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
>  		},
> @@ -903,6 +929,11 @@ struct mlx5_nl_ifindex_data {
>  			.ifi_family = AF_UNSPEC,
>  			.ifi_index = ifindex,
>  		},
> +		.rta = {
> +			.rta_type = IFLA_EXT_MASK,
> +			.rta_len = RTA_LENGTH(sizeof(int32_t)),
> +		},
> +		.extmask = RTE_LE32(1),
>  	};
>  	int ret;
> 
> --
> 1.8.3.1



More information about the dev mailing list