[dpdk-dev] [PATCH v2 01/13] net/mlx5: add representor recognition on kernels 5.x
Shahaf Shuler
shahafs at mellanox.com
Tue Mar 26 13:20:10 CET 2019
Monday, March 25, 2019 7:03 PM, Viacheslav Ovsiienko:
> Subject: [PATCH v2 01/13] net/mlx5: add representor recognition on kernels
> 5.x
>
> The master device and VF representors were distinguished by presence of
> port name, master device did not have one. The new Linux kernels starting
> from 5.0 provide the port name for master device and the implemented
> representor recognizing method does not work.
> The new recognizing method is based on quiering the VF number, created on
> the base of the device.
>
> The IFLA_NUM_VF attribute is returned by kernel if IFLA_EXT_MASK
> attribute is specified in the Netlink request message.
>
> Also the presence of device symlink in device sysfs folder is added to
> distinguish representors with sysfs based method.
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> ---
> drivers/net/mlx5/Makefile | 10 ++++++++++
> drivers/net/mlx5/meson.build | 4 ++++
> drivers/net/mlx5/mlx5.c | 17 ++++++++++++++---
> drivers/net/mlx5/mlx5.h | 1 +
> drivers/net/mlx5/mlx5_ethdev.c | 14 ++++++++++++--
> drivers/net/mlx5/mlx5_nl.c | 37
> ++++++++++++++++++++++++++++++++++---
> 6 files changed, 75 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 1ed299d..3dd7e38 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -231,6 +231,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
> enum RDMA_NLDEV_ATTR_NDEV_INDEX \
> $(AUTOCONF_OUTPUT)
> $Q sh -- '$<' '$@' \
> + HAVE_IFLA_NUM_VF \
> + linux/if_link.h \
> + enum IFLA_NUM_VF \
> + $(AUTOCONF_OUTPUT)
> + $Q sh -- '$<' '$@' \
> + HAVE_IFLA_EXT_MASK \
> + linux/if_link.h \
> + enum IFLA_EXT_MASK \
> + $(AUTOCONF_OUTPUT)
> + $Q sh -- '$<' '$@' \
> HAVE_IFLA_PHYS_SWITCH_ID \
> linux/if_link.h \
> enum IFLA_PHYS_SWITCH_ID \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index 0cf2f08..e3cb9bc 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -133,6 +133,10 @@ if build
> 'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
> [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
> 'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
> + [ 'HAVE_IFLA_NUM_VF', 'linux/if_link.h',
> + 'IFLA_NUM_VF' ],
> + [ 'HAVE_IFLA_EXT_MASK', 'linux/if_link.h',
> + 'IFLA_EXT_MASK' ],
> [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
> 'IFLA_PHYS_SWITCH_ID' ],
> [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h', diff --git
> a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ad1975c..032f1f2
> 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -13,7 +13,6 @@
> #include <errno.h>
> #include <net/if.h>
> #include <sys/mman.h>
> -#include <linux/netlink.h>
> #include <linux/rtnetlink.h>
>
> /* Verbs header. */
> @@ -1001,9 +1000,21 @@
> priv->nl_socket_route = mlx5_nl_init(NETLINK_ROUTE);
> priv->nl_sn = 0;
> priv->representor = !!switch_info->representor;
> + priv->master = !!switch_info->master;
> priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
> - priv->representor_id =
> - switch_info->representor ? switch_info->port_name : -1;
> + /*
> + * Currently we support sinlge E-Switch per PF configurations
> + * only and representors_id field contains the vport index for
> + * corresponding VF, deduced from representor port name.
> + * For exapmple, let's have the IB device port 10, it has
> + * attached network device eth0, which has port name attribute
> + * pf0vf2, we can deduce the VF number as 2, and set vport index
> + * as 3 (2+1). This assigning schema should be changed if the
> + * multiple E-Switch instance per PF configurations or/and PCI
> + * subfunctions dded.
> + */
> + priv->representor_id = switch_info->representor ?
> + switch_info->port_name + 1 : -1;
The representor_id field is also being used on the info query from user. We shouldn't report back the vport id of the representor rather its index according to phys_port_name.
You can define a different field, maybe called vport.
> /*
> * Look for sibling devices in order to reuse their switch domain
> * if any, otherwise allocate one.
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> a88cb4a..58bc37f 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -214,6 +214,7 @@ struct mlx5_priv {
> uint16_t mtu; /* Configured MTU. */
> unsigned int isolated:1; /* Whether isolated mode is enabled. */
> unsigned int representor:1; /* Device is a port representor. */
> + unsigned int master:1; /* Device is a E-Switch master. */
> uint16_t domain_id; /* Switch domain identifier. */
> int32_t representor_id; /* Port representor identifier. */
> /* RX/TX queues. */
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 2e8a296..64c6b6f 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1362,8 +1362,10 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
> .port_name = 0,
> .switch_id = 0,
> };
> + DIR *dir;
> bool port_name_set = false;
> bool port_switch_id_set = false;
> + bool device_dir = false;
> char c;
> int ret;
>
> @@ -1376,6 +1378,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev,
> char *fw_ver, size_t fw_size)
> ifname);
> MKSTR(phys_switch_id, "/sys/class/net/%s/phys_switch_id",
> ifname);
> + MKSTR(pci_device, "/sys/class/net/%s/device",
> + ifname);
>
> file = fopen(phys_port_name, "rb");
> if (file != NULL) {
> @@ -1394,8 +1398,14 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
> fscanf(file, "%" SCNx64 "%c", &data.switch_id, &c) == 2 &&
> c == '\n';
> fclose(file);
> - data.master = port_switch_id_set && !port_name_set;
> - data.representor = port_switch_id_set && port_name_set;
> + dir = opendir(pci_device);
> + if (dir != NULL) {
> + closedir(dir);
> + device_dir = true;
> + }
> + data.master = port_switch_id_set && (!port_name_set ||
> device_dir);
> + data.representor = port_switch_id_set && port_name_set &&
> !device_dir;
> + assert(!(data.master && data.representor));
> *info = data;
> return 0;
> }
> diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c index
> 8a10109..9d0fcba 100644
> --- a/drivers/net/mlx5/mlx5_nl.c
> +++ b/drivers/net/mlx5/mlx5_nl.c
> @@ -65,6 +65,12 @@
> #endif
>
> /* These are normally found in linux/if_link.h. */
> +#ifndef HAVE_IFLA_NUM_VF
> +#define IFLA_NUM_VF 21
> +#endif
> +#ifndef HAVE_IFLA_EXT_MASK
> +#define IFLA_EXT_MASK 29
> +#endif
> #ifndef HAVE_IFLA_PHYS_SWITCH_ID
> #define IFLA_PHYS_SWITCH_ID 36
> #endif
> @@ -837,6 +843,7 @@ struct mlx5_nl_ifindex_data {
> size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
> bool port_name_set = false;
> bool switch_id_set = false;
> + bool num_vf_set = false;
>
> if (nh->nlmsg_type != RTM_NEWLINK)
> goto error;
> @@ -848,6 +855,9 @@ struct mlx5_nl_ifindex_data {
> if (ra->rta_len > nh->nlmsg_len - off)
> goto error;
> switch (ra->rta_type) {
> + case IFLA_NUM_VF:
> + num_vf_set = true;
> + break;
> case IFLA_PHYS_PORT_NAME:
> port_name_set =
> mlx5_translate_port_name((char *)payload,
> @@ -864,8 +874,20 @@ struct mlx5_nl_ifindex_data {
> }
> off += RTA_ALIGN(ra->rta_len);
> }
> - info.master = switch_id_set && !port_name_set;
> - info.representor = switch_id_set && port_name_set;
> + if (switch_id_set) {
> + if (info.port_name_new) {
> + /* New representors naming schema. */
> + if (port_name_set) {
> + info.master = (info.port_name == -1);
> + info.representor = (info.port_name != -1);
> + }
> + } else {
> + /* Legacy representors naming schema. */
> + info.master = (!port_name_set || num_vf_set);
> + info.representor = port_name_set && !num_vf_set;
> + }
> + }
> + assert(!(data.master && data.representor));
> memcpy(arg, &info, sizeof(info));
> return 0;
> error:
> @@ -893,9 +915,13 @@ struct mlx5_nl_ifindex_data {
> struct {
> struct nlmsghdr nh;
> struct ifinfomsg info;
> + struct rtattr rta;
> + uint32_t extmask;
> } req = {
> .nh = {
> - .nlmsg_len = NLMSG_LENGTH(sizeof(req.info)),
> + .nlmsg_len = NLMSG_LENGTH
> + (sizeof(req.info) +
> + RTA_LENGTH(sizeof(uint32_t))),
> .nlmsg_type = RTM_GETLINK,
> .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
> },
> @@ -903,6 +929,11 @@ struct mlx5_nl_ifindex_data {
> .ifi_family = AF_UNSPEC,
> .ifi_index = ifindex,
> },
> + .rta = {
> + .rta_type = IFLA_EXT_MASK,
> + .rta_len = RTA_LENGTH(sizeof(int32_t)),
> + },
> + .extmask = RTE_LE32(1),
> };
> int ret;
>
> --
> 1.8.3.1
More information about the dev
mailing list