[dpdk-dev] [PATCH 1/1] net/mlx5: add support for PF representor

Slava Ovsiienko viacheslavo at mellanox.com
Mon Apr 15 11:11:53 CEST 2019


Hi, Shahaf

> -----Original Message-----
> From: Shahaf Shuler
> Sent: Sunday, April 14, 2019 10:43
> To: Slava Ovsiienko <viacheslavo at mellanox.com>; dev at dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 1/1] net/mlx5: add support for PF
> representor
> 
> Hi Slava,
> 
> Friday, April 12, 2019 6:48 PM, Viacheslav Ovsiienko:
> > Subject: [dpdk-dev] [PATCH 1/1] net/mlx5: add support for PF
> > representor
> >
> > On BlueField platform we have the new entity - PF representor.
> > This one represents the PCI PF attached to external host on the side of
> ARM.
> > The traffic sent by the external host to the NIC via PF will be seem
> > by ARM on this PF representor.
> >
> > This patch extends port recognizing capability on the base of physical
> > port name. The following naming formats are supported:
> >
> >   - missing physical port name (no sysfs/netlink key) at all,
> >     this is old style (before kernel 5.0) format, master assumed
> >   - 1 (decimal digits) - old style (before kernel 5.0) format,
> >     exists only for representors, master does not have physical
> >     port name at all (see above)
> >   - p0 ("p" followed by decimal digits), new style (kernel version
> >     is 5.0 or higher, Mellanox OFED 4.6 or higher) name format
> >     for uplink representor, plays the role of master
> >   - pf0vf0 ("pf" followed by PF index concatenated with "vf"
> >     followed by VF index),  new style (kernel version  is 5.0
> >     or higher, Mellanox OFED 4.6 or higher) name format for
> >     VF representor. If index of VF is "-1" it is a special case
> >     of host PF representor, this representor must be indexed in
> >     devargs as 65535, for example representor=[0-3,65535] will
> >     allow representors for VF0, VF1, VF2, VF3 and host PF.
> >     Note: do not specify representor=[0-65535] it causes devargs
> >     processing error, because number of ports (rte_eth_dev) is
> >     limited.
> >
> 
> The above is a bit complex to understand and in fact we have 2 modes:
> 1. legacy - phys_port_name are numbers. Master doesn't have
> phys_port_name 2. modern - phys_port_name are strings.
> uplink representor is p%d
> representors are (including PF representor) pf%dvf%d. the vf index for the PF
> representor is 65535.

OK, I'll try to update the commit message to make it more clear.
> 
> > Applications should distinguish representors and master devices
> > exclusively by device flag RTE_ETH_DEV_REPRESENTOR and do not rely on
> > switch port_id (mlx5 PMD deduces ones from representor_id) values
> > returned by
> > dev_infos_get() API.
> >
> 
> Please also reference the kernel commit which introduce the name change.
OK.

> 
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5.h        | 11 ++++++-
> >  drivers/net/mlx5/mlx5_ethdev.c | 68 +++++++++++++++++++++++++++----
> > -----------
> >  drivers/net/mlx5/mlx5_nl.c     | 42 +++++++++++++++++---------
> >  3 files changed, 82 insertions(+), 39 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > 8eb1019..81c02ce 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -80,11 +80,20 @@ struct mlx5_mp_param {
> >  /** Key string for IPC. */
> >  #define MLX5_MP_NAME "net_mlx5_mp"
> >
> > +/* Recognized Infiniband device physical port name types. */ enum
> > +mlx5_phys_port_name_type {
> > +	MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN = 0, /* Unrecognized.
> > */
> > +	MLX5_PHYS_PORT_NAME_TYPE_LEGACY, /* before kernel ver < 5.0
> > */
> > +	MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */
> > +	MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0
> > */ };
> > +
> >  /** Switch information returned by mlx5_nl_switch_info(). */  struct
> > mlx5_switch_info {
> >  	uint32_t master:1; /**< Master device. */
> >  	uint32_t representor:1; /**< Representor device. */
> > -	uint32_t port_name_new:1; /**< Rep. port name is in new format.
> > */
> > +	enum mlx5_phys_port_name_type name_type; /** < Port name
> > type. */
> > +	int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
> >  	int32_t port_name; /**< Representor port name. */
> >  	uint64_t switch_id; /**< Switch identifier. */  }; diff --git
> > a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> > index 3992918..371989f 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1395,12 +1395,11 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)
> >  	struct mlx5_switch_info data = {
> >  		.master = 0,
> >  		.representor = 0,
> > -		.port_name_new = 0,
> > +		.name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN,
> >  		.port_name = 0,
> >  		.switch_id = 0,
> >  	};
> >  	DIR *dir;
> > -	bool port_name_set = false;
> >  	bool port_switch_id_set = false;
> >  	bool device_dir = false;
> >  	char c;
> > @@ -1423,8 +1422,7 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev,
> > char *fw_ver, size_t fw_size)
> >  		ret = fscanf(file, "%s", port_name);
> >  		fclose(file);
> >  		if (ret == 1)
> > -			port_name_set =
> > mlx5_translate_port_name(port_name,
> > -								 &data);
> > +			mlx5_translate_port_name(port_name, &data);
> >  	}
> >  	file = fopen(phys_switch_id, "rb");
> >  	if (file == NULL) {
> > @@ -1440,8 +1438,15 @@ int mlx5_fw_version_get(struct rte_eth_dev
> > *dev, char *fw_ver, size_t fw_size)
> >  		closedir(dir);
> >  		device_dir = true;
> >  	}
> > -	data.master = port_switch_id_set && (!port_name_set ||
> > device_dir);
> > -	data.representor = port_switch_id_set && port_name_set &&
> > !device_dir;
> > +	if (port_switch_id_set) {
> > +		data.master =
> > +			device_dir ||
> > +			data.name_type ==
> > MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN ||
> > +			data.name_type ==
> > MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
> > +		data.representor = !device_dir &&
> > +			(data.name_type ==
> > MLX5_PHYS_PORT_NAME_TYPE_LEGACY ||
> > +			 data.name_type ==
> > MLX5_PHYS_PORT_NAME_TYPE_PFVF);
> 
> 
> Why we need to split the logic of the master/representor detection between
> the mlx5_translate_port_name and the caller function?

We have two stages:
- gathering the port attributes (name, vf_num, switchdev_id, presence of device directory, etc.)  in callbacks
- analyzing the parameters on gathering completion. We need all the parameters to make a reliable master/representor
 recognition.

Translate routine is called on gathering stage and just recognizes the name format, extracts useful values (pf/vf num) and stores the results in compact form. It is easier than storing the entire port name. 

> 
> The way I envision it is mlx5_tranlate_port_name receives the
> phys_port_name and maybe more metadata and return the port
This "metadata" may be not gathered yet at the moment of port name processing.

> classification (master/representor) and the representor/pf number.
> No need for data.master = some_logic(translate_port_name_info).
> 
> Inside the translate function I would expect to have 2 smaller function:
> 1. to handle the modern format (strings) 2. to handle the legacy format
> (integers)

Actually this patch is also some kind of preparation for coming subfuctions.
If we have set of mutual exclusive formats the switch/case seems to be the most
native way to treat these ones. I'd prefer to keep switch/case. Comments are clear
and it is easy to understand where legacy/new format is treated. And it is
easy to extend for coming subfunctions.

I think we should isolate master/representor recognition logic into separate
routine, "translate" routine is for parameter gathering stage, "recognize" for
analyzing.


> 
> > +	}
> >  	*info = data;
> >  	assert(!(data.master && data.representor));
> >  	if (data.master && data.representor) { @@ -1459,10 +1464,11 @@
> int
> > mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t
> > fw_size)
> >   * @param[in] port_name_in
> >   *   String representing the port name.
> >   * @param[out] port_info_out
> > - *   Port information, including port name as a number.
> > + *   Port information, including port name as a number and port name
> > + *   type if recognized
> >   *
> >   * @return
> > - *   true on success, false otherwise.
> > + *   true on success (if name format recognized), false otherwise.
> >   */
> >  bool
> >  mlx5_translate_port_name(const char *port_name_in, @@ -1470,25
> > +1476,39 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char
> > *fw_ver, size_t fw_size)  {
> >  	char pf_c1, pf_c2, vf_c1, vf_c2;
> >  	char *end;
> > -	int32_t pf_num;
> > -	bool port_name_set = false;
> > +	int sc_items;
> >
> >  	/*
> >  	 * Check for port-name as a string of the form pf0vf0
> > -	 * (support kernel ver >= 5.0)
> > +	 * (support kernel ver >= 5.0 or OFED ver >= 4.6).
> >  	 */
> > -	port_name_set =	(sscanf(port_name_in, "%c%c%d%c%c%d",
> > &pf_c1, &pf_c2,
> > -				&pf_num, &vf_c1, &vf_c2,
> > -				&port_info_out->port_name) == 6);
> > -	if (port_name_set) {
> > -		port_info_out->port_name_new = 1;
> > -	} else {
> > -		/* Check for port-name as a number (support kernel ver <
> > 5.0 */
> > -		errno = 0;
> > -		port_info_out->port_name = strtol(port_name_in, &end, 0);
> > -		if (!errno &&
> > -		    (size_t)(end - port_name_in) == strlen(port_name_in))
> > -			port_name_set = true;
> > +	sc_items = sscanf(port_name_in, "%c%c%d%c%c%d",
> > +			  &pf_c1, &pf_c2, &port_info_out->pf_num,
> > +			  &vf_c1, &vf_c2, &port_info_out->port_name);
> > +	if (sc_items == 6 &&
> > +	    pf_c1 == 'p' && pf_c2 == 'f' &&
> > +	    vf_c1 == 'v' && vf_c2 == 'f') {
> > +		port_info_out->name_type =
> > MLX5_PHYS_PORT_NAME_TYPE_PFVF;
> > +		return true;
> > +	}
> > +	/*
> > +	 * Check for port-name as a string of the form p0
> > +	 * (support kernel ver >= 5.0, or OFED ver >= 4.6).
> > +	 */
> > +	sc_items = sscanf(port_name_in, "%c%d",
> > +			  &pf_c1, &port_info_out->port_name);
> > +	if (sc_items == 2 && pf_c1 == 'p') {
> > +		port_info_out->name_type =
> > MLX5_PHYS_PORT_NAME_TYPE_UPLINK;
> > +		return true;
> > +	}
> > +	/* Check for port-name as a number (support kernel ver < 5.0 */
> > +	errno = 0;
> > +	port_info_out->port_name = strtol(port_name_in, &end, 0);
> > +	if (!errno &&
> > +	    (size_t)(end - port_name_in) == strlen(port_name_in)) {
> > +		port_info_out->name_type =
> > MLX5_PHYS_PORT_NAME_TYPE_LEGACY;
> > +		return  true;
> >  	}
> > -	return port_name_set;
> > +	port_info_out->name_type =
> > MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN;
> > +	return false;
> >  }
> > diff --git a/drivers/net/mlx5/mlx5_nl.c b/drivers/net/mlx5/mlx5_nl.c
> > index
> > fd9226b..669de76 100644
> > --- a/drivers/net/mlx5/mlx5_nl.c
> > +++ b/drivers/net/mlx5/mlx5_nl.c
> > @@ -887,12 +887,11 @@ struct mlx5_nl_ifindex_data {
> >  	struct mlx5_switch_info info = {
> >  		.master = 0,
> >  		.representor = 0,
> > -		.port_name_new = 0,
> > +		.name_type = MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN,
> >  		.port_name = 0,
> >  		.switch_id = 0,
> >  	};
> >  	size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
> > -	bool port_name_set = false;
> >  	bool switch_id_set = false;
> >  	bool num_vf_set = false;
> >
> > @@ -910,9 +909,7 @@ struct mlx5_nl_ifindex_data {
> >  			num_vf_set = true;
> >  			break;
> >  		case IFLA_PHYS_PORT_NAME:
> > -			port_name_set =
> > -				mlx5_translate_port_name((char *)payload,
> > -							 &info);
> > +			mlx5_translate_port_name((char *)payload, &info);
> >  			break;
> >  		case IFLA_PHYS_SWITCH_ID:
> >  			info.switch_id = 0;
> > @@ -926,16 +923,33 @@ struct mlx5_nl_ifindex_data {
> >  		off += RTA_ALIGN(ra->rta_len);
> >  	}
> >  	if (switch_id_set) {
> > -		if (info.port_name_new) {
> > -			/* New representors naming schema. */
> > -			if (port_name_set) {
> > -				info.master = (info.port_name == -1);
> > -				info.representor = (info.port_name != -1);
> > -			}
> > -		} else {
> > +		/* We have some E-Switch configuration. */
> > +		switch (info.name_type) {
> > +		case MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:
> > +			/*
> > +			 * Name is not recognized or not set,
> > +			 * it can not be representor, check
> > +			 * VF number to see if it is a master.
> > +			 */
> > +			info.master = num_vf_set;
> > +			break;
> > +		case MLX5_PHYS_PORT_NAME_TYPE_LEGACY:
> >  			/* Legacy representors naming schema. */
> > -			info.master = (!port_name_set || num_vf_set);
> > -			info.representor = port_name_set && !num_vf_set;
> > +			info.representor = !num_vf_set;
> > +			break;
> > +		case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
> > +			/* New uplink naming schema. */
> > +			info.master = 1;
> > +			break;
> > +		case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
> > +			/* New representors naming schema. */
> > +			info.representor = 1;
> > +			break;
> > +		}
> > +		if (!info.master && !info.representor) {
> > +			DRV_LOG(INFO,
> > +				"unable to recognize master/representors"
> > +				" on the device in switch domain");
> 
> Same comment as above. Would like to avoid this switch case outside of the
> translate function .
Don't you mind if we provide separated analyzing routine ?

> 
> >  		}
> >  	}
> >  	assert(!(info.master && info.representor));
> > --
> > 1.8.3.1

With best regards,
Slava


More information about the dev mailing list