[dpdk-dev] [PATCH v2 07/14] net/mlx5: support SubFunction

Xueming Li xuemingl at nvidia.com
Tue Jul 13 15:14:30 CEST 2021


This patch introduces SF support. Similar to VF, SF on auxiliary bus is
a portion of hardware PF, no representor or bonding parameters for SF.

Devargs to support SF:
-a auxiliary:mlx5_core.sf.8,dv_flow_en=1

New global syntax to support SF:
-a bus=auxiliary,name=mlx5_core.sf.8/class=eth/driver=mlx5,dv_flow_en=1

Signed-off-by: Xueming Li <xuemingl at nvidia.com>
---
 doc/guides/nics/mlx5.rst                |  54 +++++----
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |  12 +-
 drivers/net/mlx5/linux/mlx5_os.c        | 145 +++++++++++++++++-------
 drivers/net/mlx5/linux/mlx5_os.h        |   2 +
 drivers/net/mlx5/mlx5.c                 |  23 +++-
 drivers/net/mlx5/mlx5.h                 |   2 +
 drivers/net/mlx5/mlx5_mac.c             |   2 +-
 drivers/net/mlx5/mlx5_rxmode.c          |   8 +-
 drivers/net/mlx5/mlx5_trigger.c         |   2 +-
 drivers/net/mlx5/windows/mlx5_os.c      |  12 +-
 10 files changed, 188 insertions(+), 74 deletions(-)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 199beb2549..852fa21f5e 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -111,6 +111,11 @@ Features
 - Flow integrity offload API.
 - Connection tracking.
 - Sub-Function representors.
+- Sub-Function.
+
+Limitations
+-----------
+
 
 Limitations
 -----------
@@ -1470,40 +1475,51 @@ the DPDK application.
 
         echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
 
-Sub-Function representor
-------------------------
+Sub-Function support
+--------------------
 
 Sub-Function is a portion of the PCI device, a SF netdev has its own
-dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
-offload similar to existing PF and VF representors. A SF shares PCI
-level resources with other SFs and/or with its parent PCI function.
+dedicated queues(txq, rxq). A SF shares PCI level resources with other SFs
+and/or with its parent PCI function.
+
+0. Requirement::
+
+        OFED version >= 5.4-0.3.3.0
 
 1. Configure SF feature::
 
-        mlxconfig -d <mst device> set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
+        # Run mlxconfig on both PFs on host and ECPFs on BlueField.
+        mlxconfig -d <mst device> set PER_PF_NUM_SF=1 PF_TOTAL_SF=252 PF_SF_BAR_SIZE=12
 
-        Value of PF_BAR2_SIZE:
+2. Enable switchdev mode::
 
-            0: 8 SFs
-            1: 16 SFs
-            2: 32 SFs
-            3: 64 SFs
+        mlxdevm dev eswitch set pci/<DBDF> mode switchdev
 
-2. Reset the FW::
+3. Add SF port::
 
-        mlxfwreset -d <mst device> reset
+        mlxdevm port add pci/<DBDF> flavour pcisf pfnum 0 sfnum <sfnum>
 
-3. Enable switchdev mode::
+        Get SFID from output: pci/<DBDF>/<SFID>
 
-        echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
+4. Modify MAC address::
+
+        mlxdevm port function set pci/<DBDF>/<SFID> hw_addr <MAC>
+
+5. Activate SF port::
+
+        mlxdevm port function set pci/<DBDF>/<ID> state active
+
+6. Devargs to probe SF device::
 
-4. Create SF::
+        auxiliary:mlx5_core.sf.<num>,dv_flow_en=1
 
-        mlnx-sf -d <PCI_BDF> -a create
+Sub-Function representor support
+--------------------------------
 
-5. Probe SF representor::
+A SF netdev supports E-Switch representation offload similar to existing PF
+and VF representors. Use <sfnum> to probe SF representor.
 
-        testpmd> port attach <PCI_BDF>,representor=sf0,dv_flow_en=1
+        testpmd> port attach <PCI_BDF>,representor=sf<sfnum>,dv_flow_en=1
 
 Performance tuning
 ------------------
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index b05b9fc950..f34133e2c6 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -128,6 +128,17 @@ struct ethtool_link_settings {
 #define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
 #endif
 
+/* Get interface index from SubFunction device name. */
+int
+mlx5_auxiliary_get_ifindex(const char *sf_name)
+{
+	char if_name[IF_NAMESIZE] = { 0 };
+
+	if (mlx5_auxiliary_get_child_name(sf_name, "/net",
+					  if_name, sizeof(if_name)) != 0)
+		return -rte_errno;
+	return if_nametoindex(if_name);
+}
 
 /**
  * Get interface name from private structure.
@@ -1619,4 +1630,3 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN])
 	memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
 	return 0;
 }
-
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 1df5348455..4b3feb48a4 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -20,6 +20,7 @@
 #include <ethdev_pci.h>
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
+#include <rte_bus_auxiliary.h>
 #include <rte_common.h>
 #include <rte_kvargs.h>
 #include <rte_rwlock.h>
@@ -1984,6 +1985,27 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
 	return pf;
 }
 
+static void
+mlx5_os_config_default(struct mlx5_dev_config *config)
+{
+	memset(config, 0, sizeof(*config));
+	config->mps = MLX5_ARG_UNSET;
+	config->dbnc = MLX5_ARG_UNSET;
+	config->rx_vec_en = 1;
+	config->txq_inline_max = MLX5_ARG_UNSET;
+	config->txq_inline_min = MLX5_ARG_UNSET;
+	config->txq_inline_mpw = MLX5_ARG_UNSET;
+	config->txqs_inline = MLX5_ARG_UNSET;
+	config->vf_nl_en = 1;
+	config->mr_ext_memseg_en = 1;
+	config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
+	config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
+	config->dv_esw_en = 1;
+	config->dv_flow_en = 1;
+	config->decap_en = 1;
+	config->log_hp_size = MLX5_ARG_UNSET;
+}
+
 /**
  * Register a PCI device within bonding.
  *
@@ -2408,23 +2430,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
 		uint32_t restore;
 
 		/* Default configuration. */
-		memset(&dev_config, 0, sizeof(struct mlx5_dev_config));
+		mlx5_os_config_default(&dev_config);
 		dev_config.vf = dev_config_vf;
-		dev_config.mps = MLX5_ARG_UNSET;
-		dev_config.dbnc = MLX5_ARG_UNSET;
-		dev_config.rx_vec_en = 1;
-		dev_config.txq_inline_max = MLX5_ARG_UNSET;
-		dev_config.txq_inline_min = MLX5_ARG_UNSET;
-		dev_config.txq_inline_mpw = MLX5_ARG_UNSET;
-		dev_config.txqs_inline = MLX5_ARG_UNSET;
-		dev_config.vf_nl_en = 1;
-		dev_config.mr_ext_memseg_en = 1;
-		dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
-		dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
-		dev_config.dv_esw_en = 1;
-		dev_config.dv_flow_en = 1;
-		dev_config.decap_en = 1;
-		dev_config.log_hp_size = MLX5_ARG_UNSET;
 		dev_config.allow_duplicate_pattern = 1;
 		list[i].numa_node = pci_dev->device.numa_node;
 		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
@@ -2483,6 +2490,35 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
 	return ret;
 }
 
+static int
+mlx5_os_parse_eth_devargs(struct rte_device *dev,
+			  struct rte_eth_devargs *eth_da)
+{
+	int ret = 0;
+
+	if (dev->devargs == NULL)
+		return 0;
+	memset(eth_da, 0, sizeof(*eth_da));
+	/* Parse representor information first from class argument. */
+	if (dev->devargs->cls_str)
+		ret = rte_eth_devargs_parse(dev->devargs->cls_str, eth_da);
+	if (ret != 0) {
+		DRV_LOG(ERR, "failed to parse device arguments: %s",
+			dev->devargs->cls_str);
+		return -rte_errno;
+	}
+	if (eth_da->type == RTE_ETH_REPRESENTOR_NONE) {
+		/* Parse legacy device argument */
+		ret = rte_eth_devargs_parse(dev->devargs->args, eth_da);
+		if (ret) {
+			DRV_LOG(ERR, "failed to parse device arguments: %s",
+				dev->devargs->args);
+			return -rte_errno;
+		}
+	}
+	return 0;
+}
+
 /**
  * Callback to register a PCI device.
  *
@@ -2497,31 +2533,13 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
 static int
 mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
 {
-	struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
+	struct rte_eth_devargs eth_da = { .nb_ports = 0 };
 	int ret = 0;
 	uint16_t p;
 
-	if (pci_dev->device.devargs) {
-		/* Parse representor information from device argument. */
-		if (pci_dev->device.devargs->cls_str)
-			ret = rte_eth_devargs_parse
-				(pci_dev->device.devargs->cls_str, &eth_da);
-		if (ret) {
-			DRV_LOG(ERR, "failed to parse device arguments: %s",
-				pci_dev->device.devargs->cls_str);
-			return -rte_errno;
-		}
-		if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
-			/* Support legacy device argument */
-			ret = rte_eth_devargs_parse
-				(pci_dev->device.devargs->args, &eth_da);
-			if (ret) {
-				DRV_LOG(ERR, "failed to parse device arguments: %s",
-					pci_dev->device.devargs->args);
-				return -rte_errno;
-			}
-		}
-	}
+	ret = mlx5_os_parse_eth_devargs(&pci_dev->device, &eth_da);
+	if (ret != 0)
+		return ret;
 
 	if (eth_da.nb_ports > 0) {
 		/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
@@ -2534,10 +2552,56 @@ mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
 	return ret;
 }
 
+/* Probe a single SF device on auxiliary bus, no representor support. */
+static int
+mlx5_os_auxiliary_probe(struct rte_device *dev)
+{
+	struct rte_eth_devargs eth_da = { .nb_ports = 0 };
+	struct mlx5_dev_config config;
+	struct mlx5_dev_spawn_data spawn = { .pf_bond = -1 };
+	struct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev);
+	struct rte_eth_dev *eth_dev;
+	int ret = 0;
+
+	/* Parse ethdev devargs. */
+	ret = mlx5_os_parse_eth_devargs(dev, &eth_da);
+	if (ret != 0)
+		return ret;
+	/* Set default config data. */
+	mlx5_os_config_default(&config);
+	config.sf = 1;
+	/* Init spawn data. */
+	spawn.max_port = 1;
+	spawn.phys_port = 1;
+	spawn.phys_dev = mlx5_os_get_ibv_dev(dev);
+	if (spawn.phys_dev == NULL)
+		return -rte_errno;
+	ret = mlx5_auxiliary_get_ifindex(dev->name);
+	if (ret < 0) {
+		DRV_LOG(ERR, "failed to get ethdev ifindex: %s", dev->name);
+		return ret;
+	}
+	spawn.ifindex = ret;
+	spawn.numa_node = dev->numa_node;
+	/* Spawn device. */
+	eth_dev = mlx5_dev_spawn(dev, &spawn, &config, &eth_da);
+	if (eth_dev == NULL)
+		return -rte_errno;
+	/* Post create. */
+	eth_dev->intr_handle = &adev->intr_handle;
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
+		eth_dev->data->numa_node = dev->numa_node;
+	}
+	rte_eth_dev_probing_finish(eth_dev);
+	return 0;
+}
+
 /**
  * Net class driver callback to probe a device.
  *
- * This function probe PCI bus device(s).
+ * This function probe PCI bus device(s) or a single SF on auxiliary bus.
  *
  * @param[in] dev
  *   Pointer to the generic device.
@@ -2560,7 +2624,8 @@ mlx5_os_net_probe(struct rte_device *dev)
 	}
 	if (mlx5_dev_is_pci(dev))
 		return mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev));
-	return 0;
+	else
+		return mlx5_os_auxiliary_probe(dev);
 }
 
 static int
diff --git a/drivers/net/mlx5/linux/mlx5_os.h b/drivers/net/mlx5/linux/mlx5_os.h
index af7cbeb418..2991d37df2 100644
--- a/drivers/net/mlx5/linux/mlx5_os.h
+++ b/drivers/net/mlx5/linux/mlx5_os.h
@@ -19,4 +19,6 @@ enum {
 
 #define MLX5_NAMESIZE IF_NAMESIZE
 
+int mlx5_auxiliary_get_ifindex(const char *sf_name);
+
 #endif /* RTE_PMD_MLX5_OS_H_ */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 96e8d189ba..818e37fd48 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -399,6 +399,24 @@ mlx5_is_hpf(struct rte_eth_dev *dev)
 	       MLX5_REPRESENTOR_REPR(-1) == repr;
 }
 
+/**
+ * Decide whether representor ID is a SF port representor.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   Non-zero if HPF, otherwise 0.
+ */
+bool
+mlx5_is_sf_repr(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
+
+	return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF;
+}
+
 /**
  * Initialize the ASO aging management structure.
  *
@@ -2335,7 +2353,10 @@ mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev)
 		    (dev->device == odev ||
 		     (dev->device->driver &&
 		     dev->device->driver->name &&
-		     !strcmp(dev->device->driver->name, MLX5_PCI_DRIVER_NAME))))
+		     ((strcmp(dev->device->driver->name,
+			      MLX5_PCI_DRIVER_NAME) == 0) ||
+		      (strcmp(dev->device->driver->name,
+			      MLX5_AUXILIARY_DRIVER_NAME) == 0)))))
 			break;
 		port_id++;
 	}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 1b5e31ac40..cc22b5ddfa 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -243,6 +243,7 @@ struct mlx5_dev_config {
 	unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
 	unsigned int hw_padding:1; /* End alignment padding is supported. */
 	unsigned int vf:1; /* This is a VF. */
+	unsigned int sf:1; /* This is a SF. */
 	unsigned int tunnel_en:1;
 	/* Whether tunnel stateless offloads are supported. */
 	unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
@@ -1464,6 +1465,7 @@ int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev,
 uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev);
 int mlx5_dev_close(struct rte_eth_dev *dev);
 bool mlx5_is_hpf(struct rte_eth_dev *dev);
+bool mlx5_is_sf_repr(struct rte_eth_dev *dev);
 void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh);
 
 /* Macro to iterate over all valid ports for mlx5 driver. */
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 19981d26d8..a791fedc91 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -159,7 +159,7 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
 	 * Configuring the VF instead of its representor,
 	 * need to skip the special case of HPF on Bluefield.
 	 */
-	if (priv->representor && !mlx5_is_hpf(dev)) {
+	if (priv->representor && !mlx5_is_hpf(dev) && !mlx5_is_sf_repr(dev)) {
 		DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address",
 			dev->data->port_id);
 		if (priv->pf_bond >= 0) {
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index 25fb47c9ed..7f19b235c2 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -36,7 +36,7 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		return 0;
 	}
-	if (priv->config.vf) {
+	if (priv->config.vf || priv->config.sf) {
 		ret = mlx5_os_set_promisc(dev, 1);
 		if (ret)
 			return ret;
@@ -69,7 +69,7 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
 	int ret;
 
 	dev->data->promiscuous = 0;
-	if (priv->config.vf) {
+	if (priv->config.vf || priv->config.sf) {
 		ret = mlx5_os_set_promisc(dev, 0);
 		if (ret)
 			return ret;
@@ -109,7 +109,7 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
 			dev->data->port_id);
 		return 0;
 	}
-	if (priv->config.vf) {
+	if (priv->config.vf || priv->config.sf) {
 		ret = mlx5_os_set_allmulti(dev, 1);
 		if (ret)
 			goto error;
@@ -142,7 +142,7 @@ mlx5_allmulticast_disable(struct rte_eth_dev *dev)
 	int ret;
 
 	dev->data->all_multicast = 0;
-	if (priv->config.vf) {
+	if (priv->config.vf || priv->config.sf) {
 		ret = mlx5_os_set_allmulti(dev, 0);
 		if (ret)
 			goto error;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 6d2351f5a8..a9d5d58fd9 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1259,7 +1259,7 @@ mlx5_traffic_enable(struct rte_eth_dev *dev)
 		}
 		mlx5_txq_release(dev, i);
 	}
-	if (priv->config.dv_esw_en && !priv->config.vf) {
+	if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
 		if (mlx5_flow_create_esw_table_zero_flow(dev))
 			priv->fdb_def_rule = 1;
 		else
diff --git a/drivers/net/mlx5/windows/mlx5_os.c b/drivers/net/mlx5/windows/mlx5_os.c
index bf20adaa30..ee09acc96b 100644
--- a/drivers/net/mlx5/windows/mlx5_os.c
+++ b/drivers/net/mlx5/windows/mlx5_os.c
@@ -922,20 +922,18 @@ mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf,
 /**
  * DPDK callback to register a PCI device.
  *
- * This function spawns Ethernet devices out of a given PCI device.
+ * This function spawns Ethernet devices out of a given device.
  *
- * @param[in] pci_drv
- *   PCI driver structure (mlx5_driver).
- * @param[in] pci_dev
- *   PCI device information.
+ * @param[in] dev
+ *   Pointer to the generic device.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
-		  struct rte_pci_device *pci_dev)
+mlx5_os_net_probe(struct rte_device *dev)
 {
+	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
 	struct devx_device_bdf *devx_bdf_devs, *orig_devx_bdf_devs;
 	/*
 	 * Number of found IB Devices matching with requested PCI BDF.
-- 
2.25.1



More information about the dev mailing list