[dpdk-dev] [PATCH v10 2/2] eal: support for VFIO-PCI VF token

Haiyue Wang haiyue.wang at intel.com
Sun Apr 26 03:55:14 CEST 2020


The kernel module vfio-pci introduces the VF token to enable SR-IOV
support since 5.7.

The VF token can be set by a vfio-pci based PF driver and must be known
by the vfio-pci based VF driver in order to gain access to the device.

Signed-off-by: Haiyue Wang <haiyue.wang at intel.com>
Acked-by: Vamsi Attunuru <vattunuru at marvell.com>
Tested-by: Vamsi Attunuru <vattunuru at marvell.com>
---
 doc/guides/linux_gsg/linux_drivers.rst | 41 +++++++++++++-
 doc/guides/rel_notes/release_20_05.rst |  6 +++
 drivers/bus/pci/linux/pci_vfio.c       | 74 +++++++++++++++++++++++++-
 lib/librte_eal/freebsd/eal.c           |  3 +-
 lib/librte_eal/include/rte_vfio.h      | 26 ++++++++-
 lib/librte_eal/linux/eal_vfio.c        | 20 +++++--
 lib/librte_eal/rte_eal_version.map     |  8 ++-
 7 files changed, 169 insertions(+), 9 deletions(-)

diff --git a/doc/guides/linux_gsg/linux_drivers.rst b/doc/guides/linux_gsg/linux_drivers.rst
index 238f3e900..b42fd708b 100644
--- a/doc/guides/linux_gsg/linux_drivers.rst
+++ b/doc/guides/linux_gsg/linux_drivers.rst
@@ -72,11 +72,50 @@ Note that in order to use VFIO, your kernel must support it.
 VFIO kernel modules have been included in the Linux kernel since version 3.6.0 and are usually present by default,
 however please consult your distributions documentation to make sure that is the case.
 
+The ``vfio-pci`` module since Linux version 5.7 supports the creation of virtual
+functions. After the PF is bound to vfio-pci module, the user can create the VFs
+by sysfs interface, and these VFs are bound to vfio-pci module automatically.
+
+When the PF is bound to vfio-pci, it has initial VF token generated by random. For
+security reason, this token is write only, the user can't read it from the kernel
+directly. For accessing the VF, the user needs to start the PF with token parameter
+to setup a VF token (uuid format), then the VF can be accessed with this new known
+VF token.
+
+Also if the DPDK application running on the PF device exits, the user wants to start
+the PF with another different VF token value, it has no issue if no application like
+DPDK or KVM runs on VFs. Otherwise, the PF will fail to start until all VFs are free
+to use, after that, the user can select a new VF token to start the PF device.
+
+DPDK will use the keyword ``vf_token`` as the device argument to pass the VF token
+value to PF and its related VFs, the PMD should not use it, and this argument will
+be pruned from the device argument list, so the PMD can parse its own valid device
+arguments successfully.
+
+.. code-block:: console
+
+    1. Generate the VF token by uuid command
+        14d63f20-8445-11ea-8900-1f9ce7d5650d
+
+    2. sudo modprobe vfio-pci enable_sriov=1
+
+    2. ./usertools/dpdk-devbind.py -b vfio-pci 0000:86:00.0
+
+    3. echo 2 > /sys/bus/pci/devices/0000:86:00.0/sriov_numvfs
+
+    4. Start the PF:
+        ./x86_64-native-linux-gcc/app/testpmd -l 22-25 -n 4 \
+         -w 86:00.0,vf_token=14d63f20-8445-11ea-8900-1f9ce7d5650d --file-prefix=pf -- -i
+
+    5. Start the VF:
+        ./x86_64-native-linux-gcc/app/testpmd -l 26-29 -n 4 \
+         -w 86:02.0,vf_token=14d63f20-8445-11ea-8900-1f9ce7d5650d --file-prefix=vf0 -- -i
+
 Also, to use VFIO, both kernel and BIOS must support and be configured to use IO virtualization (such as Intel® VT-d).
 
 .. note::
 
-    ``vfio-pci`` module doesn't support the creation of virtual functions.
+    ``vfio-pci`` module doesn't support the creation of virtual functions before Linux version 5.7.
 
 For proper operation of VFIO when running DPDK applications as a non-privileged user, correct permissions should also be set up.
 This can be done by using the DPDK setup script (called dpdk-setup.sh and located in the usertools directory).
diff --git a/doc/guides/rel_notes/release_20_05.rst b/doc/guides/rel_notes/release_20_05.rst
index b124c3f28..722c61e67 100644
--- a/doc/guides/rel_notes/release_20_05.rst
+++ b/doc/guides/rel_notes/release_20_05.rst
@@ -93,6 +93,12 @@ New Features
   * Added new query: ``rte_flow_get_aged_flows`` to get the aged-out flows
     contexts from the port.
 
+* **Added the support for vfio-pci new VF token interface.**
+
+  Since Linux version 5.7, vfio-pci supports a shared VF token (UUID) to represent
+  the trust between SR-IOV PF and the created VFs. Update the method to gain access
+  to the PF and VFs devices by appending the VF token parameter.
+
 * **Updated Amazon ena driver.**
 
   Updated ena PMD with new features and improvements, including:
diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c
index 64cd84a68..efb64e2ba 100644
--- a/drivers/bus/pci/linux/pci_vfio.c
+++ b/drivers/bus/pci/linux/pci_vfio.c
@@ -11,6 +11,7 @@
 #include <sys/mman.h>
 #include <stdbool.h>
 
+#include <rte_devargs.h>
 #include <rte_log.h>
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
@@ -644,12 +645,72 @@ pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
 	return ret;
 }
 
+static int
+vfio_pci_vf_token_arg(struct rte_devargs *devargs, rte_uuid_t uuid)
+{
+#define VF_TOKEN_ARG "vf_token="
+	char c, *p, *vf_token;
+
+	memset(uuid, 0, sizeof(rte_uuid_t));
+
+	if (devargs == NULL)
+		return 0;
+
+	p = strstr(devargs->args, VF_TOKEN_ARG);
+	if (!p)
+		return 0;
+
+	vf_token = p + strlen(VF_TOKEN_ARG);
+	if (strlen(vf_token) < (RTE_UUID_STRLEN - 1)) {
+		RTE_LOG(ERR, EAL, "The VF token length is too short\n");
+		return -1;
+	}
+
+	c = vf_token[RTE_UUID_STRLEN - 1];
+	if (c != '\0' && c != ',') {
+		RTE_LOG(ERR, EAL,
+			"The VF token ends with a invalid character : %c\n", c);
+		return -1;
+	}
+
+	vf_token[RTE_UUID_STRLEN - 1] = '\0';
+	if (rte_uuid_parse(vf_token, uuid)) {
+		RTE_LOG(ERR, EAL,
+			"The VF token is invalid : %s\n", vf_token);
+		vf_token[RTE_UUID_STRLEN - 1] = c;
+		return -1;
+	}
+
+	RTE_LOG(DEBUG, EAL,
+		"The VF token is found : %s\n", vf_token);
+
+	vf_token[RTE_UUID_STRLEN - 1] = c;
+
+	/* This VF token will be treated as a invalid device argument if the
+	 * PMD calls the rte_devargs parse API with its own valid argument list,
+	 * so it needs to purge this vfio-pci specific argument.
+	 */
+	if (c != '\0') {
+		/* 1. Handle the case : 'vf_token=uuid,arg1=val1' */
+		memmove(p, vf_token + RTE_UUID_STRLEN,
+			strlen(vf_token + RTE_UUID_STRLEN) + 1);
+	} else {
+		/* 2. Handle the case : 'arg1=val1,vf_token=uuid' */
+		if (p != devargs->args)
+			p--;
+
+		*p = '\0';
+	}
+
+	return 0;
+}
 
 static int
 pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 {
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
 	char pci_addr[PATH_MAX] = {0};
+	rte_uuid_t vf_token;
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
 	int i, ret;
@@ -668,8 +729,12 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 	snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
 			loc->domain, loc->bus, loc->devid, loc->function);
 
+	ret = vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
+	if (ret)
+		return ret;
+
 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
-					&vfio_dev_fd, &device_info);
+					&vfio_dev_fd, &device_info, vf_token);
 	if (ret)
 		return ret;
 
@@ -798,6 +863,7 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 {
 	struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
 	char pci_addr[PATH_MAX] = {0};
+	rte_uuid_t vf_token;
 	int vfio_dev_fd;
 	struct rte_pci_addr *loc = &dev->addr;
 	int i, ret;
@@ -830,8 +896,12 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 		return -1;
 	}
 
+	ret = vfio_pci_vf_token_arg(dev->device.devargs, vf_token);
+	if (ret)
+		return ret;
+
 	ret = rte_vfio_setup_device(rte_pci_get_sysfs_path(), pci_addr,
-					&vfio_dev_fd, &device_info);
+					&vfio_dev_fd, &device_info, vf_token);
 	if (ret)
 		return ret;
 
diff --git a/lib/librte_eal/freebsd/eal.c b/lib/librte_eal/freebsd/eal.c
index 540b7d38c..e42d3e12e 100644
--- a/lib/librte_eal/freebsd/eal.c
+++ b/lib/librte_eal/freebsd/eal.c
@@ -1005,7 +1005,8 @@ rte_eal_vfio_intr_mode(void)
 int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
 		      __rte_unused const char *dev_addr,
 		      __rte_unused int *vfio_dev_fd,
-		      __rte_unused struct vfio_device_info *device_info)
+		      __rte_unused struct vfio_device_info *device_info,
+		      __rte_unused rte_uuid_t vf_token)
 {
 	return -1;
 }
diff --git a/lib/librte_eal/include/rte_vfio.h b/lib/librte_eal/include/rte_vfio.h
index 20ed8c45a..430f98770 100644
--- a/lib/librte_eal/include/rte_vfio.h
+++ b/lib/librte_eal/include/rte_vfio.h
@@ -16,6 +16,9 @@ extern "C" {
 
 #include <stdint.h>
 
+#include <rte_compat.h>
+#include <rte_uuid.h>
+
 /*
  * determine if VFIO is present on the system
  */
@@ -102,13 +105,34 @@ struct vfio_device_info;
  * @param device_info
  *   Device information.
  *
+ * @param vf_token
+ *   Before linux 5.7, the PF bound to vfio-pci doesn't support SR-IOV to
+ *   create VFs for security reason. Now the VF token is introduced to work
+ *   as some degree of trust or collaboration between PF and VFs.
+ *
+ *   A). as VF device, if the PF is a vfio device and it is bound to the
+ *   vfio-pci driver, the user needs to provide a VF token to access the
+ *   device, in the form of appending a vf_token to the device name, for
+ *   example:
+ *     "-w 04:10.0,vf_token=bd8d9d2b-5a5f-4f5a-a211-f591514ba1f3"
+ *
+ *   B). as PF device, When presented with a PF which has VFs in use, the
+ *   user must also provide the current VF token to prove collaboration with
+ *   existing VF users.  If VFs are not in use, the VF token provided for the
+ *   PF device will act to set the VF token.
+ *
+ *   The vf_token can be zero uuid, which will be ignored to pass into the
+ *   vfio-pci module.
+ *
  * @return
  *   0 on success.
  *   <0 on failure.
  *   >1 if the device cannot be managed this way.
  */
+__rte_internal
 int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
-		int *vfio_dev_fd, struct vfio_device_info *device_info);
+		int *vfio_dev_fd, struct vfio_device_info *device_info,
+		rte_uuid_t vf_token);
 
 /**
  * Release a device mapped to a VFIO-managed I/O MMU group.
diff --git a/lib/librte_eal/linux/eal_vfio.c b/lib/librte_eal/linux/eal_vfio.c
index d26e1649a..e8d7cbda5 100644
--- a/lib/librte_eal/linux/eal_vfio.c
+++ b/lib/librte_eal/linux/eal_vfio.c
@@ -702,7 +702,8 @@ rte_vfio_clear_group(int vfio_group_fd)
 
 int
 rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
-		int *vfio_dev_fd, struct vfio_device_info *device_info)
+		int *vfio_dev_fd, struct vfio_device_info *device_info,
+		rte_uuid_t vf_token)
 {
 	struct vfio_group_status group_status = {
 			.argsz = sizeof(group_status)
@@ -712,6 +713,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 	int vfio_container_fd;
 	int vfio_group_fd;
 	int iommu_group_num;
+	char dev[PATH_MAX];
 	int i, ret;
 
 	/* get group number */
@@ -895,8 +897,19 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
 				t->type_id, t->name);
 	}
 
+	if (!rte_uuid_is_null(vf_token)) {
+		char vf_token_str[RTE_UUID_STRLEN];
+
+		rte_uuid_unparse(vf_token, vf_token_str, sizeof(vf_token_str));
+		snprintf(dev, sizeof(dev),
+			 "%s vf_token=%s", dev_addr, vf_token_str);
+	} else {
+		snprintf(dev, sizeof(dev),
+			 "%s", dev_addr);
+	}
+
 	/* get a file descriptor for the device */
-	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+	*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev);
 	if (*vfio_dev_fd < 0) {
 		/* if we cannot get a device fd, this implies a problem with
 		 * the VFIO group or the container not having IOMMU configured.
@@ -2083,7 +2096,8 @@ int
 rte_vfio_setup_device(__rte_unused const char *sysfs_base,
 		__rte_unused const char *dev_addr,
 		__rte_unused int *vfio_dev_fd,
-		__rte_unused struct vfio_device_info *device_info)
+		__rte_unused struct vfio_device_info *device_info,
+		__rte_unused rte_uuid_t vf_token)
 {
 	return -1;
 }
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 6088e7f6c..5bcdccae6 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -213,7 +213,6 @@ DPDK_20.0 {
 	rte_vfio_is_enabled;
 	rte_vfio_noiommu_is_enabled;
 	rte_vfio_release_device;
-	rte_vfio_setup_device;
 	rte_vlog;
 	rte_zmalloc;
 	rte_zmalloc_socket;
@@ -388,3 +387,10 @@ EXPERIMENTAL {
 	rte_trace_regexp;
 	rte_trace_save;
 };
+
+INTERNAL {
+	global:
+
+	# added in 20.05
+	rte_vfio_setup_device;
+};
-- 
2.26.2



More information about the dev mailing list