[dpdk-dev] [PATCH v2 9/9] pci: implement automatic bind/unbind

David Marchand david.marchand at 6wind.com
Fri Jan 29 15:49:13 CET 2016


Reuse pci hook to implement automatic bind / unbind.
The more I look at this, the more I think this should go to the PMDs
themselves (with options per devices to control this), with EAL offering
helpers to achieve this.

Signed-off-by: David Marchand <david.marchand at 6wind.com>
---
 lib/librte_eal/bsdapp/eal/eal_pci.c        |  25 ++++
 lib/librte_eal/common/eal_common_options.c |   8 ++
 lib/librte_eal/common/eal_common_pci.c     |  79 +++++++++++
 lib/librte_eal/common/eal_options.h        |   2 +
 lib/librte_eal/common/eal_private.h        |  38 ++++++
 lib/librte_eal/common/include/rte_pci.h    |   7 +-
 lib/librte_eal/linuxapp/eal/eal_pci.c      | 210 +++++++++++++++++++++++++++++
 7 files changed, 367 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index e95249b..130f7e9 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -91,6 +91,31 @@ pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused)
 	return -ENOTSUP;
 }
 
+int
+pci_rebind_device(const struct rte_pci_device *dev __rte_unused,
+		  const char *driver __rte_unused)
+{
+	RTE_LOG(ERR, EAL, "Rebinding device to pci kernel drivers is not implemented for BSD\n");
+	return -ENOTSUP;
+}
+
+int
+pci_mapping_driver_bound(const struct rte_pci_device *dev)
+{
+	int ret;
+
+	switch (dev->kdrv) {
+	case RTE_KDRV_NIC_UIO:
+		ret = 1;
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 /* Map pci device */
 int
 pci_map_device(struct rte_pci_device *dev)
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 29942ea..b646abd 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -54,6 +54,7 @@
 #include "eal_internal_cfg.h"
 #include "eal_options.h"
 #include "eal_filesystem.h"
+#include "eal_private.h"
 
 #define BITS_PER_HEX 4
 
@@ -95,6 +96,7 @@ eal_long_options[] = {
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+	{OPT_PCI_UIO_AUTOBIND,  1, NULL, OPT_PCI_UIO_AUTOBIND_NUM },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -897,6 +899,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+	case OPT_PCI_UIO_AUTOBIND_NUM:
+		pci_init_autobind(optarg);
+		break;
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -1019,5 +1025,7 @@ eal_common_usage(void)
 	       "  --"OPT_NO_PCI"            Disable PCI\n"
 	       "  --"OPT_NO_HPET"           Disable HPET\n"
 	       "  --"OPT_NO_SHCONF"         No shared config (mmap'd files)\n"
+	       "  --"OPT_PCI_UIO_AUTOBIND"  Set default kernel driver to bind pci devices,\n"
+	       "                            when their associated pmd requires uio\n"
 	       "\n", RTE_MAX_LCORE);
 }
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index 4a0ec73..04a2490 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -89,6 +89,8 @@ struct pci_device_list pci_device_list =
 
 enum rte_eal_pci_hook {
 	RTE_EAL_PCI_SCAN,
+	RTE_EAL_PCI_ATTACH,
+	RTE_EAL_PCI_DETACH,
 };
 
 enum rte_eal_pci_hook_return {
@@ -132,6 +134,8 @@ blacklist_pci_hook(enum rte_eal_pci_hook h,
 			ret = RTE_EAL_PCI_HOOK_OK;
 		break;
 	}
+	case RTE_EAL_PCI_ATTACH:
+	case RTE_EAL_PCI_DETACH:
 	default:
 		/* nothing to do here, just say ok */
 		ret = RTE_EAL_PCI_HOOK_OK;
@@ -141,6 +145,61 @@ blacklist_pci_hook(enum rte_eal_pci_hook h,
 	return ret;
 }
 
+static char *uio_default_driver;
+
+static int
+autobind_uio_pci_hook(enum rte_eal_pci_hook h,
+		      struct rte_pci_driver *dr,
+		      struct rte_pci_device *dev)
+{
+	int ret;
+
+	/* stack with blacklist_pci_hook */
+	ret = blacklist_pci_hook(h, dr, dev);
+	if (ret != RTE_EAL_PCI_HOOK_OK)
+		goto exit;
+
+	switch (h) {
+	case RTE_EAL_PCI_ATTACH:
+	{
+		/* either nothing needed, or already bound */
+		if (!(dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) ||
+		    pci_mapping_driver_bound(dev)) {
+			ret = RTE_EAL_PCI_HOOK_OK;
+			goto exit;
+		}
+
+		if (pci_rebind_device(dev, uio_default_driver) < 0 ||
+		    pci_refresh_device(&dev->addr) < 0)
+			ret = RTE_EAL_PCI_HOOK_ERROR;
+		else
+			ret = RTE_EAL_PCI_HOOK_OK;
+
+		break;
+	}
+	case RTE_EAL_PCI_DETACH:
+	{
+		if (!(dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
+			ret = RTE_EAL_PCI_HOOK_OK;
+			goto exit;
+		}
+
+		pci_rebind_device(dev, "");
+		ret = RTE_EAL_PCI_HOOK_OK;
+
+		break;
+	}
+	case RTE_EAL_PCI_SCAN:
+	default:
+		/* nothing to do here, just say ok */
+		ret = RTE_EAL_PCI_HOOK_OK;
+		break;
+	}
+
+exit:
+	return ret;
+}
+
 static rte_eal_pci_hook_t *pci_hook = &blacklist_pci_hook;
 
 static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
@@ -269,6 +328,12 @@ pci_probe_device(struct rte_pci_driver *dr, struct rte_pci_device *dev)
 	RTE_LOG(DEBUG, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
 		dev->id.device_id, dr->name);
 
+	ret = pci_hook(RTE_EAL_PCI_ATTACH, dr, dev);
+	if (ret != RTE_EAL_PCI_HOOK_OK) {
+		RTE_LOG(DEBUG, EAL, "  attach hook refused dev, err=%d\n", ret);
+		return -1;
+	}
+
 	if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
 #ifdef RTE_PCI_CONFIG
 		/*
@@ -332,6 +397,15 @@ pci_find_driver(struct rte_pci_device *dev)
 	return dr;
 }
 
+void pci_init_autobind(const char *name)
+{
+	if (uio_default_driver)
+		free(uio_default_driver);
+	uio_default_driver = strdup(name);
+	if (uio_default_driver)
+		pci_hook = &autobind_uio_pci_hook;
+}
+
 /*
  * Find the pci device specified by pci address, then invoke probe function of
  * the driver of the devive.
@@ -382,6 +456,7 @@ int
 rte_eal_pci_detach(const struct rte_pci_addr *addr)
 {
 	struct rte_pci_device *dev;
+	struct rte_pci_driver *dr;
 
 	if (addr == NULL)
 		return -1;
@@ -394,9 +469,13 @@ rte_eal_pci_detach(const struct rte_pci_addr *addr)
 		dev->addr.domain, dev->addr.bus, dev->addr.devid,
 		dev->addr.function, dev->numa_node);
 
+	dr = dev->driver;
+
 	if (pci_detach_device(dev) < 0)
 		goto err_return;
 
+	pci_hook(RTE_EAL_PCI_DETACH, dr, dev);
+
 	TAILQ_REMOVE(&pci_device_list, dev, next);
 	return 0;
 
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..182bc3d 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -83,6 +83,8 @@ enum {
 	OPT_VMWARE_TSC_MAP_NUM,
 #define OPT_XEN_DOM0          "xen-dom0"
 	OPT_XEN_DOM0_NUM,
+#define OPT_PCI_UIO_AUTOBIND  "pci-uio-autobind"
+	OPT_PCI_UIO_AUTOBIND_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index e44a448..9019144 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -215,6 +215,44 @@ int pci_unbind_kernel_driver(struct rte_pci_device *dev);
 int pci_map_device(struct rte_pci_device *dev);
 
 /**
+ * Tell to the autobind system which driver to bind to.
+ *
+ * This function is private to EAL.
+ *
+ * @param name
+ *    The kernel driver to bind to.
+ */
+void pci_init_autobind(const char *name);
+
+/**
+ * Helper used by autobind system to check if device is already bound
+ * to an adequate driver.
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ *	The PCI device object.
+ * @return
+ *   0 on success, positive if device is bound.
+ */
+int pci_mapping_driver_bound(const struct rte_pci_device *dev);
+
+/**
+ * Rebind a pci device to a kernel driver.
+ *
+ * This function is private to EAL.
+ *
+ * @param dev
+ *	The PCI device object.
+ * @param name
+ *    The kernel driver to bind to, passing an empty name tries to rebind
+ *    the device to original kernel driver.
+ * @return
+ *   0 on success, negative on error
+ */
+int pci_rebind_device(const struct rte_pci_device *dev, const char *name);
+
+/**
  * Unmap this device
  *
  * This function is private to EAL.
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 9edd5f5..75cab50 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -90,8 +90,11 @@ TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */
 extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */
 extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */
 
-/** Pathname of PCI devices directory. */
-#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+/* FIXME: this is linux stuff, should not be in common/ */
+/** Pathname of PCI directories. */
+#define SYSFS_PCI "/sys/bus/pci"
+#define SYSFS_PCI_DEVICES SYSFS_PCI"/devices"
+#define SYSFS_PCI_DRIVERS SYSFS_PCI"/drivers"
 
 /** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */
 #define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index c3118fc..e711eea 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -94,6 +94,197 @@ error:
 }
 
 static int
+pci_rebind_device_override(const struct rte_pci_device *dev, const char *driver)
+{
+	int n;
+	FILE *f = NULL;
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	const struct rte_pci_addr *loc = &dev->addr;
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver_override",
+		 loc->domain, loc->bus, loc->devid, loc->function);
+
+	n = snprintf(buf, sizeof(buf), "%s\n", driver);
+	if (n < 0 || n >= (int)sizeof(buf)) {
+		RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+		goto error;
+	}
+
+	f = fopen(filename, "w");
+	if (!f || fwrite(buf, n, 1, f) == 0) {
+		RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+			__func__, filename);
+		goto error;
+	}
+
+	fclose(f);
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+		 loc->domain, loc->bus, loc->devid, loc->function);
+
+	n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+	if (n < 0 || n >= (int)sizeof(buf)) {
+		RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+		goto error;
+	}
+
+	f = fopen(filename, "w");
+	/* device might be bound to nothing ? */
+	if (f) {
+		if (fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+		fclose(f);
+	}
+
+	snprintf(filename, sizeof(filename), SYSFS_PCI "/drivers_probe");
+
+	f = fopen(filename, "w");
+	if (!f || fwrite(buf, n, 1, f) == 0) {
+		RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
+			filename);
+		goto error;
+	}
+
+	fclose(f);
+
+	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
+}
+
+static int
+pci_rebind_device_legacy(const struct rte_pci_device *dev, const char *driver)
+{
+	int n;
+	FILE *f = NULL;
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	const struct rte_pci_addr *loc = &dev->addr;
+
+	if (driver[0] != '\0') {
+		snprintf(filename, sizeof(filename),
+			 SYSFS_PCI_DRIVERS "/%s/new_id", driver);
+
+		n = snprintf(buf, sizeof(buf), "%4.4x %4.4x\n",
+			     dev->id.vendor_id, dev->id.device_id);
+		if (n < 0 || n >= (int)sizeof(buf)) {
+			RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+			goto error;
+		}
+
+		f = fopen(filename, "w");
+		if (!f || fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+
+		fclose(f);
+	}
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind",
+		 loc->domain, loc->bus, loc->devid, loc->function);
+
+	n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+	if (n < 0 || n >= (int)sizeof(buf)) {
+		RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+		goto error;
+	}
+
+	f = fopen(filename, "w");
+	/* device might be bound to nothing ? */
+	if (f) {
+		if (fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+		fclose(f);
+	}
+
+	if (driver[0] != '\0') {
+		snprintf(filename, sizeof(filename),
+			 SYSFS_PCI_DRIVERS "/%s/bind", driver);
+
+		f = fopen(filename, "w");
+		if (!f || fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+
+		fclose(f);
+
+		snprintf(filename, sizeof(filename),
+			 SYSFS_PCI_DRIVERS "/%s/remove_id", driver);
+
+		n = snprintf(buf, sizeof(buf), "%4.4x %4.4x\n",
+			     dev->id.vendor_id, dev->id.device_id);
+		if (n < 0 || n >= (int)sizeof(buf)) {
+			RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
+			goto error;
+		}
+
+		f = fopen(filename, "w");
+		if (!f || fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+
+		fclose(f);
+	} else {
+		snprintf(filename, sizeof(filename),
+			 SYSFS_PCI "/drivers_probe");
+
+		f = fopen(filename, "w");
+		if (!f || fwrite(buf, n, 1, f) == 0) {
+			RTE_LOG(ERR, EAL, "%s(): could not write to %s\n",
+				__func__, filename);
+			goto error;
+		}
+
+		fclose(f);
+	}
+
+	return 0;
+error:
+	if (f)
+		fclose(f);
+	return -1;
+}
+
+int pci_rebind_device(const struct rte_pci_device *dev, const char *driver)
+{
+	FILE *f;
+	char filename[PATH_MAX];
+	const struct rte_pci_addr *loc = &dev->addr;
+
+	snprintf(filename, sizeof(filename),
+		 SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver_override",
+		 loc->domain, loc->bus, loc->devid, loc->function);
+
+	f = fopen(filename, "w");
+	if (f) {
+		fclose(f);
+		return pci_rebind_device_override(dev, driver);
+	} else {
+		return pci_rebind_device_legacy(dev, driver);
+	}
+}
+
+static int
 pci_parse_sysfs_driver(const char *filename, struct rte_pci_device *dev)
 {
 	int count;
@@ -127,6 +318,25 @@ pci_parse_sysfs_driver(const char *filename, struct rte_pci_device *dev)
 	return 0;
 }
 
+int
+pci_mapping_driver_bound(const struct rte_pci_device *dev)
+{
+	int ret;
+
+	switch (dev->kdrv) {
+	case RTE_KDRV_VFIO:
+	case RTE_KDRV_IGB_UIO:
+	case RTE_KDRV_UIO_GENERIC:
+		ret = 1;
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 /* Map pci device */
 int
 pci_map_device(struct rte_pci_device *dev)
-- 
1.9.1



More information about the dev mailing list