[dpdk-dev] [PATCH v4 3/7] hv: add basic vmbus support
    Stephen Hemminger 
    stephen at networkplumber.org
       
    Tue Apr 21 19:32:40 CEST 2015
    
    
  
The hyper-v device driver forces the base EAL code to change
to support multiple bus types. This is done changing the pci_device
in ether driver to a generic union.
As much as possible this is done in a backwards source compatiable
way. It will break ABI for device drivers.
Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 lib/librte_eal/common/Makefile             |   2 +-
 lib/librte_eal/common/eal_common_options.c |   5 +
 lib/librte_eal/common/eal_internal_cfg.h   |   1 +
 lib/librte_eal/common/eal_options.h        |   2 +
 lib/librte_eal/common/eal_private.h        |  10 +
 lib/librte_eal/common/include/rte_vmbus.h  | 159 +++++++
 lib/librte_eal/linuxapp/eal/Makefile       |   3 +
 lib/librte_eal/linuxapp/eal/eal.c          |  11 +
 lib/librte_eal/linuxapp/eal/eal_vmbus.c    | 641 +++++++++++++++++++++++++++++
 lib/librte_ether/rte_ethdev.c              | 128 +++++-
 lib/librte_ether/rte_ethdev.h              |  15 +-
 11 files changed, 968 insertions(+), 9 deletions(-)
 create mode 100644 lib/librte_eal/common/include/rte_vmbus.h
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_vmbus.c
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 3ea3bbf..202485e 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -33,7 +33,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 INC := rte_branch_prediction.h rte_common.h
 INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h
-INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h
+INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h rte_vmbus.h
 INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h
 INC += rte_rwlock.h rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 8fcb1ab..76a3394 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -80,6 +80,7 @@ eal_long_options[] = {
 	{OPT_NO_HPET,           0, NULL, OPT_NO_HPET_NUM          },
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
+	{OPT_NO_VMBUS,		0, NULL, OPT_NO_VMBUS_NUM	  },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
@@ -726,6 +727,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_pci = 1;
 		break;
 
+	case OPT_NO_VMBUS_NUM:
+		conf->no_vmbus = 1;
+		break;
+
 	case OPT_NO_HPET_NUM:
 		conf->no_hpet = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index e2ecb0d..0e7de34 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
 	volatile unsigned no_pci;         /**< true to disable PCI */
+	volatile unsigned no_vmbus;	  /**< true to disable VMBUS */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
 										* instead of native TSC */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index f6714d9..54f03dc 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -67,6 +67,8 @@ enum {
 	OPT_NO_HUGE_NUM,
 #define OPT_NO_PCI            "no-pci"
 	OPT_NO_PCI_NUM,
+#define OPT_NO_VMBUS          "no-vmbus"
+	OPT_NO_VMBUS_NUM,
 #define OPT_NO_SHCONF         "no-shconf"
 	OPT_NO_SHCONF_NUM,
 #define OPT_SOCKET_MEM        "socket-mem"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4acf5a0..039e9f3 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -180,6 +180,16 @@ int rte_eal_pci_close_one_driver(struct rte_pci_driver *dr,
 		struct rte_pci_device *dev);
 
 /**
+ * VMBUS related functions and structures
+ */
+int rte_eal_vmbus_init(void);
+
+struct rte_vmbus_driver;
+struct rte_vmbus_device;
+
+int rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev);
+/**
  * Init tail queues for non-EAL library structures. This is to allow
  * the rings, mempools, etc. lists to be shared among multiple processes
  *
diff --git a/lib/librte_eal/common/include/rte_vmbus.h b/lib/librte_eal/common/include/rte_vmbus.h
new file mode 100644
index 0000000..e632572
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_vmbus.h
@@ -0,0 +1,159 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_VMBUS_H_
+#define _RTE_VMBUS_H_
+
+/**
+ * @file
+ *
+ * RTE VMBUS Interface
+ */
+
+#include <sys/queue.h>
+
+/** Pathname of VMBUS devices directory. */
+#define SYSFS_VMBUS_DEVICES "/sys/bus/vmbus/devices"
+
+/** Formatting string for VMBUS device identifier: Ex: vmbus_0_9 */
+#define VMBUS_PRI_FMT "vmbus_0_%u"
+
+#define VMBUS_ID_ANY 0xFFFF
+
+#define VMBUS_NETWORK_DEVICE "{f8615163-df3e-46c5-913f-f2d2f965ed0e}"
+
+/** Maximum number of VMBUS resources. */
+#define VMBUS_MAX_RESOURCE 7
+
+/**
+ * A structure describing an ID for a VMBUS driver. Each driver provides a
+ * table of these IDs for each device that it supports.
+ */
+struct rte_vmbus_id {
+	uint16_t device_id;           /**< VMBUS Device ID */
+	uint16_t sysfs_num;           /**< vmbus_0_X */
+};
+
+/**
+ * A structure describing a VMBUS memory resource.
+ */
+struct rte_vmbus_resource {
+	uint64_t phys_addr;   /**< Physical address, 0 if no resource. */
+	uint64_t len;         /**< Length of the resource. */
+	void *addr;           /**< Virtual address, NULL when not mapped. */
+};
+
+/**
+ * A structure describing a VMBUS device.
+ */
+struct rte_vmbus_device {
+	TAILQ_ENTRY(rte_vmbus_device) next;     /**< Next probed VMBUS device. */
+	struct rte_vmbus_id id;                 /**< VMBUS ID. */
+	const struct rte_vmbus_driver *driver;  /**< Associated driver */
+	int numa_node;                          /**< NUMA node connection */
+	unsigned int blacklisted:1;             /**< Device is blacklisted */
+	struct rte_vmbus_resource mem_resource[VMBUS_MAX_RESOURCE];   /**< VMBUS Memory Resource */
+	uint32_t vmbus_monitor_id;              /**< VMBus monitor ID for device */
+	int uio_fd;                             /** UIO device file descriptor */
+};
+
+/** Macro used to help building up tables of device IDs */
+#define RTE_VMBUS_DEVICE(dev)          \
+	.device_id = (dev)
+
+struct rte_vmbus_driver;
+
+/**
+ * Initialisation function for the driver called during VMBUS probing.
+ */
+typedef int (vmbus_devinit_t)(struct rte_vmbus_driver *, struct rte_vmbus_device *);
+
+/**
+ * Uninitialisation function for the driver called during hotplugging.
+ */
+typedef int (vmbus_devuninit_t)(struct rte_vmbus_device *);
+
+/**
+ * A structure describing a VMBUS driver.
+ */
+struct rte_vmbus_driver {
+	TAILQ_ENTRY(rte_vmbus_driver) next;     /**< Next in list. */
+	const char *name;                       /**< Driver name. */
+	vmbus_devinit_t *devinit;               /**< Device init. function. */
+	vmbus_devuninit_t *devuninit;           /**< Device uninit function. */
+	const struct rte_vmbus_id *id_table;    /**< ID table, NULL terminated. */
+	uint32_t drv_flags;                     /**< Flags contolling handling of device. */
+	const char *module_name;		/**< Associated kernel module */
+};
+
+/**
+ * Probe the VMBUS device for registered drivers.
+ *
+ * Scan the content of the vmbus, and call the probe() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ *
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_vmbus_probe(void);
+
+/**
+ * Dump the content of the vmbus.
+ */
+void rte_eal_vmbus_dump(void);
+
+/**
+ * Register a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be registered.
+ */
+void rte_eal_vmbus_register(struct rte_vmbus_driver *driver);
+
+/**
+ * Unregister a VMBUS driver.
+ *
+ * @param driver
+ *   A pointer to a rte_vmbus_driver structure describing the driver
+ *   to be unregistered.
+ */
+void rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver);
+
+int vmbus_uio_map_resource(struct rte_vmbus_device *dev);
+
+#endif /* _RTE_VMBUS_H_ */
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 01f7b70..acd5127 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -74,6 +74,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_alarm.c
 ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y)
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_ivshmem.c
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_vmbus.c
+endif
 
 # from common dir
 SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_memzone.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index bd770cf..86d0e31 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
 #include <rte_version.h>
@@ -796,6 +797,11 @@ rte_eal_init(int argc, char **argv)
 
 	rte_eal_mcfg_complete();
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_init() < 0)
+		RTE_LOG(ERR, EAL, "Cannot init VMBUS\n");
+#endif
+
 	TAILQ_FOREACH(solib, &solib_list, next) {
 		RTE_LOG(INFO, EAL, "open shared lib %s\n", solib->name);
 		solib->lib_handle = dlopen(solib->name, RTLD_NOW);
@@ -845,6 +851,11 @@ rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+#ifdef RTE_LIBRTE_HV_PMD
+	if (rte_eal_vmbus_probe() < 0)
+		rte_panic("Cannot probe VMBUS\n");
+#endif
+
 	return fctret;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vmbus.c b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
new file mode 100644
index 0000000..165edd6
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vmbus.c
@@ -0,0 +1,641 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_vmbus.h>
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#define PROC_MODULES "/proc/modules"
+#define VMBUS_DRV_PATH "/sys/bus/vmbus/drivers/%s"
+
+TAILQ_HEAD(vmbus_device_list, rte_vmbus_device); /**< VMBUS devices in D-linked Q. */
+TAILQ_HEAD(vmbus_driver_list, rte_vmbus_driver); /**< VMBUS drivers in D-linked Q. */
+
+static struct vmbus_driver_list vmbus_driver_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_driver_list);
+static struct vmbus_device_list vmbus_device_list =
+	TAILQ_HEAD_INITIALIZER(vmbus_device_list);
+
+struct uio_map {
+	void *addr;
+	uint64_t offset;
+	uint64_t size;
+	uint64_t phaddr;
+};
+
+/*
+ * For multi-process we need to reproduce all vmbus mappings in secondary
+ * processes, so save them in a tailq.
+ */
+struct uio_resource {
+	TAILQ_ENTRY(uio_resource) next;
+
+	struct rte_vmbus_id vmbus_addr;
+	char path[PATH_MAX];
+	size_t nb_maps;
+	struct uio_map maps[VMBUS_MAX_RESOURCE];
+};
+
+/*
+ * parse a sysfs file containing one integer value
+ * different to the eal version, as it needs to work with 64-bit values
+ */
+static int
+vmbus_parse_sysfs_value(const char *filename, uint64_t *val)
+{
+	FILE *f;
+	char buf[BUFSIZ];
+	char *end = NULL;
+
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		return -1;
+	}
+
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	*val = strtoull(buf, &end, 0);
+	if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) {
+		RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		return -1;
+	}
+	fclose(f);
+	return 0;
+}
+
+#define OFF_MAX              ((uint64_t)(off_t)-1)
+static ssize_t
+vmbus_uio_get_mappings(const char *devname, struct uio_map maps[], size_t nb_maps)
+{
+	size_t i;
+	char dirname[PATH_MAX];
+	char filename[PATH_MAX];
+	uint64_t offset, size;
+
+	for (i = 0; i != nb_maps; i++) {
+
+		/* check if map directory exists */
+		snprintf(dirname, sizeof(dirname),
+				"%s/maps/map%zu", devname, i);
+
+		RTE_LOG(DEBUG, EAL, "Scanning maps in %s\n", (char *)dirname);
+
+		if (access(dirname, F_OK) != 0)
+			break;
+
+		/* get mapping offset */
+		snprintf(filename, sizeof(filename),
+				"%s/offset", dirname);
+		if (vmbus_parse_sysfs_value(filename, &offset) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse offset of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping size */
+		snprintf(filename, sizeof(filename),
+				"%s/size", dirname);
+		if (vmbus_parse_sysfs_value(filename, &size) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse size of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		/* get mapping physical address */
+		snprintf(filename, sizeof(filename),
+				"%s/addr", dirname);
+		if (vmbus_parse_sysfs_value(filename, &maps[i].phaddr) < 0) {
+			RTE_LOG(ERR, EAL,
+					"%s(): cannot parse addr of %s\n",
+					__func__, dirname);
+			return -1;
+		}
+
+		if ((offset > OFF_MAX) || (size > SIZE_MAX)) {
+			RTE_LOG(ERR, EAL,
+					"%s(): offset/size exceed system max value\n",
+					__func__);
+			return -1;
+		}
+
+		maps[i].offset = offset;
+		maps[i].size = size;
+	}
+	return i;
+}
+
+/* maximum time to wait that /dev/uioX appears */
+#define UIO_DEV_WAIT_TIMEOUT 3 /* seconds */
+
+/* map a particular resource from a file */
+static void *
+vmbus_map_resource(struct rte_vmbus_device *dev, void *requested_addr,
+		const char *devname, off_t offset, size_t size)
+{
+	int fd;
+	void *mapaddr;
+
+	if (dev->uio_fd <= 0)
+		fd = open(devname, O_RDWR);
+	else
+		fd = dev->uio_fd;
+
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
+				devname, strerror(errno));
+		goto fail;
+	}
+
+	dev->uio_fd = fd;
+	/* Map the memory resource of device */
+	mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fd, offset);
+	if (mapaddr == MAP_FAILED ||
+			(requested_addr != NULL && mapaddr != requested_addr)) {
+		RTE_LOG(ERR, EAL,
+			"%s(): cannot mmap(%s(%d), %p, 0x%lx, 0x%lx):"
+			" %s (%p)\n", __func__, devname, fd, requested_addr,
+			(unsigned long)size, (unsigned long)offset,
+			strerror(errno), mapaddr);
+		close(fd);
+		goto fail;
+	}
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		close(fd);
+
+	RTE_LOG(DEBUG, EAL, "  VMBUS memory mapped at %p\n", mapaddr);
+
+	return mapaddr;
+
+fail:
+	return NULL;
+}
+
+/* map the resources of a vmbus device in virtual memory */
+int
+vmbus_uio_map_resource(struct rte_vmbus_device *dev)
+{
+	int i;
+	struct dirent *e;
+	DIR *dir;
+	char dirname[PATH_MAX];
+	char dirname2[PATH_MAX];
+	char devname[PATH_MAX]; /* contains the /dev/uioX */
+	void *mapaddr;
+	unsigned uio_num;
+	uint64_t phaddr;
+	uint64_t offset;
+	uint64_t pagesz;
+	ssize_t nb_maps;
+	struct rte_vmbus_id *loc = &dev->id;
+	struct uio_resource *uio_res;
+	struct uio_map *maps;
+
+	/* depending on kernel version, uio can be located in uio/uioX
+	 * or uio:uioX */
+	snprintf(dirname, sizeof(dirname),
+			"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT "/uio", loc->sysfs_num);
+
+	dir = opendir(dirname);
+	if (dir == NULL) {
+		/* retry with the parent directory */
+		snprintf(dirname, sizeof(dirname),
+				"/sys/bus/vmbus/devices/" VMBUS_PRI_FMT, loc->sysfs_num);
+		dir = opendir(dirname);
+
+		if (dir == NULL) {
+			RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname);
+			return -1;
+		}
+	}
+
+	/* take the first file starting with "uio" */
+	while ((e = readdir(dir)) != NULL) {
+		/* format could be uio%d ...*/
+		int shortprefix_len = sizeof("uio") - 1;
+		/* ... or uio:uio%d */
+		int longprefix_len = sizeof("uio:uio") - 1;
+		char *endptr;
+
+		if (strncmp(e->d_name, "uio", 3) != 0)
+			continue;
+
+		/* first try uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio%u", dirname, uio_num);
+			break;
+		}
+
+		/* then try uio:uio%d */
+		errno = 0;
+		uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10);
+		if (errno == 0 && endptr != e->d_name) {
+			snprintf(dirname2, sizeof(dirname2),
+					"%s/uio:uio%u", dirname, uio_num);
+			break;
+		}
+	}
+	closedir(dir);
+
+	/* No uio resource found */
+	if (e == NULL) {
+		RTE_LOG(WARNING, EAL,
+			VMBUS_PRI_FMT" not managed by UIO driver, skipping\n",
+			loc->sysfs_num);
+		return -1;
+	}
+
+	/* allocate the mapping details for secondary processes*/
+	uio_res = rte_zmalloc("UIO_RES", sizeof(*uio_res), 0);
+	if (uio_res == NULL) {
+		RTE_LOG(ERR, EAL,
+				"%s(): cannot store uio mmap details\n", __func__);
+		return -1;
+	}
+
+	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
+	snprintf(uio_res->path, sizeof(uio_res->path), "%s", devname);
+	memcpy(&uio_res->vmbus_addr, &dev->id, sizeof(uio_res->vmbus_addr));
+
+	/* collect info about device mappings */
+	nb_maps = vmbus_uio_get_mappings(dirname2, uio_res->maps,
+			sizeof(uio_res->maps) / sizeof(uio_res->maps[0]));
+	if (nb_maps < 0)
+		return nb_maps;
+
+	RTE_LOG(DEBUG, EAL, "Found %d memory maps for device "VMBUS_PRI_FMT"\n",
+			(int)nb_maps, loc->sysfs_num);
+
+	uio_res->nb_maps = nb_maps;
+
+	pagesz = sysconf(_SC_PAGESIZE);
+
+	maps = uio_res->maps;
+	for (i = 0; i != VMBUS_MAX_RESOURCE; i++) {
+		phaddr = maps[i].phaddr;
+		if (phaddr == 0)
+			continue;
+
+		RTE_LOG(DEBUG, EAL, "	mem_map%d: addr=0x%lx len = %lu\n",
+				i,
+				maps[i].phaddr,
+				maps[i].size);
+
+		if (i != nb_maps) {
+			offset = i * pagesz;
+			mapaddr = vmbus_map_resource(dev, NULL, devname, (off_t)offset,
+					(size_t)maps[i].size);
+			if (mapaddr == NULL)
+				return -1;
+
+			/* Important: offset for mapping can be non-zero, pad the addr */
+			mapaddr = ((char *)mapaddr + maps[i].offset);
+			maps[i].addr = mapaddr;
+			maps[i].offset = offset;
+			dev->mem_resource[i].addr = mapaddr;
+			dev->mem_resource[i].phys_addr = phaddr;
+			dev->mem_resource[i].len = maps[i].size;
+		}
+	}
+
+	return 0;
+}
+
+/* Compare two VMBUS device addresses. */
+static int
+vmbus_compare(struct rte_vmbus_id *id, struct rte_vmbus_id *id2)
+{
+	return id->device_id > id2->device_id;
+}
+
+/* Scan one vmbus sysfs entry, and fill the devices list from it. */
+static int
+vmbus_scan_one(const char *name)
+{
+	char filename[PATH_MAX];
+	char buf[BUFSIZ];
+	char dirname[PATH_MAX];
+	unsigned long tmp;
+	unsigned int sysfs_num;
+	struct rte_vmbus_device *dev;
+	FILE *f;
+
+	dev = rte_zmalloc("vmbus_device", sizeof(*dev), 0);
+	if (dev == NULL)
+		return -1;
+
+	snprintf(dirname, sizeof(dirname), "%s/%s",
+		 SYSFS_VMBUS_DEVICES, name);
+
+	/* parse directory name in sysfs.  this does not always reflect
+	 * the device id read below.
+	 */
+	if (sscanf(name, VMBUS_PRI_FMT, &sysfs_num) != 1) {
+		RTE_LOG(ERR, EAL, "Unable to parse vmbus sysfs name\n");
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.sysfs_num = sysfs_num;
+
+	/* get device id */
+	snprintf(filename, sizeof(filename), "%s/id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->id.device_id = (uint16_t)tmp;
+
+	/* get monitor id */
+	snprintf(filename, sizeof(filename), "%s/monitor_id", dirname);
+	if (eal_parse_sysfs_value(filename, &tmp) < 0) {
+		rte_free(dev);
+		return -1;
+	}
+	dev->vmbus_monitor_id = tmp;
+
+	/* compare class_id of device with {f8615163-df3e-46c5-913ff2d2f965ed0e} */
+	snprintf(filename, sizeof(filename), "%s/class_id", dirname);
+	f = fopen(filename, "r");
+	if (f == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n",
+				__func__, filename);
+		rte_free(dev);
+		return -1;
+	}
+	if (fgets(buf, sizeof(buf), f) == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n",
+				__func__, filename);
+		fclose(f);
+		rte_free(dev);
+		return -1;
+	}
+	fclose(f);
+
+	if (strncmp(buf, VMBUS_NETWORK_DEVICE, strlen(VMBUS_NETWORK_DEVICE))) {
+		RTE_LOG(DEBUG, EAL, "%s(): skip vmbus_0_%u with class_id = %s",
+				__func__, dev->id.sysfs_num, buf);
+		rte_free(dev);
+		return 0;
+	}
+
+	/* device is valid, add in list (sorted) */
+	RTE_LOG(DEBUG, EAL, "Adding vmbus device %d\n", dev->id.device_id);
+	if (!TAILQ_EMPTY(&vmbus_device_list)) {
+		struct rte_vmbus_device *dev2 = NULL;
+
+		TAILQ_FOREACH(dev2, &vmbus_device_list, next) {
+			if (vmbus_compare(&dev->id, &dev2->id))
+				continue;
+
+			TAILQ_INSERT_BEFORE(dev2, dev, next);
+			return 0;
+		}
+	}
+
+	TAILQ_INSERT_TAIL(&vmbus_device_list, dev, next);
+
+	return 0;
+}
+
+static int
+check_vmbus_device(const char *buf, int bufsize)
+{
+	char *n, *buf_copy, *endp;
+	unsigned long err;
+
+	/* the format is 'vmbus_0_%d' */
+	n = strrchr(buf, '_');
+	if (n == NULL)
+		return -1;
+	n++;
+	buf_copy = strndup(n, bufsize);
+	if (buf_copy == NULL) {
+		RTE_LOG(ERR, EAL, "%s(): failed to strndup: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	err = strtoul(buf_copy, &endp, 10);
+	free(buf_copy);
+
+	if (*endp != '\0' || (err == ULONG_MAX && errno == ERANGE)) {
+		RTE_LOG(ERR, EAL, "%s(): can't parse devid: %s\n",
+				__func__, strerror(errno));
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Scan the content of the vmbus, and the devices in the devices list
+ */
+static int
+vmbus_scan(void)
+{
+	struct dirent *e;
+	DIR *dir;
+
+	dir = opendir(SYSFS_VMBUS_DEVICES);
+	if (dir == NULL) {
+		if (errno == ENOENT)
+			return 0;
+
+		RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	while ((e = readdir(dir)) != NULL) {
+		if (e->d_name[0] == '.')
+			continue;
+
+		if (check_vmbus_device(e->d_name, sizeof(e->d_name)))
+			continue;
+
+		if (vmbus_scan_one(e->d_name) < 0)
+			goto error;
+	}
+	closedir(dir);
+	return 0;
+
+ error:
+	closedir(dir);
+	return -1;
+}
+
+/* Init the VMBUS EAL subsystem */
+int rte_eal_vmbus_init(void)
+{
+	/* VMBUS can be disabled */
+	if (internal_config.no_vmbus)
+		return 0;
+
+	if (vmbus_scan() < 0) {
+		RTE_LOG(ERR, EAL, "%s(): Cannot scan vmbus\n", __func__);
+		return -1;
+	}
+	return 0;
+}
+
+/* Below is PROBE part of eal_vmbus library */
+
+/*
+ * If device ID match, call the devinit() function of the driver.
+ */
+int
+rte_eal_vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
+		struct rte_vmbus_device *dev)
+{
+	const struct rte_vmbus_id *id_table;
+
+	for (id_table = dr->id_table; id_table->device_id != VMBUS_ID_ANY; id_table++) {
+		const struct rte_vmbus_id *loc = &dev->id;
+
+		RTE_LOG(DEBUG, EAL, "VMBUS device "VMBUS_PRI_FMT"\n",
+				loc->sysfs_num);
+		RTE_LOG(DEBUG, EAL, "  probe driver: %s\n", dr->name);
+
+		/* no initialization when blacklisted, return without error */
+		if (dev->blacklisted) {
+			RTE_LOG(DEBUG, EAL, "  Device is blacklisted, not initializing\n");
+			return 0;
+		}
+
+		/* map the resources */
+		if (vmbus_uio_map_resource(dev) < 0)
+			return -1;
+
+		/* reference driver structure */
+		dev->driver = dr;
+
+		/* call the driver devinit() function */
+		return dr->devinit(dr, dev);
+	}
+
+	/* return positive value if driver is not found */
+	return 1;
+}
+
+/*
+ * call the devinit() function of all
+ * registered drivers for the vmbus device. Return -1 if no driver is
+ * found for this class of vmbus device.
+ * The present assumption is that we have drivers only for vmbus network
+ * devices. That's why we don't check driver's id_table now.
+ */
+static int
+vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
+{
+	struct rte_vmbus_driver *dr = NULL;
+	int ret;
+
+	TAILQ_FOREACH(dr, &vmbus_driver_list, next) {
+		ret = rte_eal_vmbus_probe_one_driver(dr, dev);
+		if (ret < 0) {
+			/* negative value is an error */
+			RTE_LOG(ERR, EAL, "Failed to probe driver %s\n", dr->name);
+			break;
+		}
+		if (ret > 0) {
+			/* positive value means driver not found */
+			RTE_LOG(DEBUG, EAL, "Driver %s not found", dr->name);
+			continue;
+		}
+
+		RTE_LOG(DEBUG, EAL, "OK. Driver was found and probed.\n");
+		return 0;
+	}
+	return -1;
+}
+
+
+/*
+ * Scan the vmbus, and call the devinit() function for
+ * all registered drivers that have a matching entry in its id_table
+ * for discovered devices.
+ */
+int
+rte_eal_vmbus_probe(void)
+{
+	struct rte_vmbus_device *dev = NULL;
+
+	TAILQ_FOREACH(dev, &vmbus_device_list, next) {
+		RTE_LOG(DEBUG, EAL, "Probing driver for device %d ...\n",
+				dev->id.device_id);
+		vmbus_probe_all_drivers(dev);
+	}
+	return 0;
+}
+
+/* register vmbus driver */
+void
+rte_eal_vmbus_register(struct rte_vmbus_driver *driver)
+{
+	TAILQ_INSERT_TAIL(&vmbus_driver_list, driver, next);
+}
+
+/* unregister vmbus driver */
+void
+rte_eal_vmbus_unregister(struct rte_vmbus_driver *driver)
+{
+	TAILQ_REMOVE(&vmbus_driver_list, driver, next);
+}
+
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 9577d17..9093966 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -379,6 +379,98 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 	return 0;
 }
 
+#ifdef RTE_LIBRTE_HV_PMD
+static int
+rte_vmbus_dev_init(struct rte_vmbus_driver *vmbus_drv,
+		   struct rte_vmbus_device *vmbus_dev)
+{
+	struct eth_driver  *eth_drv = (struct eth_driver *)vmbus_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int diag;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI);
+	if (eth_dev == NULL)
+		return -ENOMEM;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
+				  eth_drv->dev_private_size,
+				  RTE_CACHE_LINE_SIZE);
+		if (eth_dev->data->dev_private == NULL)
+			rte_panic("Cannot allocate memzone for private port data\n");
+	}
+	eth_dev->vmbus_dev = vmbus_dev;
+	eth_dev->driver = eth_drv;
+	eth_dev->data->rx_mbuf_alloc_failed = 0;
+
+	/* init user callbacks */
+	TAILQ_INIT(&(eth_dev->link_intr_cbs));
+
+	/*
+	 * Set the default maximum frame size.
+	 */
+	eth_dev->data->mtu = ETHER_MTU;
+
+	/* Invoke PMD device initialization function */
+	diag = (*eth_drv->eth_dev_init)(eth_dev);
+	if (diag == 0)
+		return 0;
+
+	PMD_DEBUG_TRACE("driver %s: eth_dev_init(device_id=0x%x)"
+			" failed\n", vmbus_drv->name,
+			(unsigned) vmbus_dev->id.device_id);
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+	nb_ports--;
+	return diag;
+}
+
+
+static int
+rte_vmbus_dev_uninit(struct rte_vmbus_device *vmbus_dev)
+{
+	const struct eth_driver *eth_drv;
+	struct rte_eth_dev *eth_dev;
+	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
+	int ret;
+
+	if (vmbus_dev == NULL)
+		return -EINVAL;
+
+	snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%u_%u",
+		 vmbus_dev->id.device_id, vmbus_dev->id.sysfs_num);
+
+	eth_dev = rte_eth_dev_allocated(ethdev_name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	eth_drv = (const struct eth_driver *)vmbus_dev->driver;
+
+	/* Invoke PMD device uninit function */
+	if (*eth_drv->eth_dev_uninit) {
+		ret = (*eth_drv->eth_dev_uninit)(eth_dev);
+		if (ret)
+			return ret;
+	}
+
+	/* free ether device */
+	rte_eth_dev_release_port(eth_dev);
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+		rte_free(eth_dev->data->dev_private);
+
+	eth_dev->pci_dev = NULL;
+	eth_dev->driver = NULL;
+	eth_dev->data = NULL;
+
+	return 0;
+}
+#endif
+
 /**
  * Register an Ethernet [Poll Mode] driver.
  *
@@ -396,9 +488,22 @@ rte_eth_dev_uninit(struct rte_pci_device *pci_dev)
 void
 rte_eth_driver_register(struct eth_driver *eth_drv)
 {
-	eth_drv->pci_drv.devinit = rte_eth_dev_init;
-	eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
-	rte_eal_pci_register(ð_drv->pci_drv);
+	switch (eth_drv->bus_type) {
+	case RTE_BUS_PCI:
+		eth_drv->pci_drv.devinit = rte_eth_dev_init;
+		eth_drv->pci_drv.devuninit = rte_eth_dev_uninit;
+		rte_eal_pci_register(ð_drv->pci_drv);
+		break;
+#ifdef RTE_LIBRTE_HV_PMD
+	case RTE_BUS_VMBUS:
+		eth_drv->vmbus_drv.devinit = rte_vmbus_dev_init;
+		eth_drv->vmbus_drv.devuninit = rte_vmbus_dev_uninit;
+		rte_eal_vmbus_register(ð_drv->vmbus_drv);
+		break;
+#endif
+	default:
+		rte_panic("unknown bus type %u\n", eth_drv->bus_type);
+	}
 }
 
 static int
@@ -1351,6 +1456,9 @@ rte_eth_has_link_state(uint8_t port_id)
 	}
 	dev = &rte_eth_devices[port_id];
 
+	if (dev->driver->bus_type != RTE_BUS_PCI)
+		return 0;
+
 	return (dev->pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) != 0;
 }
 
@@ -1901,9 +2009,17 @@ rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info)
 
 	FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get);
 	(*dev->dev_ops->dev_infos_get)(dev, dev_info);
-	dev_info->pci_dev = dev->pci_dev;
-	if (dev->driver)
-		dev_info->driver_name = dev->driver->pci_drv.name;
+
+	if (dev->driver) {
+		switch (dev->driver->bus_type) {
+		case RTE_BUS_PCI:
+			dev_info->driver_name = dev->driver->pci_drv.name;
+			dev_info->pci_dev = dev->pci_dev;
+			break;
+		case RTE_BUS_VMBUS:
+			dev_info->driver_name = dev->driver->vmbus_drv.name;
+		}
+	}
 }
 
 void
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 991023b..9e08f3e 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -178,6 +178,7 @@ extern "C" {
 #include <rte_log.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_vmbus.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_mbuf.h>
@@ -1477,7 +1478,10 @@ struct rte_eth_dev {
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
-	struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */
+	union {
+		struct rte_pci_device *pci_dev; /**< PCI info. supplied by probig */
+		struct rte_vmbus_device *vmbus_dev; /**< VMBUS info. supplied by probing */
+	};
 	/** User application callbacks for NIC interrupts */
 	struct rte_eth_dev_cb_list link_intr_cbs;
 	/**
@@ -1696,7 +1700,14 @@ typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev);
  * - The size of the private data to allocate for each matching device.
  */
 struct eth_driver {
-	struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+	union {
+		struct rte_pci_driver pci_drv;    /**< The PMD is also a PCI driver. */
+		struct rte_vmbus_driver vmbus_drv;/**< The PMD is also a VMBUS drv. */
+	};
+	enum {
+		RTE_BUS_PCI=0,
+		RTE_BUS_VMBUS
+	} bus_type;			  /**< Device bus type. */
 	eth_dev_init_t eth_dev_init;      /**< Device init function. */
 	eth_dev_uninit_t eth_dev_uninit;  /**< Device uninit function. */
 	unsigned int dev_private_size;    /**< Size of device private data. */
-- 
2.1.4
    
    
More information about the dev
mailing list