[dpdk-dev] [PATCH 5/5] uio: integrate MSI-X support

Stephen Hemminger stephen at networkplumber.org
Mon May 18 19:40:14 CEST 2015


Add the new uio_msi as a supported driver model.

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 lib/librte_eal/common/include/rte_pci.h            |  1 +
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 94 +++++++++++++++++++---
 lib/librte_eal/linuxapp/eal/eal_pci.c              |  4 +
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c          | 59 ++++++++++++--
 lib/librte_eal/linuxapp/eal/eal_uio_msi.h          | 26 ++++++
 .../linuxapp/eal/include/exec-env/rte_interrupts.h |  1 +
 lib/librte_ether/rte_ethdev.c                      |  1 +
 tools/dpdk_nic_bind.py                             |  2 +-
 8 files changed, 166 insertions(+), 22 deletions(-)
 create mode 100644 lib/librte_eal/linuxapp/eal/eal_uio_msi.h

diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 223d3cd..106f4f7 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -147,6 +147,7 @@ enum rte_kernel_driver {
 	RTE_KDRV_IGB_UIO,
 	RTE_KDRV_VFIO,
 	RTE_KDRV_UIO_GENERIC,
+	RTE_KDRV_UIO_MSIX,
 };
 
 /**
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index fd97fc4..8cdab58 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -66,6 +66,7 @@
 
 #include "eal_private.h"
 #include "eal_vfio.h"
+#include "eal_uio_msi.h"
 
 #define EAL_INTR_EPOLL_WAIT_FOREVER (-1)
 
@@ -89,9 +90,7 @@ union intr_pipefds{
  */
 union rte_intr_read_buffer {
 	int uio_intr_count;              /* for uio device */
-#ifdef VFIO_PRESENT
-	uint64_t vfio_intr_count;        /* for vfio device */
-#endif
+	uint64_t eventfd_count;		 /* for vfio and uio-msi */
 	uint64_t timerfd_num;            /* for timerfd */
 	char charbuf[16];                /* for others */
 };
@@ -356,6 +355,67 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) {
 }
 #endif
 
+/* enable MSI-X interrupts */
+static int
+uio_msix_enable(struct rte_intr_handle *intr_handle)
+{
+	int i, max_intr;
+
+	if (!intr_handle->max_intr ||
+	    intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
+		max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+	else
+		max_intr = intr_handle->max_intr;
+
+	/* Actual number of MSI-X interrupts might be less than requested */
+	for (i = 0; i < max_intr; i++) {
+		struct uio_msi_irq_set irqs = {
+			.vec = i,
+			.fd = intr_handle->efds[i],
+		};
+
+		if (i == max_intr - 1)
+			irqs.fd = intr_handle->fd;
+
+		if (ioctl(intr_handle->vfio_dev_fd, UIO_MSI_IRQ_SET, &irqs) < 0) {
+			RTE_LOG(ERR, EAL,
+				"Error enabling MSI-X event %u fd %d (%s)\n",
+				irqs.vec, irqs.fd, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+/* disable MSI-X interrupts */
+static int
+uio_msix_disable(struct rte_intr_handle *intr_handle)
+{
+	int i, max_intr;
+
+	if (!intr_handle->max_intr ||
+	    intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
+		max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
+	else
+		max_intr = intr_handle->max_intr;
+
+	for (i = 0; i < max_intr; i++) {
+		struct uio_msi_irq_set irqs = {
+			.vec = i,
+			.fd = -1,
+		};
+
+		if (ioctl(intr_handle->vfio_dev_fd, UIO_MSI_IRQ_SET, &irqs) < 0) {
+			RTE_LOG(ERR, EAL,
+				"Error disabling MSI-X event %u (%s)\n",
+				i, strerror(errno));
+			return -1;
+		}
+	}
+	return 0;
+}
+
 static int
 uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
 {
@@ -584,6 +644,10 @@ rte_intr_enable(struct rte_intr_handle *intr_handle)
 		if (uio_intx_intr_enable(intr_handle))
 			return -1;
 		break;
+	case RTE_INTR_HANDLE_UIO_MSIX:
+		if (uio_msix_enable(intr_handle))
+			return -1;
+		break;
 	/* not used at this moment */
 	case RTE_INTR_HANDLE_ALARM:
 		return -1;
@@ -628,6 +692,10 @@ rte_intr_disable(struct rte_intr_handle *intr_handle)
 		if (uio_intx_intr_disable(intr_handle))
 			return -1;
 		break;
+	case RTE_INTR_HANDLE_UIO_MSIX:
+		if (uio_msix_disable(intr_handle))
+			return -1;
+		break;
 	/* not used at this moment */
 	case RTE_INTR_HANDLE_ALARM:
 		return -1;
@@ -696,16 +764,19 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
 		case RTE_INTR_HANDLE_UIO:
 			bytes_read = sizeof(buf.uio_intr_count);
 			break;
+
 		case RTE_INTR_HANDLE_ALARM:
 			bytes_read = sizeof(buf.timerfd_num);
 			break;
-#ifdef VFIO_PRESENT
+
+		case RTE_INTR_HANDLE_UIO_MSIX:
+#ifdef RTE_EAL_VFIO
 		case RTE_INTR_HANDLE_VFIO_MSIX:
 		case RTE_INTR_HANDLE_VFIO_MSI:
 		case RTE_INTR_HANDLE_VFIO_LEGACY:
-			bytes_read = sizeof(buf.vfio_intr_count);
-			break;
 #endif
+			bytes_read = sizeof(buf.eventfd_count);
+			break;
 		default:
 			bytes_read = 1;
 			break;
@@ -895,17 +966,14 @@ static void
 eal_intr_proc_rxtx_intr(int fd, struct rte_intr_handle *intr_handle)
 {
 	union rte_intr_read_buffer buf;
-	int bytes_read = 1;
+	int bytes_read = sizeof(buf.eventfd_count);
 
-	if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX) {
-		RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX\n");
+	if (intr_handle->type != RTE_INTR_HANDLE_VFIO_MSIX &&
+	    intr_handle->type != RTE_INTR_HANDLE_UIO_MSIX) {
+		RTE_LOG(ERR, EAL, "intr type should be VFIO_MSIX or UIO_MSIX\n");
 		return;
 	}
 
-#ifdef VFIO_PRESENT
-	bytes_read = sizeof(buf.vfio_intr_count);
-#endif
-
 	/**
 	 * read out to clear the ready-to-be-read flag
 	 * for epoll_wait.
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index d2adc66..814dc7c 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -345,6 +345,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 			dev->kdrv = RTE_KDRV_IGB_UIO;
 		else if (!strcmp(driver, "uio_pci_generic"))
 			dev->kdrv = RTE_KDRV_UIO_GENERIC;
+		else if (!strcmp(driver, "uio_msi"))
+			dev->kdrv = RTE_KDRV_UIO_MSIX;
 		else
 			dev->kdrv = RTE_KDRV_UNKNOWN;
 	} else if (ret < 0) {
@@ -576,6 +578,7 @@ pci_map_device(struct rte_pci_device *dev)
 			ret = pci_vfio_map_resource(dev);
 #endif
 		break;
+	case RTE_KDRV_UIO_MSIX:
 	case RTE_KDRV_IGB_UIO:
 	case RTE_KDRV_UIO_GENERIC:
 		/* map resources for devices that use uio */
@@ -603,6 +606,7 @@ pci_unmap_device(struct rte_pci_device *dev)
 	case RTE_KDRV_VFIO:
 		RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
 		break;
+	case RTE_KDRV_UIO_MSIX:
 	case RTE_KDRV_IGB_UIO:
 	case RTE_KDRV_UIO_GENERIC:
 		/* unmap resources for devices that use uio */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index b5116a7..7eee828 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -38,6 +38,7 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <linux/pci_regs.h>
+#include <sys/eventfd.h>
 
 #include <rte_log.h>
 #include <rte_pci.h>
@@ -259,13 +260,42 @@ pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf,
 	return uio_num;
 }
 
+static int
+pci_uio_msix_init(struct rte_pci_device *dev)
+{
+	int i, fd;
+
+	/* set up an eventfd for interrupts */
+	fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+	if (fd < 0) {
+		RTE_LOG(ERR, EAL, "  cannot set up irq eventfd (%s)\n",
+			strerror(errno));
+		return -1;
+	}
+	dev->intr_handle.fd = fd;
+
+	/* an additional eventfd for each vector */
+	for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++) {
+		fd = eventfd(0, EFD_NONBLOCK|EFD_CLOEXEC);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL,
+				" cannot set up eventfd (%s)\n",
+				strerror(errno));
+			return -1;
+		}
+
+		dev->intr_handle.efds[i] = fd;
+	}
+
+	return 0;
+}
+
 /* map the PCI resource of a PCI device in virtual memory */
 int
 pci_uio_map_resource(struct rte_pci_device *dev)
 {
-	int i, map_idx;
+	int i, fd, map_idx;
 	char dirname[PATH_MAX];
-	char cfgname[PATH_MAX];
 	char devname[PATH_MAX]; /* contains the /dev/uioX */
 	void *mapaddr;
 	int uio_num;
@@ -274,11 +304,15 @@ pci_uio_map_resource(struct rte_pci_device *dev)
 	struct mapped_pci_resource *uio_res;
 	struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list);
 	struct pci_map *maps;
+	char cfgname[PATH_MAX];
 
 	dev->intr_handle.fd = -1;
-	dev->intr_handle.uio_cfg_fd = -1;
+	dev->intr_handle.vfio_dev_fd = -1;
 	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
 
+	for (i = 0; i < RTE_MAX_RXTX_INTR_VEC_ID; i++)
+		dev->intr_handle.efds[i] = -1;
+
 	/* secondary processes - use already recorded details */
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return pci_uio_map_secondary(dev);
@@ -293,15 +327,15 @@ pci_uio_map_resource(struct rte_pci_device *dev)
 	snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num);
 
 	/* save fd if in primary process */
-	dev->intr_handle.fd = open(devname, O_RDWR);
-	if (dev->intr_handle.fd < 0) {
+	fd = open(devname, O_RDWR);
+	if (fd < 0) {
 		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
 			devname, strerror(errno));
 		return -1;
 	}
 
 	snprintf(cfgname, sizeof(cfgname),
-			"/sys/class/uio/uio%u/device/config", uio_num);
+		 "/sys/class/uio/uio%u/device/config", uio_num);
 	dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR);
 	if (dev->intr_handle.uio_cfg_fd < 0) {
 		RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
@@ -309,9 +343,17 @@ pci_uio_map_resource(struct rte_pci_device *dev)
 		return -1;
 	}
 
-	if (dev->kdrv == RTE_KDRV_IGB_UIO)
+	if (dev->kdrv == RTE_KDRV_UIO_MSIX) {
+		dev->intr_handle.vfio_dev_fd = fd;
+		dev->intr_handle.type = RTE_INTR_HANDLE_UIO_MSIX;
+		if (pci_uio_msix_init(dev) < 0)
+			return -1;
+	} else if (dev->kdrv == RTE_KDRV_IGB_UIO) {
+		dev->intr_handle.fd = fd;
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
-	else {
+	} else {
+
+		dev->intr_handle.fd = fd;
 		dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX;
 
 		/* set bus master that is not done by uio_pci_generic */
@@ -460,6 +502,7 @@ pci_uio_unmap_resource(struct rte_pci_device *dev)
 
 	/* close fd if in primary process */
 	close(dev->intr_handle.fd);
+	close(dev->intr_handle.uio_cfg_fd);
 
 	dev->intr_handle.fd = -1;
 	dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
diff --git a/lib/librte_eal/linuxapp/eal/eal_uio_msi.h b/lib/librte_eal/linuxapp/eal/eal_uio_msi.h
new file mode 100644
index 0000000..f01f302
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_uio_msi.h
@@ -0,0 +1,26 @@
+/*
+ * UIO_MSI API definition
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef EAL_UIO_MSI_H
+#define EAL_UIO_MSI_H
+
+/* Driver is not upstream yet. */
+
+#include <sys/ioctl.h>
+
+struct uio_msi_irq_set {
+	uint32_t vec;
+	int fd;
+};
+
+#define UIO_MSI_BASE	0x86
+#define UIO_MSI_IRQ_SET	_IOW('I', UIO_MSI_BASE+1, struct uio_msi_irq_set)
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index 9843001..d3cf680 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -44,6 +44,7 @@ enum rte_intr_handle_type {
 	RTE_INTR_HANDLE_UNKNOWN = 0,
 	RTE_INTR_HANDLE_UIO,          /**< uio device handle */
 	RTE_INTR_HANDLE_UIO_INTX,     /**< uio generic handle */
+	RTE_INTR_HANDLE_UIO_MSIX,     /**< uio with MSI-X support */
 	RTE_INTR_HANDLE_VFIO_LEGACY,  /**< vfio device handle (legacy) */
 	RTE_INTR_HANDLE_VFIO_MSI,     /**< vfio device handle (MSI) */
 	RTE_INTR_HANDLE_VFIO_MSIX,    /**< vfio device handle (MSIX) */
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index cf9a79a..3fbc4a1 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -515,6 +515,7 @@ rte_eth_dev_is_detachable(uint8_t port_id)
 		switch (rte_eth_devices[port_id].pci_dev->kdrv) {
 		case RTE_KDRV_IGB_UIO:
 		case RTE_KDRV_UIO_GENERIC:
+		case RTE_KDRV_UIO_MSIX:
 			break;
 		case RTE_KDRV_VFIO:
 		default:
diff --git a/tools/dpdk_nic_bind.py b/tools/dpdk_nic_bind.py
index 8523f82..20b4b06 100755
--- a/tools/dpdk_nic_bind.py
+++ b/tools/dpdk_nic_bind.py
@@ -43,7 +43,7 @@ ETHERNET_CLASS = "0200"
 # Each device within this is itself a dictionary of device properties
 devices = {}
 # list of supported DPDK drivers
-dpdk_drivers = [ "igb_uio", "vfio-pci", "uio_pci_generic" ]
+dpdk_drivers = [ "igb_uio", "vfio-pci", "uio_pci_generic", "uio_msi" ]
 
 # command-line arg flags
 b_flag = None
-- 
2.1.4



More information about the dev mailing list