[dpdk-dev] [PATCH 2/2] uio: new driver to support PCI MSI-X

Stephen Hemminger stephen at networkplumber.org
Thu Oct 1 00:28:58 CEST 2015


This driver allows using PCI device with Message Signalled Interrupt
from userspace. The API is similar to the igb_uio driver used by the DPDK.
Via ioctl it provides a mechanism to map MSI-X interrupts into event
file descriptors similar to VFIO.

VFIO is a better choice if IOMMU is available, but often userspace drivers
have to work in environments where IOMMU support (real or emulated) is
not available.  All UIO drivers that support DMA are not secure against
rogue userspace applications programming DMA hardware to access
private memory; this driver is no less secure than existing code.

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 drivers/uio/Kconfig          |   9 ++
 drivers/uio/Makefile         |   1 +
 drivers/uio/uio_msi.c        | 378 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild    |   1 +
 include/uapi/linux/uio_msi.h |  22 +++
 5 files changed, 411 insertions(+)
 create mode 100644 drivers/uio/uio_msi.c
 create mode 100644 include/uapi/linux/uio_msi.h

diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 52c98ce..04adfa0 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -93,6 +93,15 @@ config UIO_PCI_GENERIC
 	  primarily, for virtualization scenarios.
 	  If you compile this as a module, it will be called uio_pci_generic.
 
+config UIO_PCI_MSI
+       tristate "Generic driver supporting MSI-x on PCI Express cards"
+	depends on PCI
+	help
+	  Generic driver that provides Message Signalled IRQ events
+	  similar to VFIO. If IOMMMU is available please use VFIO
+	  instead since it provides more security.
+	  If you compile this as a module, it will be called uio_msi.
+
 config UIO_NETX
 	tristate "Hilscher NetX Card driver"
 	depends on PCI
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index 8560dad..62fc44b 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_UIO_NETX)	+= uio_netx.o
 obj-$(CONFIG_UIO_PRUSS)         += uio_pruss.o
 obj-$(CONFIG_UIO_MF624)         += uio_mf624.o
 obj-$(CONFIG_UIO_FSL_ELBC_GPCM)	+= uio_fsl_elbc_gpcm.o
+obj-$(CONFIG_UIO_PCI_MSI)	+= uio_msi.o
diff --git a/drivers/uio/uio_msi.c b/drivers/uio/uio_msi.c
new file mode 100644
index 0000000..802b5c4
--- /dev/null
+++ b/drivers/uio/uio_msi.c
@@ -0,0 +1,378 @@
+/*-
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * Author: Stephen Hemminger <stephen at networkplumber.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/eventfd.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/msi.h>
+#include <linux/uio_msi.h>
+
+#define DRIVER_VERSION		"0.1.1"
+#define MAX_MSIX_VECTORS	64
+
+/* MSI-X vector information */
+struct uio_msi_pci_dev {
+	struct uio_info info;		/* UIO driver info */
+	struct pci_dev *pdev;		/* PCI device */
+	struct mutex	mutex;		/* open/release/ioctl mutex */
+	int		ref_cnt;	/* references to device */
+	unsigned int	max_vectors;	/* MSI-X slots available */
+	struct msix_entry *msix;	/* MSI-X vector table */
+	struct uio_msi_irq_ctx {
+		struct eventfd_ctx *trigger; /* vector to eventfd */
+		char *name;		/* name in /proc/interrupts */
+	} *ctx;
+};
+
+static irqreturn_t uio_intx_irqhandler(int irq, void *arg)
+{
+	struct uio_msi_pci_dev *udev = arg;
+
+	if (pci_check_and_mask_intx(udev->pdev)) {
+		eventfd_signal(udev->ctx->trigger, 1);
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t uio_msi_irqhandler(int irq, void *arg)
+{
+	struct eventfd_ctx *trigger = arg;
+
+	eventfd_signal(trigger, 1);
+	return IRQ_HANDLED;
+}
+
+/* set the mapping between vector # and existing eventfd. */
+static int set_irq_eventfd(struct uio_msi_pci_dev *udev, u32 vec, int fd)
+{
+	struct eventfd_ctx *trigger;
+	int irq, err;
+
+	if (vec >= udev->max_vectors) {
+		dev_notice(&udev->pdev->dev, "vec %u >= num_vec %u\n",
+			   vec, udev->max_vectors);
+		return -ERANGE;
+	}
+
+	irq = udev->msix[vec].vector;
+	trigger = udev->ctx[vec].trigger;
+	if (trigger) {
+		/* Clearup existing irq mapping */
+		free_irq(irq, trigger);
+		eventfd_ctx_put(trigger);
+		udev->ctx[vec].trigger = NULL;
+	}
+
+	/* Passing -1 is used to disable interrupt */
+	if (fd < 0)
+		return 0;
+
+	trigger = eventfd_ctx_fdget(fd);
+	if (IS_ERR(trigger)) {
+		err = PTR_ERR(trigger);
+		dev_notice(&udev->pdev->dev,
+			   "eventfd ctx get failed: %d\n", err);
+		return err;
+	}
+
+	if (udev->msix)
+		err = request_irq(irq, uio_msi_irqhandler, 0,
+				  udev->ctx[vec].name, trigger);
+	else
+		err = request_irq(irq, uio_intx_irqhandler, IRQF_SHARED,
+				  udev->ctx[vec].name, udev);
+
+	if (err) {
+		dev_notice(&udev->pdev->dev,
+			   "request irq failed: %d\n", err);
+		eventfd_ctx_put(trigger);
+		return err;
+	}
+
+	udev->ctx[vec].trigger = trigger;
+	return 0;
+}
+
+static int
+uio_msi_ioctl(struct uio_info *info, unsigned int cmd, unsigned long arg)
+{
+	struct uio_msi_pci_dev *udev
+		= container_of(info, struct uio_msi_pci_dev, info);
+	struct uio_msi_irq_set hdr;
+	int err;
+
+	switch (cmd) {
+	case UIO_MSI_IRQ_SET:
+		if (copy_from_user(&hdr, (void __user *)arg, sizeof(hdr)))
+			return -EFAULT;
+
+		mutex_lock(&udev->mutex);
+		err = set_irq_eventfd(udev, hdr.vec, hdr.fd);
+		mutex_unlock(&udev->mutex);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+	return err;
+}
+
+/* Opening the UIO device for first time enables MSI-X */
+static int
+uio_msi_open(struct uio_info *info, struct inode *inode)
+{
+	struct uio_msi_pci_dev *udev
+		= container_of(info, struct uio_msi_pci_dev, info);
+	int err = 0;
+
+	mutex_lock(&udev->mutex);
+	if (udev->ref_cnt++ == 0) {
+		if (udev->msix)
+			err = pci_enable_msix(udev->pdev, udev->msix,
+					      udev->max_vectors);
+	}
+	mutex_unlock(&udev->mutex);
+
+	return err;
+}
+
+/* Last close of the UIO device releases/disables all IRQ's */
+static int
+uio_msi_release(struct uio_info *info, struct inode *inode)
+{
+	struct uio_msi_pci_dev *udev
+		= container_of(info, struct uio_msi_pci_dev, info);
+	int i;
+
+	mutex_lock(&udev->mutex);
+	if (--udev->ref_cnt == 0) {
+		for (i = 0; i < udev->max_vectors; i++) {
+			int irq = udev->msix[i].vector;
+			struct eventfd_ctx *trigger = udev->ctx[i].trigger;
+
+			if (!trigger)
+				continue;
+
+			free_irq(irq, trigger);
+			eventfd_ctx_put(trigger);
+			udev->ctx[i].trigger = NULL;
+		}
+
+		if (udev->msix)
+			pci_disable_msix(udev->pdev);
+	}
+	mutex_unlock(&udev->mutex);
+
+	return 0;
+}
+
+/* Unmap previously ioremap'd resources */
+static void
+release_iomaps(struct uio_mem *mem)
+{
+	int i;
+
+	for (i = 0; i < MAX_UIO_MAPS; i++, mem++) {
+		if (mem->internal_addr)
+			iounmap(mem->internal_addr);
+	}
+}
+
+static int
+setup_maps(struct pci_dev *pdev, struct uio_info *info)
+{
+	int i, m = 0, p = 0, err;
+	static const char * const bar_names[] = {
+		"BAR0",	"BAR1",	"BAR2",	"BAR3",	"BAR4",	"BAR5",
+	};
+
+	for (i = 0; i < ARRAY_SIZE(bar_names); i++) {
+		unsigned long start = pci_resource_start(pdev, i);
+		unsigned long flags = pci_resource_flags(pdev, i);
+		unsigned long len = pci_resource_len(pdev, i);
+
+		if (start == 0 || len == 0)
+			continue;
+
+		if (flags & IORESOURCE_MEM) {
+			void __iomem *addr;
+
+			if (m >= MAX_UIO_MAPS)
+				continue;
+
+			addr = ioremap(start, len);
+			if (addr == NULL) {
+				err = -EINVAL;
+				goto fail;
+			}
+
+			info->mem[m].name = bar_names[i];
+			info->mem[m].addr = start;
+			info->mem[m].internal_addr = addr;
+			info->mem[m].size = len;
+			info->mem[m].memtype = UIO_MEM_PHYS;
+			++m;
+		} else if (flags & IORESOURCE_IO) {
+			if (p >= MAX_UIO_PORT_REGIONS)
+				continue;
+
+			info->port[p].name = bar_names[i];
+			info->port[p].start = start;
+			info->port[p].size = len;
+			info->port[p].porttype = UIO_PORT_X86;
+			++p;
+		}
+	}
+
+	return 0;
+ fail:
+	for (i = 0; i < m; i++)
+		iounmap(info->mem[i].internal_addr);
+	return err;
+}
+
+static int uio_msi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct uio_msi_pci_dev *udev;
+	int i, err, vectors;
+
+	udev = kzalloc(sizeof(struct uio_msi_pci_dev), GFP_KERNEL);
+	if (!udev)
+		return -ENOMEM;
+
+	err = pci_enable_device(pdev);
+	if (err != 0) {
+		dev_err(&pdev->dev, "cannot enable PCI device\n");
+		goto fail_free;
+	}
+
+	err = pci_request_regions(pdev, "uio_msi");
+	if (err != 0) {
+		dev_err(&pdev->dev, "Cannot request regions\n");
+		goto fail_disable;
+	}
+
+	pci_set_master(pdev);
+
+	/* remap resources */
+	err = setup_maps(pdev, &udev->info);
+	if (err)
+		goto fail_release_iomem;
+
+	/* fill uio infos */
+	udev->info.name = "uio_msi";
+	udev->info.version = DRIVER_VERSION;
+	udev->info.priv = udev;
+	udev->pdev = pdev;
+	udev->info.ioctl = uio_msi_ioctl;
+	udev->info.open = uio_msi_open;
+	udev->info.release = uio_msi_release;
+	udev->info.irq = UIO_IRQ_CUSTOM;
+	mutex_init(&udev->mutex);
+
+	vectors = pci_msix_vec_count(pdev);
+	if (vectors > 0) {
+		udev->max_vectors = min_t(u16, vectors, MAX_MSIX_VECTORS);
+		dev_info(&pdev->dev, "using up to %u MSI-X vectors\n",
+			udev->max_vectors);
+
+		err = -ENOMEM;
+		udev->msix = kcalloc(udev->max_vectors,
+				     sizeof(struct msix_entry), GFP_KERNEL);
+		if (!udev->msix)
+			goto fail_release_iomem;
+	} else if (!pci_intx_mask_supported(pdev)) {
+		dev_err(&pdev->dev,
+			"device does not support MSI-X or INTX\n");
+		err = -EINVAL;
+		goto fail_release_iomem;
+	} else {
+		dev_notice(&pdev->dev, "using INTX\n");
+		udev->info.irq_flags = IRQF_SHARED;
+		udev->max_vectors = 1;
+	}
+
+	udev->ctx = kcalloc(udev->max_vectors,
+			    sizeof(struct uio_msi_irq_ctx), GFP_KERNEL);
+	if (!udev->ctx)
+		goto fail_free_msix;
+
+	for (i = 0; i < udev->max_vectors; i++) {
+		udev->msix[i].entry = i;
+
+		udev->ctx[i].name = kasprintf(GFP_KERNEL,
+					      KBUILD_MODNAME "[%d](%s)",
+					      i, pci_name(pdev));
+		if (!udev->ctx[i].name)
+			goto fail_free_ctx;
+	}
+
+	/* register uio driver */
+	err = uio_register_device(&pdev->dev, &udev->info);
+	if (err != 0)
+		goto fail_free_ctx;
+
+	pci_set_drvdata(pdev, udev);
+	return 0;
+
+fail_free_ctx:
+	for (i = 0; i < udev->max_vectors; i++)
+		kfree(udev->ctx[i].name);
+	kfree(udev->ctx);
+fail_free_msix:
+	kfree(udev->msix);
+fail_release_iomem:
+	release_iomaps(udev->info.mem);
+	pci_release_regions(pdev);
+fail_disable:
+	pci_disable_device(pdev);
+fail_free:
+	kfree(udev);
+
+	pr_notice("%s ret %d\n", __func__, err);
+	return err;
+}
+
+static void uio_msi_remove(struct pci_dev *pdev)
+{
+	struct uio_info *info = pci_get_drvdata(pdev);
+	struct uio_msi_pci_dev *udev
+		= container_of(info, struct uio_msi_pci_dev, info);
+	int i;
+
+	uio_unregister_device(info);
+	release_iomaps(info->mem);
+
+	pci_release_regions(pdev);
+	for (i = 0; i < udev->max_vectors; i++)
+		kfree(udev->ctx[i].name);
+	kfree(udev->ctx);
+	kfree(udev->msix);
+	pci_disable_device(pdev);
+
+	pci_set_drvdata(pdev, NULL);
+	kfree(udev);
+}
+
+static struct pci_driver uio_msi_pci_driver = {
+	.name = "uio_msi",
+	.probe = uio_msi_probe,
+	.remove = uio_msi_remove,
+};
+
+module_pci_driver(uio_msi_pci_driver);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Stephen Hemminger <stephen at networkplumber.org>");
+MODULE_DESCRIPTION("UIO driver for MSI PCI devices");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index f7b2db4..d9497691 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -411,6 +411,7 @@ header-y += udp.h
 header-y += uhid.h
 header-y += uinput.h
 header-y += uio.h
+header-y += uio_msi.h
 header-y += ultrasound.h
 header-y += un.h
 header-y += unistd.h
diff --git a/include/uapi/linux/uio_msi.h b/include/uapi/linux/uio_msi.h
new file mode 100644
index 0000000..297de00
--- /dev/null
+++ b/include/uapi/linux/uio_msi.h
@@ -0,0 +1,22 @@
+/*
+ * UIO_MSI API definition
+ *
+ * Copyright (c) 2015 by Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _UIO_PCI_MSI_H
+#define _UIO_PCI_MSI_H
+
+struct uio_msi_irq_set {
+	u32 vec;
+	int fd;
+};
+
+#define UIO_MSI_BASE	0x86
+#define UIO_MSI_IRQ_SET	_IOW('I', UIO_MSI_BASE+1, struct uio_msi_irq_set)
+
+#endif
-- 
2.1.4



More information about the dev mailing list