[dpdk-dev] [PATCH] igb_uio: stop device when closing /dev/uioX

Jianfeng Tan jianfeng.tan at intel.com
Fri Dec 2 17:45:46 CET 2016


Depends-on: http://dpdk.org/dev/patchwork/patch/17493/

When a DPDK application grab a PCI device, device and driver work
together to Rx/Tx packets. If the DPDK app crashes or gets killed,
there's no chance for DPDK driver to stop the device, which means
rte_eth_dev_stop() will not be called.

This could result in problems. In virtio's case, the device (the
vhost backend), will keep working. If packets come, the vhost will
copy them into the vring, which is negotiated with the previous DPDK
app. But the resources, especially hugepages, are recycled by VM
kernel. What's worse, the memory could be allocated for other usage,
and re-written. This leads to an uncertain situation. Like this post
has reported, https://bugs.launchpad.net/qemu/+bug/1558175, QEMU's
vhost finds out wrong value, and exits the whole QEMU process.

To make it right, we need to restart (or reset) the device and make
the device go into the initial state, when uio-generic or igb_uio
recycles the PCI device. There's a chance in uio framework to disable
devices when /dev/uioX gets closed. Here we enable the pci device
in open() hook and disable it in release() hook.

However, if device is not enabled in probe() phase any more, we can
not register irq in probe() through uio_register_device(). To address
that, we invoke request_irq() to register callback directly.

Similar change needs to be done in uio-generic driver. And vfio-pci
seems to have done that already.

Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
---
 lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 171 +++++++++++++++++++-----------
 1 file changed, 108 insertions(+), 63 deletions(-)

diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index e9d78fb..048390d 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -157,8 +157,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
  * If yes, disable it here and will be enable later.
  */
 static irqreturn_t
-igbuio_pci_irqhandler(int irq, struct uio_info *info)
+igbuio_pci_irqhandler(int irq, void *dev_id)
 {
+	struct uio_device *idev = (struct uio_device *)dev_id;
+	struct uio_info *info = idev->info;
 	struct rte_uio_pci_dev *udev = info->priv;
 
 	/* Legacy mode need to mask in hardware */
@@ -166,6 +168,8 @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
 	    !pci_check_and_mask_intx(udev->pdev))
 		return IRQ_NONE;
 
+	uio_event_notify(info);
+
 	/* Message signal mode, no share IRQ and automasked */
 	return IRQ_HANDLED;
 }
@@ -216,20 +220,58 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
 }
 #endif
 
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
-static int __devinit
-#else
 static int
-#endif
-igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+igbuio_intr_init(struct uio_info *info)
 {
-	struct rte_uio_pci_dev *udev;
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
 	struct msix_entry msix_entry;
-	int err;
+	int ret = 0;
 
-	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
-	if (!udev)
-		return -ENOMEM;
+	switch (igbuio_intr_mode_preferred) {
+	case RTE_INTR_MODE_MSIX:
+		/* Only 1 msi-x vector needed */
+		msix_entry.entry = 0;
+		if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
+			dev_dbg(&dev->dev, "using MSI-X");
+			info->irq = msix_entry.vector;
+			udev->mode = RTE_INTR_MODE_MSIX;
+			break;
+		}
+		/* fall back to INTX */
+	case RTE_INTR_MODE_LEGACY:
+		if (pci_intx_mask_supported(dev)) {
+			dev_dbg(&dev->dev, "using INTX");
+			info->irq_flags = IRQF_SHARED;
+			info->irq = dev->irq;
+			udev->mode = RTE_INTR_MODE_LEGACY;
+			break;
+		}
+		dev_notice(&dev->dev, "PCI INTX mask not supported\n");
+		ret = -EOPNOTSUPP;
+		/* fall back to no IRQ */
+	default:
+		udev->mode = RTE_INTR_MODE_NONE;
+		info->irq = 0;
+	}
+
+	if (info->irq) {
+		ret = request_irq(info->irq, igbuio_pci_irqhandler,
+				  info->irq_flags, info->name,
+				  info->uio_dev);
+		if (ret && udev->mode == RTE_INTR_MODE_MSIX)
+			pci_disable_msix(udev->pdev);
+	}
+
+	return ret;
+}
+
+static int
+igbuio_pci_open(struct uio_info *info, struct inode *inode)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
+	int err;
 
 	/*
 	 * enable device: ask low-level code to enable I/O and
@@ -238,30 +280,77 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	err = pci_enable_device(dev);
 	if (err != 0) {
 		dev_err(&dev->dev, "Cannot enable PCI device\n");
-		goto fail_free;
+		return err;
 	}
 
 	/* enable bus mastering on the device */
 	pci_set_master(dev);
 
 	/* set 64-bit DMA mask */
-	err = pci_set_dma_mask(dev,  DMA_BIT_MASK(64));
-	if (err != 0) {
+	err = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
+	if (err) {
 		dev_err(&dev->dev, "Cannot set DMA mask\n");
-		goto fail_disable_dev;
+		goto error;
 	}
 
 	err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64));
-	if (err != 0) {
+	if (err) {
 		dev_err(&dev->dev, "Cannot set consistent DMA mask\n");
-		goto fail_disable_dev;
+		goto error;
+	}
+
+	err = igbuio_intr_init(info);
+	if (err) {
+		dev_err(&dev->dev, "Enable interrupt fails\n");
+		goto error;
+	} else {
+		dev_info(&dev->dev, "uio device registered with irq %lx\n",
+			 udev->info.irq);
+	}
+
+	return 0;
+error:
+	pci_disable_device(dev);
+	return err;
+}
+
+static int
+igbuio_pci_release(struct uio_info *info, struct inode *inode)
+{
+	struct rte_uio_pci_dev *udev = info->priv;
+	struct pci_dev *dev = udev->pdev;
+
+	dev_info(&udev->pdev->dev, "will be disable\n");
+	if (info->irq) {
+		free_irq(info->irq, info->uio_dev);
+		info->irq = 0;
 	}
+	if (udev->mode == RTE_INTR_MODE_MSIX)
+		pci_disable_msix(dev);
+	pci_disable_device(udev->pdev);
+	return 0;
+}
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
+static int __devinit
+#else
+static int
+#endif
+igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+	struct rte_uio_pci_dev *udev;
+	int err;
+
+	udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
+	if (!udev)
+		return -ENOMEM;
 
 	/* fill uio infos */
 	udev->info.name = "igb_uio";
 	udev->info.version = "0.1";
-	udev->info.handler = igbuio_pci_irqhandler;
 	udev->info.irqcontrol = igbuio_pci_irqcontrol;
+	udev->info.release = igbuio_pci_release;
+	udev->info.open = igbuio_pci_open;
 #ifdef CONFIG_XEN_DOM0
 	/* check if the driver run on Xen Dom0 */
 	if (xen_initial_domain())
@@ -270,42 +359,9 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	udev->info.priv = udev;
 	udev->pdev = dev;
 
-	switch (igbuio_intr_mode_preferred) {
-	case RTE_INTR_MODE_MSIX:
-		/* Only 1 msi-x vector needed */
-		msix_entry.entry = 0;
-		if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
-			dev_dbg(&dev->dev, "using MSI-X");
-			udev->info.irq = msix_entry.vector;
-			udev->mode = RTE_INTR_MODE_MSIX;
-			break;
-		}
-		/* fall back to INTX */
-	case RTE_INTR_MODE_LEGACY:
-		if (pci_intx_mask_supported(dev)) {
-			dev_dbg(&dev->dev, "using INTX");
-			udev->info.irq_flags = IRQF_SHARED;
-			udev->info.irq = dev->irq;
-			udev->mode = RTE_INTR_MODE_LEGACY;
-			break;
-		}
-		dev_notice(&dev->dev, "PCI INTX mask not supported\n");
-		/* fall back to no IRQ */
-	case RTE_INTR_MODE_NONE:
-		udev->mode = RTE_INTR_MODE_NONE;
-		udev->info.irq = 0;
-		break;
-
-	default:
-		dev_err(&dev->dev, "invalid IRQ mode %u",
-			igbuio_intr_mode_preferred);
-		err = -EINVAL;
-		goto fail_disable_dev;
-	}
-
 	err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
 	if (err != 0)
-		goto fail_disable_irq;
+		goto fail_free;
 
 	/* register uio driver */
 	err = uio_register_device(&dev->dev, &udev->info);
@@ -314,18 +370,10 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	pci_set_drvdata(dev, udev);
 
-	dev_info(&dev->dev, "uio device registered with irq %lx\n",
-		 udev->info.irq);
-
 	return 0;
 
 fail_remove_group:
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-fail_disable_irq:
-	if (udev->mode == RTE_INTR_MODE_MSIX)
-		pci_disable_msix(udev->pdev);
-fail_disable_dev:
-	pci_disable_device(dev);
 fail_free:
 	kfree(udev);
 
@@ -339,9 +387,6 @@ igbuio_pci_remove(struct pci_dev *dev)
 
 	sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
 	uio_unregister_device(&udev->info);
-	if (udev->mode == RTE_INTR_MODE_MSIX)
-		pci_disable_msix(dev);
-	pci_disable_device(dev);
 	pci_set_drvdata(dev, NULL);
 	kfree(udev);
 }
-- 
2.7.4



More information about the dev mailing list