[dpdk-dev] [RFC] igb_uio: rework interrupt using pci_intx_mask

Stephen Hemminger stephen at networkplumber.org
Thu Jun 6 00:53:42 CEST 2013


The code to do interrupts in the igb_uio is broken on later kernel versions
because pci_cfg_access_lock must not be used from interrupt context. But there is
a much better way of doing all this now that a generic access to INTX
is available. Use pci_intx routines in later kernels, and provide backwards
compatible version for older kernels.

For enabling/disabling interrupts, better to program interrupt controller
via disable/enable irq rather than messing with PCI directly.

The driver was also broken for any mode other MSI-X. It did not do the
proper fallback to MSI then legacy mode if the platform does not support
MSI. This happens under virtualization environments where MSI is not
available. And the unwind code needed to handle the case of what ever
interrupt mode is used.

This version of the code is backported from our kernel modules, and
compile tested only. The pci_intx compatibility code is new and someone
who runs with older kernel should test it.

--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c	2013-06-05 14:41:46.588241089 -0700
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c	2013-06-05 15:46:01.299710839 -0700
@@ -28,14 +28,6 @@
 #include <linux/msi.h>
 #include <linux/version.h>
 
-/* Some function names changes between 3.2.0 and 3.3.0... */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
-#define PCI_LOCK pci_block_user_cfg_access
-#define PCI_UNLOCK pci_unblock_user_cfg_access
-#else
-#define PCI_LOCK pci_cfg_access_lock
-#define PCI_UNLOCK pci_cfg_access_unlock
-#endif
 
 /**
  * MSI-X related macros, copy from linux/pci_regs.h in kernel 2.6.39,
@@ -56,8 +48,7 @@
 enum igbuio_intr_mode {
 	IGBUIO_LEGACY_INTR_MODE = 0,
 	IGBUIO_MSI_INTR_MODE,
-	IGBUIO_MSIX_INTR_MODE,
-	IGBUIO_INTR_MODE_MAX
+	IGBUIO_MSIX_INTR_MODE
 };
 
 /**
@@ -66,8 +57,9 @@ enum igbuio_intr_mode {
 struct rte_uio_pci_dev {
 	struct uio_info info;
 	struct pci_dev *pdev;
-	spinlock_t lock; /* spinlock for accessing PCI config space or msix data in multi tasks/isr */
+	spinlock_t lock;
 	enum igbuio_intr_mode mode;
+	unsigned long flags;
 	struct msix_entry \
 		msix_entries[IGBUIO_NUM_MSI_VECTORS]; /* pointer to the msix vectors to be allocated later */
 };
@@ -87,70 +79,35 @@ igbuio_get_uio_pci_dev(struct uio_info *
 	return container_of(info, struct rte_uio_pci_dev, info);
 }
 
-/**
- * It masks the msix on/off of generating MSI-X messages.
- */
-static int
-igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0)
+/* backport of intx mask support */
+static bool pci_intx_mask_supported(struct pci_dev *dev)
 {
-	uint32_t mask_bits = desc->masked;
-	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
-						PCI_MSIX_ENTRY_VECTOR_CTRL;
-
-	if (state != 0)
-		mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
-	else
-		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
-
-	if (mask_bits != desc->masked) {
-		writel(mask_bits, desc->mask_base + offset);
-		readl(desc->mask_base);
-		desc->masked = mask_bits;
-	}
-
-	return 0;
+	/* assume since this is known HW, that intx works. */
+	return true;
 }
 
-/**
- * This function sets/clears the masks for generating LSC interrupts.
- *
- * @param info
- *   The pointer to struct uio_info.
- * @param on
- *   The on/off flag of masking LSC.
- * @return
- *   -On success, zero value.
- *   -On failure, a negative value.
- */
-static int
-igbuio_set_interrupt_mask(struct rte_uio_pci_dev *udev, int32_t state)
+static bool pci_check_and_mask_intx(struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = udev->pdev;
-
-	if (udev->mode == IGBUIO_MSIX_INTR_MODE) {
-		struct msi_desc *desc;
-
-		list_for_each_entry(desc, &pdev->msi_list, list) {
-			igbuio_msix_mask_irq(desc, state);
-		}
-	}
-	else if (udev->mode == IGBUIO_LEGACY_INTR_MODE) {
-		uint32_t status;
-		uint16_t old, new;
-
-		pci_read_config_dword(pdev, PCI_COMMAND, &status);
+	u32 status;
+	u16 old, new;
+	bool ret = false;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_dword(pdev, PCI_COMMAND, &status);
+	if (status & PCI_STATUS_INTERRUPT) {
 		old = status;
-		if (state != 0)
-			new = old & (~PCI_COMMAND_INTX_DISABLE);
-		else
-			new = old | PCI_COMMAND_INTX_DISABLE;
+		new = old | PCI_COMMAND_INTX_DISABLE;
 
 		if (old != new)
 			pci_write_config_word(pdev, PCI_COMMAND, new);
+		ret = true;
 	}
+	pci_unblock_user_cfg_access(pdev);
 
-	return 0;
+	return ret;
 }
+#endif
 
 /**
  * This is the irqcontrol callback to be registered to uio_info.
@@ -173,11 +130,13 @@ igbuio_pci_irqcontrol(struct uio_info *i
 	struct pci_dev *pdev = udev->pdev;
 
 	spin_lock_irqsave(&udev->lock, flags);
-	PCI_LOCK(pdev);
-
-	igbuio_set_interrupt_mask(udev, irq_state);
-
-	PCI_UNLOCK(pdev);
+	if (irq_state) {
+		if (test_and_clear_bit(0, &udev->flags))
+			enable_irq(info->irq);
+	} else {
+		if (!test_and_set_bit(0, &udev->flags))
+			disable_irq(info->irq);
+	}
 	spin_unlock_irqrestore(&udev->lock, flags);
 
 	return 0;
@@ -190,35 +149,25 @@ igbuio_pci_irqcontrol(struct uio_info *i
 static irqreturn_t
 igbuio_pci_irqhandler(int irq, struct uio_info *info)
 {
-	irqreturn_t ret = IRQ_NONE;
-	unsigned long flags;
 	struct rte_uio_pci_dev *udev = igbuio_get_uio_pci_dev(info);
-	struct pci_dev *pdev = udev->pdev;
-	uint32_t cmd_status_dword;
-	uint16_t status;
 
-	spin_lock_irqsave(&udev->lock, flags);
-	/* block userspace PCI config reads/writes */
-	PCI_LOCK(pdev);
-
-	/* for legacy mode, interrupt maybe shared */
-	if (udev->mode == IGBUIO_LEGACY_INTR_MODE) {
-		pci_read_config_dword(pdev, PCI_COMMAND, &cmd_status_dword);
-		status = cmd_status_dword >> 16;
-		/* interrupt is not ours, goes to out */
-		if (!(status & PCI_STATUS_INTERRUPT))
-			goto done;
-	}
+	/* Legacy mode need to mask in hardware */
+	if (udev->mode == IGBUIO_LEGACY_INTR_MODE &&
+	    !pci_check_and_mask_intx(udev->pdev))
+		return IRQ_NONE;
 
-	igbuio_set_interrupt_mask(udev, 0);
-	ret = IRQ_HANDLED;
-done:
-	/* unblock userspace PCI config reads/writes */
-	PCI_UNLOCK(pdev);
-	spin_unlock_irqrestore(&udev->lock, flags);
-	printk(KERN_INFO "irq 0x%x %s\n", irq, (ret == IRQ_HANDLED) ? "handled" : "not handled");
+	/* Message signal mode, no share IRQ and automasked */
+	return IRQ_HANDLED;
+}
 
-	return ret;
+/* Unwind irq setup */
+static void
+igbuio_irq_release(struct rte_uio_pci_dev *udev)
+{
+	if (udev->mode == IGBUIO_MSIX_INTR_MODE)
+		pci_disable_msix(udev->pdev);
+	else if (udev->mode == IGBUIO_MSI_INTR_MODE)
+		pci_disable_msi(udev->pdev);
 }
 
 /* Remap pci resources described by bar #pci_bar in uio resource n. */
@@ -310,35 +259,42 @@ igbuio_pci_probe(struct pci_dev *dev, co
 	udev->mode = 0; /* set the default value for interrupt mode */
 	spin_lock_init(&udev->lock);
 
-	/* check if it need to try msix first */
-	if (igbuio_intr_mode_preferred == IGBUIO_MSIX_INTR_MODE) {
+	switch (igbuio_intr_mode_preferred) {
+	case IGBUIO_MSIX_INTR_MODE: {
 		int vector;
 
-		for (vector = 0; vector < IGBUIO_NUM_MSI_VECTORS; vector ++)
+		for (vector = 0; vector < IGBUIO_NUM_MSI_VECTORS; vector++)
 			udev->msix_entries[vector].entry = vector;
 
-		if (pci_enable_msix(udev->pdev, udev->msix_entries, IGBUIO_NUM_MSI_VECTORS) == 0) {
+		if (!pci_enable_msix(dev, udev->msix_entries,
+				     IGBUIO_NUM_MSI_VECTORS)) {
+			dev_dbg(&dev->dev, "using MSI-X\n");
 			udev->mode = IGBUIO_MSIX_INTR_MODE;
+			udev->info.irq_flags = 0;
+			udev->info.irq = udev->msix_entries[0].vector;
+			break;
 		}
-		else {
-			pci_disable_msix(udev->pdev);
-			printk(KERN_INFO "fail to enable pci msix, or not enough msix entries\n");
-		}
+		/* fall through */
+		dev_dbg(&dev->dev, "unable to enable pci msix\n");
 	}
-	switch (udev->mode) {
-	case IGBUIO_MSIX_INTR_MODE:
-		udev->info.irq_flags = 0;
-		udev->info.irq = udev->msix_entries[0].vector;
-		break;
 	case IGBUIO_MSI_INTR_MODE:
-		break;
+		if (!pci_enable_msi(dev)) {
+			dev_dbg(&dev->dev, "using MSI\n");
+			udev->mode = IGBUIO_MSI_INTR_MODE;
+			break;
+		}
+		/* fall through */
 	case IGBUIO_LEGACY_INTR_MODE:
+		dev_dbg(&dev->dev, "using legacy mode\n");
+		if (!pci_intx_mask_supported(dev)) {
+			dev_err(&dev->dev, "pci_intx mask not supported\n");
+			goto fail_release_iomem;
+		}
 		udev->info.irq_flags = IRQF_SHARED;
-		udev->info.irq = dev->irq;
-		break;
-	default:
+		udev->mode = IGBUIO_LEGACY_INTR_MODE;
 		break;
 	}
+	disable_irq(dev->irq);
 
 	pci_set_drvdata(dev, udev);
 	igbuio_pci_irqcontrol(&udev->info, 0);
@@ -353,8 +309,7 @@ igbuio_pci_probe(struct pci_dev *dev, co
 
 fail_release_iomem:
 	igbuio_pci_release_iomem(&udev->info);
-	if (udev->mode == IGBUIO_MSIX_INTR_MODE)
-		pci_disable_msix(udev->pdev);
+	igbuio_irq_release(udev);
 fail_release_regions:
 	pci_release_regions(dev);
 fail_disable:
@@ -371,8 +326,7 @@ igbuio_pci_remove(struct pci_dev *dev)
 	struct uio_info *info = pci_get_drvdata(dev);
 
 	uio_unregister_device(info);
-	if (((struct rte_uio_pci_dev *)info->priv)->mode == IGBUIO_MSIX_INTR_MODE)
-		pci_disable_msix(dev);
+	igbuio_irq_release(igbuio_get_uio_pci_dev(info));
 	pci_release_regions(dev);
 	pci_disable_device(dev);
 	pci_set_drvdata(dev, NULL);


More information about the dev mailing list