[dpdk-dev] [PATCH v1 3/5] igb: enable rx queue interrupts for PF

Danny Zhou danny.zhou at intel.com
Wed Jan 28 10:50:29 CET 2015


Signed-off-by: Danny Zhou <danny.zhou at intel.com>
---
 lib/librte_pmd_e1000/e1000/e1000_hw.h |   3 +
 lib/librte_pmd_e1000/e1000_ethdev.h   |   6 +
 lib/librte_pmd_e1000/igb_ethdev.c     | 265 ++++++++++++++++++++++++++++++----
 3 files changed, 249 insertions(+), 25 deletions(-)

diff --git a/lib/librte_pmd_e1000/e1000/e1000_hw.h b/lib/librte_pmd_e1000/e1000/e1000_hw.h
index 4dd92a3..9b999ec 100644
--- a/lib/librte_pmd_e1000/e1000/e1000_hw.h
+++ b/lib/librte_pmd_e1000/e1000/e1000_hw.h
@@ -780,6 +780,9 @@ struct e1000_mac_info {
 	u16 mta_reg_count;
 	u16 uta_reg_count;
 
+	u32 max_rx_queues;
+	u32 max_tx_queues;
+
 	/* Maximum size of the MTA register table in all supported adapters */
 	#define MAX_MTA_REG 128
 	u32 mta_shadow[MAX_MTA_REG];
diff --git a/lib/librte_pmd_e1000/e1000_ethdev.h b/lib/librte_pmd_e1000/e1000_ethdev.h
index d155e77..713ca11 100644
--- a/lib/librte_pmd_e1000/e1000_ethdev.h
+++ b/lib/librte_pmd_e1000/e1000_ethdev.h
@@ -34,6 +34,8 @@
 #ifndef _E1000_ETHDEV_H_
 #define _E1000_ETHDEV_H_
 
+#include <rte_spinlock.h>
+
 /* need update link, bit flag */
 #define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
 #define E1000_FLAG_MAILBOX          (uint32_t)(1 << 1)
@@ -105,10 +107,14 @@
 #define E1000_FTQF_QUEUE_SHIFT           16
 #define E1000_FTQF_QUEUE_ENABLE          0x00000100
 
+/* maximum number of other interrupts besides Rx & Tx interrupts */
+#define E1000_MAX_OTHER_INTR		1
+
 /* structure for interrupt relative data */
 struct e1000_interrupt {
 	uint32_t flags;
 	uint32_t mask;
+	rte_spinlock_t lock;
 };
 
 /* local vfta copy */
diff --git a/lib/librte_pmd_e1000/igb_ethdev.c b/lib/librte_pmd_e1000/igb_ethdev.c
index 2a268b8..2a9bf00 100644
--- a/lib/librte_pmd_e1000/igb_ethdev.c
+++ b/lib/librte_pmd_e1000/igb_ethdev.c
@@ -97,6 +97,7 @@ static int  eth_igb_flow_ctrl_get(struct rte_eth_dev *dev,
 static int  eth_igb_flow_ctrl_set(struct rte_eth_dev *dev,
 				struct rte_eth_fc_conf *fc_conf);
 static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev);
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev);
 static int eth_igb_interrupt_action(struct rte_eth_dev *dev);
 static void eth_igb_interrupt_handler(struct rte_intr_handle *handle,
@@ -191,6 +192,12 @@ static int eth_igb_filter_ctrl(struct rte_eth_dev *dev,
 		     enum rte_filter_op filter_op,
 		     void *arg);
 
+static int eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id);
+static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
+static void eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector);
+static void eth_igb_configure_msix(struct  e1000_hw *hw);
+static void eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset);
+
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
  */
@@ -250,6 +257,8 @@ static struct eth_dev_ops eth_igb_ops = {
 	.vlan_tpid_set        = eth_igb_vlan_tpid_set,
 	.vlan_offload_set     = eth_igb_vlan_offload_set,
 	.rx_queue_setup       = eth_igb_rx_queue_setup,
+	.rx_queue_intr_enable = eth_igb_rx_queue_intr_enable,
+	.rx_queue_intr_disable = eth_igb_rx_queue_intr_disable,
 	.rx_queue_release     = eth_igb_rx_queue_release,
 	.rx_queue_count       = eth_igb_rx_queue_count,
 	.rx_descriptor_done   = eth_igb_rx_descriptor_done,
@@ -592,6 +601,16 @@ eth_igb_dev_init(__attribute__((unused)) struct eth_driver *eth_drv,
 		     eth_dev->data->port_id, pci_dev->id.vendor_id,
 		     pci_dev->id.device_id);
 
+	/* set max interrupt vfio request */
+	struct rte_eth_dev_info dev_info;
+
+	memset(&dev_info, 0, sizeof(dev_info));
+	eth_igb_infos_get(eth_dev, &dev_info);
+
+	hw->mac.max_rx_queues = dev_info.max_rx_queues;
+
+	pci_dev->intr_handle.max_intr = hw->mac.max_rx_queues + E1000_MAX_OTHER_INTR;
+
 	rte_intr_callback_register(&(pci_dev->intr_handle),
 		eth_igb_interrupt_handler, (void *)eth_dev);
 
@@ -754,7 +773,7 @@ eth_igb_start(struct rte_eth_dev *dev)
 {
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	int ret, i, mask;
+	int ret, mask;
 	uint32_t ctrl_ext;
 
 	PMD_INIT_FUNC_TRACE();
@@ -794,6 +813,9 @@ eth_igb_start(struct rte_eth_dev *dev)
 	/* configure PF module if SRIOV enabled */
 	igb_pf_host_configure(dev);
 
+	/* confiugre msix for  sleep until  rx interrupt */
+	eth_igb_configure_msix(hw);
+
 	/* Configure for OS presence */
 	igb_init_manageability(hw);
 
@@ -821,33 +843,9 @@ eth_igb_start(struct rte_eth_dev *dev)
 		igb_vmdq_vlan_hw_filter_enable(dev);
 	}
 
-	/*
-	 * Configure the Interrupt Moderation register (EITR) with the maximum
-	 * possible value (0xFFFF) to minimize "System Partial Write" issued by
-	 * spurious [DMA] memory updates of RX and TX ring descriptors.
-	 *
-	 * With a EITR granularity of 2 microseconds in the 82576, only 7/8
-	 * spurious memory updates per second should be expected.
-	 * ((65535 * 2) / 1000.1000 ~= 0.131 second).
-	 *
-	 * Because interrupts are not used at all, the MSI-X is not activated
-	 * and interrupt moderation is controlled by EITR[0].
-	 *
-	 * Note that having [almost] disabled memory updates of RX and TX ring
-	 * descriptors through the Interrupt Moderation mechanism, memory
-	 * updates of ring descriptors are now moderated by the configurable
-	 * value of Write-Back Threshold registers.
-	 */
 	if ((hw->mac.type == e1000_82576) || (hw->mac.type == e1000_82580) ||
 		(hw->mac.type == e1000_i350) || (hw->mac.type == e1000_i210) ||
 		(hw->mac.type == e1000_i211)) {
-		uint32_t ivar;
-
-		/* Enable all RX & TX queues in the IVAR registers */
-		ivar = (uint32_t) ((E1000_IVAR_VALID << 16) | E1000_IVAR_VALID);
-		for (i = 0; i < 8; i++)
-			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, i, ivar);
-
 		/* Configure EITR with the maximum possible value (0xFFFF) */
 		E1000_WRITE_REG(hw, E1000_EITR(0), 0xFFFF);
 	}
@@ -901,6 +899,10 @@ eth_igb_start(struct rte_eth_dev *dev)
 	if (dev->data->dev_conf.intr_conf.lsc != 0)
 		ret = eth_igb_lsc_interrupt_setup(dev);
 
+	/* check if rxq interrupt is enabled */
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
+		eth_igb_rxq_interrupt_setup(dev);
+
 	/* resume enabled intr since hw reset */
 	igb_intr_enable(dev);
 
@@ -1791,6 +1793,35 @@ eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev)
 		E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
 
 	intr->mask |= E1000_ICR_LSC;
+	rte_spinlock_init(&(intr->lock));
+
+	return 0;
+}
+
+/*
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev)
+{
+	uint32_t mask, regval;
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct rte_eth_dev_info dev_info;
+
+	memset(&dev_info, 0, sizeof(dev_info));
+	eth_igb_infos_get(dev, &dev_info);
+
+	mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues);
+	regval = E1000_READ_REG(hw, E1000_EIMS);
+	E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
 
 	return 0;
 }
@@ -3256,5 +3287,189 @@ static struct rte_driver pmd_igbvf_drv = {
 	.init = rte_igbvf_pmd_init,
 };
 
+static int
+eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct e1000_interrupt *intr =
+		E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+	uint32_t mask = 1 << queue_id;
+
+	rte_spinlock_lock(&(intr->lock));
+	E1000_WRITE_REG(hw, E1000_EIMC, mask);
+	E1000_WRITE_FLUSH(hw);
+	rte_spinlock_unlock(&(intr->lock));
+
+	return 0;
+}
+
+static int
+eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
+{
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	struct e1000_interrupt *intr =
+		E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+	uint32_t mask = 1 << queue_id;
+	uint32_t regval;
+
+	rte_spinlock_lock(&(intr->lock));
+	regval = E1000_READ_REG(hw, E1000_EIMS);
+	E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+	E1000_WRITE_FLUSH(hw);
+	rte_spinlock_unlock(&(intr->lock));
+
+	return 0;
+}
+
+static void
+eth_igb_write_ivar(struct e1000_hw *hw, u8 msix_vector, u8 index, u8 offset)
+{
+	uint32_t ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
+
+	/* clear any bits that are currently set */
+	ivar &= ~((uint32_t)0xFF << offset);
+
+	/* write vector and valid bit */
+	ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
+
+	E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
+}
+
+static void
+eth_igb_assign_vector(struct e1000_hw *hw, s8 direction, u8 queue, u8 msix_vector)
+{
+	uint32_t msixbm = 0;
+	switch (hw->mac.type) {
+	case e1000_82575:
+		/* The 82575 assigns vectors using a bitmask, which matches the
+		* bitmask for the EICR/EIMS/EIMC registers.  To assign one
+		* or more queues to a vector, we write the appropriate bits
+		* into the MSIXBM register for that vector.
+		*/
+		if (direction == 0)
+			msixbm = E1000_EICR_RX_QUEUE0 << queue;
+		else if (direction == 1)
+			msixbm = E1000_EICR_TX_QUEUE0 << queue;
+		E1000_WRITE_REG(hw, E1000_MSIXBM(msix_vector), msixbm);
+		break;
+	case e1000_82576:
+		/* 82576 uses a table that essentially consists of 2 columns
+		* with 8 rows.  The ordering is column-major so we use the
+		* lower 3 bits as the row index, and the 4th bit as the
+		* column offset.
+		*/
+		if (direction == 0)
+			eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+							(queue & 0x8) << 1);
+		else if (direction == 1)
+			eth_igb_write_ivar(hw, msix_vector, queue & 0x7,
+							((queue & 0x8) << 1) + 8);
+		break;
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* On 82580 and newer adapters the scheme is similar to 82576
+		* however instead of ordering column-major we have things
+		* ordered row-major.  So we traverse the table by using
+		* bit 0 as the column offset, and the remaining bits as the
+		* row index.
+		*/
+		if (direction == 0)
+			eth_igb_write_ivar(hw, msix_vector,
+						queue >> 1,
+						(queue & 0x1) << 4);
+		else if (direction == 1)
+			eth_igb_write_ivar(hw, msix_vector,
+						queue >> 1,
+						((queue & 0x1) << 4) + 8);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * eth_igb_configure_msix - Configure MSI-X hardware
+ * @hw: board private structure
+ * eth_igb_configure_msix sets up the hardware to properly generate MSI-X
+ * interrupts.
+ */
+static void
+eth_igb_configure_msix(struct e1000_hw *hw)
+{
+	int queue_id;
+	uint32_t tmp, regval, mask;
+	uint32_t max_rx_queues = hw->mac.max_rx_queues;
+
+	/* set vector for other causes, i.e. link changes */
+	switch (hw->mac.type) {
+	case e1000_82575:
+		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
+		/* enable MSI-X PBA support */
+		tmp |= E1000_CTRL_EXT_PBA_CLR;
+
+		/* Auto-Mask interrupts upon ICR read */
+		tmp |= E1000_CTRL_EXT_EIAME;
+		tmp |= E1000_CTRL_EXT_IRCA;
+
+		E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
+
+		/* enable msix_other interrupt */
+		E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 0, E1000_EIMS_OTHER);
+		regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval | E1000_EIMS_OTHER);
+		regval = E1000_READ_REG(hw, E1000_EIAM);
+		E1000_WRITE_REG(hw, E1000_EIMS, regval | E1000_EIMS_OTHER);
+
+		break;
+	case e1000_82576:
+	case e1000_82580:
+	case e1000_i350:
+	case e1000_i354:
+	case e1000_i210:
+	case e1000_i211:
+		/* Turn on MSI-X capability first, or our settings won't stick.
+		* And it will take days to debug.
+		*/
+		E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE |
+							E1000_GPIE_PBA | E1000_GPIE_EIAME |
+							E1000_GPIE_NSICR);
+
+		/* enable msix_other interrupt */
+		mask = 1 << max_rx_queues;
+		regval = E1000_READ_REG(hw, E1000_EIAC);
+		E1000_WRITE_REG(hw, E1000_EIAC, regval | mask);
+		regval = E1000_READ_REG(hw, E1000_EIMS);
+		E1000_WRITE_REG(hw, E1000_EIMS, regval | mask);
+		tmp = (max_rx_queues | E1000_IVAR_VALID) << 8;
+
+		E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp);
+		break;
+	default:
+		/* do nothing, since nothing else supports MSI-X */
+		break;
+	}
+
+	/*
+	* use EIAM and EIAC to auto-mask and auto-clear when MSI-X interrupt is asserted
+	* this saves a register write for every interrupt
+	*/
+	mask = 0xFFFFFFFF >> (32 - max_rx_queues);
+	regval = E1000_READ_REG(hw, E1000_EIAC);
+	E1000_WRITE_REG(hw, E1000_EIAC, regval | mask);
+	regval = E1000_READ_REG(hw, E1000_EIAM);
+	E1000_WRITE_REG(hw, E1000_EIAM, regval | mask);
+
+	for (queue_id = 0; queue_id < VFIO_MAX_QUEUE_ID; queue_id++)
+		eth_igb_assign_vector(hw, 0, queue_id, queue_id);
+
+	E1000_WRITE_FLUSH(hw);
+}
+
+
 PMD_REGISTER_DRIVER(pmd_igb_drv);
 PMD_REGISTER_DRIVER(pmd_igbvf_drv);
-- 
1.8.1.4



More information about the dev mailing list