[dpdk-dev] [PATCH v3] net/i40e: remove VF interrupt handler

Qi Zhang qi.z.zhang at intel.com
Wed Jun 27 15:15:27 CEST 2018


For i40evf, internal rx interrupt and adminq interrupt share the same
source, that cause a lot cpu cycles be wasted on interrupt handler
on rx path. This is complained by customers which require low latency
(when set I40E_ITR_INTERVAL to small value), but have to be sufferred by
tremendous interrupts handling that eat significant CPU resources.

The patch disable pci interrupt and remove the interrupt handler,
replace it with a low frequency (50ms) interrupt polling daemon
which is implemented by registering a alarm callback periodly, this
save CPU time significently: On a typical x86 server with 2.1GHz CPU,
with low latency configure (32us) we saw CPU usage from top commmand
reduced from 20% to 0% on management core in testpmd).

Also with the new method we can remove compile option: I40E_ITR_INTERVAL
which is used to balance between low latency and low CPU usage previously.
Now we don't need it since we can reach both at same time.

Suggested-by: Jingjing Wu <jingjing.wu at intel.com>
Signed-off-by: Qi Zhang <qi.z.zhang at intel.com>
---

v3:
- update doc
- update comment in i40evf_dev_start.

v2:
- update doc

 config/common_base                |  2 --
 doc/guides/nics/i40e.rst          | 15 -------------
 drivers/net/i40e/i40e_ethdev.c    |  3 +--
 drivers/net/i40e/i40e_ethdev.h    | 22 ++++++++++----------
 drivers/net/i40e/i40e_ethdev_vf.c | 44 +++++++++++++--------------------------
 5 files changed, 27 insertions(+), 59 deletions(-)

diff --git a/config/common_base b/config/common_base
index fcf3a1f6f..d3f91dfe0 100644
--- a/config/common_base
+++ b/config/common_base
@@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
 CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
 CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
 CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
-# interval up to 8160 us, aligned to 2 (or default value)
-CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
 
 #
 # Compile burst-oriented FM10K PMD
diff --git a/doc/guides/nics/i40e.rst b/doc/guides/nics/i40e.rst
index 18549bf5a..d5b02c95f 100644
--- a/doc/guides/nics/i40e.rst
+++ b/doc/guides/nics/i40e.rst
@@ -96,11 +96,6 @@ Please note that enabling debugging options may affect system performance.
 
   Number of queues reserved for each VMDQ Pool.
 
-- ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` (default ``-1``)
-
-  Interrupt Throttling interval.
-
-
 Runtime Config Options
 ~~~~~~~~~~~~~~~~~~~~~~
 
@@ -595,16 +590,6 @@ Use 16 Bytes RX Descriptor Size
 As i40e PMD supports both 16 and 32 bytes RX descriptor sizes, and 16 bytes size can provide helps to high performance of small packets.
 Configuration of ``CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC`` in config files can be changed to use 16 bytes size RX descriptors.
 
-High Performance and per Packet Latency Tradeoff
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Due to the hardware design, the interrupt signal inside NIC is needed for per
-packet descriptor write-back. The minimum interval of interrupts could be set
-at compile time by ``CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL`` in configuration files.
-Though there is a default configuration, the interval could be tuned by the
-users with that configuration item depends on what the user cares about more,
-performance or per packet latency.
-
 Example of getting best performance with l3fwd example
 ------------------------------------------------------
 
diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index e06e0a20b..c47b9f5f7 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -1833,8 +1833,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi, uint16_t msix_vect,
 	/* Write first RX queue to Link list register as the head element */
 	if (vsi->type != I40E_VSI_SRIOV) {
 		uint16_t interval =
-			i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
-					       pf->support_multi_driver);
+			i40e_calc_itr_interval(1, pf->support_multi_driver);
 
 		if (msix_vect == I40E_MISC_VEC_ID) {
 			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 79bfc67fc..cb5e5b5d8 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -184,7 +184,7 @@ enum i40e_flxpld_layer_idx {
 #define I40E_ITR_INDEX_NONE             3
 #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
 #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
-#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
+#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
 /* Special FW support this floating VEB feature */
 #define FLOATING_VEB_SUPPORTED_FW_MAJ 5
 #define FLOATING_VEB_SUPPORTED_FW_MIN 0
@@ -1318,17 +1318,17 @@ i40e_align_floor(int n)
 }
 
 static inline uint16_t
-i40e_calc_itr_interval(int16_t interval, bool is_pf, bool is_multi_drv)
+i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
 {
-	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
-		if (is_multi_drv) {
-			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
-		} else {
-			if (is_pf)
-				interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
-			else
-				interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
-		}
+	uint16_t interval = 0;
+
+	if (is_multi_drv) {
+		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
+	} else {
+		if (is_pf)
+			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
+		else
+			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
 	}
 
 	/* Convert to hardware count, as writing each 1 represents 2 us */
diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c
index 86b38d202..7000b8aba 100644
--- a/drivers/net/i40e/i40e_ethdev_vf.c
+++ b/drivers/net/i40e/i40e_ethdev_vf.c
@@ -44,6 +44,8 @@
 #define I40EVF_BUSY_WAIT_COUNT 50
 #define MAX_RESET_WAIT_CNT     20
 
+#define I40EVF_ALARM_INTERVAL 50000 /* us */
+
 struct i40evf_arq_msg_info {
 	enum virtchnl_ops ops;
 	enum i40e_status_code result;
@@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 	uint16_t interval =
-		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
+		i40e_calc_itr_interval(0, 0);
 
 	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
 	vf->dev_data = dev->data;
@@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
  *  void
  */
 static void
-i40evf_dev_interrupt_handler(void *param)
+i40evf_dev_alarm_handler(void *param)
 {
 	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
 
 done:
 	i40evf_enable_irq0(hw);
+	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
+			  i40evf_dev_alarm_handler, dev);
 }
 
 static int
@@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
 		return -1;
 	}
 
-	/* register callback func to eal lib */
-	rte_intr_callback_register(&pci_dev->intr_handle,
-		i40evf_dev_interrupt_handler, (void *)eth_dev);
-
-	/* enable uio intr after callback register */
-	rte_intr_enable(&pci_dev->intr_handle);
+	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
+			  i40evf_dev_alarm_handler, eth_dev);
 
 	/* configure and enable device interrupt */
 	i40evf_enable_irq0(hw);
@@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	uint16_t interval =
-		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
+		i40e_calc_itr_interval(0, 0);
 	uint16_t msix_intr;
 
 	msix_intr = intr_handle->intr_vec[queue_id];
@@ -1859,8 +1859,6 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 
 	I40EVF_WRITE_FLUSH(hw);
 
-	rte_intr_enable(&pci_dev->intr_handle);
-
 	return 0;
 }
 
@@ -2016,17 +2014,9 @@ i40evf_dev_start(struct rte_eth_dev *dev)
 		goto err_mac;
 	}
 
-	/* When a VF port is bound to VFIO-PCI, only miscellaneous interrupt
-	 * is mapped to VFIO vector 0 in i40evf_dev_init( ).
-	 * If previous VFIO interrupt mapping set in i40evf_dev_init( ) is
-	 * not cleared, it will fail when rte_intr_enable( ) tries to map Rx
-	 * queue interrupt to other VFIO vectors.
-	 * So clear uio/vfio intr/evevnfd first to avoid failure.
-	 */
-	if (dev->data->dev_conf.intr_conf.rxq != 0) {
-		rte_intr_disable(intr_handle);
+	/* only enable interrupt in rx interrupt mode */
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
 		rte_intr_enable(intr_handle);
-	}
 
 	i40evf_enable_queues_intr(dev);
 
@@ -2050,6 +2040,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_FUNC_TRACE();
 
+	if (dev->data->dev_conf.intr_conf.rxq != 0)
+		rte_intr_disable(intr_handle);
+
 	if (hw->adapter_stopped == 1)
 		return;
 	i40evf_stop_queues(dev);
@@ -2284,9 +2277,8 @@ static void
 i40evf_dev_close(struct rte_eth_dev *dev)
 {
 	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
-	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
 
+	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
 	i40evf_dev_stop(dev);
 	i40e_dev_free_queues(dev);
 	/*
@@ -2299,12 +2291,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
 
 	i40evf_reset_vf(hw);
 	i40e_shutdown_adminq(hw);
-	/* disable uio intr before callback unregister */
-	rte_intr_disable(intr_handle);
-
-	/* unregister callback func from eal lib */
-	rte_intr_callback_unregister(intr_handle,
-				     i40evf_dev_interrupt_handler, dev);
 	i40evf_disable_irq0(hw);
 }
 
-- 
2.13.6



More information about the dev mailing list