[dpdk-dev] [PATCH] net/i40e: remove VF interrupt handler

Zhang, Qi Z qi.z.zhang at intel.com
Wed Jun 27 05:59:17 CEST 2018



> -----Original Message-----
> From: Xing, Beilei
> Sent: Wednesday, June 27, 2018 11:48 AM
> To: Zhang, Qi Z <qi.z.zhang at intel.com>
> Cc: Wu, Jingjing <jingjing.wu at intel.com>; Yu, De <de.yu at intel.com>;
> dev at dpdk.org
> Subject: RE: [PATCH] net/i40e: remove VF interrupt handler
> 
> 
> 
> > -----Original Message-----
> > From: Zhang, Qi Z
> > Sent: Thursday, June 7, 2018 9:32 AM
> > To: Xing, Beilei <beilei.xing at intel.com>
> > Cc: Wu, Jingjing <jingjing.wu at intel.com>; Yu, De <de.yu at intel.com>;
> > dev at dpdk.org; Zhang, Qi Z <qi.z.zhang at intel.com>
> > Subject: [PATCH] net/i40e: remove VF interrupt handler
> >
> > For i40evf, internal rx interrupt and adminq interrupt share the same
> > source, that cause a lot cpu cycles be wasted on interrupt handler on
> > rx path. This is complained by customers which require low latency
> > (when set I40E_ITR_INTERVAL to small value), but have to be sufferred
> > by tremendous interrupts handling that eat significant CPU resources.
> >
> > The patch disable pci interrupt and remove the interrupt handler,
> > replace it with a low frequency (50ms) interrupt polling daemon which
> > is implemented by registering a alarm callback periodly, this save CPU
> > time significently: On a typical x86 server with 2.1GHz CPU, with low
> > latency configure (32us) we saw CPU usage from top commmand reduced
> > from 20% to 0% on management core in testpmd).
> >
> > Also with the new method we can remove compile option:
> > I40E_ITR_INTERVAL which is used to balance between low latency and low
> > CPU usage previously.
> > Now we don't need it since we can reach both at same time.
> >
> > Suggested-by: Jingjing Wu <jingjing.wu at intel.com>
> > Signed-off-by: Qi Zhang <qi.z.zhang at intel.com>
> > ---
> >  config/common_base                |  2 --
> >  drivers/net/i40e/i40e_ethdev.c    |  3 +--
> >  drivers/net/i40e/i40e_ethdev.h    | 22 +++++++++++-----------
> >  drivers/net/i40e/i40e_ethdev_vf.c | 36
> > ++++++++++++++----------------------
> >  4 files changed, 26 insertions(+), 37 deletions(-)
> >
> > diff --git a/config/common_base b/config/common_base index
> > 6b0d1cbbb..9e21c6865 100644
> > --- a/config/common_base
> > +++ b/config/common_base
> > @@ -264,8 +264,6 @@ CONFIG_RTE_LIBRTE_I40E_INC_VECTOR=y
> >  CONFIG_RTE_LIBRTE_I40E_16BYTE_RX_DESC=n
> >  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF=64
> >  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> > -# interval up to 8160 us, aligned to 2 (or default value)
> > -CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> 
> Seems " High Performance and per Packet Latency Tradeoff" is missed.

Ok, that should be removed also, thanks!
> 
> >
> >  #
> >  # Compile burst-oriented FM10K PMD
> > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > b/drivers/net/i40e/i40e_ethdev.c index 13c5d3296..c8f9566e0 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -1829,8 +1829,7 @@ __vsi_queues_bind_intr(struct i40e_vsi *vsi,
> > uint16_t msix_vect,
> >  	/* Write first RX queue to Link list register as the head element */
> >  	if (vsi->type != I40E_VSI_SRIOV) {
> >  		uint16_t interval =
> > -
> > 	i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 1,
> > -					       pf->support_multi_driver);
> > +			i40e_calc_itr_interval(1, pf->support_multi_driver);
> >
> >  		if (msix_vect == I40E_MISC_VEC_ID) {
> >  			I40E_WRITE_REG(hw, I40E_PFINT_LNKLST0, diff --git
> > a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > index 11c4c76bd..599993dac 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -178,7 +178,7 @@ enum i40e_flxpld_layer_idx {
> >  #define I40E_ITR_INDEX_NONE             3
> >  #define I40E_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >  #define I40E_QUEUE_ITR_INTERVAL_MAX     8160 /* 8160 us */
> > -#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 8160 /* 8160 us */
> > +#define I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT 32 /* 32 us */
> >  /* Special FW support this floating VEB feature */  #define
> > FLOATING_VEB_SUPPORTED_FW_MAJ 5  #define
> FLOATING_VEB_SUPPORTED_FW_MIN
> > 0 @@ -1328,17 +1328,17 @@ i40e_align_floor(int n)  }
> >
> >  static inline uint16_t
> > -i40e_calc_itr_interval(int16_t interval, bool is_pf, bool
> > is_multi_drv)
> > +i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
> >  {
> > -	if (interval < 0 || interval > I40E_QUEUE_ITR_INTERVAL_MAX) {
> > -		if (is_multi_drv) {
> > -			interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> > -		} else {
> > -			if (is_pf)
> > -				interval =
> > I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> > -			else
> > -				interval =
> > I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> > -		}
> > +	uint16_t interval = 0;
> > +
> > +	if (is_multi_drv) {
> > +		interval = I40E_QUEUE_ITR_INTERVAL_MAX;
> > +	} else {
> > +		if (is_pf)
> > +			interval = I40E_QUEUE_ITR_INTERVAL_DEFAULT;
> > +		else
> > +			interval = I40E_VF_QUEUE_ITR_INTERVAL_DEFAULT;
> >  	}
> >
> >  	/* Convert to hardware count, as writing each 1 represents 2 us */
> > diff --git a/drivers/net/i40e/i40e_ethdev_vf.c
> > b/drivers/net/i40e/i40e_ethdev_vf.c
> > index 804e44530..ad5c069e8 100644
> > --- a/drivers/net/i40e/i40e_ethdev_vf.c
> > +++ b/drivers/net/i40e/i40e_ethdev_vf.c
> > @@ -44,6 +44,8 @@
> >  #define I40EVF_BUSY_WAIT_COUNT 50
> >  #define MAX_RESET_WAIT_CNT     20
> >
> > +#define I40EVF_ALARM_INTERVAL 50000 /* us */
> > +
> >  struct i40evf_arq_msg_info {
> >  	enum virtchnl_ops ops;
> >  	enum i40e_status_code result;
> > @@ -1133,7 +1135,7 @@ i40evf_init_vf(struct rte_eth_dev *dev)
> >  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> >  	struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data-
> > >dev_private);
> >  	uint16_t interval =
> > -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> > +		i40e_calc_itr_interval(0, 0);
> >
> >  	vf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > >dev_private);
> >  	vf->dev_data = dev->data;
> > @@ -1370,7 +1372,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev)
> >   *  void
> >   */
> >  static void
> > -i40evf_dev_interrupt_handler(void *param)
> > +i40evf_dev_alarm_handler(void *param)
> >  {
> >  	struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
> >  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > @@ -1399,6 +1401,8 @@ i40evf_dev_interrupt_handler(void *param)
> >
> >  done:
> >  	i40evf_enable_irq0(hw);
> > +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> > +			  i40evf_dev_alarm_handler, dev);
> >  }
> >
> >  static int
> > @@ -1442,12 +1446,8 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev)
> >  		return -1;
> >  	}
> >
> > -	/* register callback func to eal lib */
> > -	rte_intr_callback_register(&pci_dev->intr_handle,
> > -		i40evf_dev_interrupt_handler, (void *)eth_dev);
> > -
> > -	/* enable uio intr after callback register */
> > -	rte_intr_enable(&pci_dev->intr_handle);
> > +	rte_eal_alarm_set(I40EVF_ALARM_INTERVAL,
> > +			  i40evf_dev_alarm_handler, eth_dev);
> >
> >  	/* configure and enable device interrupt */
> >  	i40evf_enable_irq0(hw);
> > @@ -1836,7 +1836,7 @@ i40evf_dev_rx_queue_intr_enable(struct
> > rte_eth_dev *dev, uint16_t queue_id)
> >  	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> >  	uint16_t interval =
> > -		i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL, 0, 0);
> > +		i40e_calc_itr_interval(0, 0);
> >  	uint16_t msix_intr;
> >
> >  	msix_intr = intr_handle->intr_vec[queue_id]; @@ -1859,8 +1859,6 @@
> > i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t
> > queue_id)
> >
> >  	I40EVF_WRITE_FLUSH(hw);
> >
> > -	rte_intr_enable(&pci_dev->intr_handle);
> > -
> >  	return 0;
> >  }
> >
> > @@ -2023,10 +2021,8 @@ i40evf_dev_start(struct rte_eth_dev *dev)
> >  	 * queue interrupt to other VFIO vectors.
> >  	 * So clear uio/vfio intr/evevnfd first to avoid failure.
> >  	 */
> > -	if (dev->data->dev_conf.intr_conf.rxq != 0) {
> > -		rte_intr_disable(intr_handle);
> 
> Do we need to delete "rte_intr_disable" here? If so, should the comments be
> changed?

The reason we have rte_intr_disable here is there is an rte_intr_enable in i40evf_dev_init which may cause problem as the comment above this code section.

		/* When a VF port is bound to VFIO-PCI, only miscellaneous interrupt
         * is mapped to VFIO vector 0 in i40evf_dev_init( ).
         * If previous VFIO interrupt mapping set in i40evf_dev_init( ) is
         * not cleared, it will fail when rte_intr_enable( ) tries to map Rx
         * queue interrupt to other VFIO vectors.
         * So clear uio/vfio intr/evevnfd first to avoid failure.
         */
Now, rte_intr_enable has been removed in i40evf_dev_init, so rte_intr_disable can be removed also, but seems I need to remove the comment also.

Thanks
Qi

> 
> > +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> >  		rte_intr_enable(intr_handle);
> > -	}
> >
> >  	i40evf_enable_queues_intr(dev);
> >
> > @@ -2050,6 +2046,9 @@ i40evf_dev_stop(struct rte_eth_dev *dev)
> >
> >  	PMD_INIT_FUNC_TRACE();
> >
> > +	if (dev->data->dev_conf.intr_conf.rxq != 0)
> > +		rte_intr_disable(intr_handle);
> > +
> >  	if (hw->adapter_stopped == 1)
> >  		return;
> >  	i40evf_stop_queues(dev);
> > @@ -2285,9 +2284,8 @@ static void
> >  i40evf_dev_close(struct rte_eth_dev *dev)  {
> >  	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> > >dev_private);
> > -	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
> > -	struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
> >
> > +	rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
> >  	i40evf_dev_stop(dev);
> >  	i40e_dev_free_queues(dev);
> >  	/*
> > @@ -2300,12 +2298,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
> >
> >  	i40evf_reset_vf(hw);
> >  	i40e_shutdown_adminq(hw);
> > -	/* disable uio intr before callback unregister */
> > -	rte_intr_disable(intr_handle);
> > -
> > -	/* unregister callback func from eal lib */
> > -	rte_intr_callback_unregister(intr_handle,
> > -				     i40evf_dev_interrupt_handler, dev);
> >  	i40evf_disable_irq0(hw);
> >  }
> >
> > --
> > 2.13.6



More information about the dev mailing list