[dpdk-dev] [PATCH v3] net/mlx5: support device removal event

Matan Azrad matan at mellanox.com
Mon Sep 4 19:52:55 CEST 2017


Hi Adrien,

> -----Original Message-----
> From: Adrien Mazarguil [mailto:adrien.mazarguil at 6wind.com]
> Sent: Monday, September 4, 2017 6:33 PM
> To: Matan Azrad <matan at mellanox.com>
> Cc: Nélio Laranjeiro <nelio.laranjeiro at 6wind.com>; dev at dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3] net/mlx5: support device removal event
> 
> Hi Matan,
> 
> One comment I have is, while this patch adds support for RMV, it also silently
> addresses a bug (see large comment you added to
> priv_link_status_update()).
> 
> This should be split in two commits, with the fix part coming first and CC
> stable at dpdk.org, and a second commit adding RMV support proper.
> 

Actually, the mlx4 bug was not appeared in the mlx5 previous code,
Probably because the RMV interrupt was not implemented in mlx5 before this patch.
The big comment just explains the link inconsistent issue and was added
here since Nelio and I think the new function, priv_link_status_update(),
justifies this comment for future review.  

> More below.
> 
> On Mon, Sep 04, 2017 at 04:55:53PM +0300, Matan Azrad wrote:
> > Extend the LSC event handling to support the device removal as well.
> > The Verbs library may send several related events, which are different
> > from LSC event.
> >
> > The mlx5 event handling has been made capable of receiving and
> > signaling several event types at once.
> >
> > This support includes next:
> > 1. Removal event detection according to the user configuration.
> > 2. Calling to all registered mlx5 removal callbacks.
> > 3. Capabilities extension to include removal interrupt handling.
> >
> > Signed-off-by: Matan Azrad <matan at mellanox.com>
> > ---
> >  drivers/net/mlx5/mlx5.c        |   2 +-
> >  drivers/net/mlx5/mlx5_ethdev.c | 103
> > +++++++++++++++++++++++++++++------------
> >  2 files changed, 74 insertions(+), 31 deletions(-)
> >
> > Changes:
> > V2:
> > Replace link status update function name.
> > add inconsistent link workaround comment.
> >
> > V3:
> > Fix indentations.
> > Accurate inconsistent link comment.
> >
> >
> > diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> > bd66a7c..1a3d7f1 100644
> > --- a/drivers/net/mlx5/mlx5.c
> > +++ b/drivers/net/mlx5/mlx5.c
> > @@ -865,7 +865,7 @@ static struct rte_pci_driver mlx5_driver = {
> >  	},
> >  	.id_table = mlx5_pci_id_map,
> >  	.probe = mlx5_pci_probe,
> > -	.drv_flags = RTE_PCI_DRV_INTR_LSC,
> > +	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
> >  };
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index 57f6237..cdbd723 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -1112,47 +1112,84 @@ mlx5_ibv_device_to_pci_addr(const struct
> > ibv_device *device,  }
> >
> >  /**
> > - * Link status handler.
> > + * Update the link status.
> >   *
> >   * @param priv
> >   *   Pointer to private structure.
> > - * @param dev
> > - *   Pointer to the rte_eth_dev structure.
> >   *
> >   * @return
> > - *   Nonzero if the callback process can be called immediately.
> > + *   Zero if the callback process can be called immediately.
> >   */
> >  static int
> > -priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev
> > *dev)
> > +priv_link_status_update(struct priv *priv) {
> > +	struct rte_eth_link *link = &priv->dev->data->dev_link;
> > +
> > +	mlx5_link_update(priv->dev, 0);
> > +	if (((link->link_speed == 0) && link->link_status) ||
> > +		((link->link_speed != 0) && !link->link_status)) {
> > +		/*
> > +		 * Inconsistent status.
> > +		 * The link status is read from Ethtool through an IOCTL,
> > +		 * but as the application may work in polling mode it
> > +		 * may get the port event before the Kernel driver had
> > +		 * time to process it. PMD then request the link from
> > +		 * the kernel but the event is still not processed (due
> > +		 * to more urgent interrupts) and finally the PMD may
> > +		 * get an inconsistent link.
> > +		 * Setting alarm for later checking.
> > +		 */
> 
> While adding a comment is nice, there's too much info in there. From the
> PMD standpoint, what happens is the interrupt occurs much before the
> kernel netdevice exposes the new status, so it needs to be checked later.
> Can you sum it up in fewer words?
> 

Yes, sure :)

> > +		if (!priv->pending_alarm) {
> > +			priv->pending_alarm = 1;
> > +			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> > +					  mlx5_dev_link_status_handler,
> > +					  priv->dev);
> > +		}
> > +		return 1;
> > +	} else if (unlikely(priv->pending_alarm)) {
> > +		/* In case of link interrupt while link alarm was setting. */
> > +		priv->pending_alarm = 0;
> > +		rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv-
> >dev);
> > +	}
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Device status handler.
> > + *
> > + * @param priv
> > + *   Pointer to private structure.
> > + * @param events
> > + *   Pointer to event flags holder.
> > + *
> > + * @return
> > + *   Events bitmap of callback process which can be called immediately.
> > + */
> > +static uint32_t
> > +priv_dev_status_handler(struct priv *priv)
> >  {
> >  	struct ibv_async_event event;
> > -	struct rte_eth_link *link = &dev->data->dev_link;
> > -	int ret = 0;
> > +	uint32_t ret = 0;
> >
> >  	/* Read all message and acknowledge them. */
> >  	for (;;) {
> >  		if (ibv_get_async_event(priv->ctx, &event))
> >  			break;
> > -
> > -		if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
> > -		    event.event_type != IBV_EVENT_PORT_ERR)
> > +		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
> > +			event.event_type == IBV_EVENT_PORT_ERR) &&
> > +			(priv->dev->data->dev_conf.intr_conf.lsc == 1))
> > +			ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
> > +		else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
> > +			priv->dev->data->dev_conf.intr_conf.rmv == 1)
> > +			ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
> > +		else
> >  			DEBUG("event type %d on port %d not handled",
> >  			      event.event_type, event.element.port_num);
> 
> What you also need to mention in the commit log of the fix is that splitting
> priv_dev_status_handler() and priv_link_status_update() addresses another
> bug here: this loop consumed *all* events, even during alarms. An alarm
> occurring for a LSC event could eat a RMV event that the application would
> never receive. This also affects mlx4, for which I intend to submit a fix soon.
> 

I think also this issue is only mlx4 bug,
Since in the previous mlx5 code only LCS event was supported,
all these problems was not there. 

> >  		ibv_ack_async_event(&event);
> >  	}
> > -	mlx5_link_update(dev, 0);
> > -	if (((link->link_speed == 0) && link->link_status) ||
> > -	    ((link->link_speed != 0) && !link->link_status)) {
> > -		if (!priv->pending_alarm) {
> > -			/* Inconsistent status, check again later. */
> > -			priv->pending_alarm = 1;
> > -			rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> > -					  mlx5_dev_link_status_handler,
> > -					  dev);
> > -		}
> > -	} else {
> > -		ret = 1;
> > -	}
> > +	if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
> > +		if (priv_link_status_update(priv))
> > +			ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
> >  	return ret;
> >  }
> >
> > @@ -1172,9 +1209,9 @@ mlx5_dev_link_status_handler(void *arg)
> >  	priv_lock(priv);
> >  	assert(priv->pending_alarm == 1);
> >  	priv->pending_alarm = 0;
> > -	ret = priv_dev_link_status_handler(priv, dev);
> > +	ret = priv_link_status_update(priv);
> >  	priv_unlock(priv);
> > -	if (ret)
> > +	if (!ret)
> >  		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_LSC, NULL,
> >  					      NULL);
> >  }
> > @@ -1192,14 +1229,17 @@ mlx5_dev_interrupt_handler(void *cb_arg)  {
> >  	struct rte_eth_dev *dev = cb_arg;
> >  	struct priv *priv = dev->data->dev_private;
> > -	int ret;
> > +	uint32_t events;
> >
> >  	priv_lock(priv);
> > -	ret = priv_dev_link_status_handler(priv, dev);
> > +	events = priv_dev_status_handler(priv);
> >  	priv_unlock(priv);
> > -	if (ret)
> > +	if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
> >  		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_LSC, NULL,
> >  					      NULL);
> > +	if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
> > +		_rte_eth_dev_callback_process(dev,
> RTE_ETH_EVENT_INTR_RMV, NULL,
> > +					      NULL);
> >  }
> >
> >  /**
> > @@ -1213,7 +1253,8 @@ mlx5_dev_interrupt_handler(void *cb_arg)  void
> > priv_dev_interrupt_handler_uninstall(struct priv *priv, struct
> > rte_eth_dev *dev)  {
> > -	if (!dev->data->dev_conf.intr_conf.lsc)
> > +	if (!dev->data->dev_conf.intr_conf.lsc &&
> > +		!dev->data->dev_conf.intr_conf.rmv)
> >  		return;
> >  	rte_intr_callback_unregister(&priv->intr_handle,
> >  				     mlx5_dev_interrupt_handler,
> > @@ -1238,7 +1279,8 @@ priv_dev_interrupt_handler_install(struct priv
> > *priv, struct rte_eth_dev *dev)  {
> >  	int rc, flags;
> >
> > -	if (!dev->data->dev_conf.intr_conf.lsc)
> > +	if (!dev->data->dev_conf.intr_conf.lsc &&
> > +		!dev->data->dev_conf.intr_conf.rmv)
> >  		return;
> >  	assert(priv->ctx->async_fd > 0);
> >  	flags = fcntl(priv->ctx->async_fd, F_GETFL); @@ -1246,6 +1288,7 @@
> > priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev
> *dev)
> >  	if (rc < 0) {
> >  		INFO("failed to change file descriptor async event queue");
> >  		dev->data->dev_conf.intr_conf.lsc = 0;
> > +		dev->data->dev_conf.intr_conf.rmv = 0;
> >  	} else {
> >  		priv->intr_handle.fd = priv->ctx->async_fd;
> >  		priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
> > --
> > 2.7.4
> >
> 
> --
> Adrien Mazarguil
> 6WIND

Thanks,
Matan Azrad


More information about the dev mailing list