[dpdk-dev] [PATCH v2] net/mlx5: fix device removal handler for multiport device

Shahaf Shuler shahafs at mellanox.com
Sun May 12 14:15:15 CEST 2019


Sunday, May 12, 2019 11:32 AM, Viacheslav Ovsiienko:
> Subject: [dpdk-dev] [PATCH v2] net/mlx5: fix device removal handler for
> multiport device
> 
> IBV_EVENT_DEVICE_FATAL event is generated by the driver once for the
> entire multiport Infiniband device, not for each existing ports.
> The port index is zero and it causes dropping the device removal event. We
> should invoke the removal event processing routine for each port we have
> installed handler for.
> 
> Fixes: 028b2a28c3cb ("net/mlx5: update event handler for multiport IB
> devices")
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>

Acked-by: Shahaf Shuler <shahafs at mellanox.com>

Thomas, Ferruh,
This one is a critical fix for mlx5. w/o it will break the support for failsafe at azure. 

Can you consider to integrate it?

> ---
> v2: - address comments
>     - more detailed debug messages in the event handler
>     - removed port specific IBV_EVENT_DEVICE_FATAL handling code
> 
> v1:
> https://eur03.safelinks.protection.outlook.com/?url=http%3A%2F%2Fpatch
> es.dpdk.org%2Fpatch%2F53371%2F&data=02%7C01%7Cshahafs%40mel
> lanox.com%7C46fcede947654c45106e08d6d6b462e5%7Ca652971c7d2e4d9ba
> 6a4d149256f461b%7C0%7C0%7C636932467570850420&sdata=%2FN%2B
> D0OWf5y0hgtlvWj7om9qZrQPPIbmGXDIfsgqeUtY%3D&reserved=0
> 
>  drivers/net/mlx5/mlx5_ethdev.c | 77
> ++++++++++++++++++++++++++++++++++--------
>  1 file changed, 62 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 80ee98f..a8a7ece 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1116,6 +1116,35 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)  }
> 
>  /**
> + * Handle asynchronous removal event for entire multiport device.
> + *
> + * @param sh
> + *   Infiniband device shared context.
> + */
> +static void
> +mlx5_dev_interrupt_device_fatal(struct mlx5_ibv_shared *sh) {
> +	uint32_t i;
> +
> +	for (i = 0; i < sh->max_port; ++i) {
> +		struct rte_eth_dev *dev;
> +
> +		if (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {
> +			/*
> +			 * Or not existing port either no
> +			 * handler installed for this port.
> +			 */
> +			continue;
> +		}
> +		dev = &rte_eth_devices[sh->port[i].ih_port_id];
> +		assert(dev);
> +		if (dev->data->dev_conf.intr_conf.rmv)
> +			_rte_eth_dev_callback_process
> +				(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
> +	}
> +}
> +
> +/**
>   * Handle shared asynchronous events the NIC (removal event
>   * and link status change). Supports multiport IB device.
>   *
> @@ -1137,21 +1166,46 @@ int mlx5_fw_version_get(struct rte_eth_dev
> *dev, char *fw_ver, size_t fw_size)
>  			break;
>  		/* Retrieve and check IB port index. */
>  		tmp = (uint32_t)event.element.port_num;
> -		assert(tmp && (tmp <= sh->max_port));
> -		if (!tmp ||
> -		    tmp > sh->max_port ||
> -		    sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
> +		if (!tmp && event.event_type ==
> IBV_EVENT_DEVICE_FATAL) {
>  			/*
> -			 * Invalid IB port index or no handler
> -			 * installed for this port.
> +			 * The DEVICE_FATAL event is called once for
> +			 * entire device without port specifying.
> +			 * We should notify all existing ports.
>  			 */
>  			mlx5_glue->ack_async_event(&event);
> +			mlx5_dev_interrupt_device_fatal(sh);
> +			continue;
> +		}
> +		assert(tmp && (tmp <= sh->max_port));
> +		if (!tmp) {
> +			/* Unsupported devive level event. */
> +			mlx5_glue->ack_async_event(&event);
> +			DRV_LOG(DEBUG,
> +				"unsupported common event (type %d)",
> +				event.event_type);
> +			continue;
> +		}
> +		if (tmp > sh->max_port) {
> +			/* Invalid IB port index. */
> +			mlx5_glue->ack_async_event(&event);
> +			DRV_LOG(DEBUG,
> +				"cannot handle an event (type %d)"
> +				"due to invalid IB port index (%u)",
> +				event.event_type, tmp);
> +			continue;
> +		}
> +		if (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {
> +			/* No handler installed. */
> +			mlx5_glue->ack_async_event(&event);
> +			DRV_LOG(DEBUG,
> +				"cannot handle an event (type %d)"
> +				"due to no handler installed for port %u",
> +				event.event_type, tmp);
>  			continue;
>  		}
>  		/* Retrieve ethernet device descriptor. */
>  		tmp = sh->port[tmp - 1].ih_port_id;
>  		dev = &rte_eth_devices[tmp];
> -		tmp = 0;
>  		assert(dev);
>  		if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
>  		     event.event_type == IBV_EVENT_PORT_ERR) && @@ -
> 1165,15 +1219,8 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char
> *fw_ver, size_t fw_size)
>  				(dev, RTE_ETH_EVENT_INTR_LSC, NULL);
>  			continue;
>  		}
> -		if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
> -		    dev->data->dev_conf.intr_conf.rmv) {
> -			mlx5_glue->ack_async_event(&event);
> -			_rte_eth_dev_callback_process
> -				(dev, RTE_ETH_EVENT_INTR_RMV, NULL);
> -			continue;
> -		}
>  		DRV_LOG(DEBUG,
> -			"port %u event type %d on not handled",
> +			"port %u cannot handle an unknown event (type
> %d)",
>  			dev->data->port_id, event.event_type);
>  		mlx5_glue->ack_async_event(&event);
>  	}
> --
> 1.8.3.1



More information about the dev mailing list