[dpdk-dev] [PATCH v4] ethdev: fix port data mismatched in multiple process model

Ferruh Yigit ferruh.yigit at intel.com
Thu Jan 19 19:39:13 CET 2017


On 1/9/2017 7:50 AM, Yuanhan Liu wrote:
> Assume we have two virtio ports, 00:03.0 and 00:04.0. The first one is
> managed by the kernel driver, while the later one is managed by DPDK.
> 
> Now we start the primary process. 00:03.0 will be skipped by DPDK virtio
> PMD driver (since it's being used by the kernel). 00:04.0 would be
> successfully initiated by DPDK virtio PMD (if nothing abnormal happens).
> After that, we would get a port id 0, and all the related info needed
> by virtio (virtio_hw) is stored at rte_eth_dev_data[0].
> 
> Then we start the secondary process. As usual, 00:03.0 will be firstly
> probed. It firstly tries to get a local eth_dev structure for it (by
> rte_eth_dev_allocate):
> 
>         port_id = rte_eth_dev_find_free_port();
>         ...
> 
>         eth_dev = &rte_eth_devices[port_id];
>         eth_dev->data = &rte_eth_dev_data[port_id];
>         ...
> 
>         return eth_dev;
> 
> Since it's a first PCI device, port_id will be 0. eth_dev->data would
> then point to rte_eth_dev_data[0]. And here things start going wrong,
> as rte_eth_dev_data[0] actually stores the virtio_hw for 00:04.0.
> 
> That said, in the secondary process, DPDK will continue to drive PCI
> device 00.03.0 (despite the fact it's been managed by kernel), with
> the info from PCI device 00:04.0. Which is wrong.
> 
> The fix is to attach the port already registered by the primary process:
> iterate the rte_eth_dev_data[], and get the port id who's PCI ID matches
> the current PCI device.
> 
> This would let us maintain same port ID for the same PCI device, keeping
> the chance of referencing to wrong data minimal.
> 
> Fixes: af75078fece3 ("first public release")
> 
> Cc: stable at dpdk.org
> Cc: Thomas Monjalon <thomas.monjalon at 6wind.com>
> Cc: Bruce Richardson <bruce.richardson at intel.com>
> Cc: Ferruh Yigit <ferruh.yigit at intel.com>
> Signed-off-by: Yuanhan Liu <yuanhan.liu at linux.intel.com>
> ---
> 
> v4: - assign eth_dev in the common eth_dev init help function
>       it also renamed to eth_dev_get, to not confuse with the
>       eth_dev_init callback.
>     - move primoary process specific assignments to rte_eth_dev_allocate
>     - drop the virtio example in comment
>     - combine two code block for primary into one
> 
> v3: - do not move rte_eth_dev_data_alloc to pci_probe
>     - rename eth_dev_attach to eth_dev_attach_secondary
>     - introduce eth_dev_init() for common eth_dev struct initiation
>     - move comment block inside the "if" block
> ---
>  lib/librte_ether/rte_ethdev.c | 71 +++++++++++++++++++++++++++++++++++++------
>  1 file changed, 62 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index fde8112..1453df1 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -189,6 +189,19 @@ struct rte_eth_dev *
>  	return RTE_MAX_ETHPORTS;
>  }
>  
> +static struct rte_eth_dev *
> +eth_dev_get(uint8_t port_id)
> +{
> +	struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id];
> +
> +	eth_dev->data = &rte_eth_dev_data[port_id];
> +	eth_dev->attached = DEV_ATTACHED;
> +	eth_dev_last_created_port = port_id;
> +	nb_ports++;
> +
> +	return eth_dev;
> +}
> +
>  struct rte_eth_dev *
>  rte_eth_dev_allocate(const char *name)
>  {
> @@ -210,13 +223,41 @@ struct rte_eth_dev *
>  		return NULL;
>  	}
>  
> -	eth_dev = &rte_eth_devices[port_id];
> -	eth_dev->data = &rte_eth_dev_data[port_id];
> +	eth_dev = eth_dev_get(port_id);

There can be a merge issue here, please help me understand.

In repo, different from seen here, this patch does this here:
  -       eth_dev = &rte_eth_devices[port_id];
  -       eth_dev->data = &rte_eth_dev_data[port_id];
  -       memset(eth_dev->data, 0, sizeof(*eth_dev->data));
  +       memset(&rte_eth_devices[port_id], 0, sizeof(*eth_dev->data));
  +       eth_dev = eth_dev_get(port_id);

Which no more resets the eth_dev->data, but rte_eth_devices[port_id]
(with sizeof(*eth_dev->data))

memset(eth_dev->data) added by Jan Blunck on comment:
7f95f78a8aea ("ethdev: clear data when allocating device")

most probably it should stay as "memset(eth_dev->data)", but if not,
please aware that commit 7f95f78a8aea removed some assignment from
drivers relying this memset, they needs to be added back.

>  	snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name);
>  	eth_dev->data->port_id = port_id;
> -	eth_dev->attached = DEV_ATTACHED;
> -	eth_dev_last_created_port = port_id;
> -	nb_ports++;
> +
> +	return eth_dev;
> +}
> +
> +/*
> + * Attach to a port already registered by the primary process, which
> + * makes sure that the same device would have the same port id both
> + * in the primary and secondary process.
> + */
> +static struct rte_eth_dev *
> +eth_dev_attach_secondary(const char *name)
> +{
> +	uint8_t i;
> +	struct rte_eth_dev *eth_dev;
> +
> +	if (rte_eth_dev_data == NULL)
> +		rte_eth_dev_data_alloc();
> +
> +	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +		if (strcmp(rte_eth_dev_data[i].name, name) == 0)
> +			break;
> +	}
> +	if (i == RTE_MAX_ETHPORTS) {
> +		RTE_PMD_DEBUG_TRACE(
> +			"device %s is not driven by the primary process\n",
> +			name);
> +		return NULL;
> +	}
> +
> +	eth_dev = eth_dev_get(i);
> +	RTE_ASSERT(eth_dev->data->port_id == i);
> +
>  	return eth_dev;
>  }
>  
> @@ -246,16 +287,28 @@ struct rte_eth_dev *
>  	rte_eal_pci_device_name(&pci_dev->addr, ethdev_name,
>  			sizeof(ethdev_name));
>  
> -	eth_dev = rte_eth_dev_allocate(ethdev_name);
> -	if (eth_dev == NULL)
> -		return -ENOMEM;
> -
>  	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
> +		eth_dev = rte_eth_dev_allocate(ethdev_name);
> +		if (eth_dev == NULL)
> +			return -ENOMEM;
> +
>  		eth_dev->data->dev_private = rte_zmalloc("ethdev private structure",
>  				  eth_drv->dev_private_size,
>  				  RTE_CACHE_LINE_SIZE);
>  		if (eth_dev->data->dev_private == NULL)
>  			rte_panic("Cannot allocate memzone for private port data\n");
> +	} else {
> +		eth_dev = eth_dev_attach_secondary(ethdev_name);
> +		if (eth_dev == NULL) {
> +			/*
> +			 * if we failed to attach a device, it means the
> +			 * device is skipped in primary process, due to
> +			 * some errors. If so, we return a positive value,
> +			 * to let EAL skip it for the secondary process
> +			 * as well.
> +			 */
> +			return 1;
> +		}
>  	}
>  	eth_dev->pci_dev = pci_dev;
>  	eth_dev->driver = eth_drv;
> 



More information about the dev mailing list