[dpdk-dev] [PATCH v3 2/5] Link Bonding PMD Library (librte_eal/librte_ether link bonding support changes)

Neil Horman nhorman at tuxdriver.com
Fri Jun 13 18:08:07 CEST 2014


On Fri, Jun 13, 2014 at 03:41:59PM +0100, Declan Doherty wrote:
> Updating functionality in EAL to support adding link bonding
> devices via –vdev option. Link bonding devices will be
> initialized after all physical devices have been probed and
> initialized.
> 
> Signed-off-by: Declan Doherty <declan.doherty at intel.com>
> ---
>  lib/librte_eal/common/eal_common_dev.c      | 66 +++++++++++++++++++++++++++--
>  lib/librte_eal/common/eal_common_pci.c      |  6 +++
>  lib/librte_eal/common/include/eal_private.h |  7 +++
>  lib/librte_eal/common/include/rte_dev.h     |  1 +
>  lib/librte_ether/rte_ethdev.c               | 34 +++++++++++++--
>  lib/librte_ether/rte_ethdev.h               |  7 ++-
>  lib/librte_pmd_pcap/rte_eth_pcap.c          | 22 +++++-----
>  lib/librte_pmd_ring/rte_eth_ring.c          | 32 +++++++-------
>  lib/librte_pmd_ring/rte_eth_ring.h          |  3 +-
>  lib/librte_pmd_xenvirt/rte_eth_xenvirt.c    |  2 +-
>  10 files changed, 144 insertions(+), 36 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
> index eae5656..b50c908 100644
> --- a/lib/librte_eal/common/eal_common_dev.c
> +++ b/lib/librte_eal/common/eal_common_dev.c
> @@ -75,14 +75,28 @@ rte_eal_dev_init(void)
>  
>  	/* call the init function for each virtual device */
>  	TAILQ_FOREACH(devargs, &devargs_list, next) {
> +		uint8_t bdev = 0;
>  
>  		if (devargs->type != RTE_DEVTYPE_VIRTUAL)
>  			continue;
>  
>  		TAILQ_FOREACH(driver, &dev_driver_list, next) {
> -			if (driver->type != PMD_VDEV)
> +			/* RTE_DEVTYPE_VIRTUAL can only be a virtual or bonded device*/
> +			if (driver->type != PMD_VDEV && driver->type != PMD_BDEV)
>  				continue;
>  
> +			/*
> +			 * Bonded devices are not initialize here, we do it later in
> +			 * rte_eal_bonded_dev_init() after all physical devices have been
> +			 * probed and initialized
> +			 */
> +			if (driver->type == PMD_BDEV &&
> +					!strncmp(driver->name, devargs->virtual.drv_name,
> +							strlen(driver->name))) {
> +				bdev = 1;
> +				break;
> +			}
> +
I really don't think you need to add a new device type for bonded devs.  Its got
no specific hardware that it drives, and you configure it with a --vdev command,
so treat it as one here.  I understand that you need to pass additional
information about slaves to a bonded device, which is fine, but you can do that
with kvargs pretty easily, at which point its just another vdev. The only other
requirement is that you initilize the bonded vdev after the slave vdevs have
been created, which you can do by any of several methods (a priority field to
indicate that bonded drivers should be initilized last/later, a deferral return
code from the init routine, or by dead reckoning via the careful construction of
the application command line (placed the bonded --vdev option last on the
command line argument list at run time).


>  			/* search a driver prefix in virtual device name */
>  			if (!strncmp(driver->name, devargs->virtual.drv_name,
>  					strlen(driver->name))) {
> @@ -92,9 +106,9 @@ rte_eal_dev_init(void)
>  			}
>  		}
>  
> -		if (driver == NULL) {
> -			rte_panic("no driver found for %s\n",
> -				  devargs->virtual.drv_name);
> +		if (driver == NULL && !bdev) {
> +			rte_panic("no driver found for %s and is not a bonded vdev %d\n",
> +				  devargs->virtual.drv_name, bdev);
>  		}
>  	}
>  
> @@ -107,3 +121,47 @@ rte_eal_dev_init(void)
>  	}
>  	return 0;
>  }
> +
> +#ifdef RTE_LIBRTE_PMD_BOND
> +int
> +rte_eal_bonded_dev_init(void)
> +{
> +	struct rte_devargs *devargs;
> +	struct rte_driver *driver;
> +
> +	TAILQ_FOREACH(devargs, &devargs_list, next) {
> +		int vdev = 0;
> +
> +		if (devargs->type != RTE_DEVTYPE_VIRTUAL)
> +			continue;
> +
> +		TAILQ_FOREACH(driver, &dev_driver_list, next) {
> +			if (driver->type != PMD_VDEV && driver->type != PMD_BDEV)
> +				continue;
> +
> +			/* Virtual devices have already been initialized so we skip them
> +			 * here*/
> +			if (driver->type == PMD_VDEV &&
> +					!strncmp(driver->name, devargs->virtual.drv_name,
> +							strlen(driver->name))) {
> +				vdev = 1;
> +				break;
> +			}
> +
> +			/* search a driver prefix in bonded device name */
> +			if (!strncmp(driver->name, devargs->virtual.drv_name,
> +					strlen(driver->name))) {
> +				driver->init(devargs->virtual.drv_name, devargs->args);
> +				break;
> +			}
> +		}
> +
> +		if (driver == NULL && !vdev) {
> +			rte_panic("no driver found for %s\n",
> +					devargs->virtual.drv_name);
> +		}
> +	}
> +	return 0;
> +}
> +#endif
> +
If you treat bonded devices as vdevs, you can remove this function entirely.

> diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
> index 4d877ea..9b584f5 100644
> --- a/lib/librte_eal/common/eal_common_pci.c
> +++ b/lib/librte_eal/common/eal_common_pci.c
> @@ -166,7 +166,13 @@ rte_eal_pci_probe(void)
>  				 dev->addr.devid, dev->addr.function);
>  	}
>  
> +#ifdef RTE_LIBRTE_PMD_BOND
> +	/* After all physical PCI devices have been probed and initialized then we
> +	 * initialize the bonded devices */
> +	return rte_eal_bonded_dev_init();
> +#else
This is the wrong place for this, bonded devices are not pci devices, this
doesn't belong in the pci device probe path.  If you treat the bonded devices as
vdevs and handle the ordering as described above, you won't need this anyway.
 
>  	return 0;
> +#endif
>  }
>  
>  /* dump one device */
> diff --git a/lib/librte_eal/common/include/eal_private.h b/lib/librte_eal/common/include/eal_private.h
> index 232fcec..f6081bb 100644
> --- a/lib/librte_eal/common/include/eal_private.h
> +++ b/lib/librte_eal/common/include/eal_private.h
> @@ -203,4 +203,11 @@ int rte_eal_alarm_init(void);
>   */
>  int rte_eal_dev_init(void);
>  
> +#ifdef RTE_LIBRTE_PMD_BOND
> +/**
> + * Initialize the bonded devices
> + */
> +int rte_eal_bonded_dev_init(void);
> +#endif
> +
>  #endif /* _EAL_PRIVATE_H_ */
> diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
> index f7e3a10..f0a780a 100644
> --- a/lib/librte_eal/common/include/rte_dev.h
> +++ b/lib/librte_eal/common/include/rte_dev.h
> @@ -62,6 +62,7 @@ typedef int (rte_dev_init_t)(const char *name, const char *args);
>  enum pmd_type {
>  	PMD_VDEV = 0,
>  	PMD_PDEV = 1,
> +	PMD_BDEV = 2,	/**< Poll Mode Driver Bonded Device*/
>  };
Can drop this as noted above.

>  
>  /**
> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
> index 8011b8b..4c2f1d3 100644
> --- a/lib/librte_ether/rte_ethdev.c
> +++ b/lib/librte_ether/rte_ethdev.c
> @@ -64,6 +64,7 @@
>  #include <rte_mbuf.h>
>  #include <rte_errno.h>
>  #include <rte_spinlock.h>
> +#include <rte_string_fns.h>
>  
>  #include "rte_ether.h"
>  #include "rte_ethdev.h"
> @@ -152,8 +153,21 @@ rte_eth_dev_data_alloc(void)
>  				RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data));
>  }
>  
> +static int
> +rte_eth_dev_name_unique(const char* name)
> +{
> +	unsigned i;
> +
> +	for (i = 0; i < nb_ports; i++) {
> +		if (strcmp(rte_eth_devices[i].data->name, name) == 0)
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
>  struct rte_eth_dev *
> -rte_eth_dev_allocate(void)
> +rte_eth_dev_allocate(const char* name)
>  {
>  	struct rte_eth_dev *eth_dev;
>  
> @@ -165,23 +179,37 @@ rte_eth_dev_allocate(void)
>  	if (rte_eth_dev_data == NULL)
>  		rte_eth_dev_data_alloc();
>  
> +	if (rte_eth_dev_name_unique(name)) {
> +		PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n");
> +		return NULL;
> +	}
> +
This seems fairly racy if you allow dynamic device creation at run time from the
application, if multiple threads attempt to create bonds in parallel.


>  	eth_dev = &rte_eth_devices[nb_ports];
>  	eth_dev->data = &rte_eth_dev_data[nb_ports];
> +	rte_snprintf(eth_dev->data->name , sizeof(eth_dev->data->name ),
> +			"%s", name);
>  	eth_dev->data->port_id = nb_ports++;
>  	return eth_dev;
>  }
>  
>  static int
>  rte_eth_dev_init(struct rte_pci_driver *pci_drv,
> -		 struct rte_pci_device *pci_dev)
> +		struct rte_pci_device *pci_dev)
>  {
>  	struct eth_driver    *eth_drv;
>  	struct rte_eth_dev *eth_dev;
> +	char ethdev_name[RTE_ETH_NAME_MAX_LEN];
> +
>  	int diag;
>  
>  	eth_drv = (struct eth_driver *)pci_drv;
>  
> -	eth_dev = rte_eth_dev_allocate();
> +	/* Create unique ethdev name by concatenating drive name and number of
> +	 * ports */
> +	rte_snprintf(ethdev_name, RTE_ETH_NAME_MAX_LEN, "%d:%d.%d",
> +			pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function);
> +
> +	eth_dev = rte_eth_dev_allocate(ethdev_name);
>  	if (eth_dev == NULL)
>  		return -ENOMEM;
>  
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index 67eda50..27ed0ab 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -1233,6 +1233,8 @@ struct rte_eth_dev_sriov {
>  };
>  #define RTE_ETH_DEV_SRIOV(dev)         ((dev)->data->sriov)
>  
> +#define RTE_ETH_NAME_MAX_LEN (32)
> +
>  /**
>   * @internal
>   * The data part, with no function pointers, associated with each ethernet device.
> @@ -1241,6 +1243,8 @@ struct rte_eth_dev_sriov {
>   * processes in a multi-process configuration.
>   */
>  struct rte_eth_dev_data {
> +	char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */
> +
>  	void **rx_queues; /**< Array of pointers to RX queues. */
>  	void **tx_queues; /**< Array of pointers to TX queues. */
>  	uint16_t nb_rx_queues; /**< Number of RX queues. */
> @@ -1293,10 +1297,11 @@ extern uint8_t rte_eth_dev_count(void);
>   * Allocates a new ethdev slot for an ethernet device and returns the pointer
>   * to that slot for the driver to use.
>   *
> + * @param	name	Unique identifier name for each Ethernet device
>   * @return
>   *   - Slot in the rte_dev_devices array for a new device;
>   */
> -struct rte_eth_dev *rte_eth_dev_allocate(void);
> +struct rte_eth_dev *rte_eth_dev_allocate(const char *name);
>  
>  struct eth_driver;


>  /**
> diff --git a/lib/librte_pmd_pcap/rte_eth_pcap.c b/lib/librte_pmd_pcap/rte_eth_pcap.c
Hmm, we're modifying other pmds for the naming feature, I think it would be best
split out into a separate patch.  Something entitled "support unique interface
naming for virtual pmds" or something.



More information about the dev mailing list