[dpdk-dev] [PATCH 04/14] vhost: make vDPA framework bus agnostic

Maxime Coquelin maxime.coquelin at redhat.com
Mon Jun 22 14:34:58 CEST 2020



On 6/22/20 11:49 AM, Adrian Moreno wrote:
> 
> 
> On 6/19/20 5:11 PM, Maxime Coquelin wrote:
>>
>>
>> On 6/19/20 1:14 PM, Adrian Moreno wrote:
>>>
>>>
>>> On 6/11/20 11:37 PM, Maxime Coquelin wrote:
>>>> This patch makes the vDPA framework to no more
>>>> support only PCI devices, but any devices by relying
>>>> on the generic device name as identifier.
>>>>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin at redhat.com>
>>>> ---
>>>>  drivers/vdpa/ifc/ifcvf_vdpa.c          |  6 +-
>>>>  drivers/vdpa/mlx5/mlx5_vdpa.c          |  8 +--
>>>>  drivers/vdpa/mlx5/mlx5_vdpa.h          |  2 +-
>>>>  examples/vdpa/main.c                   | 49 ++++++++--------
>>>>  lib/librte_vhost/rte_vdpa.h            | 42 +++++++-------
>>>>  lib/librte_vhost/rte_vhost_version.map |  1 +
>>>>  lib/librte_vhost/vdpa.c                | 79 +++++++++++---------------
>>>>  7 files changed, 85 insertions(+), 102 deletions(-)
>>>>
>>>> diff --git a/drivers/vdpa/ifc/ifcvf_vdpa.c b/drivers/vdpa/ifc/ifcvf_vdpa.c
>>>> index ec97178dcb..1fec1f1baf 100644
>>>> --- a/drivers/vdpa/ifc/ifcvf_vdpa.c
>>>> +++ b/drivers/vdpa/ifc/ifcvf_vdpa.c
>>>> @@ -47,7 +47,6 @@ static const char * const ifcvf_valid_arguments[] = {
>>>>  static int ifcvf_vdpa_logtype;
>>>>  
>>>>  struct ifcvf_internal {
>>>> -	struct rte_vdpa_dev_addr dev_addr;
>>>>  	struct rte_pci_device *pdev;
>>>>  	struct ifcvf_hw hw;
>>>>  	int vfio_container_fd;
>>>> @@ -1176,8 +1175,6 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>>>>  		(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
>>>>  		(1ULL << VHOST_F_LOG_ALL);
>>>>  
>>>> -	internal->dev_addr.pci_addr = pci_dev->addr;
>>>> -	internal->dev_addr.type = VDPA_ADDR_PCI;
>>>>  	list->internal = internal;
>>>>  
>>>>  	if (rte_kvargs_count(kvlist, IFCVF_SW_FALLBACK_LM)) {
>>>> @@ -1188,8 +1185,7 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>>>>  	}
>>>>  	internal->sw_lm = sw_fallback_lm;
>>>>  
>>>> -	internal->did = rte_vdpa_register_device(&internal->dev_addr,
>>>> -				&ifcvf_ops);
>>>> +	internal->did = rte_vdpa_register_device(&pci_dev->device, &ifcvf_ops);
>>>>  	if (internal->did < 0) {
>>>>  		DRV_LOG(ERR, "failed to register device %s", pci_dev->name);
>>>>  		goto error;
>>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c b/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>> index 1113d6cef0..e8255c7d7e 100644
>>>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
>>>> @@ -501,14 +501,13 @@ mlx5_vdpa_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
>>>>  	priv->caps = attr.vdpa;
>>>>  	priv->log_max_rqt_size = attr.log_max_rqt_size;
>>>>  	priv->ctx = ctx;
>>>> -	priv->dev_addr.pci_addr = pci_dev->addr;
>>>> -	priv->dev_addr.type = VDPA_ADDR_PCI;
>>>> +	priv->pci_dev = pci_dev;
>>>>  	priv->var = mlx5_glue->dv_alloc_var(ctx, 0);
>>>>  	if (!priv->var) {
>>>>  		DRV_LOG(ERR, "Failed to allocate VAR %u.\n", errno);
>>>>  		goto error;
>>>>  	}
>>>> -	priv->id = rte_vdpa_register_device(&priv->dev_addr, &mlx5_vdpa_ops);
>>>> +	priv->id = rte_vdpa_register_device(&pci_dev->device, &mlx5_vdpa_ops);
>>>>  	if (priv->id < 0) {
>>>>  		DRV_LOG(ERR, "Failed to register vDPA device.");
>>>>  		rte_errno = rte_errno ? rte_errno : EINVAL;
>>>> @@ -550,8 +549,7 @@ mlx5_vdpa_pci_remove(struct rte_pci_device *pci_dev)
>>>>  
>>>>  	pthread_mutex_lock(&priv_list_lock);
>>>>  	TAILQ_FOREACH(priv, &priv_list, next) {
>>>> -		if (memcmp(&priv->dev_addr.pci_addr, &pci_dev->addr,
>>>> -			   sizeof(pci_dev->addr)) == 0) {
>>>> +		if (priv->pci_dev == pci_dev) {
>>>>  			found = 1;
>>>>  			break;
>>>>  		}
>>>> diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h b/drivers/vdpa/mlx5/mlx5_vdpa.h
>>>> index fcc216ac78..50ee3c5870 100644
>>>> --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
>>>> +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
>>>> @@ -104,7 +104,7 @@ struct mlx5_vdpa_priv {
>>>>  	int id; /* vDPA device id. */
>>>>  	int vid; /* vhost device id. */
>>>>  	struct ibv_context *ctx; /* Device context. */
>>>> -	struct rte_vdpa_dev_addr dev_addr;
>>>> +	struct rte_pci_device *pci_dev;
>>>>  	struct mlx5_hca_vdpa_attr caps;
>>>>  	uint32_t pdn; /* Protection Domain number. */
>>>>  	struct ibv_pd *pd;
>>>> diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
>>>> index d9a9112b16..c12da69574 100644
>>>> --- a/examples/vdpa/main.c
>>>> +++ b/examples/vdpa/main.c
>>>> @@ -271,10 +271,14 @@ static void cmd_list_vdpa_devices_parsed(
>>>>  	uint32_t queue_num;
>>>>  	uint64_t features;
>>>>  	struct rte_vdpa_device *vdev;
>>>> -	struct rte_pci_addr addr;
>>>> +	struct rte_device *dev;
>>>> +	struct rte_dev_iterator dev_iter;
>>>>  
>>>> -	cmdline_printf(cl, "device id\tdevice address\tqueue num\tsupported features\n");
>>>> -	for (did = 0; did < dev_total; did++) {
>>>> +	cmdline_printf(cl, "device id\tdevice name\tqueue num\tsupported features\n");
>>>> +	RTE_DEV_FOREACH(dev, "class=vdpa", &dev_iter) {
>>>> +		did = rte_vdpa_find_device_id_by_name(dev->name);
>>>> +		if (did < 0)
>>>> +			continue;
>>>>  		vdev = rte_vdpa_get_device(did);
>>>>  		if (!vdev)
>>>>  			continue;
>>>> @@ -290,11 +294,8 @@ static void cmd_list_vdpa_devices_parsed(
>>>>  				"for device id %d.\n", did);
>>>>  			continue;
>>>>  		}
>>>> -		addr = vdev->addr.pci_addr;
>>>> -		cmdline_printf(cl,
>>>> -			"%d\t\t" PCI_PRI_FMT "\t%" PRIu32 "\t\t0x%" PRIx64 "\n",
>>>> -			did, addr.domain, addr.bus, addr.devid,
>>>> -			addr.function, queue_num, features);
>>>> +		cmdline_printf(cl, "%d\t\t%s\t\t%" PRIu32 "\t\t0x%" PRIx64 "\n",
>>>> +			did, dev->name, queue_num, features);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -324,17 +325,12 @@ static void cmd_create_vdpa_port_parsed(void *parsed_result,
>>>>  {
>>>>  	int did;
>>>>  	struct cmd_create_result *res = parsed_result;
>>>> -	struct rte_vdpa_dev_addr addr;
>>>>  
>>>>  	rte_strscpy(vports[devcnt].ifname, res->socket_path, MAX_PATH_LEN);
>>>> -	if (rte_pci_addr_parse(res->bdf, &addr.pci_addr) != 0) {
>>>> -		cmdline_printf(cl, "Unable to parse the given bdf.\n");
>>>> -		return;
>>>> -	}
>>>> -	addr.type = VDPA_ADDR_PCI;
>>>> -	did = rte_vdpa_find_device_id(&addr);
>>>> +	did = rte_vdpa_find_device_id_by_name(res->bdf);
>>>>  	if (did < 0) {
>>>> -		cmdline_printf(cl, "Unable to find vdpa device id.\n");
>>>> +		cmdline_printf(cl, "Unable to find vdpa device id for %s.\n",
>>>> +				res->bdf);
>>>>  		return;
>>>>  	}
>>>>  
>>>> @@ -400,9 +396,11 @@ int
>>>>  main(int argc, char *argv[])
>>>>  {
>>>>  	char ch;
>>>> -	int i;
>>>> +	int did;
>>>>  	int ret;
>>>>  	struct cmdline *cl;
>>>> +	struct rte_device *dev;
>>>> +	struct rte_dev_iterator dev_iter;
>>>>  
>>>>  	ret = rte_eal_init(argc, argv);
>>>>  	if (ret < 0)
>>>> @@ -428,13 +426,18 @@ main(int argc, char *argv[])
>>>>  		cmdline_interact(cl);
>>>>  		cmdline_stdin_exit(cl);
>>>>  	} else {
>>>> -		for (i = 0; i < RTE_MIN(MAX_VDPA_SAMPLE_PORTS, dev_total);
>>>> -				i++) {
>>>> -			vports[i].did = i;
>>>> -			snprintf(vports[i].ifname, MAX_PATH_LEN, "%s%d",
>>>> -					iface, i);
>>>> +		RTE_DEV_FOREACH(dev, "class=vdpa", &dev_iter) {
>>>> +			did = rte_vdpa_find_device_id_by_name(dev->name);
>>>> +			if (did < 0) {
>>>> +				rte_panic("Failed to find device id for %s\n",
>>>> +						dev->name);
>>>> +			}
>>>> +			vports[devcnt].did = did;
>>>> +			snprintf(vports[devcnt].ifname, MAX_PATH_LEN, "%s%d",
>>>> +					iface, devcnt);
>>>>  
>>>> -			start_vdpa(&vports[i]);
>>>> +			start_vdpa(&vports[devcnt]);
>>>> +			devcnt++;
>>>>  		}
>>>>  
>>>>  		printf("enter \'q\' to quit\n");
>>>> diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
>>>> index 3c400ee79b..33037d39ea 100644
>>>> --- a/lib/librte_vhost/rte_vdpa.h
>>>> +++ b/lib/librte_vhost/rte_vdpa.h
>>>> @@ -18,25 +18,6 @@
>>>>  
>>>>  #define MAX_VDPA_NAME_LEN 128
>>>>  
>>>> -enum vdpa_addr_type {
>>>> -	VDPA_ADDR_PCI,
>>>> -	VDPA_ADDR_MAX
>>>> -};
>>>> -
>>>> -/**
>>>> - * vdpa device address
>>>> - */
>>>> -struct rte_vdpa_dev_addr {
>>>> -	/** vdpa address type */
>>>> -	enum vdpa_addr_type type;
>>>> -
>>>> -	/** vdpa pci address */
>>>> -	union {
>>>> -		uint8_t __dummy[64];
>>>> -		struct rte_pci_addr pci_addr;
>>>> -	};
>>>> -};
>>>> -
>>>>  /**
>>>>   * vdpa device operations
>>>>   */
>>>> @@ -81,8 +62,8 @@ struct rte_vdpa_dev_ops {
>>>>   * vdpa device structure includes device address and device operations.
>>>>   */
>>>>  struct rte_vdpa_device {
>>>> -	/** vdpa device address */
>>>> -	struct rte_vdpa_dev_addr addr;
>>>> +	/** Generic device information */
>>>> +	struct rte_device *device;
>>>>  	/** vdpa device operations */
>>>>  	struct rte_vdpa_dev_ops *ops;
>>>>  } __rte_cache_aligned;
>>>> @@ -102,7 +83,7 @@ struct rte_vdpa_device {
>>>>   */
>>>>  __rte_experimental
>>>>  int
>>>> -rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
>>>> +rte_vdpa_register_device(struct rte_device *rte_dev,
>>>>  		struct rte_vdpa_dev_ops *ops);
>>>>  
>>>>  /**
>>>> @@ -120,6 +101,21 @@ __rte_experimental
>>>>  int
>>>>  rte_vdpa_unregister_device(int did);
>>>>  
>>>> +/**
>>>> + * @warning
>>>> + * @b EXPERIMENTAL: this API may change without prior notice
>>>> + *
>>>> + * Find the device id of a vdpa device from its name
>>>> + *
>>>> + * @param name
>>>> + *  the vdpa device name
>>>> + * @return
>>>> + *  device id on success, -1 on failure
>>>> + */
>>>> +__rte_experimental
>>>> +int
>>>> +rte_vdpa_find_device_id_by_name(const char *name);
>>>> +
>>>>  /**
>>>>   * @warning
>>>>   * @b EXPERIMENTAL: this API may change without prior notice
>>>> @@ -133,7 +129,7 @@ rte_vdpa_unregister_device(int did);
>>>>   */
>>>>  __rte_experimental
>>>>  int
>>>> -rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
>>>> +rte_vdpa_find_device_id(struct rte_vdpa_device *dev);
>>>>  
>>>>  /**
>>>>   * @warning
>>>> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
>>>> index 051d08c120..1abfff8a0c 100644
>>>> --- a/lib/librte_vhost/rte_vhost_version.map
>>>> +++ b/lib/librte_vhost/rte_vhost_version.map
>>>> @@ -66,4 +66,5 @@ EXPERIMENTAL {
>>>>  	rte_vhost_get_vhost_ring_inflight;
>>>>  	rte_vhost_get_vring_base_from_inflight;
>>>>  	rte_vhost_slave_config_change;
>>>> +	rte_vdpa_find_device_id_by_name;
>>>>  };
>>>> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
>>>> index 61ab9aadb4..5abc5a2a7c 100644
>>>> --- a/lib/librte_vhost/vdpa.c
>>>> +++ b/lib/librte_vhost/vdpa.c
>>>> @@ -18,43 +18,22 @@
>>>>  static struct rte_vdpa_device vdpa_devices[MAX_VHOST_DEVICE];
>>>>  static uint32_t vdpa_device_num;
>>>>  
>>>> -static bool
>>>> -is_same_vdpa_device(struct rte_vdpa_dev_addr *a,
>>>> -		struct rte_vdpa_dev_addr *b)
>>>> -{
>>>> -	bool ret = true;
>>>> -
>>>> -	if (a->type != b->type)
>>>> -		return false;
>>>> -
>>>> -	switch (a->type) {
>>>> -	case VDPA_ADDR_PCI:
>>>> -		if (a->pci_addr.domain != b->pci_addr.domain ||
>>>> -				a->pci_addr.bus != b->pci_addr.bus ||
>>>> -				a->pci_addr.devid != b->pci_addr.devid ||
>>>> -				a->pci_addr.function != b->pci_addr.function)
>>>> -			ret = false;
>>>> -		break;
>>>> -	default:
>>>> -		break;
>>>> -	}
>>>> -
>>>> -	return ret;
>>>> -}
>>>> -
>>>>  int
>>>> -rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
>>>> +rte_vdpa_register_device(struct rte_device *rte_dev,
>>>>  		struct rte_vdpa_dev_ops *ops)
>>>>  {
>>>>  	struct rte_vdpa_device *dev;
>>>>  	int i;
>>>>  
>>>> -	if (vdpa_device_num >= MAX_VHOST_DEVICE || addr == NULL || ops == NULL)
>>>> +	if (vdpa_device_num >= MAX_VHOST_DEVICE || ops == NULL)
>>>>  		return -1;
>>>>  
>>>>  	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
>>>>  		dev = &vdpa_devices[i];
>>>> -		if (dev->ops && is_same_vdpa_device(&dev->addr, addr))
>>>> +		if (dev->ops == NULL)
>>>> +			continue;
>>>> +
>>>> +		if (dev->device == rte_dev)
>>>>  			return -1;
>>>>  	}
>>>
>>> If we change the order of the two "if" statemets and replace "continue" with
>>> "break", we can remove the for loop that follows:
>>>
>>> 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
>>> 		if (vdpa_devices[i].ops == NULL)
>>> 			break;
>>> 	}
>>
>> Do you mean like this?
>>
>> 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
>> 		if (dev->device == rte_dev)
>> 			return -1;
>>
>> 		if (vdpa_devices[i].ops == NULL)
>> 			break;
>> 	}
>>
>> If so the behaviour will be different, because you can have holes in the
>> array if a device is unregistered.
>>
> Right, did not account for the unregistered holes. Still the double iteration
> could be avoided by storing a pointer/index in the stack but maybe not needed in
> this patch.

Yes, certainly. Now these iterations are just an intermediate step of
the series. The end result gets rid of these loops, so maybe not worth
the effort of optimizing it.

>> With above change it would stop looking if device is already registered
>> at the first hole, so could end into double registration of the same
>> device.
>>
>>>
>>>>  
>>>> @@ -67,7 +46,7 @@ rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
>>>>  		return -1;
>>>>  
>>>>  	dev = &vdpa_devices[i];
>>>> -	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
>>>> +	dev->device = rte_dev;
>>>>  	dev->ops = ops;
>>>>  	vdpa_device_num++;
>>>>  
>>>> @@ -87,12 +66,33 @@ rte_vdpa_unregister_device(int did)
>>>>  }
>>>>  
>>>>  int
>>>> -rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
>>>> +rte_vdpa_find_device_id(struct rte_vdpa_device *dev)
>>>> +{
>>>> +	struct rte_vdpa_device *tmp_dev;
>>>> +	int i;
>>>> +
>>>> +	if (dev == NULL)
>>>> +		return -1;
>>>> +
>>>> +	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
>>>> +		tmp_dev = &vdpa_devices[i];
>>>> +		if (tmp_dev->ops == NULL)
>>>> +			continue;
>>>> +
>>>> +		if (tmp_dev == dev)
>>>> +			return i;
>>>> +	}
>>>> +
>>>> +	return -1;
>>>> +}
>>>> +
>>>> +int
>>>> +rte_vdpa_find_device_id_by_name(const char *name)
>>>>  {
>>>>  	struct rte_vdpa_device *dev;
>>>>  	int i;
>>>>  
>>>> -	if (addr == NULL)
>>>> +	if (name == NULL)
>>>>  		return -1;
>>>>  
>>>>  	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
>>>> @@ -100,7 +100,7 @@ rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
>>>>  		if (dev->ops == NULL)
>>>>  			continue;
>>>>  
>>>> -		if (is_same_vdpa_device(&dev->addr, addr))
>>>> +		if (strcmp(dev->device->name, name) == 0)
>>>>  			return i;
>>>>  	}
>>>>  
>>>> @@ -236,21 +236,10 @@ static int
>>>>  vdpa_dev_match(struct rte_vdpa_device *dev,
>>>>  	      const struct rte_device *rte_dev)
>>>>  {
>>>> -	struct rte_vdpa_dev_addr addr;
>>>> +	if (dev->device == rte_dev)
>>>> +		return 0;
>>>>  
>>>> -	/*  Only PCI bus supported for now */
>>>> -	if (strcmp(rte_dev->bus->name, "pci") != 0)
>>>> -		return -1;
>>>> -
>>>> -	addr.type = VDPA_ADDR_PCI;
>>>> -
>>>> -	if (rte_pci_addr_parse(rte_dev->name, &addr.pci_addr) != 0)
>>>> -		return -1;
>>>> -
>>>> -	if (!is_same_vdpa_device(&dev->addr, &addr))
>>>> -		return -1;
>>>> -
>>>> -	return 0;
>>>> +	return -1;
>>>>  }
>>>>  
>>>>  /* Generic rte_vdpa_dev comparison function. */
>>>>
>>>
>>
> 



More information about the dev mailing list