[dpdk-dev] [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs

Tetsuya Mukawa mukawa at igel.co.jp
Wed Feb 11 09:14:23 CET 2015


On 2015/02/11 15:29, Qiu, Michael wrote:
> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>> -----Original Message-----
>>>>>> From: Qiu, Michael
>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>> To: Tetsuya Mukawa; dev at dpdk.org
>>>>>> Cc: Iremonger, Bernard
>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>>
>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>
>>>>>>> v5:
>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>> v4:
>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>> - Add parameter checking.
>>>>>>>
>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
>>>>>>> ---
>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> index 74ecce7..c844d58 100644
>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>  	return (0);
>>>>>>>  }
>>>>>>>
>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> *addr2) -{
>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>> function;
>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>> addr2->function;
>>>>>>> -
>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>> -		return 1;
>>>>>>> -	else
>>>>>>> -		return 0;
>>>>>>> -}
>>>>>>> -
>>>>>>> -
>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>  	}
>>>>>>>  	else {
>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>> +		int ret;
>>>>>>>
>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>> +			if (ret > 0)
>>>>>>>  				continue;
>>>>>>> -			else {
>>>>>>> +			else if (ret < 0) {
>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>  				return 0;
>>>>>>> +			} else { /* already registered */
>>>>>>> +				/* update pt_driver */
>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>>> +				free(dev);
>>>>>>> +				return 0;
>>>>>>>  			}
>>>>>>>  		}
>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>  			continue;
>>>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>>>  			return devargs;
>>>>>>>  	}
>>>>>>>  	return NULL;
>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>
>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>> +/**
>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>> + *
>>>>>>> + * @param addr
>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>> + * @param addr2
>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>> + * @return
>>>>>>> + *	0 on equal PCI address.
>>>>>>> + *	Positive on addr is greater than addr2.
>>>>>>> + *	Negative on addr is less than addr2, or error.
>>>>>>> + */
>>>>>>> +static inline int
>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> +*addr2) {
>>>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>>>> +
>>>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>>>> +		return -1;
>>>>>>> +
>>>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>> +				(addr->devid << 8) | addr->function;
>>>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>>>> +
>>>>>>> +	if (dev_addr > dev_addr2)
>>>>>>> +		return 1;
>>>>>>> +	else if (dev_addr < dev_addr2)
>>>>>>> +		return -1;
>>>>>>> +	else
>>>>>>> +		return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>  /**
>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>   *
>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> index c0ca5a5..d847102 100644
>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>  	return -1;
>>>>>>>  }
>>>>>>>
>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> *addr2) -{
>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>> function;
>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>> addr2->function;
>>>>>>> -
>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>> -		return 1;
>>>>>>> -	else
>>>>>>> -		return 0;
>>>>>>> -}
>>>>>>> -
>>>>>>> -
>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>> domain, uint8_t bus,
>>>>>>>  	}
>>>>>>>  	else {
>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>> +		int ret;
>>>>>>>
>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>> +			if (ret > 0)
>>>>>>>  				continue;
>>>>>>> -			else {
>>>>>>> +			else if (ret < 0) {
>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>  				return 0;
>>>>>>> +			} else { /* already registered */
>>>>>>> +				/* update pt_driver */
>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>> Hi Tetsuya,
>>>>>
>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>>> The following line should be added here:
>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>
>>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>>>       dev2->numa_mode = dev->numa_mode;
>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>> this field.
>>>>
>>>> What is the root cause?
>>> Hi Michael,
>>>
>>> Here is my guess.
>>> The above function creates pci device list.
>> I am sorry. I forgot to add below information.
>>
>> "max_vfs" or "numa_node" value is came from sysfs when the above
>> function is processed.
> Yes, but it has already been registered, why it missed?

Yes, it has been registered already, but probably should be updated.
I guess sysfs value will be changed when igb_uio starts managing the device.

ex)
1. Boot linux
2. start a dpdk application with no port.
3. pci device list is registered.
 - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
4. igb_uio binds the device.
5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
6. The dpdk application calls hotplug function.
 - Here, I guess we need to update "max_vfs" value.

Above is a just my assumption.
It may be good to wait for Bernard's reply.

Thanks,
Tetsuya

> Thanks,
> Michael
>>> And current DPDK implementation assumes all devices needed to be managed
>>> are under igb_uio or vfio when above code is processed.
>>> To add hotplug function, we also need to think some devices will start
>>> to be managed under igb_uio or vfio after initializing pci device list.
>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>> manages the device.
>>>
>>> Hi Bernard,
>>>
>>> Could you please check "max_vfs" and "num_node" values, then check the
>>> values again after the device is managed by igb_uio or vfio?
>>> In my environment, it seems max_vfs is created by igb_uio.
>>> But my NIC doesn't have VF, so behavior might be different in your
>>> environment.
>>> I guess "numa_node" should not be changed theoretically.
>>>
>>> If my guess is correct, how about replacing following values?
>>> - driver
>>> - max_vfs
>>> - resource
>>> - (numa_node)
>>> Except for above value, I guess other value shouldn't be changed even
>>> after the device is managed by igb_uio or vfio.
>>>
>>> Thanks,
>>> Tetsuya
>>>
>>>> Thanks,
>>>> Michael
>>>>
>>>>> Regards,
>>>>>
>>>>> Bernard.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>>> +				free(dev);
>>>>>>> +				return 0;
>>>>>>>  			}
>>>>>>>  		}
>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> index e53f06b..1da3507 100644
>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>
>>>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>>>  			continue;
>>>>>>>
>>>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>> Acked-by: Michael Qiu <michael.qiu at intel.com>




More information about the dev mailing list