[dpdk-dev] [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
Qiu, Michael
michael.qiu at intel.com
Wed Feb 11 07:29:07 CET 2015
On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>> -----Original Message-----
>>>>> From: Qiu, Michael
>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>> To: Tetsuya Mukawa; dev at dpdk.org
>>>>> Cc: Iremonger, Bernard
>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>
>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>> addresses by eal_compare_pci_addr().
>>>>>>
>>>>>> v5:
>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>> v4:
>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>> - Add parameter checking.
>>>>>>
>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
>>>>>> ---
>>>>>> lib/librte_eal/bsdapp/eal/eal_pci.c | 25 ++++++++---------------
>>>>>> lib/librte_eal/common/eal_common_pci.c | 2 +-
>>>>>> lib/librte_eal/common/include/rte_pci.h | 34 +++++++++++++++++++++++++++++++
>>>>>> lib/librte_eal/linuxapp/eal/eal_pci.c | 25 ++++++++---------------
>>>>>> lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 2 +-
>>>>>> 5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>
>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> index 74ecce7..c844d58 100644
>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>> return (0);
>>>>>> }
>>>>>>
>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> *addr2) -{
>>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>> function;
>>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>> addr2->function;
>>>>>> -
>>>>>> - if (dev_addr > dev_addr2)
>>>>>> - return 1;
>>>>>> - else
>>>>>> - return 0;
>>>>>> -}
>>>>>> -
>>>>>> -
>>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>> static int pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>> }
>>>>>> else {
>>>>>> struct rte_pci_device *dev2 = NULL;
>>>>>> + int ret;
>>>>>>
>>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>> - if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>> + ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>> + if (ret > 0)
>>>>>> continue;
>>>>>> - else {
>>>>>> + else if (ret < 0) {
>>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>> return 0;
>>>>>> + } else { /* already registered */
>>>>>> + /* update pt_driver */
>>>>>> + dev2->pt_driver = dev->pt_driver;
>>>>>> + free(dev);
>>>>>> + return 0;
>>>>>> }
>>>>>> }
>>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>> index f3c7f71..a89f5c3 100644
>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>> if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>> devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>> continue;
>>>>>> - if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>> + if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>> return devargs;
>>>>>> }
>>>>>> return NULL;
>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>> index 7f2d699..4814cd7 100644
>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>> rte_pci_addr *dev_addr) } #undef GET_PCIADDR_FIELD
>>>>>>
>>>>>> +/* Compare two PCI device addresses. */
>>>>>> +/**
>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>> + *
>>>>>> + * @param addr
>>>>>> + * The PCI Bus-Device-Function address to compare
>>>>>> + * @param addr2
>>>>>> + * The PCI Bus-Device-Function address to compare
>>>>>> + * @return
>>>>>> + * 0 on equal PCI address.
>>>>>> + * Positive on addr is greater than addr2.
>>>>>> + * Negative on addr is less than addr2, or error.
>>>>>> + */
>>>>>> +static inline int
>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> +*addr2) {
>>>>>> + uint64_t dev_addr, dev_addr2;
>>>>>> +
>>>>>> + if ((addr == NULL) || (addr2 == NULL))
>>>>>> + return -1;
>>>>>> +
>>>>>> + dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>> + (addr->devid << 8) | addr->function;
>>>>>> + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>> + (addr2->devid << 8) | addr2->function;
>>>>>> +
>>>>>> + if (dev_addr > dev_addr2)
>>>>>> + return 1;
>>>>>> + else if (dev_addr < dev_addr2)
>>>>>> + return -1;
>>>>>> + else
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> /**
>>>>>> * Probe the PCI bus for registered drivers.
>>>>>> *
>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> index c0ca5a5..d847102 100644
>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>> return -1;
>>>>>> }
>>>>>>
>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> *addr2) -{
>>>>>> - uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>> function;
>>>>>> - uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>> addr2->function;
>>>>>> -
>>>>>> - if (dev_addr > dev_addr2)
>>>>>> - return 1;
>>>>>> - else
>>>>>> - return 0;
>>>>>> -}
>>>>>> -
>>>>>> -
>>>>>> /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>> static int pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>> domain, uint8_t bus,
>>>>>> }
>>>>>> else {
>>>>>> struct rte_pci_device *dev2 = NULL;
>>>>>> + int ret;
>>>>>>
>>>>>> TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>> - if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>> + ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>> + if (ret > 0)
>>>>>> continue;
>>>>>> - else {
>>>>>> + else if (ret < 0) {
>>>>>> TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>> return 0;
>>>>>> + } else { /* already registered */
>>>>>> + /* update pt_driver */
>>>>>> + dev2->pt_driver = dev->pt_driver;
>>>> Hi Tetsuya,
>>>>
>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>> The following line should be added here:
>>>> dev2->max_vfs = dev->max_vfs;
>>>>
>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>> dev2->numa_mode = dev->numa_mode;
>>> I'm very curious, why those field miss? I haven't see any places clear
>>> this field.
>>>
>>> What is the root cause?
>> Hi Michael,
>>
>> Here is my guess.
>> The above function creates pci device list.
> I am sorry. I forgot to add below information.
>
> "max_vfs" or "numa_node" value is came from sysfs when the above
> function is processed.
Yes, but it has already been registered, why it missed?
Thanks,
Michael
>> And current DPDK implementation assumes all devices needed to be managed
>> are under igb_uio or vfio when above code is processed.
>> To add hotplug function, we also need to think some devices will start
>> to be managed under igb_uio or vfio after initializing pci device list.
>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>> manages the device.
>>
>> Hi Bernard,
>>
>> Could you please check "max_vfs" and "num_node" values, then check the
>> values again after the device is managed by igb_uio or vfio?
>> In my environment, it seems max_vfs is created by igb_uio.
>> But my NIC doesn't have VF, so behavior might be different in your
>> environment.
>> I guess "numa_node" should not be changed theoretically.
>>
>> If my guess is correct, how about replacing following values?
>> - driver
>> - max_vfs
>> - resource
>> - (numa_node)
>> Except for above value, I guess other value shouldn't be changed even
>> after the device is managed by igb_uio or vfio.
>>
>> Thanks,
>> Tetsuya
>>
>>> Thanks,
>>> Michael
>>>
>>>> Regards,
>>>>
>>>> Bernard.
>>>>
>>>>
>>>>
>>>>
>>>>>> + free(dev);
>>>>>> + return 0;
>>>>>> }
>>>>>> }
>>>>>> TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> index e53f06b..1da3507 100644
>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>> TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>
>>>>>> /* skip this element if it doesn't match our PCI address */
>>>>>> - if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>> + if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>> continue;
>>>>>>
>>>>>> for (i = 0; i != uio_res->nb_maps; i++) {
>>>>> Acked-by: Michael Qiu <michael.qiu at intel.com>
>
More information about the dev
mailing list