[dpdk-dev] [PATCH v4] pci: support both PIO and MMIO BAR for legacy virtio on x86
谢华伟(此时此刻)
huawei.xhw at alibaba-inc.com
Wed Oct 21 14:32:01 CEST 2020
On 2020/10/21 19:49, Ferruh Yigit wrote:
> On 10/13/2020 9:41 AM, 谢华伟(此时此刻) wrote:
>> From: "huawei.xhw" <huawei.xhw at alibaba-inc.com>
>>
>> Legacy virtio-pci only supports PIO BAR resource. As we need to
>> create lots of
>> virtio devices and PIO resource on x86 is very limited, we expose
>> MMIO BAR.
>>
>> Kernel supports both PIO and MMIO BAR for legacy virtio-pci device.
>> We handles
>> different type of BAR in the similar way.
>>
>> In previous implementation, with igb_uio we get PIO address from igb_uio
>> sysfs entry; with uio_pci_generic, we get PIO address from
>> /proc/ioports.
>> For PIO/MMIO RW, there is different path for different drivers and arch.
>> For VFIO, PIO/MMIO RW is through syscall, which has big performance
>> issue.
>> On X86, it assumes only PIO is supported.
>>
>> All of the above is too much twisted.
>> This patch unifies the way to get both PIO and MMIO address for
>> different driver
>> and arch, all from standard resource attr under pci sysfs.
>>
>
> As mentined above this patch does multiple things.
>
> The main target is, as far as I understand, you have a legacy virtio
> device which supports "memory-mapped I/O" and "port-mapped I/O", but
> virtio logic forces legacy devices to use the PIO but you want to be
> able to use the MMIO with this device.
yes.
>
> The solution below is adding MMIO support in the PIO funciton, and
> distinguish MMIO or PIO based on their address check.
Yes, kernel does this in the similar way.
>
>
> Instead of this, can't this be resolved in the virtio side, like if
> the legacy device supports MMIO (detect this somehow) use the MMIO
> istead of hacking PIO mapping to support MMIO?
Get your concern.
1>
If we move, I think we should move all those PCI codes into virtio side,
not just the mmio part.
Without my patch, those PCI codes are virtio-pci device specific, not
generic.
With this patch, those pci ioport map/rw code could also be used for
other devices if they support both PIO and MMIO.
Every option is ok. Hope i make myself clear.
2> I don't think this is hacking. for rte_pci_ioport_map/read/write, if
ioport could be both PIO and MMIO, then everything is reasonable.
Take how kernel does port map for example:
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
Here io doesn't mean PIO only. It could also be MMIO. Kernel then uses
ioread/write to access PIO/MMIO port.
Actually we are pretty much the same in the interface.
I think this patch extends rather then hacks the ioport interface to
support MMIO.
>
> I have other concerns, specially mergin VFIO mapping too, but lets
> clarify above first.
vfio doesn't affect other driver but only virtio.
igb_uio, uio_pci_generic and vfio-pci all uses the same way to map/rw
ioport.
>
> Thanks,
> ferruh
>
>
>
>> We distinguish PIO and MMIO by their address like how kernel does. It
>> is ugly but works.
>>
>> Signed-off-by: huawei.xhw <huawei.xhw at alibaba-inc.com>
>> ---
>> drivers/bus/pci/linux/pci.c | 89 +--------------------
>> drivers/bus/pci/linux/pci_uio.c | 167
>> +++++++++++++++++++++++++++-------------
>> 2 files changed, 119 insertions(+), 137 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index bf27594..885e54e 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -687,71 +687,6 @@ int rte_pci_write_config(const struct
>> rte_pci_device *device,
>> }
>> }
>> -#if defined(RTE_ARCH_X86)
>> -static int
>> -pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
>> - struct rte_pci_ioport *p)
>> -{
>> - uint16_t start, end;
>> - FILE *fp;
>> - char *line = NULL;
>> - char pci_id[16];
>> - int found = 0;
>> - size_t linesz;
>> -
>> - if (rte_eal_iopl_init() != 0) {
>> - RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>> PCI device %s\n",
>> - __func__, dev->name);
>> - return -1;
>> - }
>> -
>> - snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT,
>> - dev->addr.domain, dev->addr.bus,
>> - dev->addr.devid, dev->addr.function);
>> -
>> - fp = fopen("/proc/ioports", "r");
>> - if (fp == NULL) {
>> - RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__);
>> - return -1;
>> - }
>> -
>> - while (getdelim(&line, &linesz, '\n', fp) > 0) {
>> - char *ptr = line;
>> - char *left;
>> - int n;
>> -
>> - n = strcspn(ptr, ":");
>> - ptr[n] = 0;
>> - left = &ptr[n + 1];
>> -
>> - while (*left && isspace(*left))
>> - left++;
>> -
>> - if (!strncmp(left, pci_id, strlen(pci_id))) {
>> - found = 1;
>> -
>> - while (*ptr && isspace(*ptr))
>> - ptr++;
>> -
>> - sscanf(ptr, "%04hx-%04hx", &start, &end);
>> -
>> - break;
>> - }
>> - }
>> -
>> - free(line);
>> - fclose(fp);
>> -
>> - if (!found)
>> - return -1;
>> -
>> - p->base = start;
>> - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start);
>> -
>> - return 0;
>> -}
>> -#endif
>> -
>> int
>> rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
>> struct rte_pci_ioport *p)
>> @@ -762,18 +697,12 @@ int rte_pci_write_config(const struct
>> rte_pci_device *device,
>> #ifdef VFIO_PRESENT
>> case RTE_PCI_KDRV_VFIO:
>> if (pci_vfio_is_enabled())
>> - ret = pci_vfio_ioport_map(dev, bar, p);
>> + ret = pci_uio_ioport_map(dev, bar, p);
>> break;
>> #endif
>> case RTE_PCI_KDRV_IGB_UIO:
>> - ret = pci_uio_ioport_map(dev, bar, p);
>> - break;
>> case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> - ret = pci_ioport_map(dev, bar, p);
>> -#else
>> ret = pci_uio_ioport_map(dev, bar, p);
>> -#endif
>> break;
>> default:
>> break;
>> @@ -792,12 +721,10 @@ int rte_pci_write_config(const struct
>> rte_pci_device *device,
>> switch (p->dev->kdrv) {
>> #ifdef VFIO_PRESENT
>> case RTE_PCI_KDRV_VFIO:
>> - pci_vfio_ioport_read(p, data, len, offset);
>> + pci_uio_ioport_read(p, data, len, offset);
>> break;
>> #endif
>> case RTE_PCI_KDRV_IGB_UIO:
>> - pci_uio_ioport_read(p, data, len, offset);
>> - break;
>> case RTE_PCI_KDRV_UIO_GENERIC:
>> pci_uio_ioport_read(p, data, len, offset);
>> break;
>> @@ -813,12 +740,10 @@ int rte_pci_write_config(const struct
>> rte_pci_device *device,
>> switch (p->dev->kdrv) {
>> #ifdef VFIO_PRESENT
>> case RTE_PCI_KDRV_VFIO:
>> - pci_vfio_ioport_write(p, data, len, offset);
>> + pci_uio_ioport_write(p, data, len, offset);
>> break;
>> #endif
>> case RTE_PCI_KDRV_IGB_UIO:
>> - pci_uio_ioport_write(p, data, len, offset);
>> - break;
>> case RTE_PCI_KDRV_UIO_GENERIC:
>> pci_uio_ioport_write(p, data, len, offset);
>> break;
>> @@ -836,18 +761,12 @@ int rte_pci_write_config(const struct
>> rte_pci_device *device,
>> #ifdef VFIO_PRESENT
>> case RTE_PCI_KDRV_VFIO:
>> if (pci_vfio_is_enabled())
>> - ret = pci_vfio_ioport_unmap(p);
>> + ret = pci_uio_ioport_unmap(p);
>> break;
>> #endif
>> case RTE_PCI_KDRV_IGB_UIO:
>> - ret = pci_uio_ioport_unmap(p);
>> - break;
>> case RTE_PCI_KDRV_UIO_GENERIC:
>> -#if defined(RTE_ARCH_X86)
>> - ret = 0;
>> -#else
>> ret = pci_uio_ioport_unmap(p);
>> -#endif
>> break;
>> default:
>> break;
>> diff --git a/drivers/bus/pci/linux/pci_uio.c
>> b/drivers/bus/pci/linux/pci_uio.c
>> index f3305a2..0062ac0 100644
>> --- a/drivers/bus/pci/linux/pci_uio.c
>> +++ b/drivers/bus/pci/linux/pci_uio.c
>> @@ -22,6 +22,7 @@
>> #include <rte_bus_pci.h>
>> #include <rte_common.h>
>> #include <rte_malloc.h>
>> +#include <rte_bus.h>
>> #include "eal_filesystem.h"
>> #include "pci_init.h"
>> @@ -373,52 +374,83 @@
>> pci_uio_ioport_map(struct rte_pci_device *dev, int bar,
>> struct rte_pci_ioport *p)
>> {
>> + FILE *f = NULL;
>> char dirname[PATH_MAX];
>> char filename[PATH_MAX];
>> + char buf[BUFSIZ];
>> + uint64_t phys_addr, end_addr, flags;
>> int uio_num;
>> - unsigned long start;
>> + unsigned long base;
>> + bool iobar;
>> + int i;
>> - if (rte_eal_iopl_init() != 0) {
>> - RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>> PCI device %s\n",
>> - __func__, dev->name);
>> + /* open and read addresses of the corresponding resource in
>> sysfs */
>> + snprintf(filename, sizeof(filename), "%s/" PCI_PRI_FMT "/resource",
>> + rte_pci_get_sysfs_path(), dev->addr.domain, dev->addr.bus,
>> + dev->addr.devid, dev->addr.function);
>> + f = fopen(filename, "r");
>> + if (f == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot open sysfs resource: %s\n",
>> + strerror(errno));
>> return -1;
>> }
>> - uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> - if (uio_num < 0)
>> - return -1;
>> + for (i = 0; i < bar + 1; i++) {
>> + if (fgets(buf, sizeof(buf), f) == NULL) {
>> + RTE_LOG(ERR, EAL, "Cannot read sysfs resource\n");
>> + goto error;
>> + }
>> + }
>> + if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr,
>> + &end_addr, &flags) < 0)
>> + goto error;
>> - /* get portio start */
>> - snprintf(filename, sizeof(filename),
>> - "%s/portio/port%d/start", dirname, bar);
>> - if (eal_parse_sysfs_value(filename, &start) < 0) {
>> - RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n",
>> - __func__);
>> - return -1;
>> + if (flags & IORESOURCE_IO) {
>> + iobar = 1;
>> + base = (unsigned long)phys_addr;
>> + RTE_LOG(INFO, EAL, "%s(): PIO BAR %08lx detected\n",
>> __func__, base);
>> + } else if (flags & IORESOURCE_MEM) {
>> + iobar = 0;
>> + base = (unsigned long)dev->mem_resource[bar].addr;
>> + RTE_LOG(INFO, EAL, "%s(): MMIO BAR %08lx detected\n",
>> __func__, base);
>> + } else {
>> + RTE_LOG(ERR, EAL, "%s(): unknown BAR type\n", __func__);
>> + goto error;
>> + }
>> +
>> + if (iobar && rte_eal_iopl_init() != 0) {
>> + RTE_LOG(ERR, EAL, "%s(): insufficient ioport permissions for
>> PCI device %s\n",
>> + __func__, dev->name);
>> + goto error;
>> }
>> - /* ensure we don't get anything funny here, read/write will cast to
>> - * uin16_t */
>> - if (start > UINT16_MAX)
>> - return -1;
>> /* FIXME only for primary process ? */
>> - if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) {
>> + if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN &&
>> + dev->kdrv == RTE_PCI_KDRV_UIO_GENERIC) {
>> + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0);
>> + if (uio_num < 0)
>> + goto error;
>> snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num);
>> dev->intr_handle.fd = open(filename, O_RDWR);
>> if (dev->intr_handle.fd < 0) {
>> RTE_LOG(ERR, EAL, "Cannot open %s: %s\n",
>> filename, strerror(errno));
>> - return -1;
>> + goto error;
>> }
>> dev->intr_handle.type = RTE_INTR_HANDLE_UIO;
>> }
>> - RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start);
>> + RTE_LOG(DEBUG, EAL, "PCI IO port found start=0x%lx\n", base);
>> - p->base = start;
>> + p->base = base;
>> p->len = 0;
>> + fclose(f);
>> return 0;
>> +error:
>> + if (f)
>> + fclose(f);
>> + return -1;
>> }
>> #else
>> int
>> @@ -489,6 +521,61 @@
>> }
>> #endif
>> +#define PIO_MAX 0x10000
>> +static inline uint8_t ioread8(void *addr)
>> +{
>> + uint8_t val;
>> +
>> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint8_t *)addr :
>> + inb((unsigned long)addr);
>> +
>> + return val;
>> +}
>> +
>> +static inline uint16_t ioread16(void *addr)
>> +{
>> + uint16_t val;
>> +
>> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint16_t *)addr :
>> + inw((unsigned long)addr);
>> +
>> + return val;
>> +}
>> +
>> +static inline uint32_t ioread32(void *addr)
>> +{
>> + uint32_t val;
>> +
>> + val = (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint32_t *)addr :
>> + inl((unsigned long)addr);
>> +
>> + return val;
>> +}
>> +
>> +static inline void iowrite8(uint8_t val, void *addr)
>> +{
>> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint8_t *)addr = val :
>> + outb(val, (unsigned long)addr);
>> +}
>> +
>> +static inline void iowrite16(uint16_t val, void *addr)
>> +{
>> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint16_t *)addr = val :
>> + outw(val, (unsigned long)addr);
>> +}
>> +
>> +static inline void iowrite32(uint32_t val, void *addr)
>> +{
>> + (uint64_t)(uintptr_t)addr >= PIO_MAX ?
>> + *(volatile uint32_t *)addr = val :
>> + outl(val, (unsigned long)addr);
>> +}
>> +
>> void
>> pci_uio_ioport_read(struct rte_pci_ioport *p,
>> void *data, size_t len, off_t offset)
>> @@ -500,25 +587,13 @@
>> for (d = data; len > 0; d += size, reg += size, len -= size) {
>> if (len >= 4) {
>> size = 4;
>> -#if defined(RTE_ARCH_X86)
>> - *(uint32_t *)d = inl(reg);
>> -#else
>> - *(uint32_t *)d = *(volatile uint32_t *)reg;
>> -#endif
>> + *(uint32_t *)d = ioread32((void *)reg);
>> } else if (len >= 2) {
>> size = 2;
>> -#if defined(RTE_ARCH_X86)
>> - *(uint16_t *)d = inw(reg);
>> -#else
>> - *(uint16_t *)d = *(volatile uint16_t *)reg;
>> -#endif
>> + *(uint16_t *)d = ioread16((void *)reg);
>> } else {
>> size = 1;
>> -#if defined(RTE_ARCH_X86)
>> - *d = inb(reg);
>> -#else
>> - *d = *(volatile uint8_t *)reg;
>> -#endif
>> + *d = ioread8((void *)reg);
>> }
>> }
>> }
>> @@ -534,25 +609,13 @@
>> for (s = data; len > 0; s += size, reg += size, len -= size) {
>> if (len >= 4) {
>> size = 4;
>> -#if defined(RTE_ARCH_X86)
>> - outl_p(*(const uint32_t *)s, reg);
>> -#else
>> - *(volatile uint32_t *)reg = *(const uint32_t *)s;
>> -#endif
>> + iowrite32(*(const uint32_t *)s, (void *)reg);
>> } else if (len >= 2) {
>> size = 2;
>> -#if defined(RTE_ARCH_X86)
>> - outw_p(*(const uint16_t *)s, reg);
>> -#else
>> - *(volatile uint16_t *)reg = *(const uint16_t *)s;
>> -#endif
>> + iowrite16(*(const uint16_t *)s, (void *)reg);
>> } else {
>> size = 1;
>> -#if defined(RTE_ARCH_X86)
>> - outb_p(*s, reg);
>> -#else
>> - *(volatile uint8_t *)reg = *s;
>> -#endif
>> + iowrite8(*s, (void *)reg);
>> }
>> }
>> }
>>
More information about the dev
mailing list