[dpdk-dev] [PATCH] eal: sPAPR IOMMU support in pci probing for vfio-pci in ppc64le
Burakov, Anatoly
anatoly.burakov at intel.com
Thu Mar 2 16:18:59 CET 2017
> From: Gowrishankar [mailto:gowrishankar.m at linux.vnet.ibm.com]
> Sent: Friday, February 10, 2017 6:18 AM
> To: dev at dpdk.org
> Cc: Chao Zhu <chaozhu at linux.vnet.ibm.com>; Thomas Monjalon
> <thomas.monjalon at 6wind.com>; Burakov, Anatoly
> <anatoly.burakov at intel.com>; Pradeep <pradeep at us.ibm.com>;
> Gowrishankar Muthukrishnan <gowrishankar.m at linux.vnet.ibm.com>
> Subject: [PATCH] eal: sPAPR IOMMU support in pci probing for vfio-pci in
> ppc64le
>
> From: Gowrishankar Muthukrishnan <gowrishankar.m at linux.vnet.ibm.com>
>
> Below changes adds pci probing support for vfio-pci devices in power8.
>
> Signed-off-by: Gowrishankar Muthukrishnan
> <gowrishankar.m at linux.vnet.ibm.com>
> Acked-by: Chao Zhu <chaozhu at linux.vnet.ibm.com>
> ---
> lib/librte_eal/linuxapp/eal/eal_vfio.c | 88
> ++++++++++++++++++++++++++++++++++
> lib/librte_eal/linuxapp/eal/eal_vfio.h | 1 +
> 2 files changed, 89 insertions(+)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> index 702f7a2..1d4fea6 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> @@ -50,12 +50,15 @@
> static struct vfio_config vfio_cfg;
>
> static int vfio_type1_dma_map(int);
> +static int vfio_spapr_dma_map(int);
> static int vfio_noiommu_dma_map(int);
>
> /* IOMMU types we support */
> static const struct vfio_iommu_type iommu_types[] = {
> /* x86 IOMMU, otherwise known as type 1 */
> { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
> + /* ppc64 IOMMU, otherwise known as spapr */
> + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
> /* IOMMU-less mode */
> { RTE_VFIO_NOIOMMU, "No-IOMMU",
> &vfio_noiommu_dma_map}, }; @@ -540,6 +543,91 @@ int
> vfio_setup_device(const char *sysfs_base, const char *dev_addr, }
>
> static int
> +vfio_spapr_dma_map(int vfio_container_fd) {
> + const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> + int i, ret;
> +
> + struct vfio_iommu_spapr_register_memory reg = {
> + .argsz = sizeof(reg),
> + .flags = 0
> + };
> + struct vfio_iommu_spapr_tce_info info = {
> + .argsz = sizeof(info),
> + };
> + struct vfio_iommu_spapr_tce_create create = {
> + .argsz = sizeof(create),
> + };
> + struct vfio_iommu_spapr_tce_remove remove = {
> + .argsz = sizeof(remove),
> + };
> +
> + /* query spapr iommu info */
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO,
> &info);
> + if (ret) {
> + RTE_LOG(ERR, EAL, " cannot get iommu info, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* remove default DMA of 32 bit window */
> + remove.start_addr = info.dma32_window_start;
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE,
> &remove);
> + if (ret) {
> + RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* calculate window size based on number of hugepages configured
> */
> + create.window_size = rte_eal_get_physmem_size();
> + create.page_shift = __builtin_ctzll(ms->hugepage_sz);
> + create.levels = 2;
> +
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE,
> &create);
> + if (ret) {
> + RTE_LOG(ERR, EAL, " cannot create new DMA window, "
> + "error %i (%s)\n", errno, strerror(errno));
> + return -1;
> + }
> +
> + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
> + for (i = 0; i < RTE_MAX_MEMSEG; i++) {
> + struct vfio_iommu_type1_dma_map dma_map;
> +
> + if (ms[i].addr == NULL)
> + break;
> +
> + reg.vaddr = (uintptr_t) ms[i].addr;
> + reg.size = ms[i].len;
> + ret = ioctl(vfio_container_fd,
> VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®);
> + if (ret) {
> + RTE_LOG(ERR, EAL, " cannot register vaddr for
> IOMMU, "
> + "error %i (%s)\n", errno,
> strerror(errno));
> + return -1;
> + }
> +
> + memset(&dma_map, 0, sizeof(dma_map));
> + dma_map.argsz = sizeof(struct
> vfio_iommu_type1_dma_map);
> + dma_map.vaddr = ms[i].addr_64;
> + dma_map.size = ms[i].len;
> + dma_map.iova = ms[i].phys_addr;
> + dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
> VFIO_DMA_MAP_FLAG_WRITE;
> +
> + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA,
> &dma_map);
> +
> + if (ret) {
> + RTE_LOG(ERR, EAL, " cannot set up DMA remapping,
> "
> + "error %i (%s)\n", errno,
> strerror(errno));
> + return -1;
> + }
> +
> + }
> +
> + return 0;
> +}
> +
> +static int
> vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) {
> /* No-IOMMU mode does not need DMA mapping */ diff --git
> a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> index 29f7f3e..533b854 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> @@ -53,6 +53,7 @@
> #endif
>
> #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
> +#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU
Does this VFIO type exist for all kernel versions starting with 3.6? it may be worth it to add kernel version a check, like NOIOMMU type has.
>
> #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) #define
> RTE_VFIO_NOIOMMU 8
> --
> 1.9.1
More information about the dev
mailing list