[dpdk-dev] [PATCH 19.11] vfio: fix DMA mapping of externally allocated heaps

David Marchand david.marchand at redhat.com
Wed Nov 6 22:53:43 CET 2019


On Tue, Nov 5, 2019 at 4:15 PM Anatoly Burakov
<anatoly.burakov at intel.com> wrote:
>
> Currently, externally created heaps are supposed to be automatically
> mapped for VFIO DMA by EAL, however they only do so if, at the time of
> heap creation, VFIO is initialized and has at least one device
> available. If no devices are available at the time of heap creation (or
> if devices were available, but were since hot-unplugged, thus dropping
> all VFIO container mappings), then VFIO mapping code would have skipped
> over externally allocated heaps.
>
> The fix is two-fold. First, we allow externally allocated memory
> segments to be marked as "heap" segments. This allows us to distinguish
> between external memory segments that were created via heap API, from
> those that were created via rte_extmem_register() API.
>
> Then, we fix the VFIO code to only skip non-heap external segments.
> Also, since external heaps are not guaranteed to have valid IOVA
> addresses, we will skip those which have invalid IOVA addresses as well.
>
> Fixes: 0f526d674f8e ("malloc: separate creating memseg list and malloc heap")
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> ---
>
> Notes:
>     This cannot be backported to older releases as it breaks the
>     API and ABI. A separate fix is in the works for stable.

I'd say we still have to Cc: stable, on the principle so that the
stable maintainers know there is an issue on this commit.

Kevin, wdyt?

>
>  lib/librte_eal/common/include/rte_memory.h |  1 +
>  lib/librte_eal/common/rte_malloc.c         |  1 +
>  lib/librte_eal/freebsd/eal/eal_memory.c    |  1 +
>  lib/librte_eal/linux/eal/eal_memory.c      |  3 ++
>  lib/librte_eal/linux/eal/eal_vfio.c        | 46 +++++++++++++++++++---
>  5 files changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
> index 38e00e382c..bf81a2faa8 100644
> --- a/lib/librte_eal/common/include/rte_memory.h
> +++ b/lib/librte_eal/common/include/rte_memory.h
> @@ -81,6 +81,7 @@ struct rte_memseg_list {
>         volatile uint32_t version; /**< version number for multiprocess sync. */
>         size_t len; /**< Length of memory area covered by this memseg list. */
>         unsigned int external; /**< 1 if this list points to external memory */
> +       unsigned int heap; /**< 1 if this list points to a heap */
>         struct rte_fbarray memseg_arr;
>  };
>
> diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
> index 044d3a9078..413e4aa004 100644
> --- a/lib/librte_eal/common/rte_malloc.c
> +++ b/lib/librte_eal/common/rte_malloc.c
> @@ -396,6 +396,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
>
>         rte_spinlock_lock(&heap->lock);
>         ret = malloc_heap_add_external_memory(heap, msl);
> +       msl->heap = 1; /* mark it as heap segment */
>         rte_spinlock_unlock(&heap->lock);
>
>  unlock:
> diff --git a/lib/librte_eal/freebsd/eal/eal_memory.c b/lib/librte_eal/freebsd/eal/eal_memory.c
> index 7fe3178898..a97d8f0f0c 100644
> --- a/lib/librte_eal/freebsd/eal/eal_memory.c
> +++ b/lib/librte_eal/freebsd/eal/eal_memory.c
> @@ -93,6 +93,7 @@ rte_eal_hugepage_init(void)
>                 msl->page_sz = page_sz;
>                 msl->len = internal_config.memory;
>                 msl->socket_id = 0;
> +               msl->heap = 1;
>
>                 /* populate memsegs. each memseg is 1 page long */
>                 for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
> diff --git a/lib/librte_eal/linux/eal/eal_memory.c b/lib/librte_eal/linux/eal/eal_memory.c
> index accfd2e232..43e4ffc757 100644
> --- a/lib/librte_eal/linux/eal/eal_memory.c
> +++ b/lib/librte_eal/linux/eal/eal_memory.c
> @@ -831,6 +831,7 @@ alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
>         msl->page_sz = page_sz;
>         msl->socket_id = socket_id;
>         msl->base_va = NULL;
> +       msl->heap = 1; /* mark it as a heap segment */
>
>         RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
>                         (size_t)page_sz >> 10, socket_id);
> @@ -1405,6 +1406,7 @@ eal_legacy_hugepage_init(void)
>                 msl->page_sz = page_sz;
>                 msl->socket_id = 0;
>                 msl->len = internal_config.memory;
> +               msl->heap = 1;
>
>                 /* we're in single-file segments mode, so only the segment list
>                  * fd needs to be set up.
> @@ -1677,6 +1679,7 @@ eal_legacy_hugepage_init(void)
>                 mem_sz = msl->len;
>                 munmap(msl->base_va, mem_sz);
>                 msl->base_va = NULL;
> +               msl->heap = 0;
>
>                 /* destroy backing fbarray */
>                 rte_fbarray_destroy(&msl->memseg_arr);
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
> index d9541b1220..d5a2bbea0d 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
> @@ -1250,7 +1250,16 @@ type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
>  {
>         int *vfio_container_fd = arg;
>
> -       if (msl->external)
> +       /* skip external memory that isn't a heap */
> +       if (msl->external && !msl->heap)
> +               return 0;
> +
> +       /* skip any segments with invalid IOVA addresses */
> +       if (ms->iova == RTE_BAD_IOVA)
> +               return 0;
> +
> +       /* if IOVA mode is VA, we've already mapped the internal segments */
> +       if (!msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
>                 return 0;
>
>         return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
> @@ -1313,12 +1322,18 @@ vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
>  static int
>  vfio_type1_dma_map(int vfio_container_fd)
>  {
> +       int ret;
>         if (rte_eal_iova_mode() == RTE_IOVA_VA) {
>                 /* with IOVA as VA mode, we can get away with mapping contiguous
>                  * chunks rather than going page-by-page.
>                  */
> -               return rte_memseg_contig_walk(type1_map_contig,
> +               ret = rte_memseg_contig_walk(type1_map_contig,

We can move the ret variable declaration in this block.
I can do when applying.

>                                 &vfio_container_fd);
> +               if (ret)
> +                       return ret;
> +               /* we have to continue the walk because we've skipped the
> +                * external segments during the config walk.
> +                */
>         }
>         return rte_memseg_walk(type1_map, &vfio_container_fd);
>  }
> @@ -1410,7 +1425,15 @@ vfio_spapr_map_walk(const struct rte_memseg_list *msl,
>  {
>         struct spapr_remap_walk_param *param = arg;
>
> -       if (msl->external || ms->addr_64 == param->addr_64)
> +       /* skip external memory that isn't a heap */
> +       if (msl->external && !msl->heap)
> +               return 0;
> +
> +       /* skip any segments with invalid IOVA addresses */
> +       if (ms->iova == RTE_BAD_IOVA)
> +               return 0;
> +
> +       if (ms->addr_64 == param->addr_64)
>                 return 0;
>
>         return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
> @@ -1423,7 +1446,15 @@ vfio_spapr_unmap_walk(const struct rte_memseg_list *msl,
>  {
>         struct spapr_remap_walk_param *param = arg;
>
> -       if (msl->external || ms->addr_64 == param->addr_64)
> +       /* skip external memory that isn't a heap */
> +       if (msl->external && !msl->heap)
> +               return 0;
> +
> +       /* skip any segments with invalid IOVA addresses */
> +       if (ms->iova == RTE_BAD_IOVA)
> +               return 0;
> +
> +       if (ms->addr_64 == param->addr_64)
>                 return 0;
>
>         return vfio_spapr_dma_do_map(param->vfio_container_fd, ms->addr_64, ms->iova,
> @@ -1443,7 +1474,12 @@ vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
>         struct spapr_walk_param *param = arg;
>         uint64_t max = ms->iova + ms->len;
>
> -       if (msl->external)
> +       /* skip external memory that isn't a heap */
> +       if (msl->external && !msl->heap)
> +               return 0;
> +
> +       /* skip any segments with invalid IOVA addresses */
> +       if (ms->iova == RTE_BAD_IOVA)
>                 return 0;
>
>         /* do not iterate ms we haven't mapped yet  */
> --
> 2.17.1



-- 
David Marchand


More information about the dev mailing list