[dpdk-dev] [PATCH 2/2] vfio: fix partial DMA unmapping for VFIO type1
Burakov, Anatoly
anatoly.burakov at intel.com
Wed Oct 14 17:07:10 CEST 2020
On 12-Oct-20 9:11 AM, Nithin Dabilpuram wrote:
> Partial unmapping is not supported for VFIO IOMMU type1
> by kernel. Though kernel gives return as zero, the unmapped size
> returned will not be same as expected. So check for
> returned unmap size and return error.
>
> For case of DMA map/unmap triggered by heap allocations,
> maintain granularity of memseg page size so that heap
> expansion and contraction does not have this issue.
This is quite unfortunate, because there was a different bug that had to
do with kernel having a very limited number of mappings available [1],
as a result of which the page concatenation code was added.
It should therefore be documented that the dma_entry_limit parameter
should be adjusted should the user run out of the DMA entries.
[1]
https://lore.kernel.org/lkml/155414977872.12780.13728555131525362206.stgit@gimli.home/T/
>
> For user requested DMA map/unmap disallow partial unmapping
> for VFIO type1.
>
> Fixes: 73a639085938 ("vfio: allow to map other memory regions")
> Cc: anatoly.burakov at intel.com
> Cc: stable at dpdk.org
>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram at marvell.com>
> ---
> lib/librte_eal/linux/eal_vfio.c | 34 ++++++++++++++++++++++++++++------
> lib/librte_eal/linux/eal_vfio.h | 1 +
> 2 files changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/lib/librte_eal/linux/eal_vfio.c b/lib/librte_eal/linux/eal_vfio.c
> index d26e164..ef95259 100644
> --- a/lib/librte_eal/linux/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal_vfio.c
> @@ -69,6 +69,7 @@ static const struct vfio_iommu_type iommu_types[] = {
> {
> .type_id = RTE_VFIO_TYPE1,
> .name = "Type 1",
> + .partial_unmap = false,
> .dma_map_func = &vfio_type1_dma_map,
> .dma_user_map_func = &vfio_type1_dma_mem_map
> },
> @@ -76,6 +77,7 @@ static const struct vfio_iommu_type iommu_types[] = {
> {
> .type_id = RTE_VFIO_SPAPR,
> .name = "sPAPR",
> + .partial_unmap = true,
> .dma_map_func = &vfio_spapr_dma_map,
> .dma_user_map_func = &vfio_spapr_dma_mem_map
> },
> @@ -83,6 +85,7 @@ static const struct vfio_iommu_type iommu_types[] = {
> {
> .type_id = RTE_VFIO_NOIOMMU,
> .name = "No-IOMMU",
> + .partial_unmap = true,
> .dma_map_func = &vfio_noiommu_dma_map,
> .dma_user_map_func = &vfio_noiommu_dma_mem_map
> },
> @@ -525,12 +528,19 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
> /* for IOVA as VA mode, no need to care for IOVA addresses */
> if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) {
> uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
> - if (type == RTE_MEM_EVENT_ALLOC)
> - vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
> - len, 1);
> - else
> - vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
> - len, 0);
> + uint64_t page_sz = msl->page_sz;
> +
> + /* Maintain granularity of DMA map/unmap to memseg size */
> + for (; cur_len < len; cur_len += page_sz) {
> + if (type == RTE_MEM_EVENT_ALLOC)
> + vfio_dma_mem_map(default_vfio_cfg, vfio_va,
> + vfio_va, page_sz, 1);
> + else
> + vfio_dma_mem_map(default_vfio_cfg, vfio_va,
> + vfio_va, page_sz, 0);
> + vfio_va += page_sz;
> + }
> +
You'd also have to revert d1c7c0cdf7bac5eb40d3a2a690453aefeee5887b
because currently the PA path will opportunistically concantenate
contiguous segments into single mapping too.
> return;
> }
>
> @@ -1383,6 +1393,12 @@ vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
> RTE_LOG(ERR, EAL, " cannot clear DMA remapping, error %i (%s)\n",
> errno, strerror(errno));
> return -1;
> + } else if (dma_unmap.size != len) {
> + RTE_LOG(ERR, EAL, " unexpected size %"PRIu64" of DMA "
> + "remapping cleared instead of %"PRIu64"\n",
> + (uint64_t)dma_unmap.size, len);
> + rte_errno = EIO;
> + return -1;
> }
> }
>
> @@ -1853,6 +1869,12 @@ container_dma_unmap(struct vfio_config *vfio_cfg, uint64_t vaddr, uint64_t iova,
> /* we're partially unmapping a previously mapped region, so we
> * need to split entry into two.
> */
> + if (!vfio_cfg->vfio_iommu_type->partial_unmap) {
> + RTE_LOG(DEBUG, EAL, "DMA partial unmap unsupported\n");
> + rte_errno = ENOTSUP;
> + ret = -1;
> + goto out;
> + }
How would we ever arrive here if we never do more than 1 page worth of
memory anyway? I don't think this is needed.
> if (user_mem_maps->n_maps == VFIO_MAX_USER_MEM_MAPS) {
> RTE_LOG(ERR, EAL, "Not enough space to store partial mapping\n");
> rte_errno = ENOMEM;
> diff --git a/lib/librte_eal/linux/eal_vfio.h b/lib/librte_eal/linux/eal_vfio.h
> index cb2d35f..6ebaca6 100644
> --- a/lib/librte_eal/linux/eal_vfio.h
> +++ b/lib/librte_eal/linux/eal_vfio.h
> @@ -113,6 +113,7 @@ typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
> struct vfio_iommu_type {
> int type_id;
> const char *name;
> + bool partial_unmap;
> vfio_dma_user_func_t dma_user_map_func;
> vfio_dma_func_t dma_map_func;
> };
>
--
Thanks,
Anatoly
More information about the dev
mailing list