[PATCH v2 2/2] vhost: fix physical address mapping

Maxime Coquelin maxime.coquelin at redhat.com
Tue Feb 1 09:51:22 CET 2022



On 1/19/22 16:10, xuan.ding at intel.com wrote:
> From: Xuan Ding <xuan.ding at intel.com>
> 
> When choosing IOVA as PA mode, IOVA is likely to be discontinuous,
> which requires page by page mapping for DMA devices. To be consistent,
> this patch implements page by page mapping instead of mapping at the
> region granularity for both IOVA as VA and PA mode.
> 
> Fixes: 7c61fa08b716 ("vhost: enable IOMMU for async vhost")
> 
> Signed-off-by: Xuan Ding <xuan.ding at intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang at intel.com>
> ---
>   lib/vhost/vhost.h      |   1 +
>   lib/vhost/vhost_user.c | 116 ++++++++++++++++++++---------------------
>   2 files changed, 57 insertions(+), 60 deletions(-)
> 
> diff --git a/lib/vhost/vhost.h b/lib/vhost/vhost.h
> index ca7f58039d..9521ae56da 100644
> --- a/lib/vhost/vhost.h
> +++ b/lib/vhost/vhost.h
> @@ -355,6 +355,7 @@ struct vring_packed_desc_event {
>   struct guest_page {
>   	uint64_t guest_phys_addr;
>   	uint64_t host_iova;
> +	uint64_t host_user_addr;
>   	uint64_t size;
>   };
>   
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
> index 95c9df697e..48c08716ba 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -143,57 +143,56 @@ get_blk_size(int fd)
>   	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
>   }
>   
> -static int
> -async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
> +static void
> +async_dma_map(struct virtio_net *dev, bool do_map)
>   {
> -	uint64_t host_iova;
>   	int ret = 0;
> -
> -	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
> +	uint32_t i;
> +	struct guest_page *page;

Add new line.

>   	if (do_map) {
> -		/* Add mapped region into the default container of DPDK. */
> -		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						 region->host_user_addr,
> -						 host_iova,
> -						 region->size);
> -		if (ret) {
> -			/*
> -			 * DMA device may bind with kernel driver, in this case,
> -			 * we don't need to program IOMMU manually. However, if no
> -			 * device is bound with vfio/uio in DPDK, and vfio kernel
> -			 * module is loaded, the API will still be called and return
> -			 * with ENODEV/ENOSUP.
> -			 *
> -			 * DPDK vfio only returns ENODEV/ENOSUP in very similar
> -			 * situations(vfio either unsupported, or supported
> -			 * but no devices found). Either way, no mappings could be
> -			 * performed. We treat it as normal case in async path.
> -			 */
> -			if (rte_errno == ENODEV || rte_errno == ENOTSUP)
> -				return 0;
> -
> -			VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> -			/* DMA mapping errors won't stop VHST_USER_SET_MEM_TABLE. */
> -			return 0;
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							 page->host_user_addr,
> +							 page->host_iova,
> +							 page->size);
> +			if (ret) {
> +				/*
> +				 * DMA device may bind with kernel driver, in this case,
> +				 * we don't need to program IOMMU manually. However, if no
> +				 * device is bound with vfio/uio in DPDK, and vfio kernel
> +				 * module is loaded, the API will still be called and return
> +				 * with ENODEV.
> +				 *
> +				 * DPDK vfio only returns ENODEV in very similar situations
> +				 * (vfio either unsupported, or supported but no devices found).
> +				 * Either way, no mappings could be performed. We treat it as
> +				 * normal case in async path. This is a workaround.
> +				 */
> +				if (rte_errno == ENODEV)
> +					return;
> +
> +				/* DMA mapping errors won't stop VHOST_USER_SET_MEM_TABLE. */
> +				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
> +			}
>   		}
>   
>   	} else {
> -		/* Remove mapped region from the default container of DPDK. */
> -		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> -						   region->host_user_addr,
> -						   host_iova,
> -						   region->size);
> -		if (ret) {
> -			/* like DMA map, ignore the kernel driver case when unmap. */
> -			if (rte_errno == EINVAL)
> -				return 0;
> -
> -			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> -			return ret;
> +		for (i = 0; i < dev->nr_guest_pages; i++) {
> +			page = &dev->guest_pages[i];
> +			ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +							   page->host_user_addr,
> +							   page->host_iova,
> +							   page->size);
> +			if (ret) {
> +				/* like DMA map, ignore the kernel driver case when unmap. */
> +				if (rte_errno == EINVAL)
> +					return;
> +
> +				VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
> +			}
>   		}
>   	}
> -
> -	return ret;
>   }
>   
>   static void
> @@ -205,12 +204,12 @@ free_mem_region(struct virtio_net *dev)
>   	if (!dev || !dev->mem)
>   		return;
>   
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, false);
> +
>   	for (i = 0; i < dev->mem->nregions; i++) {
>   		reg = &dev->mem->regions[i];
>   		if (reg->host_user_addr) {
> -			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> -				async_dma_map(reg, false);
> -
>   			munmap(reg->mmap_addr, reg->mmap_size);
>   			close(reg->fd);
>   		}
> @@ -978,7 +977,7 @@ vhost_user_set_vring_base(struct virtio_net **pdev,
>   
>   static int
>   add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
> -		   uint64_t host_iova, uint64_t size)
> +		   uint64_t host_iova, uint64_t host_user_addr, uint64_t size)
>   {
>   	struct guest_page *page, *last_page;
>   	struct guest_page *old_pages;
> @@ -999,8 +998,9 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
>   	if (dev->nr_guest_pages > 0) {
>   		last_page = &dev->guest_pages[dev->nr_guest_pages - 1];
>   		/* merge if the two pages are continuous */
> -		if (host_iova == last_page->host_iova +
> -				      last_page->size) {
> +		if (host_iova == last_page->host_iova + last_page->size
> +		&& guest_phys_addr == last_page->guest_phys_addr + last_page->size
> +		&& host_user_addr == last_page->host_user_addr + last_page->size) {

Indentation looks wrong.

>   			last_page->size += size;
>   			return 0;
>   		}
> @@ -1009,6 +1009,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr,
>   	page = &dev->guest_pages[dev->nr_guest_pages++];
>   	page->guest_phys_addr = guest_phys_addr;
>   	page->host_iova  = host_iova;
> +	page->host_user_addr = host_user_addr;
>   	page->size = size;
>   
>   	return 0;
> @@ -1028,7 +1029,8 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
>   	size = page_size - (guest_phys_addr & (page_size - 1));
>   	size = RTE_MIN(size, reg_size);
>   
> -	if (add_one_guest_page(dev, guest_phys_addr, host_iova, size) < 0)
> +	if (add_one_guest_page(dev, guest_phys_addr, host_iova,
> +			       host_user_addr, size) < 0)
>   		return -1;
>   
>   	host_user_addr  += size;
> @@ -1040,7 +1042,7 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
>   		host_iova = rte_mem_virt2iova((void *)(uintptr_t)
>   						  host_user_addr);
>   		if (add_one_guest_page(dev, guest_phys_addr, host_iova,
> -				size) < 0)
> +				       host_user_addr, size) < 0)
>   			return -1;
>   
>   		host_user_addr  += size;
> @@ -1215,7 +1217,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>   	uint64_t mmap_size;
>   	uint64_t alignment;
>   	int populate;
> -	int ret;
>   
>   	/* Check for memory_size + mmap_offset overflow */
>   	if (mmap_offset >= -region->size) {
> @@ -1274,14 +1275,6 @@ vhost_user_mmap_region(struct virtio_net *dev,
>   			VHOST_LOG_CONFIG(ERR, "adding guest pages to region failed.\n");
>   			return -1;
>   		}
> -
> -		if (rte_vfio_is_enabled("vfio")) {
> -			ret = async_dma_map(region, true);
> -			if (ret) {
> -				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
> -				return -1;
> -			}
> -		}
>   	}
>   
>   	VHOST_LOG_CONFIG(INFO,
> @@ -1420,6 +1413,9 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
>   		dev->mem->nregions++;
>   	}
>   
> +	if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +		async_dma_map(dev, true);
> +
>   	if (vhost_user_postcopy_register(dev, main_fd, msg) < 0)
>   		goto free_mem_table;
>   

Overall, the patch looks good.
Please fix the small nits & add Fixes tag and cc stable on patch 1.

Thanks,
Maxime



More information about the dev mailing list