[PATCH v11 3/5] vhost_user: support function defines for back-end
Bathija, Pravin
Pravin.Bathija at dell.com
Tue May 12 06:26:54 CEST 2026
Hi Stephen, Maxime,
Any feedback on the latest patch-set v12 ? Please help merge this into mainline so we can catch the upcoming SPDK release.
Thanks,
Pravin
Internal Use - Confidential
> -----Original Message-----
> From: Bathija, Pravin
> Sent: Tuesday, May 5, 2026 9:00 PM
> To: 'Maxime Coquelin' <maxime.coquelin at redhat.com>
> Cc: dev at dpdk.org; stephen at networkplumber.org; thomas at monjalon.net;
> fengchengwen at huawei.com
> Subject: RE: [PATCH v11 3/5] vhost_user: support function defines for back-end
>
> Hi Maxime,
>
> The response are inline. I have also submitted patch-set v12 with the changes.
>
> From: Maxime Coquelin <maxime.coquelin at redhat.com>
> Sent: Tuesday, May 5, 2026 2:48 AM
> To: Bathija, Pravin <Pravin.Bathija at dell.com>
> Cc: dev at dpdk.org; stephen at networkplumber.org; thomas at monjalon.net;
> fengchengwen at huawei.com
> Subject: Re: [PATCH v11 3/5] vhost_user: support function defines for back-end
>
> [EXTERNAL EMAIL]
>
>
> On Tue, May 5, 2026 at 7:53 AM <mailto:pravin.bathija at dell.com> wrote:
> From: Pravin M Bathija <mailto:pravin.bathija at dell.com>
>
> Here we define support functions which are called from the various vhost-user
> back-end message functions like set memory table, get memory slots, add
> memory region, remove memory region. These are essentially common
> functions to initialize memory, unmap a set of memory regions, perform
> register copy, align memory addresses, dma map/unmap a single memory
> region and remove guest pages by removing all entries belonging to a given
> memory region.
>
> Signed-off-by: Pravin M Bathija <mailto:pravin.bathija at dell.com>
> ---
> lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++--
> -
> 1 file changed, 136 insertions(+), 10 deletions(-)
>
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> 4bfb13fb98..1f96ecf963 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -171,6 +171,52 @@ get_blk_size(int fd)
> return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
> }
>
> +static int
> +async_dma_map_region(struct virtio_net *dev, struct
> +rte_vhost_mem_region *reg, bool do_map) {
> + uint32_t i;
> + int ret;
> + uint64_t reg_start = reg->host_user_addr;
> + uint64_t reg_end = reg_start + reg->size;
> +
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + struct guest_page *page = &dev->guest_pages[i];
> +
> + /* Only process pages belonging to this region */
> + if (page->host_user_addr < reg_start ||
> + page->host_user_addr >= reg_end)
> + continue;
> +
> + if (do_map) {
> + ret =
> +rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + page->host_user_addr,
> + page->host_iova,
> + page->size);
> + if (ret) {
> + if (rte_errno == ENODEV)
> + return 0;
> +
> + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA
> +engine map failed");
> + return -1;
> + }
> + } else {
> + ret =
> +rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> + page->host_user_addr,
> + page->host_iova,
> + page->size);
> + if (ret) {
> + if (rte_errno == EINVAL)
> + return 0;
> +
> + VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA
> +engine unmap failed");
> + return -1;
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> static void
> async_dma_map(struct virtio_net *dev, bool do_map)
> {
> @@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool
> do_map)
> }
>
> I think async_dma_map and async_dma_map_region should be refactored to
> avoid code duplication, What about something like this:
>
> static void
> async_dma_map(struct virtio_net *dev, bool do_map)
> {
> uint32_t i;
> struct rte_vhost_mem_region *reg;
>
> for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
> reg = &dev->mem->regions[i];
> if (reg->host_user_addr == 0)
> continue;
> async_dma_map_region(dev, reg, do_map);
> }
> }
>
> Also, duplicating code and stripping comments is not ideal as they are
> important (i.e. to understand why we can ignore ENODEV and EINVAL)
>
> DMA refactoring: async_dma_map() now delegates to
> async_dma_map_region(), eliminating the duplicated DMA map/unmap logic.
> The original comments explaining ENODEV/EINVAL handling have been
> restored in async_dma_map_region().
>
>
> static void
> -free_mem_region(struct virtio_net *dev)
> +free_mem_region(struct rte_vhost_mem_region *reg) {
> + if (reg != NULL && reg->mmap_addr) {
> + munmap(reg->mmap_addr, reg->mmap_size);
> + close(reg->fd);
> + memset(reg, 0, sizeof(struct rte_vhost_mem_region));
> + }
> +}
> +
> +static void
> +free_all_mem_regions(struct virtio_net *dev)
> {
> uint32_t i;
> struct rte_vhost_mem_region *reg; @@ -236,12 +292,10 @@
> free_mem_region(struct virtio_net *dev)
> if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> async_dma_map(dev, false);
>
> - for (i = 0; i < dev->mem->nregions; i++) {
> + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
> reg = &dev->mem->regions[i];
> - if (reg->host_user_addr) {
> - munmap(reg->mmap_addr, reg->mmap_size);
> - close(reg->fd);
> - }
> + if (reg->mmap_addr)
> + free_mem_region(reg);
> }
> }
>
> @@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev)
> vdpa_dev->ops->dev_cleanup(dev->vid);
>
> if (dev->mem) {
> - free_mem_region(dev);
> + free_all_mem_regions(dev);
> rte_free(dev->mem);
> dev->mem = NULL;
> }
> @@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct
> vhost_virtqueue **pvq)
> vhost_devices[dev->vid] = dev;
>
> mem_size = sizeof(struct rte_vhost_memory) +
> - sizeof(struct rte_vhost_mem_region) * dev->mem->nregions;
> + sizeof(struct rte_vhost_mem_region) *
> +VHOST_MEMORY_MAX_NREGIONS;
> mem = rte_realloc_socket(dev->mem, mem_size, 0, node);
> if (!mem) {
> VHOST_CONFIG_LOG(dev->ifname, ERR, @@ -808,8 +862,10 @@
> hua_to_alignment(struct rte_vhost_memory *mem, void *ptr)
> uint32_t i;
> uintptr_t hua = (uintptr_t)ptr;
>
> - for (i = 0; i < mem->nregions; i++) {
> + for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {
> r = &mem->regions[i];
> + if (r->host_user_addr == 0)
> + continue;
> if (hua >= r->host_user_addr &&
> hua < r->host_user_addr + r->size) {
> return get_blk_size(r->fd); @@ -1136,6 +1192,24 @@
> add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,
> return 0;
> }
>
> +static void
> +remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region
> +*reg) {
> + uint64_t reg_start = reg->host_user_addr;
> + uint64_t reg_end = reg_start + reg->size;
> + uint32_t i, j = 0;
> +
> + for (i = 0; i < dev->nr_guest_pages; i++) {
> + if (dev->guest_pages[i].host_user_addr >= reg_start &&
> + dev->guest_pages[i].host_user_addr < reg_end)
> + continue;
> + if (j != i)
> + dev->guest_pages[j] = dev->guest_pages[i];
> + j++;
> + }
> + dev->nr_guest_pages = j;
> +}
> +
> #ifdef RTE_LIBRTE_VHOST_DEBUG
> /* TODO: enable it only in debug mode? */
> static void
> @@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net
> *dev, int main_fd,
> * DPDK's virtual address with Qemu, so that Qemu can
> * retrieve the region offset when handling userfaults.
> */
> + int reg_msg_index = 0;
> memory = &ctx->msg.payload.memory;
> for (i = 0; i < memory->nregions; i++) {
> reg = &dev->mem->regions[i];
> - memory->regions[i].userspace_addr = reg->host_user_addr;
> + if (reg->host_user_addr == 0)
> + continue;
> + memory->regions[reg_msg_index].userspace_addr =
> +reg->host_user_addr;
> + reg_msg_index++;
> }
>
> /* Send the addresses back to qemu */ @@ -1278,6 +1356,8 @@
> vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,
> /* Now userfault register and we can use the memory */
> for (i = 0; i < memory->nregions; i++) {
> reg = &dev->mem->regions[i];
> + if (reg->host_user_addr == 0)
> + continue;
> if (vhost_user_postcopy_region_register(dev, reg) < 0)
> return -1;
> }
> @@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev,
> return 0;
> }
>
> +static int
> +vhost_user_initialize_memory(struct virtio_net **pdev) {
> + struct virtio_net *dev = *pdev;
> + int numa_node = SOCKET_ID_ANY;
> +
> + if (dev->mem != NULL) {
> + VHOST_CONFIG_LOG(dev->ifname, ERR,
> + "memory already initialized, free it first");
> + return -1;
> + }
> +
> + /*
> + * If VQ 0 has already been allocated, try to allocate on the
> +same
> + * NUMA node. It can be reallocated later in numa_realloc().
> + */
> + if (dev->nr_vring > 0)
> + numa_node = dev->virtqueue[0]->numa_node;
> +
> + dev->nr_guest_pages = 0;
> + if (dev->guest_pages == NULL) {
> + dev->max_guest_pages = 8;
> + dev->guest_pages = rte_zmalloc_socket(NULL,
> + dev->max_guest_pages *
> + sizeof(struct guest_page),
> + RTE_CACHE_LINE_SIZE,
> + numa_node);
> + if (dev->guest_pages == NULL) {
> + VHOST_CONFIG_LOG(dev->ifname, ERR,
> + "failed to allocate memory for
> +dev->guest_pages");
> + return -1;
> + }
> + }
> +
> + dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct
> +rte_vhost_memory) +
> + sizeof(struct rte_vhost_mem_region) *
> +VHOST_MEMORY_MAX_NREGIONS, 0, numa_node);
> + if (dev->mem == NULL) {
> + VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate
> +memory for dev->mem");
> + rte_free(dev->guest_pages);
> + dev->guest_pages = NULL;
> + return -1;
> + }
> +
> + return 0;
> +}
> +
>
> I think it should be in a dedicated patch, and in the same patch
> would vhost_user_set_mem_table() make use of it.
> The idea is to make it straitforward you are doing a refactoring, and easily
> check the code you are extracting out from
> vhost_user_set_mem_table() into a new function has not been changed in-
> between.
>
> vhost_user_initialize_memory() patch placement: Moved from patch 3 to patch
> 4, grouped with the
> vhost_user_set_mem_table() refactoring that uses it. This makes the
> extraction clearer to review as a pure refactor
> without mixing it with other changes.
>
> static int
> vhost_user_set_mem_table(struct virtio_net **pdev,
> struct vhu_msg_context *ctx,
> --
> 2.43.0
More information about the dev
mailing list