<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote gmail_quote_container"><div dir="ltr" class="gmail_attr">On Tue, May 5, 2026 at 7:53 AM <<a href="mailto:pravin.bathija@dell.com">pravin.bathija@dell.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Pravin M Bathija <<a href="mailto:pravin.bathija@dell.com" target="_blank">pravin.bathija@dell.com</a>><br>
<br>
Here we define support functions which are called from the various<br>
vhost-user back-end message functions like set memory table, get<br>
memory slots, add memory region, remove memory region.  These are<br>
essentially common functions to initialize memory, unmap a set of<br>
memory regions, perform register copy, align memory addresses,<br>
dma map/unmap a single memory region and remove guest pages by<br>
removing all entries belonging to a given memory region.<br>
<br>
Signed-off-by: Pravin M Bathija <<a href="mailto:pravin.bathija@dell.com" target="_blank">pravin.bathija@dell.com</a>><br>
---<br>
 lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++---<br>
 1 file changed, 136 insertions(+), 10 deletions(-)<br>
<br>
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c<br>
index 4bfb13fb98..1f96ecf963 100644<br>
--- a/lib/vhost/vhost_user.c<br>
+++ b/lib/vhost/vhost_user.c<br>
@@ -171,6 +171,52 @@ get_blk_size(int fd)<br>
        return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;<br>
 }<br>
<br>
+static int<br>
+async_dma_map_region(struct virtio_net *dev, struct rte_vhost_mem_region *reg, bool do_map)<br>
+{<br>
+       uint32_t i;<br>
+       int ret;<br>
+       uint64_t reg_start = reg->host_user_addr;<br>
+       uint64_t reg_end = reg_start + reg->size;<br>
+<br>
+       for (i = 0; i < dev->nr_guest_pages; i++) {<br>
+               struct guest_page *page = &dev->guest_pages[i];<br>
+<br>
+               /* Only process pages belonging to this region */<br>
+               if (page->host_user_addr < reg_start ||<br>
+                   page->host_user_addr >= reg_end)<br>
+                       continue;<br>
+<br>
+               if (do_map) {<br>
+                       ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,<br>
+                                       page->host_user_addr,<br>
+                                       page->host_iova,<br>
+                                       page->size);<br>
+                       if (ret) {<br>
+                               if (rte_errno == ENODEV)<br>
+                                       return 0;<br>
+<br>
+                               VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine map failed");<br>
+                               return -1;<br>
+                       }<br>
+               } else {<br>
+                       ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,<br>
+                                       page->host_user_addr,<br>
+                                       page->host_iova,<br>
+                                       page->size);<br>
+                       if (ret) {<br>
+                               if (rte_errno == EINVAL)<br>
+                                       return 0;<br>
+<br>
+                               VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine unmap failed");<br>
+                               return -1;<br>
+                       }<br>
+               }<br>
+       }<br>
+<br>
+       return 0;<br>
+}<br>
+<br>
 static void<br>
 async_dma_map(struct virtio_net *dev, bool do_map)<br>
 {<br>
@@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool do_map)<br>
 }<br></blockquote><div><br></div><div>I think async_dma_map and async_dma_map_region should be refactored to avoid code duplication,</div><div>What about something like this:</div><div><br></div><div>  static void<br>  async_dma_map(struct virtio_net *dev, bool do_map)<br>  {<br>      uint32_t i;<br>      struct rte_vhost_mem_region *reg;<br><br>      for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br>          reg = &dev->mem->regions[i];<br>          if (reg->host_user_addr == 0)<br>              continue;<br>          async_dma_map_region(dev, reg, do_map);<br>     }<br>  }</div><div> </div><div>Also, duplicating code and stripping comments is not ideal as they are important</div><div>(i.e. to understand why we can ignore ENODEV and EINVAL)</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
 static void<br>
-free_mem_region(struct virtio_net *dev)<br>
+free_mem_region(struct rte_vhost_mem_region *reg)<br>
+{<br>
+       if (reg != NULL && reg->mmap_addr) {<br>
+               munmap(reg->mmap_addr, reg->mmap_size);<br>
+               close(reg->fd);<br>
+               memset(reg, 0, sizeof(struct rte_vhost_mem_region));<br>
+       }<br>
+}<br>
+<br>
+static void<br>
+free_all_mem_regions(struct virtio_net *dev)<br>
 {<br>
        uint32_t i;<br>
        struct rte_vhost_mem_region *reg;<br>
@@ -236,12 +292,10 @@ free_mem_region(struct virtio_net *dev)<br>
        if (dev->async_copy && rte_vfio_is_enabled("vfio"))<br>
                async_dma_map(dev, false);<br>
<br>
-       for (i = 0; i < dev->mem->nregions; i++) {<br>
+       for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br>
                reg = &dev->mem->regions[i];<br>
-               if (reg->host_user_addr) {<br>
-                       munmap(reg->mmap_addr, reg->mmap_size);<br>
-                       close(reg->fd);<br>
-               }<br>
+               if (reg->mmap_addr)<br>
+                       free_mem_region(reg);<br>
        }<br>
 }<br>
<br>
@@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev)<br>
                vdpa_dev->ops->dev_cleanup(dev->vid);<br>
<br>
        if (dev->mem) {<br>
-               free_mem_region(dev);<br>
+               free_all_mem_regions(dev);<br>
                rte_free(dev->mem);<br>
                dev->mem = NULL;<br>
        }<br>
@@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq)<br>
        vhost_devices[dev->vid] = dev;<br>
<br>
        mem_size = sizeof(struct rte_vhost_memory) +<br>
-               sizeof(struct rte_vhost_mem_region) * dev->mem->nregions;<br>
+               sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS;<br>
        mem = rte_realloc_socket(dev->mem, mem_size, 0, node);<br>
        if (!mem) {<br>
                VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
@@ -808,8 +862,10 @@ hua_to_alignment(struct rte_vhost_memory *mem, void *ptr)<br>
        uint32_t i;<br>
        uintptr_t hua = (uintptr_t)ptr;<br>
<br>
-       for (i = 0; i < mem->nregions; i++) {<br>
+       for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br>
                r = &mem->regions[i];<br>
+               if (r->host_user_addr == 0)<br>
+                       continue;<br>
                if (hua >= r->host_user_addr &&<br>
                        hua < r->host_user_addr + r->size) {<br>
                        return get_blk_size(r->fd);<br>
@@ -1136,6 +1192,24 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,<br>
        return 0;<br>
 }<br>
<br>
+static void<br>
+remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg)<br>
+{<br>
+       uint64_t reg_start = reg->host_user_addr;<br>
+       uint64_t reg_end = reg_start + reg->size;<br>
+       uint32_t i, j = 0;<br>
+<br>
+       for (i = 0; i < dev->nr_guest_pages; i++) {<br>
+               if (dev->guest_pages[i].host_user_addr >= reg_start &&<br>
+                   dev->guest_pages[i].host_user_addr < reg_end)<br>
+                       continue;<br>
+               if (j != i)<br>
+                       dev->guest_pages[j] = dev->guest_pages[i];<br>
+               j++;<br>
+       }<br>
+       dev->nr_guest_pages = j;<br>
+}<br>
+<br>
 #ifdef RTE_LIBRTE_VHOST_DEBUG<br>
 /* TODO: enable it only in debug mode? */<br>
 static void<br>
@@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,<br>
         * DPDK's virtual address with Qemu, so that Qemu can<br>
         * retrieve the region offset when handling userfaults.<br>
         */<br>
+       int reg_msg_index = 0;<br>
        memory = &ctx->msg.payload.memory;<br>
        for (i = 0; i < memory->nregions; i++) {<br>
                reg = &dev->mem->regions[i];<br>
-               memory->regions[i].userspace_addr = reg->host_user_addr;<br>
+               if (reg->host_user_addr == 0)<br>
+                       continue;<br>
+               memory->regions[reg_msg_index].userspace_addr = reg->host_user_addr;<br>
+               reg_msg_index++;<br>
        }<br>
<br>
        /* Send the addresses back to qemu */<br>
@@ -1278,6 +1356,8 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,<br>
        /* Now userfault register and we can use the memory */<br>
        for (i = 0; i < memory->nregions; i++) {<br>
                reg = &dev->mem->regions[i];<br>
+               if (reg->host_user_addr == 0)<br>
+                       continue;<br>
                if (vhost_user_postcopy_region_register(dev, reg) < 0)<br>
                        return -1;<br>
        }<br>
@@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev,<br>
        return 0;<br>
 }<br>
<br>
+static int<br>
+vhost_user_initialize_memory(struct virtio_net **pdev)<br>
+{<br>
+       struct virtio_net *dev = *pdev;<br>
+       int numa_node = SOCKET_ID_ANY;<br>
+<br>
+       if (dev->mem != NULL) {<br>
+               VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
+                       "memory already initialized, free it first");<br>
+               return -1;<br>
+       }<br>
+<br>
+       /*<br>
+        * If VQ 0 has already been allocated, try to allocate on the same<br>
+        * NUMA node. It can be reallocated later in numa_realloc().<br>
+        */<br>
+       if (dev->nr_vring > 0)<br>
+               numa_node = dev->virtqueue[0]->numa_node;<br>
+<br>
+       dev->nr_guest_pages = 0;<br>
+       if (dev->guest_pages == NULL) {<br>
+               dev->max_guest_pages = 8;<br>
+               dev->guest_pages = rte_zmalloc_socket(NULL,<br>
+                                       dev->max_guest_pages *<br>
+                                       sizeof(struct guest_page),<br>
+                                       RTE_CACHE_LINE_SIZE,<br>
+                                       numa_node);<br>
+               if (dev->guest_pages == NULL) {<br>
+                       VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
+                               "failed to allocate memory for dev->guest_pages");<br>
+                       return -1;<br>
+               }<br>
+       }<br>
+<br>
+       dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct rte_vhost_memory) +<br>
+               sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS, 0, numa_node);<br>
+       if (dev->mem == NULL) {<br>
+               VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate memory for dev->mem");<br>
+               rte_free(dev->guest_pages);<br>
+               dev->guest_pages = NULL;<br>
+               return -1;<br>
+       }<br>
+<br>
+       return 0;<br>
+}<br>
+<br></blockquote><div><br></div><div>I think it should be in a dedicated patch, and in the same patch would vhost_user_set_mem_table() make use of it.</div><div>The idea is to make it straitforward you are doing a refactoring, and easily check the code you are extracting out from</div><div>vhost_user_set_mem_table() into a new function has not been changed in-between.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
 static int<br>
 vhost_user_set_mem_table(struct virtio_net **pdev,<br>
                        struct vhu_msg_context *ctx,<br>
-- <br>
2.43.0<br>
<br>
</blockquote></div></div>