<div dir="ltr"><div dir="ltr"><br></div><br><div class="gmail_quote gmail_quote_container"><div dir="ltr" class="gmail_attr">On Tue, May 5, 2026 at 7:53 AM <<a href="mailto:pravin.bathija@dell.com">pravin.bathija@dell.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Pravin M Bathija <<a href="mailto:pravin.bathija@dell.com" target="_blank">pravin.bathija@dell.com</a>><br>
<br>
Here we define support functions which are called from the various<br>
vhost-user back-end message functions like set memory table, get<br>
memory slots, add memory region, remove memory region. These are<br>
essentially common functions to initialize memory, unmap a set of<br>
memory regions, perform register copy, align memory addresses,<br>
dma map/unmap a single memory region and remove guest pages by<br>
removing all entries belonging to a given memory region.<br>
<br>
Signed-off-by: Pravin M Bathija <<a href="mailto:pravin.bathija@dell.com" target="_blank">pravin.bathija@dell.com</a>><br>
---<br>
lib/vhost/vhost_user.c | 146 ++++++++++++++++++++++++++++++++++++++---<br>
1 file changed, 136 insertions(+), 10 deletions(-)<br>
<br>
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c<br>
index 4bfb13fb98..1f96ecf963 100644<br>
--- a/lib/vhost/vhost_user.c<br>
+++ b/lib/vhost/vhost_user.c<br>
@@ -171,6 +171,52 @@ get_blk_size(int fd)<br>
return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;<br>
}<br>
<br>
+static int<br>
+async_dma_map_region(struct virtio_net *dev, struct rte_vhost_mem_region *reg, bool do_map)<br>
+{<br>
+ uint32_t i;<br>
+ int ret;<br>
+ uint64_t reg_start = reg->host_user_addr;<br>
+ uint64_t reg_end = reg_start + reg->size;<br>
+<br>
+ for (i = 0; i < dev->nr_guest_pages; i++) {<br>
+ struct guest_page *page = &dev->guest_pages[i];<br>
+<br>
+ /* Only process pages belonging to this region */<br>
+ if (page->host_user_addr < reg_start ||<br>
+ page->host_user_addr >= reg_end)<br>
+ continue;<br>
+<br>
+ if (do_map) {<br>
+ ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,<br>
+ page->host_user_addr,<br>
+ page->host_iova,<br>
+ page->size);<br>
+ if (ret) {<br>
+ if (rte_errno == ENODEV)<br>
+ return 0;<br>
+<br>
+ VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine map failed");<br>
+ return -1;<br>
+ }<br>
+ } else {<br>
+ ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,<br>
+ page->host_user_addr,<br>
+ page->host_iova,<br>
+ page->size);<br>
+ if (ret) {<br>
+ if (rte_errno == EINVAL)<br>
+ return 0;<br>
+<br>
+ VHOST_CONFIG_LOG(dev->ifname, ERR, "DMA engine unmap failed");<br>
+ return -1;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ return 0;<br>
+}<br>
+<br>
static void<br>
async_dma_map(struct virtio_net *dev, bool do_map)<br>
{<br>
@@ -225,7 +271,17 @@ async_dma_map(struct virtio_net *dev, bool do_map)<br>
}<br></blockquote><div><br></div><div>I think async_dma_map and async_dma_map_region should be refactored to avoid code duplication,</div><div>What about something like this:</div><div><br></div><div> static void<br> async_dma_map(struct virtio_net *dev, bool do_map)<br> {<br> uint32_t i;<br> struct rte_vhost_mem_region *reg;<br><br> for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br> reg = &dev->mem->regions[i];<br> if (reg->host_user_addr == 0)<br> continue;<br> async_dma_map_region(dev, reg, do_map);<br> }<br> }</div><div> </div><div>Also, duplicating code and stripping comments is not ideal as they are important</div><div>(i.e. to understand why we can ignore ENODEV and EINVAL)</div><div><br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
<br>
static void<br>
-free_mem_region(struct virtio_net *dev)<br>
+free_mem_region(struct rte_vhost_mem_region *reg)<br>
+{<br>
+ if (reg != NULL && reg->mmap_addr) {<br>
+ munmap(reg->mmap_addr, reg->mmap_size);<br>
+ close(reg->fd);<br>
+ memset(reg, 0, sizeof(struct rte_vhost_mem_region));<br>
+ }<br>
+}<br>
+<br>
+static void<br>
+free_all_mem_regions(struct virtio_net *dev)<br>
{<br>
uint32_t i;<br>
struct rte_vhost_mem_region *reg;<br>
@@ -236,12 +292,10 @@ free_mem_region(struct virtio_net *dev)<br>
if (dev->async_copy && rte_vfio_is_enabled("vfio"))<br>
async_dma_map(dev, false);<br>
<br>
- for (i = 0; i < dev->mem->nregions; i++) {<br>
+ for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br>
reg = &dev->mem->regions[i];<br>
- if (reg->host_user_addr) {<br>
- munmap(reg->mmap_addr, reg->mmap_size);<br>
- close(reg->fd);<br>
- }<br>
+ if (reg->mmap_addr)<br>
+ free_mem_region(reg);<br>
}<br>
}<br>
<br>
@@ -255,7 +309,7 @@ vhost_backend_cleanup(struct virtio_net *dev)<br>
vdpa_dev->ops->dev_cleanup(dev->vid);<br>
<br>
if (dev->mem) {<br>
- free_mem_region(dev);<br>
+ free_all_mem_regions(dev);<br>
rte_free(dev->mem);<br>
dev->mem = NULL;<br>
}<br>
@@ -704,7 +758,7 @@ numa_realloc(struct virtio_net **pdev, struct vhost_virtqueue **pvq)<br>
vhost_devices[dev->vid] = dev;<br>
<br>
mem_size = sizeof(struct rte_vhost_memory) +<br>
- sizeof(struct rte_vhost_mem_region) * dev->mem->nregions;<br>
+ sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS;<br>
mem = rte_realloc_socket(dev->mem, mem_size, 0, node);<br>
if (!mem) {<br>
VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
@@ -808,8 +862,10 @@ hua_to_alignment(struct rte_vhost_memory *mem, void *ptr)<br>
uint32_t i;<br>
uintptr_t hua = (uintptr_t)ptr;<br>
<br>
- for (i = 0; i < mem->nregions; i++) {<br>
+ for (i = 0; i < VHOST_MEMORY_MAX_NREGIONS; i++) {<br>
r = &mem->regions[i];<br>
+ if (r->host_user_addr == 0)<br>
+ continue;<br>
if (hua >= r->host_user_addr &&<br>
hua < r->host_user_addr + r->size) {<br>
return get_blk_size(r->fd);<br>
@@ -1136,6 +1192,24 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg,<br>
return 0;<br>
}<br>
<br>
+static void<br>
+remove_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg)<br>
+{<br>
+ uint64_t reg_start = reg->host_user_addr;<br>
+ uint64_t reg_end = reg_start + reg->size;<br>
+ uint32_t i, j = 0;<br>
+<br>
+ for (i = 0; i < dev->nr_guest_pages; i++) {<br>
+ if (dev->guest_pages[i].host_user_addr >= reg_start &&<br>
+ dev->guest_pages[i].host_user_addr < reg_end)<br>
+ continue;<br>
+ if (j != i)<br>
+ dev->guest_pages[j] = dev->guest_pages[i];<br>
+ j++;<br>
+ }<br>
+ dev->nr_guest_pages = j;<br>
+}<br>
+<br>
#ifdef RTE_LIBRTE_VHOST_DEBUG<br>
/* TODO: enable it only in debug mode? */<br>
static void<br>
@@ -1246,10 +1320,14 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,<br>
* DPDK's virtual address with Qemu, so that Qemu can<br>
* retrieve the region offset when handling userfaults.<br>
*/<br>
+ int reg_msg_index = 0;<br>
memory = &ctx->msg.payload.memory;<br>
for (i = 0; i < memory->nregions; i++) {<br>
reg = &dev->mem->regions[i];<br>
- memory->regions[i].userspace_addr = reg->host_user_addr;<br>
+ if (reg->host_user_addr == 0)<br>
+ continue;<br>
+ memory->regions[reg_msg_index].userspace_addr = reg->host_user_addr;<br>
+ reg_msg_index++;<br>
}<br>
<br>
/* Send the addresses back to qemu */<br>
@@ -1278,6 +1356,8 @@ vhost_user_postcopy_register(struct virtio_net *dev, int main_fd,<br>
/* Now userfault register and we can use the memory */<br>
for (i = 0; i < memory->nregions; i++) {<br>
reg = &dev->mem->regions[i];<br>
+ if (reg->host_user_addr == 0)<br>
+ continue;<br>
if (vhost_user_postcopy_region_register(dev, reg) < 0)<br>
return -1;<br>
}<br>
@@ -1382,6 +1462,52 @@ vhost_user_mmap_region(struct virtio_net *dev,<br>
return 0;<br>
}<br>
<br>
+static int<br>
+vhost_user_initialize_memory(struct virtio_net **pdev)<br>
+{<br>
+ struct virtio_net *dev = *pdev;<br>
+ int numa_node = SOCKET_ID_ANY;<br>
+<br>
+ if (dev->mem != NULL) {<br>
+ VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
+ "memory already initialized, free it first");<br>
+ return -1;<br>
+ }<br>
+<br>
+ /*<br>
+ * If VQ 0 has already been allocated, try to allocate on the same<br>
+ * NUMA node. It can be reallocated later in numa_realloc().<br>
+ */<br>
+ if (dev->nr_vring > 0)<br>
+ numa_node = dev->virtqueue[0]->numa_node;<br>
+<br>
+ dev->nr_guest_pages = 0;<br>
+ if (dev->guest_pages == NULL) {<br>
+ dev->max_guest_pages = 8;<br>
+ dev->guest_pages = rte_zmalloc_socket(NULL,<br>
+ dev->max_guest_pages *<br>
+ sizeof(struct guest_page),<br>
+ RTE_CACHE_LINE_SIZE,<br>
+ numa_node);<br>
+ if (dev->guest_pages == NULL) {<br>
+ VHOST_CONFIG_LOG(dev->ifname, ERR,<br>
+ "failed to allocate memory for dev->guest_pages");<br>
+ return -1;<br>
+ }<br>
+ }<br>
+<br>
+ dev->mem = rte_zmalloc_socket("vhost-mem-table", sizeof(struct rte_vhost_memory) +<br>
+ sizeof(struct rte_vhost_mem_region) * VHOST_MEMORY_MAX_NREGIONS, 0, numa_node);<br>
+ if (dev->mem == NULL) {<br>
+ VHOST_CONFIG_LOG(dev->ifname, ERR, "failed to allocate memory for dev->mem");<br>
+ rte_free(dev->guest_pages);<br>
+ dev->guest_pages = NULL;<br>
+ return -1;<br>
+ }<br>
+<br>
+ return 0;<br>
+}<br>
+<br></blockquote><div><br></div><div>I think it should be in a dedicated patch, and in the same patch would vhost_user_set_mem_table() make use of it.</div><div>The idea is to make it straitforward you are doing a refactoring, and easily check the code you are extracting out from</div><div>vhost_user_set_mem_table() into a new function has not been changed in-between.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
static int<br>
vhost_user_set_mem_table(struct virtio_net **pdev,<br>
struct vhu_msg_context *ctx,<br>
-- <br>
2.43.0<br>
<br>
</blockquote></div></div>