[PATCH v8] vhost: fix use-after-free in fdset during shutdown
Yehor Malikov
malikovyehor at gmail.com
Thu Feb 5 19:30:04 CET 2026
From: Yehor Malikov <Yehor.Malikov at solidigm.com>
The fdset_event_dispatch thread runs in a loop checking the destroy
flag after each epoll_wait iteration. During process exit,
rte_eal_cleanup() releases resources (like hugepages via
rte_eal_memory_detach) while the fdset thread is still running.
This race condition can lead to use-after-free errors if the thread
accesses memory that has been freed.
Standard destructors (RTE_FINI) run after rte_eal_cleanup() returns,
which is too late to prevent this race.
To address this, introduce a mechanism to register cleanup callbacks
that run within rte_eal_cleanup() before memory is detached:
1. Add rte_eal_cleanup_register() API to EAL.
2. Implement fdset_deinit() in vhost to synchronously stop the
dispatch thread, close the epoll fd, and release resources.
3. Register the vhost cleanup handler during initialization to
ensure proper shutdown ordering via EAL.
Fixes: e68a6feaa3b3 ("vhost: improve fdset initialization")
Signed-off-by: Yehor Malikov <Yehor.Malikov at solidigm.com>
---
.mailmap | 1 +
lib/eal/common/eal_common_cleanup.c | 48 +++++++++++++++++++++++++++++
lib/eal/common/eal_private.h | 5 +++
lib/eal/common/meson.build | 1 +
lib/eal/freebsd/eal.c | 1 +
lib/eal/include/rte_eal.h | 15 +++++++++
lib/eal/linux/eal.c | 1 +
lib/eal/windows/eal.c | 1 +
lib/vhost/fd_man.c | 16 ++++++++++
lib/vhost/fd_man.h | 1 +
lib/vhost/rte_vhost.h | 15 +++++++++
lib/vhost/socket.c | 35 ++++++++++++++++++++-
lib/vhost/vduse.c | 12 +++++++-
lib/vhost/vduse.h | 1 +
14 files changed, 151 insertions(+), 2 deletions(-)
create mode 100644 lib/eal/common/eal_common_cleanup.c
diff --git a/.mailmap b/.mailmap
index fc53ed2a55..711a6ceff5 100644
--- a/.mailmap
+++ b/.mailmap
@@ -1840,6 +1840,7 @@ Yaroslav Brustinov <ybrustin at cisco.com>
Yash Sharma <ysharma at marvell.com>
Yasufumi Ogawa <ogawa.yasufumi at lab.ntt.co.jp> <yasufum.o at gmail.com>
Yelena Krivosheev <yelena at marvell.com>
+Yehor Malikov <Yehor.Malikov at solidigm.com>
Yerden Zhumabekov <e_zhumabekov at sts.kz> <yerden.zhumabekov at sts.kz>
Yevgeny Kliteynik <kliteyn at nvidia.com>
Yi Chen <chenyi221 at huawei.com>
diff --git a/lib/eal/common/eal_common_cleanup.c b/lib/eal/common/eal_common_cleanup.c
new file mode 100644
index 0000000000..933484e254
--- /dev/null
+++ b/lib/eal/common/eal_common_cleanup.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2025 Red Hat, Inc.
+ */
+
+#include <stdlib.h>
+#include <rte_spinlock.h>
+#include <rte_eal.h>
+
+#include <eal_export.h>
+#include "eal_private.h"
+
+#define MAX_CLEANUP_CALLBACKS 16
+
+static rte_eal_cleanup_callback_t cleanup_callbacks[MAX_CLEANUP_CALLBACKS];
+static int num_cleanup_callbacks;
+static rte_spinlock_t cleanup_lock = RTE_SPINLOCK_INITIALIZER;
+
+RTE_EXPORT_SYMBOL(rte_eal_cleanup_register)
+int
+rte_eal_cleanup_register(rte_eal_cleanup_callback_t callback)
+{
+ int ret = -1;
+
+ if (callback == NULL)
+ return -1;
+
+ rte_spinlock_lock(&cleanup_lock);
+ if (num_cleanup_callbacks < MAX_CLEANUP_CALLBACKS) {
+ cleanup_callbacks[num_cleanup_callbacks++] = callback;
+ ret = 0;
+ }
+ rte_spinlock_unlock(&cleanup_lock);
+
+ return ret;
+}
+
+void
+eal_cleanup_callbacks_run(void)
+{
+ int i;
+
+ rte_spinlock_lock(&cleanup_lock);
+ for (i = 0; i < num_cleanup_callbacks; i++) {
+ if (cleanup_callbacks[i] != NULL)
+ cleanup_callbacks[i]();
+ }
+ rte_spinlock_unlock(&cleanup_lock);
+}
diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
index e032dd10c9..3443960f13 100644
--- a/lib/eal/common/eal_private.h
+++ b/lib/eal/common/eal_private.h
@@ -479,6 +479,11 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
*/
int eal_bus_cleanup(void);
+/**
+ * Call all registered cleanup callbacks.
+ */
+void eal_cleanup_callbacks_run(void);
+
/**
* Create the unix channel for primary/secondary communication.
*
diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
index e273745e93..c36a82c34b 100644
--- a/lib/eal/common/meson.build
+++ b/lib/eal/common/meson.build
@@ -8,6 +8,7 @@ cflags += [ '-DABI_VERSION="@0@"'.format(abi_version) ]
sources += files(
'eal_common_bus.c',
'eal_common_class.c',
+ 'eal_common_cleanup.c',
'eal_common_config.c',
'eal_common_debug.c',
'eal_common_dev.c',
diff --git a/lib/eal/freebsd/eal.c b/lib/eal/freebsd/eal.c
index 6215245ad5..fdb8927b2b 100644
--- a/lib/eal/freebsd/eal.c
+++ b/lib/eal/freebsd/eal.c
@@ -777,6 +777,7 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ eal_cleanup_callbacks_run();
rte_service_finalize();
eal_bus_cleanup();
rte_mp_channel_cleanup();
diff --git a/lib/eal/include/rte_eal.h b/lib/eal/include/rte_eal.h
index 7241f3be5d..06d2820010 100644
--- a/lib/eal/include/rte_eal.h
+++ b/lib/eal/include/rte_eal.h
@@ -126,6 +126,21 @@ int rte_eal_init(int argc, char **argv);
*/
int rte_eal_cleanup(void);
+/**
+ * Cleanup callback function type.
+ */
+typedef void (*rte_eal_cleanup_callback_t)(void);
+
+/**
+ * Register a function to be called during rte_eal_cleanup().
+ *
+ * @param callback
+ * The cleanup callback function to register.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int rte_eal_cleanup_register(rte_eal_cleanup_callback_t callback);
+
/**
* Check if a primary process is currently alive
*
diff --git a/lib/eal/linux/eal.c b/lib/eal/linux/eal.c
index b12f325ddd..ba552f60c8 100644
--- a/lib/eal/linux/eal.c
+++ b/lib/eal/linux/eal.c
@@ -976,6 +976,7 @@ rte_eal_cleanup(void)
internal_conf->hugepage_file.unlink_existing)
rte_memseg_walk(mark_freeable, NULL);
+ eal_cleanup_callbacks_run();
rte_service_finalize();
eal_bus_cleanup();
vfio_mp_sync_cleanup();
diff --git a/lib/eal/windows/eal.c b/lib/eal/windows/eal.c
index 21fe7cb1d9..3ac61bfd2a 100644
--- a/lib/eal/windows/eal.c
+++ b/lib/eal/windows/eal.c
@@ -142,6 +142,7 @@ rte_eal_cleanup(void)
struct internal_config *internal_conf =
eal_get_internal_configuration();
+ eal_cleanup_callbacks_run();
eal_intr_thread_cancel();
eal_mem_virt2iova_cleanup();
eal_bus_cleanup();
diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c
index f9147edee7..9790c8a586 100644
--- a/lib/vhost/fd_man.c
+++ b/lib/vhost/fd_man.c
@@ -149,6 +149,22 @@ fdset_init(const char *name)
return NULL;
}
+void
+fdset_deinit(struct fdset *pfdset)
+{
+ unsigned int val;
+
+ if (pfdset == NULL)
+ return;
+
+ /* Signal the dispatch thread to stop */
+ pfdset->destroy = true;
+
+ /* Wait for the dispatch thread to exit */
+ if (rte_thread_join(pfdset->tid, &val) != 0)
+ VHOST_FDMAN_LOG(ERR, "Failed to join %s event dispatch thread", pfdset->name);
+}
+
static int
fdset_insert_entry(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
{
diff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h
index eadcc6fb42..c9e51badaa 100644
--- a/lib/vhost/fd_man.h
+++ b/lib/vhost/fd_man.h
@@ -15,6 +15,7 @@ struct fdset;
typedef void (*fd_cb)(int fd, void *dat, int *close);
struct fdset *fdset_init(const char *name);
+void fdset_deinit(struct fdset *pfdset);
int fdset_add(struct fdset *pfdset, int fd,
fd_cb rcb, fd_cb wcb, void *dat);
diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 2f7c4c0080..31cbbe6039 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -464,6 +464,21 @@ int rte_vhost_driver_register(const char *path, uint64_t flags);
/* Unregister vhost driver. This is only meaningful to vhost user. */
int rte_vhost_driver_unregister(const char *path);
+/**
+ * Cleanup vhost library resources.
+ *
+ * This function stops the fdset event dispatch threads for both vhost-user
+ * and VDUSE backends. It should be called before rte_eal_cleanup() to ensure
+ * proper shutdown without use-after-free issues.
+ *
+ * The function is safe to call multiple times or even if no vhost driver
+ * was ever started.
+ *
+ * @return
+ * 0 on success
+ */
+int rte_vhost_cleanup(void);
+
/**
* Set the vdpa device id, enforce single connection per socket
*
diff --git a/lib/vhost/socket.c b/lib/vhost/socket.c
index ae95e7e6b0..dc9fd16c62 100644
--- a/lib/vhost/socket.c
+++ b/lib/vhost/socket.c
@@ -76,6 +76,8 @@ struct vhost_user_connection {
};
#define MAX_VHOST_SOCKET 1024
+#define VHOST_USER_FDSET_NAME "vhost-evt"
+
struct vhost_user {
struct vhost_user_socket *vsockets[MAX_VHOST_SOCKET];
struct fdset *fdset;
@@ -1198,7 +1200,7 @@ rte_vhost_driver_start(const char *path)
vsocket->extbuf, vsocket->linearbuf);
if (vhost_user.fdset == NULL) {
- vhost_user.fdset = fdset_init("vhost-evt");
+ vhost_user.fdset = fdset_init(VHOST_USER_FDSET_NAME);
if (vhost_user.fdset == NULL) {
VHOST_CONFIG_LOG(path, ERR, "failed to init Vhost-user fdset");
return -1;
@@ -1210,3 +1212,34 @@ rte_vhost_driver_start(const char *path)
else
return vhost_user_start_client(vsocket);
}
+
+static void
+vhost_user_fdset_cleanup(void)
+{
+ if (vhost_user.fdset != NULL) {
+ fdset_deinit(vhost_user.fdset);
+ vhost_user.fdset = NULL;
+ }
+}
+
+RTE_EXPORT_SYMBOL(rte_vhost_cleanup)
+int
+rte_vhost_cleanup(void)
+{
+ vhost_user_fdset_cleanup();
+ vduse_fdset_cleanup();
+
+ return 0;
+}
+
+static void
+vhost_cleanup_handler(void)
+{
+ rte_vhost_cleanup();
+}
+
+RTE_INIT(vhost_cleanup_register)
+{
+ if (rte_eal_cleanup_register(vhost_cleanup_handler) < 0)
+ RTE_LOG(ERR, VHOST_CONFIG, "Failed to register vhost cleanup\n");
+}
diff --git a/lib/vhost/vduse.c b/lib/vhost/vduse.c
index 0b5d158fee..5c9e77f11e 100644
--- a/lib/vhost/vduse.c
+++ b/lib/vhost/vduse.c
@@ -27,6 +27,7 @@
#define VHOST_VDUSE_API_VERSION 0
#define VDUSE_CTRL_PATH "/dev/vduse/control"
+#define VDUSE_FDSET_NAME "vduse-evt"
struct vduse {
struct fdset *fdset;
@@ -685,7 +686,7 @@ vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool
bool reconnect = false;
if (vduse.fdset == NULL) {
- vduse.fdset = fdset_init("vduse-evt");
+ vduse.fdset = fdset_init(VDUSE_FDSET_NAME);
if (vduse.fdset == NULL) {
VHOST_CONFIG_LOG(path, ERR, "failed to init VDUSE fdset");
return -1;
@@ -942,3 +943,12 @@ vduse_device_destroy(const char *path)
return 0;
}
+
+void
+vduse_fdset_cleanup(void)
+{
+ if (vduse.fdset != NULL) {
+ fdset_deinit(vduse.fdset);
+ vduse.fdset = NULL;
+ }
+}
diff --git a/lib/vhost/vduse.h b/lib/vhost/vduse.h
index b2515bb9df..53f16b0238 100644
--- a/lib/vhost/vduse.h
+++ b/lib/vhost/vduse.h
@@ -11,5 +11,6 @@
int vduse_device_create(const char *path, bool compliant_ol_flags, bool extbuf, bool linearbuf);
int vduse_device_destroy(const char *path);
+void vduse_fdset_cleanup(void);
#endif /* _VDUSE_H */
--
2.52.0
More information about the dev
mailing list