patch 'net/netvsc: retry VF hotplug until device disappears' has been queued to stable release 24.11.7
luca.boccassi at gmail.com
luca.boccassi at gmail.com
Thu Jun 11 15:19:37 CEST 2026
Hi,
FYI, your patch has been queued to stable release 24.11.7
Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 06/13/26. So please
shout if anyone has objections.
Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.
Queued patches are on a temporary branch at:
https://github.com/bluca/dpdk-stable
This queued commit can be viewed at:
https://github.com/bluca/dpdk-stable/commit/185e3d2e419e10f0cda0c171759d3db13bdcdcb6
Thanks.
Luca Boccassi
---
>From 185e3d2e419e10f0cda0c171759d3db13bdcdcb6 Mon Sep 17 00:00:00 2001
From: Long Li <longli at microsoft.com>
Date: Fri, 15 May 2026 12:28:35 -0700
Subject: [PATCH] net/netvsc: retry VF hotplug until device disappears
[ upstream commit 03d66f18ddcbcfeac71e65bec2c27f973ea6f9a8 ]
After PCI rescan on Azure, the MANA kernel driver can take over 100
seconds to probe and create the /sys/bus/pci/devices/<dev>/net directory.
The previous fixed retry limit (NETVSC_MAX_HOTADD_RETRY=10, ~12 seconds)
was insufficient, causing VF re-attach to fail with 'Failed to parse PCI
device' on systems with slow MANA driver initialization.
Replace the fixed retry limit with an indefinite retry that only gives up
when the PCI device itself disappears from sysfs. This is safe because:
- The retry uses rte_eal_alarm callbacks which are serialized on the EAL
interrupt thread, preventing races with VF remove or device close paths.
- Device close (eth_hn_dev_uninit) explicitly cancels all pending hotplug
alarms via rte_eal_alarm_cancel and frees the context.
- If the PCI device is removed while retrying, access() detects the
missing sysfs path and stops immediately.
A periodic NOTICE log every 30 retries (~30s) provides visibility into
long waits without flooding the log at DEBUG level.
Fixes: a2a23a794b3a ("net/netvsc: support VF device hot add/remove")
Signed-off-by: Long Li <longli at microsoft.com>
---
drivers/net/netvsc/hn_ethdev.c | 41 +++++++++++++++++++++++++---------
1 file changed, 31 insertions(+), 10 deletions(-)
diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index da6368457c..d967e1444a 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -82,8 +82,8 @@ struct netvsc_mp_param {
#define NETVSC_ARG_TXBREAK "tx_copybreak"
#define NETVSC_ARG_RX_EXTMBUF_ENABLE "rx_extmbuf_enable"
-/* The max number of retry when hot adding a VF device */
-#define NETVSC_MAX_HOTADD_RETRY 10
+/* Retry interval for hot-add VF device (microseconds) */
+#define NETVSC_HOTADD_RETRY_INTERVAL 1000000
struct hn_xstats_name_off {
char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -604,19 +604,39 @@ static void netvsc_hotplug_retry(void *args)
PMD_DRV_LOG(DEBUG, "%s: retry count %d",
__func__, hot_ctx->eal_hot_plug_retry);
- if (hot_ctx->eal_hot_plug_retry++ > NETVSC_MAX_HOTADD_RETRY) {
- PMD_DRV_LOG(NOTICE, "Failed to parse PCI device retry=%d",
- hot_ctx->eal_hot_plug_retry);
+ hot_ctx->eal_hot_plug_retry++;
+
+ /* Check if PCI device still exists — if it disappeared, give up.
+ * Otherwise keep retrying indefinitely until the net directory
+ * appears. This is safe because:
+ * - MANA driver probe can take >100s after PCI rescan
+ * - The retry uses rte_eal_alarm callbacks serialized on the
+ * EAL interrupt thread, preventing races with device close
+ * - Device close cancels pending alarms and frees the context
+ * - If the PCI device is removed, the access() check below
+ * detects the missing sysfs path and stops immediately
+ */
+ snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s", d->name);
+ if (access(buf, F_OK) != 0) {
+ PMD_DRV_LOG(NOTICE,
+ "PCI device %s no longer exists, giving up after %d retries",
+ d->name, hot_ctx->eal_hot_plug_retry);
goto free_hotadd_ctx;
}
snprintf(buf, sizeof(buf), "/sys/bus/pci/devices/%s/net", d->name);
di = opendir(buf);
if (!di) {
- PMD_DRV_LOG(DEBUG, "%s: can't open directory %s, "
- "retrying in 1 second", __func__, buf);
- /* The device is still being initialized, retry after 1 second */
- rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hot_ctx);
+ if (hot_ctx->eal_hot_plug_retry % 30 == 0)
+ PMD_DRV_LOG(NOTICE,
+ "%s: waiting for %s (retry %d, %ds elapsed)",
+ __func__, buf, hot_ctx->eal_hot_plug_retry,
+ hot_ctx->eal_hot_plug_retry);
+ else
+ PMD_DRV_LOG(DEBUG, "%s: can't open directory %s, "
+ "retrying in 1 second", __func__, buf);
+ rte_eal_alarm_set(NETVSC_HOTADD_RETRY_INTERVAL,
+ netvsc_hotplug_retry, hot_ctx);
return;
}
@@ -726,7 +746,8 @@ netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type,
rte_spinlock_lock(&hv->hotadd_lock);
LIST_INSERT_HEAD(&hv->hotadd_list, hot_ctx, list);
rte_spinlock_unlock(&hv->hotadd_lock);
- rte_eal_alarm_set(1000000, netvsc_hotplug_retry, hot_ctx);
+ rte_eal_alarm_set(NETVSC_HOTADD_RETRY_INTERVAL,
+ netvsc_hotplug_retry, hot_ctx);
return;
}
--
2.47.3
---
Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- - 2026-06-11 14:20:02.466229056 +0100
+++ 0028-net-netvsc-retry-VF-hotplug-until-device-disappears.patch 2026-06-11 14:20:01.194745497 +0100
@@ -1 +1 @@
-From 03d66f18ddcbcfeac71e65bec2c27f973ea6f9a8 Mon Sep 17 00:00:00 2001
+From 185e3d2e419e10f0cda0c171759d3db13bdcdcb6 Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 03d66f18ddcbcfeac71e65bec2c27f973ea6f9a8 ]
+
@@ -26 +27,0 @@
-Cc: stable at dpdk.org
@@ -34 +35 @@
-index b8880edb4c..85e500c178 100644
+index da6368457c..d967e1444a 100644
@@ -37 +38 @@
-@@ -89,8 +89,8 @@ struct netvsc_mp_param {
+@@ -82,8 +82,8 @@ struct netvsc_mp_param {
@@ -48 +49 @@
-@@ -622,19 +622,39 @@ static void netvsc_hotplug_retry(void *args)
+@@ -604,19 +604,39 @@ static void netvsc_hotplug_retry(void *args)
@@ -95 +96 @@
-@@ -758,7 +778,8 @@ netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type,
+@@ -726,7 +746,8 @@ netvsc_hotadd_callback(const char *device_name, enum rte_dev_event_type type,
More information about the stable
mailing list