patch 'net/mlx5: fix device start error handling' has been queued to stable release 23.11.6

Shani Peretz shperetz at nvidia.com
Thu Dec 25 10:18:53 CET 2025


Hi,

FYI, your patch has been queued to stable release 23.11.6

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 12/30/25. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/shanipr/dpdk-stable

This queued commit can be viewed at:
https://github.com/shanipr/dpdk-stable/commit/efb731220b65055d70e0c7718c94d88741fe91f6

Thanks.

Shani

---
>From efb731220b65055d70e0c7718c94d88741fe91f6 Mon Sep 17 00:00:00 2001
From: Maayan Kashani <mkashani at nvidia.com>
Date: Tue, 25 Nov 2025 13:09:27 +0200
Subject: [PATCH] net/mlx5: fix device start error handling

[ upstream commit 860f6c63dbc1 ]

When mlx5_dev_start() fails partway through initialization, the error
cleanup code unconditionally calls cleanup functions for all steps,
including those that were never successfully initialized. This causes
state corruption leading to incorrect behavior on subsequent start
attempts.

The issue manifests as:
1. First start attempt fails with -ENOMEM (expected)
2. Second start attempt returns -EINVAL instead of -ENOMEM
3. With flow isolated mode, second attempt incorrectly succeeds,
   leading to segfault in rte_eth_rx_burst()

Root cause: The single error label cleanup path calls functions like
mlx5_traffic_disable() and mlx5_flow_stop_default() even when their
corresponding initialization functions (mlx5_traffic_enable() and
mlx5_flow_start_default()) were never called due to earlier failure.

For example, when mlx5_rxq_start() fails:
- mlx5_traffic_enable() at line 1403 never executes
- mlx5_flow_start_default() at line 1420 never executes
- But cleanup unconditionally calls:
  * mlx5_traffic_disable() - destroys control flows list
  * mlx5_flow_stop_default() - corrupts flow metadata state

This corrupts the device state, causing subsequent start attempts to
fail with different errors or, in isolated mode, to incorrectly succeed
with an improperly initialized device.

Fix by replacing the single error label with cascading error labels
(Linux kernel style). Each label cleans up only its corresponding step,
then falls through to clean up earlier steps.
This ensures only successfully initialized steps are cleaned up,
maintaining device state consistency across failed start attempts.

Bugzilla ID: 1419
Fixes: 8db7e3b69822 ("net/mlx5: change operations for non-cached flows")
Cc: stable at dpdk.org

Signed-off-by: Maayan Kashani <mkashani at nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski at nvidia.com>
---
 drivers/net/mlx5/mlx5_trigger.c | 60 +++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 3bda84e963..7332a79609 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -1133,6 +1133,11 @@ mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev)
 
 #endif
 
+#define SAVE_RTE_ERRNO_AND_STOP(ret, dev) do {	\
+	ret = rte_errno;			\
+	(dev)->data->dev_started = 0;		\
+} while (0)
+
 /**
  * DPDK callback to start the device.
  *
@@ -1203,19 +1208,23 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(ERR, "port %u Tx packet pacing init failed: %s",
 			dev->data->port_id, strerror(rte_errno));
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
 		goto error;
 	}
 	if (mlx5_devx_obj_ops_en(priv->sh) &&
 	    priv->obj_ops.lb_dummy_queue_create) {
 		ret = priv->obj_ops.lb_dummy_queue_create(dev);
-		if (ret)
-			goto error;
+		if (ret) {
+			SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+			goto txpp_stop;
+		}
 	}
 	ret = mlx5_txq_start(dev);
 	if (ret) {
 		DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto lb_dummy_queue_release;
 	}
 	if (priv->config.std_delay_drop || priv->config.hp_delay_drop) {
 		if (!priv->sh->dev_cap.vf && !priv->sh->dev_cap.sf &&
@@ -1239,7 +1248,8 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(ERR, "port %u Rx queue allocation failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto txq_stop;
 	}
 	/*
 	 * Such step will be skipped if there is no hairpin TX queue configured
@@ -1249,7 +1259,8 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(ERR, "port %u hairpin auto binding failed: %s",
 			dev->data->port_id, strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto rxq_stop;
 	}
 	/* Set started flag here for the following steps like control flow. */
 	dev->data->dev_started = 1;
@@ -1257,7 +1268,8 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(ERR, "port %u Rx interrupt vector creation failed",
 			dev->data->port_id);
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto rxq_stop;
 	}
 	mlx5_os_stats_init(dev);
 	/*
@@ -1269,7 +1281,8 @@ continue_dev_start:
 		DRV_LOG(ERR,
 			"port %u failed to attach indirect actions: %s",
 			dev->data->port_id, rte_strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto rx_intr_vec_disable;
 	}
 #ifdef HAVE_MLX5_HWS_SUPPORT
 	if (priv->sh->config.dv_flow_en == 2) {
@@ -1277,7 +1290,8 @@ continue_dev_start:
 		if (ret) {
 			DRV_LOG(ERR, "port %u failed to update HWS tables",
 				dev->data->port_id);
-			goto error;
+			SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+			goto action_handle_detach;
 		}
 	}
 #endif
@@ -1285,7 +1299,8 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(ERR, "port %u failed to set defaults flows",
 			dev->data->port_id);
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto action_handle_detach;
 	}
 	/* Set dynamic fields and flags into Rx queues. */
 	mlx5_flow_rxq_dynf_set(dev);
@@ -1302,12 +1317,14 @@ continue_dev_start:
 	if (ret) {
 		DRV_LOG(DEBUG, "port %u failed to start default actions: %s",
 			dev->data->port_id, strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto traffic_disable;
 	}
 	if (mlx5_dev_ctx_shared_mempool_subscribe(dev) != 0) {
 		DRV_LOG(ERR, "port %u failed to subscribe for mempool life cycle: %s",
 			dev->data->port_id, rte_strerror(rte_errno));
-		goto error;
+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
+		goto stop_default;
 	}
 	rte_wmb();
 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
@@ -1334,18 +1351,25 @@ continue_dev_start:
 		priv->sh->port[priv->dev_port - 1].devx_ih_port_id =
 					(uint32_t)dev->data->port_id;
 	return 0;
-error:
-	ret = rte_errno; /* Save rte_errno before cleanup. */
-	/* Rollback. */
-	dev->data->dev_started = 0;
+stop_default:
 	mlx5_flow_stop_default(dev);
+traffic_disable:
 	mlx5_traffic_disable(dev);
-	mlx5_txq_stop(dev);
+action_handle_detach:
+	mlx5_action_handle_detach(dev);
+rx_intr_vec_disable:
+	mlx5_rx_intr_vec_disable(dev);
+rxq_stop:
 	mlx5_rxq_stop(dev);
+txq_stop:
+	mlx5_txq_stop(dev);
+lb_dummy_queue_release:
 	if (priv->obj_ops.lb_dummy_queue_release)
 		priv->obj_ops.lb_dummy_queue_release(dev);
-	mlx5_txpp_stop(dev); /* Stop last. */
-	rte_errno = ret; /* Restore rte_errno. */
+txpp_stop:
+	mlx5_txpp_stop(dev);
+error:
+	rte_errno = ret;
 	return -rte_errno;
 }
 
-- 
2.43.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2025-12-25 11:16:40.880491332 +0200
+++ 0092-net-mlx5-fix-device-start-error-handling.patch	2025-12-25 11:16:36.261892000 +0200
@@ -1 +1 @@
-From 860f6c63dbc1cc6ae6bbaca886c04b88d43a2236 Mon Sep 17 00:00:00 2001
+From efb731220b65055d70e0c7718c94d88741fe91f6 Mon Sep 17 00:00:00 2001
@@ -3 +3 @@
-Date: Thu, 13 Nov 2025 21:37:11 +0200
+Date: Tue, 25 Nov 2025 13:09:27 +0200
@@ -5,0 +6,2 @@
+[ upstream commit 860f6c63dbc1 ]
+
@@ -47,2 +49,2 @@
- drivers/net/mlx5/mlx5_trigger.c | 66 +++++++++++++++++++++++----------
- 1 file changed, 46 insertions(+), 20 deletions(-)
+ drivers/net/mlx5/mlx5_trigger.c | 60 +++++++++++++++++++++++----------
+ 1 file changed, 42 insertions(+), 18 deletions(-)
@@ -51 +53 @@
-index c0fb98e78a..9aa36ae9a8 100644
+index 3bda84e963..7332a79609 100644
@@ -54,3 +56,3 @@
-@@ -1226,6 +1226,11 @@ static void mlx5_dev_free_consec_tx_mem(struct rte_eth_dev *dev, bool on_stop)
- 	}
- }
+@@ -1133,6 +1133,11 @@ mlx5_hw_representor_port_allowed_start(struct rte_eth_dev *dev)
+ 
+ #endif
@@ -66 +68 @@
-@@ -1316,25 +1321,30 @@ continue_dev_start:
+@@ -1203,19 +1208,23 @@ continue_dev_start:
@@ -83,8 +84,0 @@
- 	ret = mlx5_dev_allocate_consec_tx_mem(dev);
- 	if (ret) {
- 		DRV_LOG(ERR, "port %u Tx queues memory allocation failed: %s",
- 			dev->data->port_id, strerror(rte_errno));
--		goto error;
-+		SAVE_RTE_ERRNO_AND_STOP(ret, dev);
-+		goto lb_dummy_queue_release;
- 	}
@@ -97 +91 @@
-+		goto free_consec_tx_mem;
++		goto lb_dummy_queue_release;
@@ -101 +95 @@
-@@ -1358,7 +1368,8 @@ continue_dev_start:
+@@ -1239,7 +1248,8 @@ continue_dev_start:
@@ -111 +105 @@
-@@ -1368,7 +1379,8 @@ continue_dev_start:
+@@ -1249,7 +1259,8 @@ continue_dev_start:
@@ -121 +115 @@
-@@ -1376,7 +1388,8 @@ continue_dev_start:
+@@ -1257,7 +1268,8 @@ continue_dev_start:
@@ -131 +125 @@
-@@ -1388,7 +1401,8 @@ continue_dev_start:
+@@ -1269,7 +1281,8 @@ continue_dev_start:
@@ -141 +135 @@
-@@ -1396,7 +1410,8 @@ continue_dev_start:
+@@ -1277,7 +1290,8 @@ continue_dev_start:
@@ -151 +145 @@
-@@ -1404,7 +1419,8 @@ continue_dev_start:
+@@ -1285,7 +1299,8 @@ continue_dev_start:
@@ -161 +155 @@
-@@ -1421,12 +1437,14 @@ continue_dev_start:
+@@ -1302,12 +1317,14 @@ continue_dev_start:
@@ -176,3 +170,3 @@
- 	if (mlx5_flow_is_steering_disabled())
- 		mlx5_flow_rxq_mark_flag_set(dev);
-@@ -1455,19 +1473,27 @@ continue_dev_start:
+ 	rte_wmb();
+ 	dev->tx_pkt_burst = mlx5_select_tx_function(dev);
+@@ -1334,18 +1351,25 @@ continue_dev_start:
@@ -199,2 +192,0 @@
-+free_consec_tx_mem:
-+	mlx5_dev_free_consec_tx_mem(dev, false);
@@ -204 +195,0 @@
--	mlx5_dev_free_consec_tx_mem(dev, false);


More information about the stable mailing list