patch 'net/nbl: improve mailbox exception handling' has been queued to stable release 25.11.1
Kevin Traynor
ktraynor at redhat.com
Thu Feb 26 14:09:36 CET 2026
Hi,
FYI, your patch has been queued to stable release 25.11.1
Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 03/02/26. So please
shout if anyone has objections.
Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.
Queued patches are on a temporary branch at:
https://github.com/kevintraynor/dpdk-stable
This queued commit can be viewed at:
https://github.com/kevintraynor/dpdk-stable/commit/80472b9632d810c0ea7568dce802a7d4dc8842f0
Thanks.
Kevin
---
>From 80472b9632d810c0ea7568dce802a7d4dc8842f0 Mon Sep 17 00:00:00 2001
From: Dimon Zhao <dimon.zhao at nebula-matrix.com>
Date: Mon, 26 Jan 2026 18:52:20 -0800
Subject: [PATCH] net/nbl: improve mailbox exception handling
[ upstream commit 903bec3c46e89c76403ecbb5c6a23266a601d873 ]
Key changes:
1. Replace simple 'acked' flag with a state machine to improve mailbox
reliability. New states: IDLE, WAITING, ACKING, ACKED, TIMEOUT.
2. Validate message ID and type before processing acknowledgments to
prevent incorrect ACK matching.
3. Handle timeout scenarios: discard ACKs received after timeout to
prevent use of already released buffers.
4. Check mailbox status before sending: if the send mailbox descriptor
is found to be in use or waiting for an ACK during Tx,
the message will not be sent.
Fixes: a1c5ffa13b2c ("net/nbl: add channel layer")
Signed-off-by: Dimon Zhao <dimon.zhao at nebula-matrix.com>
---
drivers/net/nbl/nbl_hw/nbl_channel.c | 94 ++++++++++++++++++++++------
drivers/net/nbl/nbl_hw/nbl_channel.h | 11 +++-
2 files changed, 85 insertions(+), 20 deletions(-)
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.c b/drivers/net/nbl/nbl_hw/nbl_channel.c
index f81c4c8591..52ad71c833 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.c
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.c
@@ -23,4 +23,5 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
struct nbl_chan_ring *txq = &chan_info->mailbox.txq;
size_t size = chan_info->mailbox.num_txq_entries * sizeof(struct nbl_chan_tx_desc);
+ int i;
txq->desc = nbl_alloc_dma_mem(&txq->desc_mem, size);
@@ -37,4 +38,8 @@ static int nbl_chan_init_tx_queue(union nbl_chan_info *chan_info)
}
+ for (i = 0; i < chan_info->mailbox.num_txq_entries; i++)
+ rte_atomic_store_explicit(&chan_info->mailbox.wait[i].status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_relaxed);
+
size = (u64)chan_info->mailbox.num_txq_entries * (u64)chan_info->mailbox.txq_buf_size;
txq->buf = nbl_alloc_dma_mem(&txq->buf_mem, size);
@@ -254,22 +259,53 @@ static void nbl_chan_recv_ack_msg(void *priv, uint16_t srcid, uint16_t msgid,
uint32_t ack_msgtype;
uint32_t copy_len;
+ int status = NBL_MBX_STATUS_WAITING;
chan_info = NBL_CHAN_MGT_TO_CHAN_INFO(chan_mgt);
ack_msgtype = *payload;
ack_msgid = *(payload + 1);
+
+ if (ack_msgid >= chan_mgt->chan_info->mailbox.num_txq_entries) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u", ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head = &chan_info->mailbox.wait[ack_msgid];
+ if (wait_head->msg_type != ack_msgtype) {
+ NBL_LOG(ERR, "Invalid msg id %u, msg type %u, wait msg type %u",
+ ack_msgid, ack_msgtype, wait_head->msg_type);
+ return;
+ }
+
+ if (!rte_atomic_compare_exchange_strong_explicit(&wait_head->status, &status,
+ NBL_MBX_STATUS_ACKING,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "Invalid wait status %d msg id %u, msg type %u",
+ wait_head->status, ack_msgid, ack_msgtype);
+ return;
+ }
+
wait_head->ack_err = *(payload + 2);
NBL_LOG(DEBUG, "recv ack, srcid:%u, msgtype:%u, msgid:%u, ack_msgid:%u,"
- " data_len:%u, ack_data_len:%u",
- srcid, ack_msgtype, msgid, ack_msgid, data_len, wait_head->ack_data_len);
+ " data_len:%u, ack_data_len:%u, ack_err:%d",
+ srcid, ack_msgtype, msgid, ack_msgid, data_len,
+ wait_head->ack_data_len, wait_head->ack_err);
if (wait_head->ack_err >= 0 && (data_len > 3 * sizeof(uint32_t))) {
/* the mailbox msg parameter structure may change */
+ if (data_len - 3 * sizeof(uint32_t) != wait_head->ack_data_len)
+ NBL_LOG(INFO, "payload_len does not match ack_len!,"
+ " srcid:%u, msgtype:%u, msgid:%u, ack_msgid %u,"
+ " data_len:%u, ack_data_len:%u",
+ srcid, ack_msgtype, msgid,
+ ack_msgid, data_len, wait_head->ack_data_len);
copy_len = RTE_MIN(wait_head->ack_data_len, data_len - 3 * sizeof(uint32_t));
- memcpy(wait_head->ack_data, payload + 3, copy_len);
+ if (copy_len)
+ memcpy(wait_head->ack_data, payload + 3, copy_len);
}
- rte_atomic_store_explicit(&wait_head->acked, 1, rte_memory_order_release);
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_ACKED,
+ rte_memory_order_release);
}
@@ -372,10 +408,25 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
struct nbl_chan_ring *txq;
struct nbl_chan_tx_desc *tx_desc;
+ struct nbl_chan_waitqueue_head *wait_head;
uint64_t pa;
void *va;
uint16_t next_to_use;
+ int status;
+
+ if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
+ NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
+ return 0xFFFF;
+ }
txq = &chan_info->mailbox.txq;
next_to_use = txq->next_to_use;
+
+ wait_head = &chan_info->mailbox.wait[next_to_use];
+ status = rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire);
+ if (status != NBL_MBX_STATUS_IDLE && status != NBL_MBX_STATUS_TIMEOUT) {
+ NBL_LOG(ERR, "too much inflight mailbox msg, msg id %u", next_to_use);
+ return 0xFFFF;
+ }
+
va = (u8 *)txq->buf + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
pa = txq->buf_mem.pa + (u64)next_to_use * (u64)chan_info->mailbox.txq_buf_size;
@@ -385,8 +436,4 @@ static uint16_t nbl_chan_update_txqueue(union nbl_chan_info *chan_info,
tx_desc->msg_type = msg_type;
tx_desc->msgid = next_to_use;
- if (arg_len > NBL_CHAN_BUF_LEN - sizeof(*tx_desc)) {
- NBL_LOG(ERR, "arg_len: %zu, too long!", arg_len);
- return -1;
- }
if (arg_len > NBL_CHAN_TX_DESC_EMBEDDED_DATA_LEN) {
@@ -419,4 +466,5 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
int ret;
int retry_time = 0;
+ int status = NBL_MBX_STATUS_WAITING;
if (chan_mgt->state)
@@ -432,6 +480,5 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
if (msgid == 0xFFFF) {
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
- NBL_LOG(ERR, "chan tx queue full, send msgtype:%u"
- " to dstid:%u failed",
+ NBL_LOG(ERR, "chan tx queue full, send msgtype:%u to dstid:%u failed",
chan_send->msg_type, chan_send->dstid);
return -ECOMM;
@@ -447,21 +494,32 @@ static int nbl_chan_send_msg(void *priv, struct nbl_chan_send_info *chan_send)
wait_head->ack_data = chan_send->resp;
wait_head->ack_data_len = chan_send->resp_len;
- rte_atomic_store_explicit(&wait_head->acked, 0, rte_memory_order_relaxed);
wait_head->msg_type = chan_send->msg_type;
- rte_wmb();
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_WAITING,
+ rte_memory_order_release);
nbl_chan_kick_tx_ring(chan_mgt, chan_info);
rte_spinlock_unlock(&chan_info->mailbox.txq_lock);
while (1) {
- if (rte_atomic_load_explicit(&wait_head->acked, rte_memory_order_acquire))
- return wait_head->ack_err;
+ if (rte_atomic_load_explicit(&wait_head->status, rte_memory_order_acquire) ==
+ NBL_MBX_STATUS_ACKED) {
+ ret = wait_head->ack_err;
+ rte_atomic_store_explicit(&wait_head->status, NBL_MBX_STATUS_IDLE,
+ rte_memory_order_release);
+ return ret;
+ }
rte_delay_us(50);
retry_time++;
- if (retry_time > NBL_CHAN_RETRY_TIMES)
- return -EIO;
+ if (retry_time > NBL_CHAN_RETRY_TIMES &&
+ rte_atomic_compare_exchange_strong_explicit(&wait_head->status,
+ &status, NBL_MBX_STATUS_TIMEOUT,
+ rte_memory_order_acq_rel,
+ rte_memory_order_relaxed)) {
+ NBL_LOG(ERR, "send msgtype:%u msgid %u to dstid:%u timeout",
+ chan_send->msg_type, msgid, chan_send->dstid);
+ break;
+ }
}
-
- return 0;
+ return -EIO;
}
diff --git a/drivers/net/nbl/nbl_hw/nbl_channel.h b/drivers/net/nbl/nbl_hw/nbl_channel.h
index f8fa89af51..21fce7478d 100644
--- a/drivers/net/nbl/nbl_hw/nbl_channel.h
+++ b/drivers/net/nbl/nbl_hw/nbl_channel.h
@@ -42,4 +42,12 @@ enum {
};
+enum {
+ NBL_MBX_STATUS_IDLE = 0,
+ NBL_MBX_STATUS_WAITING,
+ NBL_MBX_STATUS_ACKING,
+ NBL_MBX_STATUS_ACKED,
+ NBL_MBX_STATUS_TIMEOUT,
+};
+
struct __rte_packed_begin nbl_chan_tx_desc {
uint16_t flags;
@@ -75,6 +83,5 @@ struct nbl_chan_ring {
struct nbl_chan_waitqueue_head {
char *ack_data;
- RTE_ATOMIC(int)
- acked;
+ RTE_ATOMIC(int) status;
int ack_err;
uint16_t ack_data_len;
--
2.53.0
---
Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- - 2026-02-26 10:16:50.745078057 +0000
+++ 0094-net-nbl-improve-mailbox-exception-handling.patch 2026-02-26 10:16:47.038318539 +0000
@@ -1 +1 @@
-From 903bec3c46e89c76403ecbb5c6a23266a601d873 Mon Sep 17 00:00:00 2001
+From 80472b9632d810c0ea7568dce802a7d4dc8842f0 Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 903bec3c46e89c76403ecbb5c6a23266a601d873 ]
+
@@ -18 +19,0 @@
-Cc: stable at dpdk.org
More information about the stable
mailing list