|SUCCESS| [2/2] net/netvsc: handle VF recovery events for service reset
dpdklab at iol.unh.edu
dpdklab at iol.unh.edu
Wed Apr 29 20:16:07 CEST 2026
Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/163337
_AI Code Review_
=== Patch Review: series.patch (via Claude) ===
Review date: 2026-04-29
# Review of DPDK MANA Device Reset Support Patches
git sha: From patchwork Wed Apr 29 08:49:54 2026
Author: Wei Hu <weh at microsoft.com>
Subject: [PATCH 1/2] net/mana: add device reset support
This patch adds hardware reset event handling to the MANA driver with
automatic teardown and recovery using RCU synchronization and a two-phase
reset flow.
> diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
> index 67396cda1f..031c6a73be 100644
> --- a/drivers/net/mana/mana.c
> +++ b/drivers/net/mana/mana.c
[ ... ]
> @@ -103,6 +105,23 @@ mana_dev_configure(struct rte_eth_dev *dev)
> RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
>
> priv->num_queues = dev->data->nb_rx_queues;
> + DRV_LOG(DEBUG, "priv %p, port %u, dev port %u, num_queues: %u",
> + priv, priv->port_id, priv->dev_port, priv->num_queues);
> +
> + /*
> + * Now we know the total number of rx and tx queues.
> + * Register the rcu qsv thread.
> + */
Does this comment accurately describe what happens during device
reconfiguration? The comment says "Register the rcu qsv thread" but
the code is registering thread IDs for data path threads, not creating
threads.
> + for (unsigned int i = 0; i < (unsigned int)(2 * priv->num_queues); i++) {
Can this cast be avoided? The loop variable is already unsigned int and
priv->num_queues should be an unsigned type. Does num_queues need to be
cast here?
> + if (rte_rcu_qsbr_thread_register(priv->dev_state_qsv, i) != 0) {
> + DRV_LOG(ERR, "Failed to register rcu qsv thread %d of total %d",
> + i, 2 * priv->num_queues - 1);
> + return -EINVAL;
> + }
Does this code leak previously registered threads when registration fails
midway through the loop? Should there be cleanup code to unregister threads
0 through i-1 before returning?
[ ... ]
> @@ -188,8 +207,10 @@ mana_dev_start(struct rte_eth_dev *dev)
> struct mana_priv *priv = dev->data->dev_private;
>
> rte_spinlock_init(&priv->mr_btree_lock);
> +
> ret = mana_mr_btree_init(&priv->mr_btree, MANA_MR_BTREE_CACHE_N,
> dev->device->numa_node);
> +
Are these blank lines intentional? The coding style guide discourages
multiple consecutive blank lines.
[ ... ]
> @@ -215,7 +236,7 @@ mana_dev_start(struct rte_eth_dev *dev)
> DRV_LOG(INFO, "TX/RX queues have started");
>
> /* Enable datapath for secondary processes */
> - mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
> + (void)mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
Does this code intentionally ignore errors from mana_mp_req_on_rxtx()? If
the secondary process data path fails to start, should the function
continue normally or return an error?
[ ... ]
> @@ -243,25 +264,31 @@ mana_dev_stop(struct rte_eth_dev *dev)
> int ret;
> struct mana_priv *priv = dev->data->dev_private;
>
> - rxq_intr_disable(priv);
> + enum mana_device_state state = rte_atomic_load_explicit(
> + &priv->dev_state, rte_memory_order_acquire);
Does the coding style allow declarations after statements? This should be
at the top of the function block.
> + if (state == MANA_DEV_ACTIVE ||
> + state == MANA_DEV_RESET_FAILED) {
> + rxq_intr_disable(priv);
> + DRV_LOG(DEBUG, "rxq_intr_disable called");
> + }
[ ... ]
> @@ -276,30 +303,47 @@ mana_dev_close(struct rte_eth_dev *dev)
> struct mana_priv *priv = dev->data->dev_private;
> int ret;
>
> + DRV_LOG(DEBUG, "Free MR for priv %p", priv);
> mana_remove_all_mr(priv);
>
> - ret = mana_intr_uninstall(priv);
> - if (ret)
> - return ret;
> + enum mana_device_state state = rte_atomic_load_explicit(
> + &priv->dev_state, rte_memory_order_acquire);
Same declaration-after-statement issue. Does this violate coding style?
> + if (state == MANA_DEV_ACTIVE ||
> + state == MANA_DEV_RESET_FAILED) {
> + ret = mana_intr_uninstall(priv);
> + if (ret)
> + return ret;
> + }
>
> if (priv->ib_parent_pd) {
> - int err = ibv_dealloc_pd(priv->ib_parent_pd);
> - if (err)
> - DRV_LOG(ERR, "Failed to deallocate parent PD: %d", err);
> + ret = ibv_dealloc_pd(priv->ib_parent_pd);
> + if (ret)
> + DRV_LOG(ERR,
> + "Failed to deallocate parent PD: %d", ret);
> +
Another unnecessary blank line?
[ ... ]
> @@ -552,6 +617,29 @@ mana_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
> return ret;
> }
>
> +static int
> +mana_dev_tx_queue_setup_lock(struct rte_eth_dev *dev, uint16_t queue_idx,
> + uint16_t nb_desc, unsigned int socket_id,
> + const struct rte_eth_txconf *tx_conf)
> +{
> + struct mana_priv *priv = dev->data->dev_private;
> + int ret;
> +
> + if (rte_spinlock_trylock(&priv->reset_ops_lock)) {
> + if (rte_atomic_load_explicit(&priv->dev_state,
> + rte_memory_order_acquire) != MANA_DEV_ACTIVE) {
> + rte_spinlock_unlock(&priv->reset_ops_lock);
> + return -EBUSY;
> + }
> + ret = mana_dev_tx_queue_setup(dev, queue_idx,
> + nb_desc, socket_id, tx_conf);
> + rte_spinlock_unlock(&priv->reset_ops_lock);
> + } else {
> + ret = -EBUSY;
> + }
> + return ret;
> +}
Does this pattern appear many times in the code? The MANA_OPS_*_LOCK macros
seem designed to avoid this duplication. Can this function be generated
with one of those macros, or does it need custom parameters?
[ ... ]
> @@ -820,33 +932,217 @@ mana_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> return mana_ifreq(priv, SIOCSIFMTU, &request);
> }
>
> +#define MANA_OPS_1_LOCK(_func) \
> +static int \
> +_func##_lock(struct rte_eth_dev *dev) \
> +{ \
> + struct mana_priv *priv = dev->data->dev_private; \
> + int ret; \
> + if (rte_spinlock_trylock(&priv->reset_ops_lock)) { \
> + if (rte_atomic_load_explicit(&priv->dev_state, \
> + rte_memory_order_acquire) != \
> + MANA_DEV_ACTIVE) { \
> + rte_spinlock_unlock(&priv->reset_ops_lock); \
> + return -EBUSY; \
> + } \
> + ret = _func(dev); \
> + rte_spinlock_unlock(&priv->reset_ops_lock); \
> + } else { \
> + ret = -EBUSY; \
> + } \
> + return ret; \
> +}
Does this macro follow DPDK coding style for macros? Complex macros should
use do-while(0) wrapping when they contain multiple statements. Although
this generates a complete function, does the indentation and line
continuation match the style guide?
[ ... ]
> @@ -1031,28 +1327,410 @@ mana_ibv_device_to_pci_addr(const struct ibv_device *device,
> return 0;
> }
>
> +static int mana_pci_probe(struct rte_pci_driver *pci_drv,
> + struct rte_pci_device *pci_dev);
> +static void mana_intr_handler(void *arg);
> +static void mana_reset_exit(struct mana_priv *priv);
> +
> +/* Delay before initiating reset exit after reset enter completes */
> +#define MANA_RESET_TIMER_US (15 * 1000000ULL) /* 15 seconds */
Does this 15 second delay need to be configurable? Hard-coded timeouts can
be problematic across different hardware configurations.
[ ... ]
> +static void
> +mana_reset_enter(struct mana_priv *priv)
> +{
> + int ret;
> + uint64_t ticket;
> + struct rte_eth_dev *dev = &rte_eth_devices[priv->port_id];
> +
> + rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_ENTER,
> + rte_memory_order_release);
> +
> + DRV_LOG(DEBUG, "Entering into device reset state");
> + DRV_LOG(DEBUG, "Resetting dev = %p, priv = %p", dev, priv);
> +
> + ticket = rte_rcu_qsbr_start(priv->dev_state_qsv);
> +
> + while (rte_rcu_qsbr_check(priv->dev_state_qsv, ticket, false) == 0)
> + rte_pause();
Does this busy-wait loop handle the case where threads are stalled
indefinitely? Should there be a maximum iteration count or timeout to
prevent hanging?
[ ... ]
> + for (int i = 0; i < priv->num_queues; i++) {
> + struct mana_rxq *rxq = dev->data->rx_queues[i];
> + struct mana_txq *txq = dev->data->tx_queues[i];
> +
> + DRV_LOG(DEBUG, "Free MR for priv = %p, rxq %u, txq %u",
> + priv, rxq->rxq_idx, txq->rxq_idx);
> + mana_mr_btree_free(&rxq->mr_btree);
> + mana_mr_btree_free(&txq->mr_btree);
> + }
Does this code verify that rx_queues[i] and tx_queues[i] are non-NULL
before dereferencing them? Can queues be partially configured during reset?
[ ... ]
> +static uint32_t
> +mana_reset_exit_delay(void *arg)
> +{
> + struct mana_priv *priv = (struct mana_priv *)arg;
> + uint32_t ret = 0;
> + int i;
> + struct rte_eth_dev *dev;
> + struct rte_pci_device *pci_dev;
Declaration after statement issue again. Does this need to be fixed?
> +
> + DRV_LOG(DEBUG, "Delayed mana device reset complete processing");
> +
> + /* If the app called dev_stop/dev_close during the timer window,
> + * state is no longer RESET_EXIT. Nothing to do.
> + */
Comment style: Does DPDK require "/*" on its own line for multi-line
comments?
[ ... ]
> + for (i = 0; i < priv->num_queues; i++) {
> + struct mana_rxq *rxq = dev->data->rx_queues[i];
> + struct mana_txq *txq = dev->data->tx_queues[i];
> +
> + ret = mana_mr_btree_init(&rxq->mr_btree,
> + MANA_MR_BTREE_PER_QUEUE_N,
> + rxq->socket);
> + if (ret) {
> + DRV_LOG(ERR, "Failed to init RXQ %d MR btree "
> + "on socket %u, ret %d", i, rxq->socket, ret);
> + goto mr_init_failed_rxq;
> + }
> +
> + ret = mana_mr_btree_init(&txq->mr_btree,
> + MANA_MR_BTREE_PER_QUEUE_N,
> + txq->socket);
> + if (ret) {
> + DRV_LOG(ERR, "Failed to init TXQ %d MR btree "
> + "on socket %u, ret %d", i, txq->socket, ret);
> + goto mr_init_failed_txq;
> + }
> + }
Same question as above: does this verify queues are non-NULL before use?
[ ... ]
> +mr_init_failed_all:
> + i = priv->num_queues;
> + goto mr_init_failed_rxq;
> +
> +mr_init_failed_txq:
> + /* RXQ btree at index i was initialized, free it */
> + mana_mr_btree_free(&((struct mana_rxq *)
> + dev->data->rx_queues[i])->mr_btree);
> +
> +mr_init_failed_rxq:
> + /* Free all fully initialized btrees for indices < i */
> + for (int j = 0; j < i; j++) {
> + struct mana_rxq *rxq = dev->data->rx_queues[j];
More information about the test-report
mailing list