|SUCCESS| [v3, 2/2] net/mana: add documentation for device reset support

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Fri May 22 12:56:56 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/164261

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-05-22

I'll review this DPDK patch for the MANA driver's device reset support feature. Let me analyze it systematically for correctness bugs, coding style, and guideline compliance.

```
commit 898d9314d3cff39cebf4df67396cda1f (hypothetical - from patch context)
Author: Wei Hu <weh at microsoft.com>

[PATCH v3 1/2] net/mana: add device reset support

Add support for handling hardware reset events in the MANA driver.
When the MANA kernel driver receives a hardware service event, it
initiates a device reset and notifies userspace via
IBV_EVENT_DEVICE_FATAL. The DPDK driver handles this by performing
an automatic teardown and recovery sequence.

> diff --git a/drivers/net/mana/mana.c b/drivers/net/mana/mana.c
> index 67396cda1f..898d9314d3 100644
> --- a/drivers/net/mana/mana.c
> +++ b/drivers/net/mana/mana.c
> @@ -13,7 +13,10 @@
>  #include <ethdev_pci.h>
>  #include <rte_kvargs.h>
>  #include <rte_eal_paging.h>
> +#include <rte_alarm.h>
>  #include <rte_pci.h>
> +#include <rte_rcu_qsbr.h>
> +#include <rte_lock_annotations.h>

The rte_alarm.h is included but rte_alarm functions don't appear to be
used in the code. Can this include be removed?

> @@ -103,6 +106,23 @@ mana_dev_configure(struct rte_eth_dev *dev)
>  			      RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
>  
>  	priv->num_queues = dev->data->nb_rx_queues;
> +	DRV_LOG(DEBUG, "priv %p, port %u, dev port %u, num_queues: %u",
> +		priv, priv->port_id, priv->dev_port, priv->num_queues);
> +
> +	/*
> +	 * Register data path thread IDs (rx and tx) with the RCU
> +	 * quiescent state variable for device state synchronization.
> +	 */
> +	for (int i = 0; i < 2 * priv->num_queues; i++) {
> +		if (rte_rcu_qsbr_thread_register(priv->dev_state_qsv, i) != 0) {
> +			DRV_LOG(ERR, "Failed to register rcu qsv thread %d of total %d",
> +				i, 2 * priv->num_queues - 1);
> +			return -EINVAL;
> +		}

Does this code need to unregister previously registered threads when
registration fails mid-loop? If thread 5 fails to register but 0-4
succeeded, are threads 0-4 leaked?

> @@ -214,8 +234,8 @@ mana_dev_start(struct rte_eth_dev *dev)
>  
>  	DRV_LOG(INFO, "TX/RX queues have started");
>  
> -	/* Enable datapath for secondary processes */
> -	mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);
> +	/* Intentionally ignore errors -- secondary may not be running */
> +	(void)mana_mp_req_on_rxtx(dev, MANA_MP_REQ_START_RXTX);

The comment uses an em dash character. Should this be plain ASCII "--"
for consistency with typical DPDK code?

> @@ -242,26 +262,33 @@ mana_dev_stop(struct rte_eth_dev *dev)
>  {
>  	int ret;
>  	struct mana_priv *priv = dev->data->dev_private;
> -
> -	rxq_intr_disable(priv);
> +	enum mana_device_state state;
> +
> +	state = rte_atomic_load_explicit(&priv->dev_state,
> +					 rte_memory_order_acquire);
> +	if (state == MANA_DEV_ACTIVE ||
> +	    state == MANA_DEV_RESET_FAILED) {
> +		rxq_intr_disable(priv);
> +		DRV_LOG(DEBUG, "rxq_intr_disable called");
> +	}

Does this code check dev_state without the reset_ops_lock? Could state
transition to RESET_FAILED between the check and rxq_intr_disable call,
potentially causing rxq_intr_disable to operate on partially torn-down
state?

[ ... ]

> +/*
> + * Reset thread: sleeps for the reset timer period, then performs
> + * the reset exit sequence. Runs on a control thread so it can call
> + * rte_intr_callback_unregister (which fails from alarm/intr thread).
> + */
> +static uint32_t
> +mana_reset_thread(void *arg)
> +	__rte_no_thread_safety_analysis
> +{
> +	struct mana_priv *priv = (struct mana_priv *)arg;
> +	struct timespec ts;
> +
> +	DRV_LOG(INFO, "Reset thread started, waiting %us",
> +		(unsigned int)(MANA_RESET_TIMER_US / 1000000));
> +
> +	/* Wait on condvar with timeout -- can be woken early by PCI remove */
> +	clock_gettime(CLOCK_REALTIME, &ts);
> +	ts.tv_sec += MANA_RESET_TIMER_US / 1000000;
> +
> +	pthread_mutex_lock(&priv->reset_cond_mutex);
> +	pthread_cond_timedwait(&priv->reset_cond, &priv->reset_cond_mutex, &ts);
> +	pthread_mutex_unlock(&priv->reset_cond_mutex);

Does this code check the return value of pthread_cond_timedwait? If it
returns an error (EINVAL for bad ts, etc.), should the reset proceed or
abort?

> +static void
> +mana_reset_enter(struct mana_priv *priv)
> +	__rte_no_thread_safety_analysis
> +{
> +	int ret;
> +	uint64_t ticket;
> +	struct rte_eth_dev *dev = &rte_eth_devices[priv->port_id];
> +
> +	/*
> +	 * Lock ownership for reset_ops_lock through the reset path:
> +	 *
> +	 *   mana_intr_handler     -- acquires the lock
> +	 *   mana_reset_enter      -- called with lock held, releases it
> +	 *                           after spawning the reset thread
> +	 *   mana_reset_thread     -- re-acquires the lock after the
> +	 *                           recovery delay
> +	 *   mana_reset_exit       -- called with lock held, passes it
> +	 *                           to mana_reset_exit_delay
> +	 *   mana_reset_exit_delay -- called with lock held, releases it
> +	 *                           on completion
> +	 */
> +
> +	rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_ENTER,
> +				     rte_memory_order_release);

Does this atomic store need a preceding compiler barrier or fence? The
data path reads dev_state with acquire ordering, but other state
modified before this store (like queue pointers) might be reordered
after it by the compiler despite the release ordering.

> +	for (int i = 0; i < priv->num_queues; i++) {
> +		struct mana_rxq *rxq = dev->data->rx_queues[i];
> +		struct mana_txq *txq = dev->data->tx_queues[i];
> +
> +		DRV_LOG(DEBUG, "Free MR for priv = %p, rxq %u, txq %u",
> +			priv, rxq->rxq_idx, txq->rxq_idx);
> +		mana_mr_btree_free(&rxq->mr_btree);
> +		mana_mr_btree_free(&txq->mr_btree);
> +	}

Does this code verify that rxq and txq pointers are non-NULL before
dereferencing? If queue setup failed or was never called, could these be
NULL?

[ ... ]

> +static uint32_t
> +mana_reset_exit_delay(void *arg)
> +	__rte_no_thread_safety_analysis
> +{
> +	struct mana_priv *priv = (struct mana_priv *)arg;
> +	uint32_t ret = 0;
> +	int i;
> +	struct rte_eth_dev *dev;
> +	struct rte_pci_device *pci_dev;

The ret variable is initialized to 0 but later assigned both int return
values and used as uint32_t return. Does this cause sign extension
issues when returning negative error codes?

> +	ret = ibv_close_device(priv->ib_ctx);
> +	priv->ib_ctx = NULL;
> +	if (ret) {
> +		DRV_LOG(ERR, "Failed to close ibv device %d", ret);
> +		rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_FAILED,
> +				     rte_memory_order_release);
> +		goto out;
> +	}
> +
> +	ret = mana_pci_probe(NULL, pci_dev);
> +	if (ret) {
> +		DRV_LOG(ERR, "Failed to probe mana pci dev ret %d", ret);
> +		rte_atomic_store_explicit(&priv->dev_state, MANA_DEV_RESET_FAILED,
> +				     rte_memory_order_release);
> +		goto out;
> +	}

Does priv remain valid after ibv_close_device and mana_pci_probe? If
mana_pci_probe allocates a new priv, is the old priv leaked? The code
appears to assume priv survives the reprobe.

> +	for (i = 0; i < priv->num_queues; i++) {
> +		struct mana_rxq *rxq = dev->data->rx_queues[i];
> +		struct mana_txq *txq = dev->data->tx_queues[i];
> +
> +		ret = mana_mr_btree_init(&rxq->mr_btree,
> +					 MANA_MR_BTREE_PER_QUEUE_N,
> +					 rxq->socket);
> +		if (ret) {
> +			DRV_LOG(ERR, "Failed to init RXQ %d MR btree "
> +				"on socket %u, ret %d", i, rxq->socket, ret);
> +			goto mr_init_failed_rxq;
> +		}

Same question about NULL rxq/txq pointers as in mana_reset_enter.

[ ... ]

> +static void
> +mana_intr_handler(void *arg)
> +	__rte_no_thread_safety_analysis
> +{
> +	struct mana_priv *priv = arg;
> +	struct ibv_context *ctx = priv->ib_ctx;
> -	struct ibv_async_event event;
> +	struct ibv_async_event event = { 0 };
> +	struct rte_eth_dev *dev;
>  
>  	/* Read and ack all messages from IB device */
>  	while (true) {
>  		if (ibv_get_async_event(ctx, &event))
>  			break;
>  
> -		if (event.event_type == IBV_EVENT_DEVICE_FATAL) {
> -			struct rte_eth_dev *dev;
> -
> -			dev = &rte_eth_devices[priv->port_id];
> -			if (dev->data->dev_conf.intr_conf.rmv)
> +		switch (event.event_type) {
> +		case IBV_EVENT_DEVICE_FATAL:
> +			DRV_LOG(INFO, "IBV_EVENT_DEVICE_FATAL received, dev_state=%d",
> +				(int)rte_atomic_load_explicit(&priv->dev_state,
> +				rte_memory_order_acquire));
> +			if (rte_atomic_load_explicit(&priv->dev_state,
> +			    rte_memory_order_acquire) == MANA_DEV_ACTIVE) {
> +				rte_spinlock_lock(&priv->reset_ops_lock);
> +
> +				/* Re-check after lock to avoid racing with
> +				 * mana_pci_remove_event_cb which may have
> +				 * set RESET_FAILED while we waited.
> +				 */
> +				if (rte_atomic_load_explicit(&priv->dev_state,
> +				    rte_memory_order_acquire) !=
> +				    MANA_DEV_ACTIVE) {
> +					rte_spinlock_unlock(
> +						&priv->reset_ops_lock);
> +					break;
> +				}
> +				mana_reset_enter(priv);

Does this code risk deadlock? mana_intr_handler runs on EAL interrupt
thread. mana_reset_enter spawns mana_reset_thread which calls
mana_reset_exit, which calls rte_intr_callback_unregister. Does
rte_intr_callback_unregister block waiting for this handler to complete?

[ ... ]

> @@ -1127,6 +1864,16 @@ mana_intr_install(struct rte_eth_dev *eth_dev, struct mana_priv *priv)
>  		goto free_intr;
>  	}
>  
> +	/* Register for PCI device removal events to distinguish
> +	 * PCI hot-remove from service reset. This requires the
> +	 * application to call rte_dev_event_monitor_start() for
> +	 * events to be delivered (e.g. testpmd --hot-plug-handling).
> +	 */
> +	ret = rte_dev_event_callback_register(eth_dev->device->name,
> +					      mana_pci_remove_event_cb, priv);
> +	if (ret)
> +		DRV_LOG(WARNING, "Failed to register PCI remove event callback");
> +

Does this code leak resources if rte_dev_event_callback_register fails?
The function continues instead of going to the error path after
registering the interrupt callback.

[ ... ]

> diff --git a/drivers/net/mana/mana.h b/drivers/net/mana/mana.h
> index 79cc47b6ab..df4832fd75 100644
> --- a/drivers/net/mana/mana.h
> +++ b/drivers/net/mana/mana.h
> @@ -5,6 +5,8 @@
>  #ifndef __MANA_H__
>  #define __MANA_H__
>  
> +#include <pthread.h>
> +
>  #define	PCI_VENDOR_ID_MICROSOFT		0x1414
>  #define PCI_DEVICE_ID_MICROSOFT_


More information about the test-report mailing list