[dpdk-dev] [PATCH 3/3] net/bnxt: check for chip reset in dev stop/close ops

Somnath Kotur somnath.kotur at broadcom.com
Thu Dec 24 10:35:41 CET 2020


While the error recovery thread is running, an application
can invoke dev_stop or dev_close_op thus triggering a race
and unwanted consequences if dev_close is invoked while the
recovery is not yet completed.
Fix by having another lock to synchronize between the 2 threads and
return EGAIN if adapter is in the middle of recovery when dev_stop
or dev_close ops are invoked

Signed-off-by: Somnath Kotur <somnath.kotur at broadcom.com>
---
 drivers/net/bnxt/bnxt.h        |  5 ++++
 drivers/net/bnxt/bnxt_cpr.c    |  2 ++
 drivers/net/bnxt/bnxt_ethdev.c | 49 +++++++++++++++++++++++++++++-----
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/drivers/net/bnxt/bnxt.h b/drivers/net/bnxt/bnxt.h
index 8374e9fadc..7c135370f0 100644
--- a/drivers/net/bnxt/bnxt.h
+++ b/drivers/net/bnxt/bnxt.h
@@ -719,6 +719,11 @@ struct bnxt {
 	 * health_check_lock
 	 */
 	pthread_mutex_t			health_check_lock;
+	/* synchronize between dev_stop/dev_close_op and
+	 * error recovery thread triggered as part of
+	 * HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY
+	 */
+	pthread_mutex_t			err_recovery_lock;
 	uint16_t			max_req_len;
 	uint16_t			max_resp_len;
 	uint16_t                        hwrm_max_ext_req_len;
diff --git a/drivers/net/bnxt/bnxt_cpr.c b/drivers/net/bnxt/bnxt_cpr.c
index ee96ae81bf..6e172a9eea 100644
--- a/drivers/net/bnxt/bnxt_cpr.c
+++ b/drivers/net/bnxt/bnxt_cpr.c
@@ -133,6 +133,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 			return;
 		}
 
+		pthread_mutex_lock(&bp->err_recovery_lock);
 		event_data = rte_le_to_cpu_32(async_cmp->event_data1);
 		/* timestamp_lo/hi values are in units of 100ms */
 		bp->fw_reset_max_msecs = async_cmp->timestamp_hi ?
@@ -152,6 +153,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
 		}
 
 		bp->flags |= BNXT_FLAG_FW_RESET;
+		pthread_mutex_unlock(&bp->err_recovery_lock);
 		rte_eal_alarm_set(US_PER_MS, bnxt_dev_reset_and_resume,
 				  (void *)bp);
 		break;
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index b6a9db1b66..a6794a417d 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1276,8 +1276,7 @@ static void bnxt_free_switch_domain(struct bnxt *bp)
 	}
 }
 
-/* Unload the driver, release resources */
-static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
+static int bnxt_dev_stop(struct rte_eth_dev *eth_dev)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
 	struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
@@ -1345,6 +1344,22 @@ static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
 	return 0;
 }
 
+/* Unload the driver, release resources */
+static int bnxt_dev_stop_op(struct rte_eth_dev *eth_dev)
+{
+	struct bnxt *bp = eth_dev->data->dev_private;
+
+	pthread_mutex_lock(&bp->err_recovery_lock);
+	if (bp->flags & BNXT_FLAG_FW_RESET) {
+		PMD_DRV_LOG(ERR,
+			    "Adapter recovering from error..Please retry\n");
+		return -EAGAIN;
+	}
+	pthread_mutex_unlock(&bp->err_recovery_lock);
+
+	return bnxt_dev_stop(eth_dev);
+}
+
 static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 {
 	struct bnxt *bp = eth_dev->data->dev_private;
@@ -1411,7 +1426,7 @@ static int bnxt_dev_start_op(struct rte_eth_dev *eth_dev)
 	return 0;
 
 error:
-	bnxt_dev_stop_op(eth_dev);
+	bnxt_dev_stop(eth_dev);
 	return rc;
 }
 
@@ -1421,6 +1436,7 @@ bnxt_uninit_locks(struct bnxt *bp)
 	pthread_mutex_destroy(&bp->flow_lock);
 	pthread_mutex_destroy(&bp->def_cp_lock);
 	pthread_mutex_destroy(&bp->health_check_lock);
+	pthread_mutex_destroy(&bp->err_recovery_lock);
 	if (bp->rep_info) {
 		pthread_mutex_destroy(&bp->rep_info->vfr_lock);
 		pthread_mutex_destroy(&bp->rep_info->vfr_start_lock);
@@ -1435,13 +1451,21 @@ static int bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
+	pthread_mutex_lock(&bp->err_recovery_lock);
+	if (bp->flags & BNXT_FLAG_FW_RESET) {
+		PMD_DRV_LOG(ERR,
+			    "Adapter recovering from error...Please retry\n");
+		return -EAGAIN;
+	}
+	pthread_mutex_unlock(&bp->err_recovery_lock);
+
 	/* cancel the recovery handler before remove dev */
 	rte_eal_alarm_cancel(bnxt_dev_reset_and_resume, (void *)bp);
 	rte_eal_alarm_cancel(bnxt_dev_recover, (void *)bp);
 	bnxt_cancel_fc_thread(bp);
 
 	if (eth_dev->data->dev_started)
-		ret = bnxt_dev_stop_op(eth_dev);
+		ret = bnxt_dev_stop(eth_dev);
 
 	bnxt_free_switch_domain(bp);
 
@@ -3655,7 +3679,7 @@ static void bnxt_dev_cleanup(struct bnxt *bp)
 	bp->eth_dev->data->dev_link.link_status = 0;
 	bp->link_info->link_up = 0;
 	if (bp->eth_dev->data->dev_started)
-		bnxt_dev_stop_op(bp->eth_dev);
+		bnxt_dev_stop(bp->eth_dev);
 
 	bnxt_uninit_resources(bp, true);
 }
@@ -3756,6 +3780,7 @@ static void bnxt_dev_recover(void *arg)
 	int timeout = bp->fw_reset_max_msecs;
 	int rc = 0;
 
+	pthread_mutex_lock(&bp->err_recovery_lock);
 	/* Clear Error flag so that device re-init should happen */
 	bp->flags &= ~BNXT_FLAG_FATAL_ERROR;
 
@@ -3792,12 +3817,15 @@ static void bnxt_dev_recover(void *arg)
 		goto err_start;
 
 	PMD_DRV_LOG(INFO, "Recovered from FW reset\n");
+	pthread_mutex_unlock(&bp->err_recovery_lock);
+
 	return;
 err_start:
-	bnxt_dev_stop_op(bp->eth_dev);
+	bnxt_dev_stop(bp->eth_dev);
 err:
 	bp->flags |= BNXT_FLAG_FATAL_ERROR;
 	bnxt_uninit_resources(bp, false);
+	pthread_mutex_unlock(&bp->err_recovery_lock);
 	PMD_DRV_LOG(ERR, "Failed to recover from FW reset\n");
 }
 
@@ -4733,8 +4761,15 @@ bnxt_init_locks(struct bnxt *bp)
 	}
 
 	err = pthread_mutex_init(&bp->health_check_lock, NULL);
-	if (err)
+	if (err) {
 		PMD_DRV_LOG(ERR, "Unable to initialize health_check_lock\n");
+		return err;
+	}
+
+	err = pthread_mutex_init(&bp->err_recovery_lock, NULL);
+	if (err)
+		PMD_DRV_LOG(ERR, "Unable to initialize err_recovery_lock\n");
+
 	return err;
 }
 
-- 
2.28.0.497.g54e85e7



More information about the dev mailing list