[dpdk-dev] [PATCH v3 57/61] net/qede/base: prevent race condition during unload

Rasesh Mody rasesh.mody at cavium.com
Fri Mar 24 08:28:47 CET 2017


Merge hw_stop and hw_reset into one function.
Prevent race condition between MFW attentions and pf stop command during
unload flow that causes an ASSERT.

Signed-off-by: Rasesh Mody <rasesh.mody at cavium.com>
---
 drivers/net/qede/base/bcm_osal.h      |    1 +
 drivers/net/qede/base/ecore_dev.c     |  175 ++++++++++++++++-----------------
 drivers/net/qede/base/ecore_dev_api.h |    9 --
 drivers/net/qede/base/ecore_mcp.c     |   12 +++
 drivers/net/qede/base/ecore_mcp.h     |   11 +++
 drivers/net/qede/base/ecore_spq.c     |    3 +
 drivers/net/qede/qede_main.c          |   18 +---
 7 files changed, 116 insertions(+), 113 deletions(-)

diff --git a/drivers/net/qede/base/bcm_osal.h b/drivers/net/qede/base/bcm_osal.h
index 052a0cf..32c9b25 100644
--- a/drivers/net/qede/base/bcm_osal.h
+++ b/drivers/net/qede/base/bcm_osal.h
@@ -168,6 +168,7 @@ typedef pthread_mutex_t osal_mutex_t;
 #define OSAL_DPC_ALLOC(hwfn) OSAL_ALLOC(hwfn, GFP, sizeof(osal_dpc_t))
 #define OSAL_DPC_INIT(dpc, hwfn) nothing
 #define OSAL_POLL_MODE_DPC(hwfn) nothing
+#define OSAL_DPC_SYNC(hwfn) nothing
 
 /* Lists */
 
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index 2a621f7..d8e4ca2 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -2050,7 +2050,7 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
 
 		if (mfw_rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
-				  "Failed sending LOAD_DONE command\n");
+				  "Failed sending a LOAD_DONE command\n");
 			return mfw_rc;
 		}
 
@@ -2139,32 +2139,77 @@ void ecore_hw_timers_stop_all(struct ecore_dev *p_dev)
 	}
 }
 
+static enum _ecore_status_t ecore_verify_reg_val(struct ecore_hwfn *p_hwfn,
+						 struct ecore_ptt *p_ptt,
+						 u32 addr, u32 expected_val)
+{
+	u32 val = ecore_rd(p_hwfn, p_ptt, addr);
+
+	if (val != expected_val) {
+		DP_NOTICE(p_hwfn, true,
+			  "Value at address 0x%08x is 0x%08x while the expected value is 0x%08x\n",
+			  addr, val, expected_val);
+		return ECORE_UNKNOWN_ERROR;
+	}
+
+	return ECORE_SUCCESS;
+}
+
 enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 {
-	enum _ecore_status_t rc = ECORE_SUCCESS, t_rc;
+	struct ecore_hwfn *p_hwfn;
+	struct ecore_ptt *p_ptt;
+	enum _ecore_status_t rc, rc2 = ECORE_SUCCESS;
 	int j;
 
 	for_each_hwfn(p_dev, j) {
-		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[j];
-		struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
+		p_hwfn = &p_dev->hwfns[j];
+		p_ptt = p_hwfn->p_main_ptt;
 
 		DP_VERBOSE(p_hwfn, ECORE_MSG_IFDOWN, "Stopping hw/fw\n");
 
 		if (IS_VF(p_dev)) {
 			ecore_vf_pf_int_cleanup(p_hwfn);
+			rc = ecore_vf_pf_reset(p_hwfn);
+			if (rc != ECORE_SUCCESS) {
+				DP_NOTICE(p_hwfn, true,
+					  "ecore_vf_pf_reset failed. rc = %d.\n",
+					  rc);
+				rc2 = ECORE_UNKNOWN_ERROR;
+			}
 			continue;
 		}
 
 		/* mark the hw as uninitialized... */
 		p_hwfn->hw_init_done = false;
 
+		/* Send unload command to MCP */
+		if (!p_dev->recov_in_prog) {
+			rc = ecore_mcp_unload_req(p_hwfn, p_ptt);
+			if (rc != ECORE_SUCCESS) {
+				DP_NOTICE(p_hwfn, true,
+					  "Failed sending a UNLOAD_REQ command. rc = %d.\n",
+					  rc);
+				rc2 = ECORE_UNKNOWN_ERROR;
+			}
+		}
+
+		OSAL_DPC_SYNC(p_hwfn);
+
+		/* After this point no MFW attentions are expected, e.g. prevent
+		 * race between pf stop and dcbx pf update.
+		 */
+
 		rc = ecore_sp_pf_stop(p_hwfn);
-		if (rc)
+		if (rc != ECORE_SUCCESS) {
 			DP_NOTICE(p_hwfn, true,
-				  "Failed to close PF against FW. Continue to stop HW to prevent illegal host access by the device\n");
+				  "Failed to close PF against FW [rc = %d]. Continue to stop HW to prevent illegal host access by the device.\n",
+				  rc);
+			rc2 = ECORE_UNKNOWN_ERROR;
+		}
 
 		/* perform debug action after PF stop was sent */
-		OSAL_AFTER_PF_STOP((void *)p_hwfn->p_dev, p_hwfn->my_id);
+		OSAL_AFTER_PF_STOP((void *)p_dev, p_hwfn->my_id);
 
 		/* close NIG to BRB gate */
 		ecore_wr(p_hwfn, p_ptt,
@@ -2191,20 +2236,48 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
 		ecore_int_igu_init_pure_rt(p_hwfn, p_ptt, false, true);
 		/* Need to wait 1ms to guarantee SBs are cleared */
 		OSAL_MSLEEP(1);
-	}
+
+		if (!p_dev->recov_in_prog) {
+			ecore_verify_reg_val(p_hwfn, p_ptt,
+					     QM_REG_USG_CNT_PF_TX, 0);
+			ecore_verify_reg_val(p_hwfn, p_ptt,
+					     QM_REG_USG_CNT_PF_OTHER, 0);
+			/* @@@TBD - assert on incorrect xCFC values (10.b) */
+		}
+
+		/* Disable PF in HW blocks */
+		ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
+		ecore_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
+
+		if (!p_dev->recov_in_prog) {
+			ecore_mcp_unload_done(p_hwfn, p_ptt);
+			if (rc != ECORE_SUCCESS) {
+				DP_NOTICE(p_hwfn, true,
+					  "Failed sending a UNLOAD_DONE command. rc = %d.\n",
+					  rc);
+				rc2 = ECORE_UNKNOWN_ERROR;
+			}
+		}
+	} /* hwfn loop */
 
 	if (IS_PF(p_dev)) {
+		p_hwfn = ECORE_LEADING_HWFN(p_dev);
+		p_ptt = ECORE_LEADING_HWFN(p_dev)->p_main_ptt;
+
 		/* Disable DMAE in PXP - in CMT, this should only be done for
 		 * first hw-function, and only after all transactions have
 		 * stopped for all active hw-functions.
 		 */
-		t_rc = ecore_change_pci_hwfn(&p_dev->hwfns[0],
-					     p_dev->hwfns[0].p_main_ptt, false);
-		if (t_rc != ECORE_SUCCESS)
-			rc = t_rc;
+		rc = ecore_change_pci_hwfn(p_hwfn, p_ptt, false);
+		if (rc != ECORE_SUCCESS) {
+			DP_NOTICE(p_hwfn, true,
+				  "ecore_change_pci_hwfn failed. rc = %d.\n",
+				  rc);
+			rc2 = ECORE_UNKNOWN_ERROR;
+		}
 	}
 
-	return rc;
+	return rc2;
 }
 
 void ecore_hw_stop_fastpath(struct ecore_dev *p_dev)
@@ -2265,82 +2338,6 @@ void ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn)
 		 NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0);
 }
 
-static enum _ecore_status_t ecore_reg_assert(struct ecore_hwfn *p_hwfn,
-					     struct ecore_ptt *p_ptt, u32 reg,
-					     bool expected)
-{
-	u32 assert_val = ecore_rd(p_hwfn, p_ptt, reg);
-
-	if (assert_val != expected) {
-		DP_NOTICE(p_hwfn, true, "Value at address 0x%08x != 0x%08x\n",
-			  reg, expected);
-		return ECORE_UNKNOWN_ERROR;
-	}
-
-	return 0;
-}
-
-enum _ecore_status_t ecore_hw_reset(struct ecore_dev *p_dev)
-{
-	enum _ecore_status_t rc = ECORE_SUCCESS;
-	u32 unload_resp, unload_param;
-	int i;
-
-	for_each_hwfn(p_dev, i) {
-		struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
-
-		if (IS_VF(p_dev)) {
-			rc = ecore_vf_pf_reset(p_hwfn);
-			if (rc)
-				return rc;
-			continue;
-		}
-
-		DP_VERBOSE(p_hwfn, ECORE_MSG_IFDOWN, "Resetting hw/fw\n");
-
-		/* Check for incorrect states */
-		if (!p_dev->recov_in_prog) {
-			ecore_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
-					 QM_REG_USG_CNT_PF_TX, 0);
-			ecore_reg_assert(p_hwfn, p_hwfn->p_main_ptt,
-					 QM_REG_USG_CNT_PF_OTHER, 0);
-			/* @@@TBD - assert on incorrect xCFC values (10.b) */
-		}
-
-		/* Disable PF in HW blocks */
-		ecore_wr(p_hwfn, p_hwfn->p_main_ptt, DORQ_REG_PF_DB_ENABLE, 0);
-		ecore_wr(p_hwfn, p_hwfn->p_main_ptt, QM_REG_PF_EN, 0);
-
-		if (p_dev->recov_in_prog) {
-			DP_VERBOSE(p_hwfn, ECORE_MSG_IFDOWN,
-				   "Recovery is in progress -> skip sending unload_req/done\n");
-			break;
-		}
-
-		/* Send unload command to MCP */
-		rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				   DRV_MSG_CODE_UNLOAD_REQ,
-				   DRV_MB_PARAM_UNLOAD_WOL_MCP,
-				   &unload_resp, &unload_param);
-		if (rc != ECORE_SUCCESS) {
-			DP_NOTICE(p_hwfn, true,
-				  "ecore_hw_reset: UNLOAD_REQ failed\n");
-			/* @@TBD - what to do? for now, assume ENG. */
-			unload_resp = FW_MSG_CODE_DRV_UNLOAD_ENGINE;
-		}
-
-		rc = ecore_mcp_unload_done(p_hwfn, p_hwfn->p_main_ptt);
-		if (rc != ECORE_SUCCESS) {
-			DP_NOTICE(p_hwfn,
-				  true, "ecore_hw_reset: UNLOAD_DONE failed\n");
-			/* @@@TBD - Should it really ASSERT here ? */
-			return rc;
-		}
-	}
-
-	return rc;
-}
-
 /* Free hwfn memory and resources acquired in hw_hwfn_prepare */
 static void ecore_hw_hwfn_free(struct ecore_hwfn *p_hwfn)
 {
diff --git a/drivers/net/qede/base/ecore_dev_api.h b/drivers/net/qede/base/ecore_dev_api.h
index ce764d2..e64a768 100644
--- a/drivers/net/qede/base/ecore_dev_api.h
+++ b/drivers/net/qede/base/ecore_dev_api.h
@@ -151,15 +151,6 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev);
  */
 void ecore_hw_start_fastpath(struct ecore_hwfn *p_hwfn);
 
-/**
- * @brief ecore_hw_reset -
- *
- * @param p_dev
- *
- * @return enum _ecore_status_t
- */
-enum _ecore_status_t ecore_hw_reset(struct ecore_dev *p_dev);
-
 enum ecore_hw_prepare_result {
 	ECORE_HW_PREPARE_SUCCESS,
 
diff --git a/drivers/net/qede/base/ecore_mcp.c b/drivers/net/qede/base/ecore_mcp.c
index b53210f..1c5f24c 100644
--- a/drivers/net/qede/base/ecore_mcp.c
+++ b/drivers/net/qede/base/ecore_mcp.c
@@ -891,6 +891,18 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 	return ECORE_SUCCESS;
 }
 
+enum _ecore_status_t ecore_mcp_unload_req(struct ecore_hwfn *p_hwfn,
+					  struct ecore_ptt *p_ptt)
+{
+	u32 wol_param, mcp_resp, mcp_param;
+
+	/* @DPDK */
+	wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
+
+	return ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_UNLOAD_REQ, wol_param,
+			     &mcp_resp, &mcp_param);
+}
+
 enum _ecore_status_t ecore_mcp_unload_done(struct ecore_hwfn *p_hwfn,
 					   struct ecore_ptt *p_ptt)
 {
diff --git a/drivers/net/qede/base/ecore_mcp.h b/drivers/net/qede/base/ecore_mcp.h
index 350d8a2..37d1835 100644
--- a/drivers/net/qede/base/ecore_mcp.h
+++ b/drivers/net/qede/base/ecore_mcp.h
@@ -171,6 +171,17 @@ enum _ecore_status_t ecore_mcp_load_req(struct ecore_hwfn *p_hwfn,
 					struct ecore_load_req_params *p_params);
 
 /**
+ * @brief Sends a UNLOAD_REQ message to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ *
+ * @return enum _ecore_status_t - ECORE_SUCCESS - Operation was successful.
+ */
+enum _ecore_status_t ecore_mcp_unload_req(struct ecore_hwfn *p_hwfn,
+					  struct ecore_ptt *p_ptt);
+
+/**
  * @brief Sends a UNLOAD_DONE message to the MFW
  *
  * @param p_hwfn
diff --git a/drivers/net/qede/base/ecore_spq.c b/drivers/net/qede/base/ecore_spq.c
index 016de74..3c1d05b 100644
--- a/drivers/net/qede/base/ecore_spq.c
+++ b/drivers/net/qede/base/ecore_spq.c
@@ -190,6 +190,9 @@ static void ecore_spq_hw_initialize(struct ecore_hwfn *p_hwfn,
 
 	p_cxt = cxt_info.p_cxt;
 
+	/* @@@TBD we zero the context until we have ilt_reset implemented. */
+	OSAL_MEM_ZERO(p_cxt, sizeof(*p_cxt));
+
 	if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev)) {
 		SET_FIELD(p_cxt->xstorm_ag_context.flags10,
 			  E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN, 1);
diff --git a/drivers/net/qede/qede_main.c b/drivers/net/qede/qede_main.c
index 326e56f..74856c5 100644
--- a/drivers/net/qede/qede_main.c
+++ b/drivers/net/qede/qede_main.c
@@ -636,19 +636,6 @@ static int qed_nic_stop(struct ecore_dev *edev)
 	return rc;
 }
 
-static int qed_nic_reset(struct ecore_dev *edev)
-{
-	int rc;
-
-	rc = ecore_hw_reset(edev);
-	if (rc)
-		return rc;
-
-	ecore_resc_free(edev);
-
-	return 0;
-}
-
 static int qed_slowpath_stop(struct ecore_dev *edev)
 {
 #ifdef CONFIG_QED_SRIOV
@@ -667,10 +654,11 @@ static int qed_slowpath_stop(struct ecore_dev *edev)
 		if (IS_QED_ETH_IF(edev))
 			qed_sriov_disable(edev, true);
 #endif
-		qed_nic_stop(edev);
 	}
 
-	qed_nic_reset(edev);
+	qed_nic_stop(edev);
+
+	ecore_resc_free(edev);
 	qed_stop_iov_task(edev);
 
 	return 0;
-- 
1.7.10.3



More information about the dev mailing list