[dpdk-dev] [PATCH v4 46/58] net/txgbe: add priority flow control support

Jiawen Wu jiawenwu at trustnetic.com
Mon Oct 19 10:54:03 CEST 2020


Add priority flow control support.

Signed-off-by: Jiawen Wu <jiawenwu at trustnetic.com>
---
 drivers/net/txgbe/base/txgbe_dcb.c    | 148 ++++++++++++++++++++++++++
 drivers/net/txgbe/base/txgbe_dcb.h    |   2 +
 drivers/net/txgbe/base/txgbe_dcb_hw.c |  73 +++++++++++++
 drivers/net/txgbe/txgbe_ethdev.c      |  54 ++++++++++
 drivers/net/txgbe/txgbe_rxtx.c        |  22 +++-
 5 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/drivers/net/txgbe/base/txgbe_dcb.c b/drivers/net/txgbe/base/txgbe_dcb.c
index da6a3a7c8..7e9a16cfe 100644
--- a/drivers/net/txgbe/base/txgbe_dcb.c
+++ b/drivers/net/txgbe/base/txgbe_dcb.c
@@ -7,6 +7,146 @@
 #include "txgbe_dcb.h"
 #include "txgbe_dcb_hw.h"
 
+/**
+ *  txgbe_pfc_enable - Enable flow control
+ *  @hw: pointer to hardware structure
+ *  @tc_num: traffic class number
+ *  Enable flow control according to the current settings.
+ */
+int
+txgbe_dcb_pfc_enable(struct txgbe_hw *hw, uint8_t tc_num)
+{
+	int ret_val = 0;
+	uint32_t mflcn_reg, fccfg_reg;
+	uint32_t pause_time;
+	uint32_t fcrtl, fcrth;
+	uint8_t i;
+	uint8_t nb_rx_en;
+
+	/* Validate the water mark configuration */
+	if (!hw->fc.pause_time) {
+		ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+		goto out;
+	}
+
+	/* Low water mark of zero causes XOFF floods */
+	if (hw->fc.current_mode & txgbe_fc_tx_pause) {
+		 /* High/Low water can not be 0 */
+		if (!hw->fc.high_water[tc_num] ||
+		    !hw->fc.low_water[tc_num]) {
+			PMD_INIT_LOG(ERR, "Invalid water mark configuration");
+			ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+			goto out;
+		}
+
+		if (hw->fc.low_water[tc_num] >= hw->fc.high_water[tc_num]) {
+			PMD_INIT_LOG(ERR, "Invalid water mark configuration");
+			ret_val = TXGBE_ERR_INVALID_LINK_SETTINGS;
+			goto out;
+		}
+	}
+	/* Negotiate the fc mode to use */
+	txgbe_fc_autoneg(hw);
+
+	/* Disable any previous flow control settings */
+	mflcn_reg = rd32(hw, TXGBE_RXFCCFG);
+	mflcn_reg &= ~(TXGBE_RXFCCFG_FC | TXGBE_RXFCCFG_PFC);
+
+	fccfg_reg = rd32(hw, TXGBE_TXFCCFG);
+	fccfg_reg &= ~(TXGBE_TXFCCFG_FC | TXGBE_TXFCCFG_PFC);
+
+	switch (hw->fc.current_mode) {
+	case txgbe_fc_none:
+		/*
+		 * If the count of enabled RX Priority Flow control > 1,
+		 * and the TX pause can not be disabled
+		 */
+		nb_rx_en = 0;
+		for (i = 0; i < TXGBE_DCB_TC_MAX; i++) {
+			uint32_t reg = rd32(hw, TXGBE_FCWTRHI(i));
+			if (reg & TXGBE_FCWTRHI_XOFF)
+				nb_rx_en++;
+		}
+		if (nb_rx_en > 1)
+			fccfg_reg |= TXGBE_TXFCCFG_PFC;
+		break;
+	case txgbe_fc_rx_pause:
+		/*
+		 * Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+		mflcn_reg |= TXGBE_RXFCCFG_PFC;
+		/*
+		 * If the count of enabled RX Priority Flow control > 1,
+		 * and the TX pause can not be disabled
+		 */
+		nb_rx_en = 0;
+		for (i = 0; i < TXGBE_DCB_TC_MAX; i++) {
+			uint32_t reg = rd32(hw, TXGBE_FCWTRHI(i));
+			if (reg & TXGBE_FCWTRHI_XOFF)
+				nb_rx_en++;
+		}
+		if (nb_rx_en > 1)
+			fccfg_reg |= TXGBE_TXFCCFG_PFC;
+		break;
+	case txgbe_fc_tx_pause:
+		/*
+		 * Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		fccfg_reg |= TXGBE_TXFCCFG_PFC;
+		break;
+	case txgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		mflcn_reg |= TXGBE_RXFCCFG_PFC;
+		fccfg_reg |= TXGBE_TXFCCFG_PFC;
+		break;
+	default:
+		PMD_DRV_LOG(DEBUG, "Flow control param set incorrectly");
+		ret_val = TXGBE_ERR_CONFIG;
+		goto out;
+	}
+
+	/* Set 802.3x based flow control settings. */
+	wr32(hw, TXGBE_RXFCCFG, mflcn_reg);
+	wr32(hw, TXGBE_TXFCCFG, fccfg_reg);
+
+	/* Set up and enable Rx high/low water mark thresholds, enable XON. */
+	if ((hw->fc.current_mode & txgbe_fc_tx_pause) &&
+		hw->fc.high_water[tc_num]) {
+		fcrtl = TXGBE_FCWTRLO_TH(hw->fc.low_water[tc_num]) |
+			TXGBE_FCWTRLO_XON;
+		fcrth = TXGBE_FCWTRHI_TH(hw->fc.high_water[tc_num]) |
+			TXGBE_FCWTRHI_XOFF;
+	} else {
+		/*
+		 * In order to prevent Tx hangs when the internal Tx
+		 * switch is enabled we must set the high water mark
+		 * to the maximum FCRTH value.  This allows the Tx
+		 * switch to function even under heavy Rx workloads.
+		 */
+		fcrtl = 0;
+		fcrth = rd32(hw, TXGBE_PBRXSIZE(tc_num)) - 32;
+	}
+	wr32(hw, TXGBE_FCWTRLO(tc_num), fcrtl);
+	wr32(hw, TXGBE_FCWTRHI(tc_num), fcrth);
+
+	/* Configure pause time (2 TCs per register) */
+	pause_time = TXGBE_RXFCFSH_TIME(hw->fc.pause_time);
+	for (i = 0; i < (TXGBE_DCB_TC_MAX / 2); i++)
+		wr32(hw, TXGBE_FCXOFFTM(i), pause_time * 0x00010001);
+
+	/* Configure flow control refresh threshold value */
+	wr32(hw, TXGBE_RXFCRFSH, pause_time / 2);
+
+out:
+	return ret_val;
+}
+
 /**
  * txgbe_dcb_calculate_tc_credits_cee - Calculates traffic class credits
  * @hw: pointer to hardware structure
@@ -210,3 +350,11 @@ void txgbe_dcb_unpack_map_cee(struct txgbe_dcb_config *cfg, int direction,
 		map[up] = txgbe_dcb_get_tc_from_up(cfg, direction, up);
 }
 
+/* Helper routines to abstract HW specifics from DCB netlink ops */
+s32 txgbe_dcb_config_pfc(struct txgbe_hw *hw, u8 pfc_en, u8 *map)
+{
+	int ret = TXGBE_ERR_PARAM;
+	ret = txgbe_dcb_config_pfc_raptor(hw, pfc_en, map);
+	return ret;
+}
+
diff --git a/drivers/net/txgbe/base/txgbe_dcb.h b/drivers/net/txgbe/base/txgbe_dcb.h
index f08e0301d..cd87cf305 100644
--- a/drivers/net/txgbe/base/txgbe_dcb.h
+++ b/drivers/net/txgbe/base/txgbe_dcb.h
@@ -92,6 +92,8 @@ struct txgbe_dcb_config {
 	bool vt_mode;
 };
 
+int txgbe_dcb_pfc_enable(struct txgbe_hw *hw, u8 tc_num);
+
 /* DCB credits calculation */
 s32 txgbe_dcb_calculate_tc_credits_cee(struct txgbe_hw *hw,
 				   struct txgbe_dcb_config *dcb_config,
diff --git a/drivers/net/txgbe/base/txgbe_dcb_hw.c b/drivers/net/txgbe/base/txgbe_dcb_hw.c
index 3c759c0c3..42742d04f 100644
--- a/drivers/net/txgbe/base/txgbe_dcb_hw.c
+++ b/drivers/net/txgbe/base/txgbe_dcb_hw.c
@@ -180,6 +180,79 @@ s32 txgbe_dcb_config_tx_data_arbiter_raptor(struct txgbe_hw *hw, u16 *refill,
 	return 0;
 }
 
+/**
+ * txgbe_dcb_config_pfc_raptor - Configure priority flow control
+ * @hw: pointer to hardware structure
+ * @pfc_en: enabled pfc bitmask
+ * @map: priority to tc assignments indexed by priority
+ *
+ * Configure Priority Flow Control (PFC) for each traffic class.
+ */
+s32 txgbe_dcb_config_pfc_raptor(struct txgbe_hw *hw, u8 pfc_en, u8 *map)
+{
+	u32 i, j, fcrtl, reg;
+	u8 max_tc = 0;
+
+	/* Enable Transmit Priority Flow Control */
+	wr32(hw, TXGBE_TXFCCFG, TXGBE_TXFCCFG_PFC);
+
+	/* Enable Receive Priority Flow Control */
+	wr32m(hw, TXGBE_RXFCCFG, TXGBE_RXFCCFG_PFC,
+		pfc_en ? TXGBE_RXFCCFG_PFC : 0);
+
+	for (i = 0; i < TXGBE_DCB_UP_MAX; i++) {
+		if (map[i] > max_tc)
+			max_tc = map[i];
+	}
+
+	/* Configure PFC Tx thresholds per TC */
+	for (i = 0; i <= max_tc; i++) {
+		int enabled = 0;
+
+		for (j = 0; j < TXGBE_DCB_UP_MAX; j++) {
+			if (map[j] == i && (pfc_en & (1 << j))) {
+				enabled = 1;
+				break;
+			}
+		}
+
+		if (enabled) {
+			reg = TXGBE_FCWTRHI_TH(hw->fc.high_water[i]) |
+			      TXGBE_FCWTRHI_XOFF;
+			fcrtl = TXGBE_FCWTRLO_TH(hw->fc.low_water[i]) |
+				TXGBE_FCWTRLO_XON;
+			wr32(hw, TXGBE_FCWTRLO(i), fcrtl);
+		} else {
+			/*
+			 * In order to prevent Tx hangs when the internal Tx
+			 * switch is enabled we must set the high water mark
+			 * to the Rx packet buffer size - 24KB.  This allows
+			 * the Tx switch to function even under heavy Rx
+			 * workloads.
+			 */
+			reg = rd32(hw, TXGBE_PBRXSIZE(i)) - 24576;
+			wr32(hw, TXGBE_FCWTRLO(i), 0);
+		}
+
+		wr32(hw, TXGBE_FCWTRHI(i), reg);
+	}
+
+	for (; i < TXGBE_DCB_TC_MAX; i++) {
+		wr32(hw, TXGBE_FCWTRLO(i), 0);
+		wr32(hw, TXGBE_FCWTRHI(i), 0);
+	}
+
+	/* Configure pause time (2 TCs per register) */
+	reg = hw->fc.pause_time | (hw->fc.pause_time << 16);
+	for (i = 0; i < (TXGBE_DCB_TC_MAX / 2); i++)
+		wr32(hw, TXGBE_FCXOFFTM(i), reg);
+
+	/* Configure flow control refresh threshold value */
+	wr32(hw, TXGBE_RXFCRFSH, hw->fc.pause_time / 2);
+
+	return 0;
+}
+
 /**
  * txgbe_dcb_config_tc_stats_raptor - Config traffic class statistics
  * @hw: pointer to hardware structure
diff --git a/drivers/net/txgbe/txgbe_ethdev.c b/drivers/net/txgbe/txgbe_ethdev.c
index a2cc7f715..d0238e5bc 100644
--- a/drivers/net/txgbe/txgbe_ethdev.c
+++ b/drivers/net/txgbe/txgbe_ethdev.c
@@ -2800,6 +2800,59 @@ txgbe_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
 	return -EIO;
 }
 
+static int
+txgbe_priority_flow_ctrl_set(struct rte_eth_dev *dev,
+		struct rte_eth_pfc_conf *pfc_conf)
+{
+	int err;
+	uint32_t rx_buf_size;
+	uint32_t max_high_water;
+	uint8_t tc_num;
+	uint8_t  map[TXGBE_DCB_UP_MAX] = { 0 };
+	struct txgbe_hw *hw = TXGBE_DEV_HW(dev);
+	struct txgbe_dcb_config *dcb_config = TXGBE_DEV_DCB_CONFIG(dev);
+
+	enum txgbe_fc_mode rte_fcmode_2_txgbe_fcmode[] = {
+		txgbe_fc_none,
+		txgbe_fc_rx_pause,
+		txgbe_fc_tx_pause,
+		txgbe_fc_full
+	};
+
+	PMD_INIT_FUNC_TRACE();
+
+	txgbe_dcb_unpack_map_cee(dcb_config, TXGBE_DCB_RX_CONFIG, map);
+	tc_num = map[pfc_conf->priority];
+	rx_buf_size = rd32(hw, TXGBE_PBRXSIZE(tc_num));
+	PMD_INIT_LOG(DEBUG, "Rx packet buffer size = 0x%x", rx_buf_size);
+	/*
+	 * At least reserve one Ethernet frame for watermark
+	 * high_water/low_water in kilo bytes for txgbe
+	 */
+	max_high_water = (rx_buf_size - RTE_ETHER_MAX_LEN) >> 10;
+	if (pfc_conf->fc.high_water > max_high_water ||
+	    pfc_conf->fc.high_water <= pfc_conf->fc.low_water) {
+		PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB");
+		PMD_INIT_LOG(ERR, "High_water must <= 0x%x", max_high_water);
+		return -EINVAL;
+	}
+
+	hw->fc.requested_mode = rte_fcmode_2_txgbe_fcmode[pfc_conf->fc.mode];
+	hw->fc.pause_time = pfc_conf->fc.pause_time;
+	hw->fc.send_xon = pfc_conf->fc.send_xon;
+	hw->fc.low_water[tc_num] =  pfc_conf->fc.low_water;
+	hw->fc.high_water[tc_num] = pfc_conf->fc.high_water;
+
+	err = txgbe_dcb_pfc_enable(hw, tc_num);
+
+	/* Not negotiated is not an error case */
+	if (err == 0 || err == TXGBE_ERR_FC_NOT_NEGOTIATED)
+		return 0;
+
+	PMD_INIT_LOG(ERR, "txgbe_dcb_pfc_enable = 0x%x", err);
+	return -EIO;
+}
+
 int
 txgbe_dev_rss_reta_update(struct rte_eth_dev *dev,
 			  struct rte_eth_rss_reta_entry64 *reta_conf,
@@ -3287,6 +3340,7 @@ static const struct eth_dev_ops txgbe_eth_dev_ops = {
 	.tx_queue_release           = txgbe_dev_tx_queue_release,
 	.flow_ctrl_get              = txgbe_flow_ctrl_get,
 	.flow_ctrl_set              = txgbe_flow_ctrl_set,
+	.priority_flow_ctrl_set     = txgbe_priority_flow_ctrl_set,
 	.mac_addr_add               = txgbe_add_rar,
 	.mac_addr_remove            = txgbe_remove_rar,
 	.mac_addr_set               = txgbe_set_default_mac_addr,
diff --git a/drivers/net/txgbe/txgbe_rxtx.c b/drivers/net/txgbe/txgbe_rxtx.c
index 2480ddf1e..9b628b181 100644
--- a/drivers/net/txgbe/txgbe_rxtx.c
+++ b/drivers/net/txgbe/txgbe_rxtx.c
@@ -3132,7 +3132,7 @@ txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
 			struct txgbe_dcb_config *dcb_config)
 {
 	int     ret = 0;
-	uint8_t i, nb_tcs;
+	uint8_t i, pfc_en, nb_tcs;
 	uint16_t pbsize, rx_buffer_size;
 	uint8_t config_dcb_rx = 0;
 	uint8_t config_dcb_tx = 0;
@@ -3306,6 +3306,26 @@ txgbe_dcb_hw_configure(struct rte_eth_dev *dev,
 	/* Configure queue statistics registers */
 	txgbe_dcb_config_tc_stats_raptor(hw, dcb_config);
 
+	/* Check if the PFC is supported */
+	if (dev->data->dev_conf.dcb_capability_en & ETH_DCB_PFC_SUPPORT) {
+		pbsize = (uint16_t)(rx_buffer_size / nb_tcs);
+		for (i = 0; i < nb_tcs; i++) {
+			/* If the TC count is 8,
+			 * and the default high_water is 48,
+			 * the low_water is 16 as default.
+			 */
+			hw->fc.high_water[i] = (pbsize * 3) / 4;
+			hw->fc.low_water[i] = pbsize / 4;
+			/* Enable pfc for this TC */
+			tc = &dcb_config->tc_config[i];
+			tc->pfc = txgbe_dcb_pfc_enabled;
+		}
+		txgbe_dcb_unpack_pfc_cee(dcb_config, map, &pfc_en);
+		if (dcb_config->num_tcs.pfc_tcs == ETH_4_TCS)
+			pfc_en &= 0x0F;
+		ret = txgbe_dcb_config_pfc(hw, pfc_en, map);
+	}
+
 	return ret;
 }
 
-- 
2.18.4





More information about the dev mailing list