[dpdk-dev] [PATCH 4/4] igb: automatic link recovery on VF

Wenzhuo Lu wenzhuo.lu at intel.com
Wed May 4 23:10:46 CEST 2016


When the physical link is down and recover later,
the VF link cannot recover until the user stop and
start it manually.
This patch implements the automatic recovery of VF
port.
The automatic recovery bases on the link up/down
message received from PF. When VF receives the link
up/down message, it will replace the RX/TX and
operation functions with fake ones to stop RX/TX
and any future operation. Then reset the VF port.
After successfully resetting the port, recover the
RX/TX and operation functions.

Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |  2 +-
 drivers/net/e1000/e1000_ethdev.h       | 14 ++++++
 drivers/net/e1000/igb_ethdev.c         | 87 +++++++++++++++++++++++++++++++++-
 drivers/net/e1000/igb_rxtx.c           | 38 +++++++++++++++
 4 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index d80f449..8144450 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -40,7 +40,7 @@ This section should contain new features added in this release. Sample format:
   VF. To handle this link up/down event, add the mailbox interruption
   support to receive the message.
 
-* **Added the support of automatic link recovery for ixgbe VF.**
+* **Added the support of automatic link recovery for ixgbe/igb VF.**
 
   When the physical link becomes down and recover later, VF will receive
   the mailbox message for that. VF handles this message by resetting the
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index e8bf8da..1b71f9b 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -34,6 +34,7 @@
 #ifndef _E1000_ETHDEV_H_
 #define _E1000_ETHDEV_H_
 #include <rte_time.h>
+#include <rte_spinlock.h>
 
 /* need update link, bit flag */
 #define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -261,6 +262,9 @@ struct e1000_adapter {
 	struct rte_timecounter  systime_tc;
 	struct rte_timecounter  rx_tstamp_tc;
 	struct rte_timecounter  tx_tstamp_tc;
+	eth_rx_burst_t rx_backup;
+	eth_tx_burst_t tx_backup;
+	rte_spinlock_t vf_reset_lock;
 };
 
 #define E1000_DEV_PRIVATE(adapter) \
@@ -316,6 +320,14 @@ uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t eth_igbvf_xmit_pkts_fake(void *txq,
+				  struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
+
+uint16_t eth_igbvf_recv_pkts_fake(void *rxq,
+				  struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_scattered_pkts(void *rxq,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 
@@ -388,4 +400,6 @@ void em_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 
 void igb_pf_host_uninit(struct rte_eth_dev *dev);
 
+void igbvf_dev_link_up_down_handler(struct rte_eth_dev *dev);
+
 #endif /* _E1000_ETHDEV_H_ */
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index b0e5e6a..5dc3182 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -411,6 +411,8 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
 	.get_reg              = igbvf_get_regs,
 };
 
+static const struct eth_dev_ops igbvf_eth_dev_ops_fake = {NULL};
+
 /* store statistics names and its offset in stats structure */
 struct rte_igb_xstats_name_off {
 	char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -911,6 +913,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev)
 	eth_dev->dev_ops = &igbvf_eth_dev_ops;
 	eth_dev->rx_pkt_burst = &eth_igb_recv_pkts;
 	eth_dev->tx_pkt_burst = &eth_igb_xmit_pkts;
+	rte_spinlock_init(&adapter->vf_reset_lock);
 
 	/* for secondary processes, we don't initialise any further as primary
 	 * has already done this work. Only check we don't need a different
@@ -2641,6 +2644,8 @@ eth_igbvf_interrupt_get_status(struct rte_eth_dev *dev)
 
 void igbvf_mbx_process(struct rte_eth_dev *dev)
 {
+	struct e1000_adapter *adapter =
+		(struct e1000_adapter *)dev->data->dev_private;
 	struct e1000_hw *hw =
 		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 	struct e1000_mbx_info *mbx = &hw->mbx;
@@ -2650,8 +2655,88 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
 		return;
 
 	/* PF reset VF event */
-	if (in_msg == E1000_PF_CONTROL_MSG)
+	if (in_msg == E1000_PF_CONTROL_MSG) {
 		_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET);
+
+		/* Stop the ops and rx/tx */
+		if (dev->data->dev_started) {
+			PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+			dev->dev_ops = &igbvf_eth_dev_ops_fake;
+
+			adapter->rx_backup = dev->rx_pkt_burst;
+			adapter->tx_backup = dev->tx_pkt_burst;
+			dev->rx_pkt_burst = eth_igbvf_recv_pkts_fake;
+			dev->tx_pkt_burst = eth_igbvf_xmit_pkts_fake;
+		}
+	}
+}
+
+void
+igbvf_dev_link_up_down_handler(struct rte_eth_dev *dev)
+{
+	struct e1000_adapter *adapter =
+		(struct e1000_adapter *)dev->data->dev_private;
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int diag;
+	uint32_t eiam;
+
+	/* Reference igbvf_intr_enable */
+	uint32_t eiam_mbx = 1 << E1000_VTIVAR_MISC_MAILBOX;
+
+	/* Only one working core need to performance VF reset */
+	if (rte_spinlock_trylock(&adapter->vf_reset_lock)) {
+		/**
+		 * When fake rec/xmit is replaced, working thread may is running
+		 * into real RX/TX func, so wait long enough to assume all
+		 * working thread exit. The assumption is it will spend less
+		 * than 100us for each execution of RX and TX func.
+		 */
+		rte_delay_us(100);
+
+		do {
+			dev->data->dev_started = 0;
+			igbvf_dev_stop(dev);
+			rte_delay_us(1000000);
+
+			diag = igbvf_dev_start(dev);
+			if (diag) {
+				PMD_INIT_LOG(ERR, "Igb VF reset: "
+					     "Failed to start device.");
+				return;
+			}
+			dev->data->dev_started = 1;
+			eth_igbvf_stats_reset(dev);
+			if (dev->data->dev_conf.intr_conf.lsc == 0)
+				diag = eth_igb_link_update(dev, 0);
+			if (diag) {
+				PMD_INIT_LOG(INFO, "Igb VF reset: "
+					     "Failed to update link.");
+			}
+
+			/**
+			 * When the PF link is down, there has chance
+			 * that VF cannot operate its registers. Will
+			 * check if the registers is written
+			 * successfully. If not, repeat stop/start until
+			 * the PF link is up, in other words, until the
+			 * registers can be written.
+			 */
+			eiam = E1000_READ_REG(hw, E1000_EIAM);
+		} while (!(eiam & eiam_mbx));
+
+		dev->rx_pkt_burst = adapter->rx_backup;
+		dev->tx_pkt_burst = adapter->tx_backup;
+		dev->dev_ops = &igbvf_eth_dev_ops;
+
+		/**
+		 * Wait a while to ensure other working thread is running with
+		 * real rx/tx func. Can avoid other working thread runs into and
+		 * reset device again.
+		 */
+		rte_delay_us(100);
+		rte_spinlock_unlock(&adapter->vf_reset_lock);
+	}
 }
 
 static int
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 4a987e3..5e3b47b 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -117,6 +117,7 @@ struct igb_rx_queue {
 	struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
 	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
 	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
+	struct rte_eth_dev  *dev; /**< device this queue belongs to. */
 	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
 	uint16_t            rx_tail;    /**< current value of RDT register. */
 	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
@@ -185,6 +186,7 @@ struct igb_tx_queue {
 	uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
 	struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
 	volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
+	struct rte_eth_dev     *dev; /**< device this queue belongs to. */
 	uint32_t               txd_type;      /**< Device-specific TXD type */
 	uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
 	uint16_t               tx_tail; /**< Current value of TDT register. */
@@ -1344,6 +1346,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
 		return -ENOMEM;
 	}
 
+	txq->dev = dev;
 	txq->nb_tx_desc = nb_desc;
 	txq->pthresh = tx_conf->tx_thresh.pthresh;
 	txq->hthresh = tx_conf->tx_thresh.hthresh;
@@ -1461,6 +1464,7 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
 			  RTE_CACHE_LINE_SIZE);
 	if (rxq == NULL)
 		return -ENOMEM;
+	rxq->dev = dev;
 	rxq->mb_pool = mp;
 	rxq->nb_rx_desc = nb_desc;
 	rxq->pthresh = rx_conf->rx_thresh.pthresh;
@@ -2524,3 +2528,37 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_thresh.hthresh = txq->hthresh;
 	qinfo->conf.tx_thresh.wthresh = txq->wthresh;
 }
+
+/**
+ * A function for link up/down.
+ * Handle the link up/down event but not receiving.
+ */
+uint16_t
+eth_igbvf_xmit_pkts_fake(void *tx_queue,
+			 struct rte_mbuf __rte_unused **tx_pkts,
+			 uint16_t __rte_unused nb_pkts)
+{
+	struct igb_tx_queue *txq;
+
+	txq = tx_queue;
+	igbvf_dev_link_up_down_handler(txq->dev);
+
+	return 0;
+}
+
+/**
+ * A function for link up/down.
+ * Handle the link up/down event but not transmitting.
+ */
+uint16_t
+eth_igbvf_recv_pkts_fake(void *rx_queue,
+			 struct rte_mbuf __rte_unused **rx_pkts,
+			 uint16_t __rte_unused nb_pkts)
+{
+	struct igb_rx_queue *rxq;
+
+	rxq = rx_queue;
+	igbvf_dev_link_up_down_handler(rxq->dev);
+
+	return 0;
+}
-- 
1.9.3



More information about the dev mailing list