[dpdk-dev] [PATCH v2 6/8] igb: implement device reset on VF

Zhe Tao zhe.tao at intel.com
Tue Jun 7 07:45:16 CEST 2016


From: Wenzhuo Lu <wenzhuo.lu at intel.com>

Implement the device reset function.
1, Add the fake RX/TX functions.
2, The reset function tries to stop RX/TX by replacing
   the RX/TX functions with the fake ones and getting the
   locks to make sure the regular RX/TX finished.
3, After the RX/TX stopped, reset the VF port, and then
   release the locks and restore the RX/TX functions.

BTW: The definition of some structures are moved from .c
file to .h file.

Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
Signed-off-by: zhe.tao <zhe.tao at intel.com>
---
 doc/guides/rel_notes/release_16_07.rst |   2 +-
 drivers/net/e1000/e1000_ethdev.h       | 116 ++++++++++++++++++++++++++++++
 drivers/net/e1000/igb_ethdev.c         | 104 +++++++++++++++++++++++++++
 drivers/net/e1000/igb_rxtx.c           | 128 ++++++---------------------------
 4 files changed, 243 insertions(+), 107 deletions(-)

diff --git a/doc/guides/rel_notes/release_16_07.rst b/doc/guides/rel_notes/release_16_07.rst
index d36c4b1..a4c0cc3 100644
--- a/doc/guides/rel_notes/release_16_07.rst
+++ b/doc/guides/rel_notes/release_16_07.rst
@@ -53,7 +53,7 @@ New Features
   VF. To handle this link up/down event, add the mailbox interruption
   support to receive the message.
 
-* **Added device reset support for ixgbe VF.**
+* **Added device reset support for ixgbe/igb VF.**
 
   Added the device reset API. APP can call this API to reset the VF port
   when it's not working.
diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h
index 6a42994..4ae03ce 100644
--- a/drivers/net/e1000/e1000_ethdev.h
+++ b/drivers/net/e1000/e1000_ethdev.h
@@ -34,6 +34,7 @@
 #ifndef _E1000_ETHDEV_H_
 #define _E1000_ETHDEV_H_
 #include <rte_time.h>
+#include <rte_spinlock.h>
 
 /* need update link, bit flag */
 #define E1000_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
@@ -261,6 +262,113 @@ struct e1000_adapter {
 	struct rte_timecounter  systime_tc;
 	struct rte_timecounter  rx_tstamp_tc;
 	struct rte_timecounter  tx_tstamp_tc;
+	eth_rx_burst_t rx_backup;
+	eth_tx_burst_t tx_backup;
+};
+
+/**
+ * Structure associated with each descriptor of the RX ring of a RX queue.
+ */
+struct igb_rx_entry {
+	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct igb_tx_entry {
+	struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
+	uint16_t next_id; /**< Index of next descriptor in ring. */
+	uint16_t last_id; /**< Index of last scattered descriptor. */
+};
+
+/**
+ * Hardware context number
+ */
+enum igb_advctx_num {
+	IGB_CTX_0    = 0, /**< CTX0    */
+	IGB_CTX_1    = 1, /**< CTX1    */
+	IGB_CTX_NUM  = 2, /**< CTX_NUM */
+};
+
+/** Offload features */
+union igb_tx_offload {
+	uint64_t data;
+	struct {
+		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+		uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
+		uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+		uint64_t tso_segsz:16; /**< TCP TSO segment size. */
+
+		/* uint64_t unused:8; */
+	};
+};
+
+/**
+ * Strucutre to check if new context need be built
+ */
+struct igb_advctx_info {
+	uint64_t flags;           /**< ol_flags related to context build. */
+	/** tx offload: vlan, tso, l2-l3-l4 lengths. */
+	union igb_tx_offload tx_offload;
+	/** compare mask for tx offload. */
+	union igb_tx_offload tx_offload_mask;
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct igb_rx_queue {
+	struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
+	volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
+	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
+	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
+	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
+	struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
+	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
+	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
+	rte_spinlock_t      rx_lock; /**< Lock for packet receiption. */
+	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
+	uint16_t            rx_tail;    /**< current value of RDT register. */
+	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
+	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
+	uint16_t            queue_id;   /**< RX queue index. */
+	uint16_t            reg_idx;    /**< RX queue register index. */
+	uint8_t             port_id;    /**< Device port identifier. */
+	uint8_t             pthresh;    /**< Prefetch threshold register. */
+	uint8_t             hthresh;    /**< Host threshold register. */
+	uint8_t             wthresh;    /**< Write-back threshold register. */
+	uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
+	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
+};
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct igb_tx_queue {
+	volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
+	uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
+	struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
+	volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
+	rte_spinlock_t         tx_lock; /**< Lock for packet transmission. */
+	uint32_t               txd_type;      /**< Device-specific TXD type */
+	uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
+	uint16_t               tx_tail; /**< Current value of TDT register. */
+	uint16_t               tx_head;
+	/**< Index of first used TX descriptor. */
+	uint16_t               queue_id; /**< TX queue index. */
+	uint16_t               reg_idx;  /**< TX queue register index. */
+	uint8_t                port_id;  /**< Device port identifier. */
+	uint8_t                pthresh;  /**< Prefetch threshold register. */
+	uint8_t                hthresh;  /**< Host threshold register. */
+	uint8_t                wthresh;  /**< Write-back threshold register. */
+	uint32_t               ctx_curr;
+	/**< Current used hardware descriptor. */
+	uint32_t               ctx_start;
+	/**< Start context position for transmit queue. */
+	struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
+	/**< Hardware context history.*/
 };
 
 #define E1000_DEV_PRIVATE(adapter) \
@@ -316,6 +424,14 @@ uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
 uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
 		uint16_t nb_pkts);
 
+uint16_t eth_igbvf_xmit_pkts_fake(void *txq,
+				  struct rte_mbuf **tx_pkts,
+				  uint16_t nb_pkts);
+
+uint16_t eth_igbvf_recv_pkts_fake(void *rxq,
+				  struct rte_mbuf **rx_pkts,
+				  uint16_t nb_pkts);
+
 uint16_t eth_igb_recv_scattered_pkts(void *rxq,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 
diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c
index 8aad741..4b78a25 100644
--- a/drivers/net/e1000/igb_ethdev.c
+++ b/drivers/net/e1000/igb_ethdev.c
@@ -268,6 +268,7 @@ static void eth_igb_configure_msix_intr(struct rte_eth_dev *dev);
 static void eth_igbvf_interrupt_handler(struct rte_intr_handle *handle,
 					void *param);
 static void igbvf_mbx_process(struct rte_eth_dev *dev);
+static int igbvf_dev_reset(struct rte_eth_dev *dev);
 
 /*
  * Define VF Stats MACRO for Non "cleared on read" register
@@ -409,6 +410,7 @@ static const struct eth_dev_ops igbvf_eth_dev_ops = {
 	.mac_addr_set         = igbvf_default_mac_addr_set,
 	.get_reg_length       = igbvf_get_reg_length,
 	.get_reg              = igbvf_get_regs,
+	.dev_reset            = igbvf_dev_reset,
 };
 
 /* store statistics names and its offset in stats structure */
@@ -2663,6 +2665,108 @@ void igbvf_mbx_process(struct rte_eth_dev *dev)
 }
 
 static int
+igbvf_dev_reset(struct rte_eth_dev *dev)
+{
+	struct e1000_adapter *adapter =
+		(struct e1000_adapter *)dev->data->dev_private;
+	struct e1000_hw *hw =
+		E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	int diag = 0;
+	uint32_t eiam;
+	uint16_t i;
+	struct igb_rx_queue *rxq;
+	struct igb_tx_queue *txq;
+	/* Reference igbvf_intr_enable */
+	uint32_t eiam_mbx = 1 << E1000_VTIVAR_MISC_MAILBOX;
+
+	/* Nothing needs to be done if the device is not started. */
+	if (!dev->data->dev_started)
+		return 0;
+
+	PMD_DRV_LOG(DEBUG, "Link up/down event detected.");
+
+	/**
+	 * Stop RX/TX by fake functions and locks.
+	 * Fake functions are used to make RX/TX lock easier.
+	 */
+	adapter->rx_backup = dev->rx_pkt_burst;
+	adapter->tx_backup = dev->tx_pkt_burst;
+	dev->rx_pkt_burst = eth_igbvf_recv_pkts_fake;
+	dev->tx_pkt_burst = eth_igbvf_xmit_pkts_fake;
+
+	if (dev->data->rx_queues)
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			rxq = dev->data->rx_queues[i];
+			rte_spinlock_lock(&rxq->rx_lock);
+		}
+
+	if (dev->data->tx_queues)
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			txq = dev->data->tx_queues[i];
+			rte_spinlock_lock(&txq->tx_lock);
+		}
+
+	/* Performance VF reset. */
+	do {
+		dev->data->dev_started = 0;
+		igbvf_dev_stop(dev);
+		if (dev->data->dev_conf.intr_conf.lsc == 0)
+			diag = eth_igb_link_update(dev, 0);
+		if (diag) {
+			PMD_INIT_LOG(INFO, "Igb VF reset: "
+				     "Failed to update link.");
+		}
+		rte_delay_ms(1000);
+
+		diag = igbvf_dev_start(dev);
+		if (diag) {
+			PMD_INIT_LOG(ERR, "Igb VF reset: "
+				     "Failed to start device.");
+			return diag;
+		}
+		dev->data->dev_started = 1;
+		eth_igbvf_stats_reset(dev);
+		if (dev->data->dev_conf.intr_conf.lsc == 0)
+			diag = eth_igb_link_update(dev, 0);
+		if (diag) {
+			PMD_INIT_LOG(INFO, "Igb VF reset: "
+				     "Failed to update link.");
+		}
+
+		/**
+		 * When the PF link is down, there has chance
+		 * that VF cannot operate its registers. Will
+		 * check if the registers is written
+		 * successfully. If not, repeat stop/start until
+		 * the PF link is up, in other words, until the
+		 * registers can be written.
+		 */
+		eiam = E1000_READ_REG(hw, E1000_EIAM);
+	} while (!(eiam & eiam_mbx));
+
+	/**
+	 * Release the locks for queues.
+	 * Restore the RX/TX functions.
+	 */
+	if (dev->data->rx_queues)
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			rxq = dev->data->rx_queues[i];
+			rte_spinlock_unlock(&rxq->rx_lock);
+		}
+
+	if (dev->data->tx_queues)
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			txq = dev->data->tx_queues[i];
+			rte_spinlock_unlock(&txq->tx_lock);
+		}
+
+	dev->rx_pkt_burst = adapter->rx_backup;
+	dev->tx_pkt_burst = adapter->tx_backup;
+
+	return 0;
+}
+
+static int
 eth_igbvf_interrupt_action(struct rte_eth_dev *dev)
 {
 	struct e1000_interrupt *intr =
diff --git a/drivers/net/e1000/igb_rxtx.c b/drivers/net/e1000/igb_rxtx.c
index 7e97330..5af7173 100644
--- a/drivers/net/e1000/igb_rxtx.c
+++ b/drivers/net/e1000/igb_rxtx.c
@@ -67,7 +67,6 @@
 #include <rte_tcp.h>
 #include <rte_sctp.h>
 #include <rte_string_fns.h>
-#include <rte_spinlock.h>
 
 #include "e1000_logs.h"
 #include "base/e1000_api.h"
@@ -80,72 +79,6 @@
 		PKT_TX_L4_MASK |		 \
 		PKT_TX_TCP_SEG)
 
-/**
- * Structure associated with each descriptor of the RX ring of a RX queue.
- */
-struct igb_rx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
-};
-
-/**
- * Structure associated with each descriptor of the TX ring of a TX queue.
- */
-struct igb_tx_entry {
-	struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
-	uint16_t next_id; /**< Index of next descriptor in ring. */
-	uint16_t last_id; /**< Index of last scattered descriptor. */
-};
-
-/**
- * Structure associated with each RX queue.
- */
-struct igb_rx_queue {
-	struct rte_mempool  *mb_pool;   /**< mbuf pool to populate RX ring. */
-	volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
-	uint64_t            rx_ring_phys_addr; /**< RX ring DMA address. */
-	volatile uint32_t   *rdt_reg_addr; /**< RDT register address. */
-	volatile uint32_t   *rdh_reg_addr; /**< RDH register address. */
-	struct igb_rx_entry *sw_ring;   /**< address of RX software ring. */
-	struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
-	struct rte_mbuf *pkt_last_seg;  /**< Last segment of current packet. */
-	rte_spinlock_t      rx_lock; /**< Lock for packet receiption. */
-	uint16_t            nb_rx_desc; /**< number of RX descriptors. */
-	uint16_t            rx_tail;    /**< current value of RDT register. */
-	uint16_t            nb_rx_hold; /**< number of held free RX desc. */
-	uint16_t            rx_free_thresh; /**< max free RX desc to hold. */
-	uint16_t            queue_id;   /**< RX queue index. */
-	uint16_t            reg_idx;    /**< RX queue register index. */
-	uint8_t             port_id;    /**< Device port identifier. */
-	uint8_t             pthresh;    /**< Prefetch threshold register. */
-	uint8_t             hthresh;    /**< Host threshold register. */
-	uint8_t             wthresh;    /**< Write-back threshold register. */
-	uint8_t             crc_len;    /**< 0 if CRC stripped, 4 otherwise. */
-	uint8_t             drop_en;  /**< If not 0, set SRRCTL.Drop_En. */
-};
-
-/**
- * Hardware context number
- */
-enum igb_advctx_num {
-	IGB_CTX_0    = 0, /**< CTX0    */
-	IGB_CTX_1    = 1, /**< CTX1    */
-	IGB_CTX_NUM  = 2, /**< CTX_NUM */
-};
-
-/** Offload features */
-union igb_tx_offload {
-	uint64_t data;
-	struct {
-		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
-		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
-		uint64_t vlan_tci:16;  /**< VLAN Tag Control Identifier(CPU order). */
-		uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
-		uint64_t tso_segsz:16; /**< TCP TSO segment size. */
-
-		/* uint64_t unused:8; */
-	};
-};
-
 /*
  * Compare mask for igb_tx_offload.data,
  * should be in sync with igb_tx_offload layout.
@@ -158,45 +91,6 @@ union igb_tx_offload {
 #define TX_TSO_CMP_MASK	\
 	(TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
 
-/**
- * Strucutre to check if new context need be built
- */
-struct igb_advctx_info {
-	uint64_t flags;           /**< ol_flags related to context build. */
-	/** tx offload: vlan, tso, l2-l3-l4 lengths. */
-	union igb_tx_offload tx_offload;
-	/** compare mask for tx offload. */
-	union igb_tx_offload tx_offload_mask;
-};
-
-/**
- * Structure associated with each TX queue.
- */
-struct igb_tx_queue {
-	volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
-	uint64_t               tx_ring_phys_addr; /**< TX ring DMA address. */
-	struct igb_tx_entry    *sw_ring; /**< virtual address of SW ring. */
-	rte_spinlock_t         tx_lock; /**< Lock for packet transmission. */
-	volatile uint32_t      *tdt_reg_addr; /**< Address of TDT register. */
-	uint32_t               txd_type;      /**< Device-specific TXD type */
-	uint16_t               nb_tx_desc;    /**< number of TX descriptors. */
-	uint16_t               tx_tail; /**< Current value of TDT register. */
-	uint16_t               tx_head;
-	/**< Index of first used TX descriptor. */
-	uint16_t               queue_id; /**< TX queue index. */
-	uint16_t               reg_idx;  /**< TX queue register index. */
-	uint8_t                port_id;  /**< Device port identifier. */
-	uint8_t                pthresh;  /**< Prefetch threshold register. */
-	uint8_t                hthresh;  /**< Host threshold register. */
-	uint8_t                wthresh;  /**< Write-back threshold register. */
-	uint32_t               ctx_curr;
-	/**< Current used hardware descriptor. */
-	uint32_t               ctx_start;
-	/**< Start context position for transmit queue. */
-	struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
-	/**< Hardware context history.*/
-};
-
 #if 1
 #define RTE_PMD_USE_PREFETCH
 #endif
@@ -2530,3 +2424,25 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.tx_thresh.hthresh = txq->hthresh;
 	qinfo->conf.tx_thresh.wthresh = txq->wthresh;
 }
+
+/**
+ * A fake function to stop transmission.
+ */
+uint16_t
+eth_igbvf_xmit_pkts_fake(void __rte_unused *tx_queue,
+			 struct rte_mbuf __rte_unused **tx_pkts,
+			 uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
+
+/**
+ * A fake function to stop receiption.
+ */
+uint16_t
+eth_igbvf_recv_pkts_fake(void __rte_unused *rx_queue,
+			 struct rte_mbuf __rte_unused **rx_pkts,
+			 uint16_t __rte_unused nb_pkts)
+{
+	return 0;
+}
-- 
2.1.4



More information about the dev mailing list