[dpdk-dev] [PATCH v2 3/4] vmxnet3: add TSO support

Yong Wang yongwang at vmware.com
Tue Jan 5 03:28:18 CET 2016


This commit adds vmxnet3 TSO support.

Verified with test-pmd (set fwd csum) that both tso and non-tso
pkts can be successfully transmitted and all segmentes for a tso
pkt are correct on the receiver side.

Signed-off-by: Yong Wang <yongwang at vmware.com>
---
 doc/guides/rel_notes/release_2_3.rst |   3 +
 drivers/net/vmxnet3/vmxnet3_ring.h   |  13 ----
 drivers/net/vmxnet3/vmxnet3_rxtx.c   | 117 ++++++++++++++++++++++++++---------
 3 files changed, 92 insertions(+), 41 deletions(-)

diff --git a/doc/guides/rel_notes/release_2_3.rst b/doc/guides/rel_notes/release_2_3.rst
index 58205fe..ae487bb 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -24,6 +24,9 @@ Drivers
 
   Support TCP/UDP checksum offload.
 
+* **vmxnet3: add TSO support.**
+
+
 Libraries
 ~~~~~~~~~
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ring.h b/drivers/net/vmxnet3/vmxnet3_ring.h
index 612487e..15b19e1 100644
--- a/drivers/net/vmxnet3/vmxnet3_ring.h
+++ b/drivers/net/vmxnet3/vmxnet3_ring.h
@@ -130,18 +130,6 @@ struct vmxnet3_txq_stats {
 	uint64_t	tx_ring_full;
 };
 
-typedef struct vmxnet3_tx_ctx {
-	int      ip_type;
-	bool     is_vlan;
-	bool     is_cso;
-
-	uint16_t evl_tag;		/* only valid when is_vlan == TRUE */
-	uint32_t eth_hdr_size;  /* only valid for pkts requesting tso or csum
-							 * offloading */
-	uint32_t ip_hdr_size;
-	uint32_t l4_hdr_size;
-} vmxnet3_tx_ctx_t;
-
 typedef struct vmxnet3_tx_queue {
 	struct vmxnet3_hw            *hw;
 	struct vmxnet3_cmd_ring      cmd_ring;
@@ -155,7 +143,6 @@ typedef struct vmxnet3_tx_queue {
 	uint8_t                      port_id;       /**< Device port identifier. */
 } vmxnet3_tx_queue_t;
 
-
 struct vmxnet3_rxq_stats {
 	uint64_t                     drop_total;
 	uint64_t                     drop_err;
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 08e6115..1dd793e 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -295,27 +295,46 @@ vmxnet3_dev_clear_queues(struct rte_eth_dev *dev)
 	}
 }
 
+static int
+vmxnet3_unmap_pkt(uint16_t eop_idx, vmxnet3_tx_queue_t *txq)
+{
+	int completed = 0;
+	struct rte_mbuf *mbuf;
+
+	/* Release cmd_ring descriptor and free mbuf */
+	VMXNET3_ASSERT(txq->cmd_ring.base[eop_idx].txd.eop == 1);
+
+	mbuf = txq->cmd_ring.buf_info[eop_idx].m;
+	if (unlikely(mbuf == NULL))
+		rte_panic("EOP desc does not point to a valid mbuf");
+	else
+		rte_pktmbuf_free(mbuf);
+
+	txq->cmd_ring.buf_info[eop_idx].m = NULL;
+
+	while (txq->cmd_ring.next2comp != eop_idx) {
+		/* no out-of-order completion */
+		VMXNET3_ASSERT(txq->cmd_ring.base[txq->cmd_ring.next2comp].txd.cq == 0);
+		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
+		completed++;
+	}
+
+	/* Mark the txd for which tcd was generated as completed */
+	vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
+
+	return completed + 1;
+}
+
 static void
 vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
 {
 	int completed = 0;
-	struct rte_mbuf *mbuf;
 	vmxnet3_comp_ring_t *comp_ring = &txq->comp_ring;
 	struct Vmxnet3_TxCompDesc *tcd = (struct Vmxnet3_TxCompDesc *)
 		(comp_ring->base + comp_ring->next2proc);
 
 	while (tcd->gen == comp_ring->gen) {
-		/* Release cmd_ring descriptor and free mbuf */
-		VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
-		while (txq->cmd_ring.next2comp != tcd->txdIdx) {
-			mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
-			txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
-			rte_pktmbuf_free_seg(mbuf);
-
-			/* Mark the txd for which tcd was generated as completed */
-			vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
-			completed++;
-		}
+		completed += vmxnet3_unmap_pkt(tcd->txdIdx, txq);
 
 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
@@ -325,6 +344,13 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
 }
 
+/* The number of descriptors that are needed for a packet. */
+static unsigned
+txd_estimate(const struct rte_mbuf *m)
+{
+	return m->nb_segs;
+}
+
 uint16_t
 vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		  uint16_t nb_pkts)
@@ -351,21 +377,42 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		struct rte_mbuf *txm = tx_pkts[nb_tx];
 		struct rte_mbuf *m_seg = txm;
 		int copy_size = 0;
+		bool tso = (txm->ol_flags & PKT_TX_TCP_SEG) != 0;
+		unsigned count = txd_estimate(txm);
+
+		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
+		if (count > avail) {
+			/* Is command ring full? */
+			if (unlikely(avail == 0)) {
+				PMD_TX_LOG(DEBUG, "No free ring descriptors");
+				txq->stats.tx_ring_full++;
+				txq->stats.drop_total += (nb_pkts - nb_tx);
+				break;
+			}
 
-		/* Is this packet execessively fragmented, then drop */
-		if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
-			++txq->stats.drop_too_many_segs;
-			++txq->stats.drop_total;
+			/* Command ring is not full but cannot handle the
+			 * multi-segmented packet. Let's try the next packet
+			 * in this case.
+			 */
+			PMD_TX_LOG(DEBUG, "Running out of ring descriptors "
+				"(avail %d needed %d)\n", avail, count);
+			txq->stats.drop_total++;
+			if (tso)
+				txq->stats.drop_tso++;
 			rte_pktmbuf_free(txm);
-			++nb_tx;
+			nb_tx++;
 			continue;
 		}
 
-		/* Is command ring full? */
-		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
-		if (txm->nb_segs > avail) {
-			++txq->stats.tx_ring_full;
-			break;
+		/* Drop non-TSO packet that is excessively fragmented */
+		if (unlikely(!tso && count > VMXNET3_MAX_TXD_PER_PKT)) {
+			PMD_TX_LOG(ERROR, "Non-TSO packet cannot occupy more than %d tx "
+				"descriptors. Packet dropped.\n", VMXNET3_MAX_TXD_PER_PKT);
+			txq->stats.drop_too_many_segs++;
+			txq->stats.drop_total++;
+			rte_pktmbuf_free(txm);
+			nb_tx++;
+			continue;
 		}
 
 		if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
@@ -382,11 +429,11 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		do {
 			/* Remember the transmit buffer for cleanup */
 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
-			tbi->m = m_seg;
 
 			/* NB: the following assumes that VMXNET3 maximum
-			   transmit buffer size (16K) is greater than
-			   maximum sizeof mbuf segment size. */
+			 * transmit buffer size (16K) is greater than
+			 * maximum size of mbuf segment size.
+			 */
 			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
 			if (copy_size)
 				gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA +
@@ -405,6 +452,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
 		} while ((m_seg = m_seg->next) != NULL);
 
+		/* set the last buf_info for the pkt */
+		tbi->m = txm;
 		/* Update the EOP descriptor */
 		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
 
@@ -415,7 +464,17 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			gdesc->txd.tci = txm->vlan_tci;
 		}
 
-		if (txm->ol_flags & PKT_TX_L4_MASK) {
+		if (tso) {
+			uint16_t mss = txm->tso_segsz;
+
+			VMXNET3_ASSERT(mss > 0);
+
+			gdesc->txd.hlen = txm->l2_len + txm->l3_len + txm->l4_len;
+			gdesc->txd.om = VMXNET3_OM_TSO;
+			gdesc->txd.msscof = mss;
+
+			deferred += (rte_pktmbuf_pkt_len(txm) - gdesc->txd.hlen + mss - 1) / mss;
+		} else if (txm->ol_flags & PKT_TX_L4_MASK) {
 			gdesc->txd.om = VMXNET3_OM_CSUM;
 			gdesc->txd.hlen = txm->l2_len + txm->l3_len;
 
@@ -431,13 +490,15 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 					   txm->ol_flags & PKT_TX_L4_MASK);
 				abort();
 			}
+			deferred++;
 		} else {
 			gdesc->txd.hlen = 0;
 			gdesc->txd.om = VMXNET3_OM_NONE;
 			gdesc->txd.msscof = 0;
+			deferred++;
 		}
 
-		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(++deferred);
+		txq_ctrl->txNumDeferred = rte_cpu_to_le_32(deferred);
 
 		/* flip the GEN bit on the SOP */
 		rte_compiler_barrier();
@@ -634,7 +695,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
 			PMD_RX_LOG(DEBUG,
 				   "Alert : Misbehaving device, incorrect "
-				   " buffer type used. iPacket dropped.");
+				   " buffer type used. Packet dropped.");
 			rte_pktmbuf_free_seg(rbi->m);
 			goto rcd_done;
 		}
-- 
1.9.1



More information about the dev mailing list