[dpdk-dev] [PATCH v2 04/16] fm10k: add func to re-allocate mbuf for RX ring

Chen Jing D(Mark) jing.d.chen at intel.com
Thu Oct 22 11:44:52 CEST 2015


From: "Chen Jing D(Mark)" <jing.d.chen at intel.com>

Add function fm10k_rxq_rearm to re-allocate mbuf for used desc
in RX HW ring.

Signed-off-by: Chen Jing D(Mark) <jing.d.chen at intel.com>
---
 drivers/net/fm10k/fm10k.h          |    9 ++++
 drivers/net/fm10k/fm10k_ethdev.c   |    3 +
 drivers/net/fm10k/fm10k_rxtx_vec.c |   90 ++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 0 deletions(-)

diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h
index 362a2d0..5df7960 100644
--- a/drivers/net/fm10k/fm10k.h
+++ b/drivers/net/fm10k/fm10k.h
@@ -123,6 +123,12 @@
 #define FM10K_VFTA_BIT(vlan_id)    (1 << ((vlan_id) & 0x1F))
 #define FM10K_VFTA_IDX(vlan_id)    ((vlan_id) >> 5)
 
+#define RTE_FM10K_RXQ_REARM_THRESH      32
+#define RTE_FM10K_VPMD_TX_BURST         32
+#define RTE_FM10K_MAX_RX_BURST          RTE_FM10K_RXQ_REARM_THRESH
+#define RTE_FM10K_TX_MAX_FREE_BUF_SZ    64
+#define RTE_FM10K_DESCS_PER_LOOP    4
+
 struct fm10k_macvlan_filter_info {
 	uint16_t vlan_num;       /* Total VLAN number */
 	uint16_t mac_num;        /* Total mac number */
@@ -178,6 +184,9 @@ struct fm10k_rx_queue {
 	volatile uint32_t *tail_ptr;
 	uint16_t nb_desc;
 	uint16_t queue_id;
+	/* Below 2 fields only valid in case vPMD is applied. */
+	uint16_t rxrearm_nb;     /* number of remaining to be re-armed */
+	uint16_t rxrearm_start;  /* the idx we start the re-arming from */
 	uint8_t port_id;
 	uint8_t drop_en;
 	uint8_t rx_deferred_start; /* don't start this queue in dev start. */
diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c
index 1bc1e7c..24f936a 100644
--- a/drivers/net/fm10k/fm10k_ethdev.c
+++ b/drivers/net/fm10k/fm10k_ethdev.c
@@ -121,6 +121,9 @@ rx_queue_reset(struct fm10k_rx_queue *q)
 	q->next_alloc = 0;
 	q->next_trigger = q->alloc_thresh - 1;
 	FM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);
+	q->rxrearm_start = 0;
+	q->rxrearm_nb = 0;
+
 	return 0;
 }
 
diff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c
index 34b677b..75533f9 100644
--- a/drivers/net/fm10k/fm10k_rxtx_vec.c
+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c
@@ -64,3 +64,93 @@ fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)
 	rxq->mbuf_initializer = *(uint64_t *)p;
 	return 0;
 }
+
+static inline void
+fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)
+{
+	int i;
+	uint16_t rx_id;
+	volatile union fm10k_rx_desc *rxdp;
+	struct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start];
+	struct rte_mbuf *mb0, *mb1;
+	__m128i head_off = _mm_set_epi64x(
+			RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1,
+			RTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1);
+	__m128i dma_addr0, dma_addr1;
+	/* Rx buffer need to be aligned with 512 byte */
+	const __m128i hba_msk = _mm_set_epi64x(0,
+				UINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1);
+
+	rxdp = rxq->hw_ring + rxq->rxrearm_start;
+
+	/* Pull 'n' more MBUFs into the software ring */
+	if (rte_mempool_get_bulk(rxq->mp,
+				 (void *)mb_alloc,
+				 RTE_FM10K_RXQ_REARM_THRESH) < 0) {
+		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+			RTE_FM10K_RXQ_REARM_THRESH;
+		return;
+	}
+
+	/* Initialize the mbufs in vector, process 2 mbufs in one loop */
+	for (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) {
+		__m128i vaddr0, vaddr1;
+		uintptr_t p0, p1;
+
+		mb0 = mb_alloc[0];
+		mb1 = mb_alloc[1];
+
+		/* Flush mbuf with pkt template.
+		 * Data to be rearmed is 6 bytes long.
+		 * Though, RX will overwrite ol_flags that are coming next
+		 * anyway. So overwrite whole 8 bytes with one load:
+		 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
+		 */
+		p0 = (uintptr_t)&mb0->rearm_data;
+		*(uint64_t *)p0 = rxq->mbuf_initializer;
+		p1 = (uintptr_t)&mb1->rearm_data;
+		*(uint64_t *)p1 = rxq->mbuf_initializer;
+
+		/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */
+		vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr));
+		vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr));
+
+		/* convert pa to dma_addr hdr/data */
+		dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);
+		dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);
+
+		/* add headroom to pa values */
+		dma_addr0 = _mm_add_epi64(dma_addr0, head_off);
+		dma_addr1 = _mm_add_epi64(dma_addr1, head_off);
+
+		/* Do 512 byte alignment to satisfy HW requirement, in the
+		 * meanwhile, set Header Buffer Address to zero.
+		 */
+		dma_addr0 = _mm_and_si128(dma_addr0, hba_msk);
+		dma_addr1 = _mm_and_si128(dma_addr1, hba_msk);
+
+		/* flush desc with pa dma_addr */
+		_mm_store_si128((__m128i *)&rxdp++->q, dma_addr0);
+		_mm_store_si128((__m128i *)&rxdp++->q, dma_addr1);
+
+		/* enforce 512B alignment on default Rx virtual addresses */
+		mb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr
+				+ RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)
+				- (char *)mb0->buf_addr);
+		mb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr
+				+ RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)
+				- (char *)mb1->buf_addr);
+	}
+
+	rxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH;
+	if (rxq->rxrearm_start >= rxq->nb_desc)
+		rxq->rxrearm_start = 0;
+
+	rxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH;
+
+	rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ?
+			     (rxq->nb_desc - 1) : (rxq->rxrearm_start - 1));
+
+	/* Update the tail pointer on the NIC */
+	FM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id);
+}
-- 
1.7.7.6



More information about the dev mailing list