[dpdk-dev] [PATCH v4 4/5] net/ice: fix vector rx burst for ice
Jeff Guo
jia.guo at intel.com
Thu Sep 17 09:58:33 CEST 2020
The limitation of burst size in vector rx was removed, since it should
retrieve as much received packets as possible. And also the scattered
receive path should use a wrapper function to achieve the goal of
burst maximizing. And do some code cleaning for vector rx path.
Bugzilla ID: 516
Fixes: c68a52b8b38c ("net/ice: support vector SSE in Rx")
Fixes: ae60d3c9b227 ("net/ice: support Rx AVX2 vector")
Signed-off-by: Jeff Guo <jia.guo at intel.com>
Tested-by: Yingya Han <yingyax.han at intel.com>
---
drivers/net/ice/ice_rxtx.h | 1 +
drivers/net/ice/ice_rxtx_vec_avx2.c | 23 ++++++------
drivers/net/ice/ice_rxtx_vec_sse.c | 56 +++++++++++++++++++----------
3 files changed, 49 insertions(+), 31 deletions(-)
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 2fdcfb7d0..3ef5f300d 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -35,6 +35,7 @@
#define ICE_MAX_RX_BURST ICE_RXQ_REARM_THRESH
#define ICE_TX_MAX_FREE_BUF_SZ 64
#define ICE_DESCS_PER_LOOP 4
+#define ICE_DESCS_PER_LOOP_AVX 8
#define ICE_FDIR_PKT_LEN 512
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index be50677c2..843e4f32a 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -29,7 +29,7 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
__m128i dma_addr0;
dma_addr0 = _mm_setzero_si128();
- for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+ for (i = 0; i < ICE_DESCS_PER_LOOP_AVX; i++) {
rxep[i].mbuf = &rxq->fake_mbuf;
_mm_store_si128((__m128i *)&rxdp[i].read,
dma_addr0);
@@ -132,12 +132,17 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
}
+/**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP_AVX)
+ *
+ * Notice:
+ * - nb_pkts < ICE_DESCS_PER_LOOP_AVX, just return no packet
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP_AVX power-of-two
+ */
static inline uint16_t
_ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts, uint8_t *split_packet)
{
-#define ICE_DESCS_PER_LOOP_AVX 8
-
const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
const __m256i mbuf_init = _mm256_set_epi64x(0, 0,
0, rxq->mbuf_initializer);
@@ -603,10 +608,6 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
return received;
}
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- */
uint16_t
ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -616,8 +617,6 @@ ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
/**
* vPMD receive routine that reassembles single burst of 32 scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
*/
static uint16_t
ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
@@ -626,6 +625,9 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
struct ice_rx_queue *rxq = rx_queue;
uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
+ /* split_flags only can support max of ICE_VPMD_RX_BURST */
+ nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
/* get some new buffers */
uint16_t nb_bufs = _ice_recv_raw_pkts_vec_avx2(rxq, rx_pkts, nb_pkts,
split_flags);
@@ -657,9 +659,6 @@ ice_recv_scattered_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
/**
* vPMD receive routine that reassembles scattered packets.
- * Main receive routine that can handle arbitrary burst sizes
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
*/
uint16_t
ice_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 382ef31f3..c03e24092 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -205,10 +205,11 @@ ice_rx_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts,
}
/**
+ * vPMD raw receive routine, only accept(nb_pkts >= ICE_DESCS_PER_LOOP)
+ *
* Notice:
* - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- * numbers of DD bits
+ * - floor align nb_pkts to a ICE_DESCS_PER_LOOP power-of-two
*/
static inline uint16_t
_ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
@@ -264,9 +265,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL,
0x0000000200000002LL);
- /* nb_pkts shall be less equal than ICE_MAX_RX_BURST */
- nb_pkts = RTE_MIN(nb_pkts, ICE_MAX_RX_BURST);
-
/* nb_pkts has to be floor-aligned to ICE_DESCS_PER_LOOP */
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, ICE_DESCS_PER_LOOP);
@@ -441,12 +439,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
return nb_pkts_recd;
}
-/**
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- * numbers of DD bits
- */
uint16_t
ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts)
@@ -454,19 +446,19 @@ ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
return _ice_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
}
-/* vPMD receive routine that reassembles scattered packets
- * Notice:
- * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
- * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
- * numbers of DD bits
+/**
+ * vPMD receive routine that reassembles single burst of 32 scattered packets
*/
-uint16_t
-ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
- uint16_t nb_pkts)
+static uint16_t
+ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
{
struct ice_rx_queue *rxq = rx_queue;
uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
+ /* split_flags only can support max of ICE_VPMD_RX_BURST */
+ nb_pkts = RTE_MIN(nb_pkts, ICE_VPMD_RX_BURST);
+
/* get some new buffers */
uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
split_flags);
@@ -496,6 +488,32 @@ ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
&split_flags[i]);
}
+/**
+ * vPMD receive routine that reassembles scattered packets.
+ */
+uint16_t
+ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ uint16_t retval = 0;
+
+ while (nb_pkts > ICE_VPMD_RX_BURST) {
+ uint16_t burst;
+
+ burst = ice_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ ICE_VPMD_RX_BURST);
+ retval += burst;
+ nb_pkts -= burst;
+ if (burst < ICE_VPMD_RX_BURST)
+ return retval;
+ }
+
+ return retval + ice_recv_scattered_burst_vec(rx_queue,
+ rx_pkts + retval,
+ nb_pkts);
+}
+
static inline void
ice_vtx1(volatile struct ice_tx_desc *txdp, struct rte_mbuf *pkt,
uint64_t flags)
--
2.20.1
More information about the dev
mailing list