[PATCH 01/10] net/bnxt: vector mode implementation for V3 packets
Mohammad Shuab Siddique
mohammad-shuab.siddique at broadcom.com
Thu Jun 4 05:18:42 CEST 2026
From: Keegan Freyhof <keegan.freyhof at broadcom.com>
Added support for AVX2 vector mode reporting of the
VLAN TCI for Thor 2.
Signed-off-by: Keegan Freyhof <keegan.freyhof at broadcom.com>
Signed-off-by: Mohammad Shuab Siddique <mohammad-shuab.siddique at broadcom.com>
---
drivers/net/bnxt/bnxt_ethdev.c | 7 +-
drivers/net/bnxt/bnxt_rxr.h | 2 +
drivers/net/bnxt/bnxt_rxtx_vec_avx2.c | 402 ++++++++++++++++++++++++
drivers/net/bnxt/bnxt_rxtx_vec_common.h | 36 +++
4 files changed, 446 insertions(+), 1 deletion(-)
diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index c45afdb20a..5bd51de3cd 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -1493,7 +1493,12 @@ bnxt_receive_function(struct rte_eth_dev *eth_dev)
bp->flags |= BNXT_FLAG_RX_VECTOR_PKT_MODE;
if (bnxt_compressed_rx_cqe_mode_enabled(bp))
return bnxt_crx_pkts_vec_avx2;
- return bnxt_recv_pkts_vec_avx2;
+ if (BNXT_TRUFLOW_EN(bp) && bnxt_ulp_explicit_mark_enabled(bp))
+ goto use_scalar_rx;
+ if (BNXT_CHIP_P7(bp))
+ return bnxt_recv_pkts_vec_avx2_v3;
+ else
+ return bnxt_recv_pkts_vec_avx2;
}
#endif
if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
diff --git a/drivers/net/bnxt/bnxt_rxr.h b/drivers/net/bnxt/bnxt_rxr.h
index 2a28fa2073..352d509210 100644
--- a/drivers/net/bnxt/bnxt_rxr.h
+++ b/drivers/net/bnxt/bnxt_rxr.h
@@ -165,6 +165,8 @@ int bnxt_rxq_vec_setup(struct bnxt_rx_queue *rxq);
#if defined(RTE_ARCH_X86)
uint16_t bnxt_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
+uint16_t bnxt_recv_pkts_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
uint16_t bnxt_crx_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
#endif
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
index 35550534de..46b51b20e4 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
@@ -903,3 +903,405 @@ bnxt_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
return nb_sent;
}
+
+
+/*
+ * V3 (Thor2) RX burst processing - AVX2 vectorized implementation
+ *
+ * V3 completions have a different layout for checksum and VLAN handling
+ * compared to the standard and compressed completion formats.
+ */
+static uint16_t
+recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+ struct bnxt_rx_queue *rxq = rx_queue;
+ struct bnxt_vnic_info *vnic = rxq->vnic;
+ const __m256i mbuf_init =
+ _mm256_set_epi64x(0, 0, 0, rxq->mbuf_initializer);
+ struct bnxt_cp_ring_info *cpr = rxq->cp_ring;
+ struct bnxt_rx_ring_info *rxr = rxq->rx_ring;
+ uint16_t cp_ring_size = cpr->cp_ring_struct->ring_size;
+ uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
+ struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
+ uint64_t valid, desc_valid_mask = ~0ULL;
+ const __m256i info3_v_mask = _mm256_set1_epi32(CMPL_BASE_V);
+ uint32_t raw_cons = cpr->cp_raw_cons;
+ uint32_t cons, mbcons;
+ int nb_rx_pkts = 0;
+ int i;
+ const __m256i valid_target =
+ _mm256_set1_epi32(!!(raw_cons & cp_ring_size));
+
+ /*
+ * Shuffle mask for V3 descriptors to rearrange fields into mbuf layout.
+ */
+ const __m256i shuf_msk =
+ _mm256_set_epi8(15, 14, 13, 12, /* rss */
+ 0xFF, 0xFF, /* vlan_tci (filled separately) */
+ 3, 2, /* data_len */
+ 0xFF, 0xFF, 3, 2, /* pkt_len */
+ 0xFF, 0xFF, 0xFF, 0xFF, /* pkt_type (zeroes) */
+ 15, 14, 13, 12, /* rss */
+ 0xFF, 0xFF, /* vlan_tci (filled separately) */
+ 3, 2, /* data_len */
+ 0xFF, 0xFF, 3, 2, /* pkt_len */
+ 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
+
+ /* Shuffle mask for high completion to extract metadata0 and errors */
+ const __m256i dsc_shuf_msk =
+ _mm256_set_epi8(0xff, 0xff, 0xff, 0xff, /* Zeroes */
+ 11, 10, /* metadata0 (vlan_tci) */
+ 9, 8, /* errors_v2 */
+ 5, 4, /* metadata1 (payload_offset) */
+ 1, 0, /* flags2 low */
+ 0xff, 0xff, 0xff, 0xff, /* Zeroes */
+ 0xff, 0xff, 0xff, 0xff, /* Zeroes */
+ 11, 10, /* metadata0 (vlan_tci) */
+ 9, 8, /* errors_v2 */
+ 5, 4, /* metadata1 (payload_offset) */
+ 1, 0, /* flags2 low */
+ 0xff, 0xff, 0xff, 0xff); /* Zeroes */
+
+ const __m256i flags_type_mask =
+ _mm256_set1_epi32(RX_PKT_V3_CMPL_FLAGS_ITYPE_MASK);
+ const __m256i flags2_ip_type_mask =
+ _mm256_set1_epi32(RX_PKT_V3_CMPL_HI_FLAGS2_IP_TYPE);
+ const __m256i rss_mask =
+ _mm256_set1_epi32(RX_PKT_V3_CMPL_FLAGS_RSS_VALID);
+ const __m256i metadata1_valid_mask =
+ _mm256_set1_epi32(RX_PKT_V3_CMPL_METADATA1_VALID);
+ const __m256i vlan_tci_mask =
+ _mm256_set1_epi32(RX_PKT_V3_CMPL_HI_METADATA0_VID_MASK |
+ RX_PKT_V3_CMPL_HI_METADATA0_DE |
+ RX_PKT_V3_CMPL_HI_METADATA0_PRI_MASK);
+ const __m256i cs_err_mask =
+ _mm256_set1_epi32(RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR |
+ RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR |
+ RX_PKT_CMPL_ERRORS_L4_CS_ERROR |
+ RX_PKT_CMPL_ERRORS_IP_CS_ERROR);
+ const __m256i cs_calc_mask =
+ _mm256_set1_epi32(RX_PKT_CMPL_CALC);
+
+ __m256i t0, t1, flags_type, flags2, errors, metadata1;
+ __m256i ptype_idx, ptypes, vlan_tci, vlan_flags;
+ __m256i mbuf01, mbuf23, mbuf45, mbuf67;
+ __m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5, rearm6, rearm7;
+ __m256i ol_flags, ol_flags_hi;
+ __m256i rss_flags;
+
+ /* Validate ptype table indexing at build time. */
+ bnxt_check_ptype_constants();
+
+ if (unlikely(!rxq->rx_started))
+ return 0;
+
+ if (rxq->rxrearm_nb >= rxq->rx_free_thresh)
+ bnxt_rxq_rearm(rxq, rxr);
+
+ nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, BNXT_RX_DESCS_PER_LOOP_VEC256);
+
+ cons = raw_cons & (cp_ring_size - 1);
+ mbcons = (raw_cons / 2) & (rx_ring_size - 1);
+
+ if (!bnxt_cpr_cmp_valid(&cp_desc_ring[cons], raw_cons, cp_ring_size))
+ return 0;
+
+ nb_pkts = RTE_MIN(nb_pkts, RTE_MIN(rx_ring_size - mbcons,
+ (cp_ring_size - cons) / 2));
+ /*
+ * If we are at the end of the ring, ensure that descriptors after the
+ * last valid entry are not treated as valid.
+ */
+ if (nb_pkts < BNXT_RX_DESCS_PER_LOOP_VEC256) {
+ desc_valid_mask >>=
+ CHAR_BIT * (BNXT_RX_DESCS_PER_LOOP_VEC256 - nb_pkts);
+ } else {
+ nb_pkts =
+ RTE_ALIGN_FLOOR(nb_pkts, BNXT_RX_DESCS_PER_LOOP_VEC256);
+ }
+
+ for (i = 0; i < nb_pkts; i += BNXT_RX_DESCS_PER_LOOP_VEC256,
+ cons += BNXT_RX_DESCS_PER_LOOP_VEC256 * 2,
+ mbcons += BNXT_RX_DESCS_PER_LOOP_VEC256) {
+ __m256i desc0, desc1, desc2, desc3, desc4, desc5, desc6, desc7;
+ __m256i rxcmp0_1, rxcmp2_3, rxcmp4_5, rxcmp6_7, info3_v;
+ __m256i errors_v2, meta0_err, cs_calc, cs_valid;
+ uint32_t num_valid;
+
+ t0 = _mm256_loadu_si256((void *)&rxr->rx_buf_ring[mbcons]);
+ _mm256_storeu_si256((void *)&rx_pkts[i], t0);
+#ifdef RTE_ARCH_X86_64
+ t0 = _mm256_loadu_si256((void *)&rxr->rx_buf_ring[mbcons + 4]);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 4], t0);
+#endif
+
+ /*
+ * Load eight receive completion descriptors into 256-bit
+ * registers. Loads are issued in reverse order for consistent state.
+ */
+ desc7 = _mm256_load_si256((void *)&cp_desc_ring[cons + 14]);
+ rte_compiler_barrier();
+ desc6 = _mm256_load_si256((void *)&cp_desc_ring[cons + 12]);
+ rte_compiler_barrier();
+ desc5 = _mm256_load_si256((void *)&cp_desc_ring[cons + 10]);
+ rte_compiler_barrier();
+ desc4 = _mm256_load_si256((void *)&cp_desc_ring[cons + 8]);
+ rte_compiler_barrier();
+ desc3 = _mm256_load_si256((void *)&cp_desc_ring[cons + 6]);
+ rte_compiler_barrier();
+ desc2 = _mm256_load_si256((void *)&cp_desc_ring[cons + 4]);
+ rte_compiler_barrier();
+ desc1 = _mm256_load_si256((void *)&cp_desc_ring[cons + 2]);
+ rte_compiler_barrier();
+ desc0 = _mm256_load_si256((void *)&cp_desc_ring[cons + 0]);
+
+ /*
+ * Pack needed fields from each descriptor pair.
+ * For V3: extract rxcmp (low) for flags_type, len, rss
+ * and rxcmp1 (hi) for flags2, metadata0, metadata1, errors_v2
+ */
+ t0 = _mm256_permute2f128_si256(desc6, desc7, 0x20);
+ t1 = _mm256_permute2f128_si256(desc6, desc7, 0x31);
+ t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
+ rxcmp6_7 = _mm256_blend_epi32(t0, t1, 0x66);
+
+ t0 = _mm256_permute2f128_si256(desc4, desc5, 0x20);
+ t1 = _mm256_permute2f128_si256(desc4, desc5, 0x31);
+ t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
+ rxcmp4_5 = _mm256_blend_epi32(t0, t1, 0x66);
+
+ t0 = _mm256_permute2f128_si256(desc2, desc3, 0x20);
+ t1 = _mm256_permute2f128_si256(desc2, desc3, 0x31);
+ t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
+ rxcmp2_3 = _mm256_blend_epi32(t0, t1, 0x66);
+
+ t0 = _mm256_permute2f128_si256(desc0, desc1, 0x20);
+ t1 = _mm256_permute2f128_si256(desc0, desc1, 0x31);
+ t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
+ rxcmp0_1 = _mm256_blend_epi32(t0, t1, 0x66);
+
+ /* Extract flags_type from low completion for eight packets */
+ t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
+ t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
+ flags_type = _mm256_unpacklo_epi64(t0, t1);
+
+ /* Compute ptype_idx from flags_type itype field */
+ ptype_idx = _mm256_and_si256(flags_type, flags_type_mask);
+ ptype_idx = _mm256_srli_epi32(ptype_idx,
+ RX_PKT_V3_CMPL_FLAGS_ITYPE_SFT -
+ BNXT_PTYPE_TBL_TYPE_SFT);
+
+ /* Extract flags2 from high completion */
+ t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
+ t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
+ flags2 = _mm256_unpackhi_epi64(t0, t1);
+
+ t0 = _mm256_srli_epi32(_mm256_and_si256(flags2, flags2_ip_type_mask),
+ RX_PKT_V3_CMPL_FLAGS2_IP_TYPE_SFT -
+ BNXT_PTYPE_TBL_IP_VER_SFT);
+ ptype_idx = _mm256_or_si256(ptype_idx, t0);
+
+ /*
+ * Extract metadata1 (contains VLAN valid bit) from LOW completion.
+ * metadata1_payload_offset is at word 2 of rxcmp (low 128 bits of desc).
+ */
+ {
+ __m128i m01, m23, hi;
+ hi =
+ _mm_unpacklo_epi64(_mm_unpackhi_epi32(_mm256_castsi256_si128(desc4),
+ _mm256_castsi256_si128(desc5)),
+ _mm_unpackhi_epi32(_mm256_castsi256_si128(desc6),
+ _mm256_castsi256_si128(desc7)));
+ m01 = _mm_unpackhi_epi32(_mm256_castsi256_si128(desc0),
+ _mm256_castsi256_si128(desc1));
+ m23 = _mm_unpackhi_epi32(_mm256_castsi256_si128(desc2),
+ _mm256_castsi256_si128(desc3));
+ metadata1 =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_unpacklo_epi64(m01,
+ m23)), hi, 1);
+ }
+ metadata1 = _mm256_srli_epi32(metadata1, 16);
+
+ t0 = _mm256_srli_epi32(_mm256_and_si256(metadata1, metadata1_valid_mask),
+ RX_PKT_V3_CMPL_METADATA1_VALID_SFT -
+ BNXT_PTYPE_TBL_VLAN_SFT);
+ ptype_idx = _mm256_or_si256(ptype_idx, t0);
+
+ /*
+ * Load ptypes for eight packets using gather.
+ */
+ ptypes = _mm256_i32gather_epi32((int *)bnxt_ptype_table,
+ ptype_idx, sizeof(uint32_t));
+
+ /* Extract RSS valid flags for eight packets */
+ rss_flags = _mm256_and_si256(flags_type, rss_mask);
+ rss_flags = _mm256_srli_epi32(rss_flags, 9);
+
+ /* Extract metadata0 (contains vlan_tci) and errors from high completion */
+ t0 = _mm256_unpackhi_epi32(rxcmp0_1, rxcmp2_3);
+ t1 = _mm256_unpackhi_epi32(rxcmp4_5, rxcmp6_7);
+ meta0_err = _mm256_unpacklo_epi64(t0, t1);
+
+ /* Extract vlan_tci from high 16 bits of meta0_err (metadata0) */
+ vlan_tci = _mm256_and_si256(_mm256_srli_epi32(meta0_err, 16), vlan_tci_mask);
+
+ vlan_flags = _mm256_and_si256(metadata1, metadata1_valid_mask);
+ vlan_flags = _mm256_min_epu32(vlan_flags, _mm256_set1_epi32(1));
+
+ if (vnic->vlan_strip) {
+ vlan_flags = _mm256_or_si256(vlan_flags,
+ _mm256_slli_epi32(vlan_flags, 6));
+ }
+
+ errors_v2 = meta0_err;
+
+ errors = _mm256_srli_epi32(_mm256_and_si256(meta0_err, cs_err_mask), 4);
+
+ cs_calc = _mm256_and_si256(flags2, cs_calc_mask);
+ cs_valid = _mm256_cmpeq_epi32(cs_calc, _mm256_setzero_si256());
+ errors = _mm256_andnot_si256(cs_valid, errors);
+ ol_flags = _mm256_i32gather_epi32((const int *)errors_to_olflags_v3,
+ errors, sizeof(uint32_t));
+ __m256i unknown_flags = _mm256_and_si256(cs_valid,
+ _mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN));
+ ol_flags = _mm256_or_si256(ol_flags, unknown_flags);
+
+ const __m256i perm_msk =
+ _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
+ info3_v = _mm256_permutevar8x32_epi32(errors_v2, perm_msk);
+ info3_v = _mm256_and_si256(errors_v2, info3_v_mask);
+ info3_v = _mm256_xor_si256(info3_v, valid_target);
+
+ info3_v = _mm256_packs_epi32(info3_v, _mm256_setzero_si256());
+ valid = _mm_cvtsi128_si64(_mm256_extracti128_si256(info3_v, 1));
+ valid = (valid << CHAR_BIT) |
+ _mm_cvtsi128_si64(_mm256_castsi256_si128(info3_v));
+ num_valid = rte_popcount64(valid & desc_valid_mask);
+
+ if (num_valid == 0)
+ break;
+
+ mbuf01 = _mm256_shuffle_epi8(rxcmp0_1, shuf_msk);
+ mbuf23 = _mm256_shuffle_epi8(rxcmp2_3, shuf_msk);
+ mbuf45 = _mm256_shuffle_epi8(rxcmp4_5, shuf_msk);
+ mbuf67 = _mm256_shuffle_epi8(rxcmp6_7, shuf_msk);
+
+ mbuf01 = _mm256_blend_epi32(mbuf01, ptypes, 0x11);
+ mbuf23 = _mm256_blend_epi32(mbuf23,
+ _mm256_srli_si256(ptypes, 4), 0x11);
+ mbuf45 = _mm256_blend_epi32(mbuf45,
+ _mm256_srli_si256(ptypes, 8), 0x11);
+ mbuf67 = _mm256_blend_epi32(mbuf67,
+ _mm256_srli_si256(ptypes, 12), 0x11);
+
+ const __m256i tci_perm_01 = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0);
+ const __m256i tci_perm_23 = _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2);
+ const __m256i tci_perm_45 = _mm256_set_epi32(5, 5, 5, 5, 4, 4, 4, 4);
+ const __m256i tci_perm_67 = _mm256_set_epi32(7, 7, 7, 7, 6, 6, 6, 6);
+
+ mbuf01 = _mm256_blend_epi16(mbuf01,
+ _mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
+ tci_perm_01), 10), 0x20);
+ mbuf23 = _mm256_blend_epi16(mbuf23,
+ _mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
+ tci_perm_23), 10), 0x20);
+ mbuf45 = _mm256_blend_epi16(mbuf45,
+ _mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
+ tci_perm_45), 10), 0x20);
+ mbuf67 = _mm256_blend_epi16(mbuf67,
+ _mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
+ tci_perm_67), 10), 0x20);
+
+ rearm0 = _mm256_permute2f128_si256(mbuf_init, mbuf01, 0x20);
+ rearm1 = _mm256_blend_epi32(mbuf_init, mbuf01, 0xF0);
+ rearm2 = _mm256_permute2f128_si256(mbuf_init, mbuf23, 0x20);
+ rearm3 = _mm256_blend_epi32(mbuf_init, mbuf23, 0xF0);
+
+ ol_flags = _mm256_or_si256(ol_flags, rss_flags);
+ ol_flags = _mm256_or_si256(ol_flags, vlan_flags);
+ ol_flags_hi = _mm256_permute2f128_si256(ol_flags,
+ ol_flags, 0x11);
+
+ rearm0 = _mm256_blend_epi32(rearm0,
+ _mm256_slli_si256(ol_flags, 8),
+ 0x04);
+ rearm1 = _mm256_blend_epi32(rearm1,
+ _mm256_slli_si256(ol_flags_hi, 8),
+ 0x04);
+ rearm2 = _mm256_blend_epi32(rearm2,
+ _mm256_slli_si256(ol_flags, 4),
+ 0x04);
+ rearm3 = _mm256_blend_epi32(rearm3,
+ _mm256_slli_si256(ol_flags_hi, 4),
+ 0x04);
+
+ _mm256_storeu_si256((void *)&rx_pkts[i + 0]->rearm_data,
+ rearm0);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 1]->rearm_data,
+ rearm1);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 2]->rearm_data,
+ rearm2);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 3]->rearm_data,
+ rearm3);
+
+ rearm4 = _mm256_permute2f128_si256(mbuf_init, mbuf45, 0x20);
+ rearm5 = _mm256_blend_epi32(mbuf_init, mbuf45, 0xF0);
+ rearm6 = _mm256_permute2f128_si256(mbuf_init, mbuf67, 0x20);
+ rearm7 = _mm256_blend_epi32(mbuf_init, mbuf67, 0xF0);
+
+ rearm4 = _mm256_blend_epi32(rearm4, ol_flags, 0x04);
+ rearm5 = _mm256_blend_epi32(rearm5, ol_flags_hi, 0x04);
+ rearm6 = _mm256_blend_epi32(rearm6,
+ _mm256_srli_si256(ol_flags, 4),
+ 0x04);
+ rearm7 = _mm256_blend_epi32(rearm7,
+ _mm256_srli_si256(ol_flags_hi, 4),
+ 0x04);
+
+ _mm256_storeu_si256((void *)&rx_pkts[i + 4]->rearm_data,
+ rearm4);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 5]->rearm_data,
+ rearm5);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 6]->rearm_data,
+ rearm6);
+ _mm256_storeu_si256((void *)&rx_pkts[i + 7]->rearm_data,
+ rearm7);
+
+ nb_rx_pkts += num_valid;
+ if (num_valid < BNXT_RX_DESCS_PER_LOOP_VEC256)
+ break;
+ }
+
+ if (nb_rx_pkts) {
+ rxr->rx_raw_prod = RING_ADV(rxr->rx_raw_prod, nb_rx_pkts);
+
+ rxq->rxrearm_nb += nb_rx_pkts;
+ cpr->cp_raw_cons += 2 * nb_rx_pkts;
+ bnxt_db_cq(cpr);
+ }
+
+ return nb_rx_pkts;
+}
+
+uint16_t
+bnxt_recv_pkts_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct bnxt_rx_queue *rxq = rx_queue;
+ uint32_t expected_burst = rxq->rx_free_thresh;
+ uint16_t cnt = 0;
+
+ while (nb_pkts > expected_burst) {
+ uint16_t burst;
+
+ burst = recv_burst_vec_avx2_v3(rx_queue, rx_pkts + cnt, expected_burst);
+
+ cnt += burst;
+ nb_pkts -= burst;
+
+ if (burst < expected_burst)
+ return cnt;
+ }
+ return cnt + recv_burst_vec_avx2_v3(rx_queue, rx_pkts + cnt, nb_pkts);
+}
+
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index e185005293..e8da010dc3 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -177,4 +177,40 @@ bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
}
txr->tx_raw_cons = raw_cons;
}
+
+static const uint64_t errors_to_olflags_v3[16] = {
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+ RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+ RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+ RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
+ RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_IP_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_GOOD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_GOOD | RTE_MBUF_F_RX_IP_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_BAD,
+ RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD | RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD |
+ RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD
+};
+
#endif /* _BNXT_RXTX_VEC_COMMON_H_ */
--
2.47.3
More information about the dev
mailing list