[PATCH 07/10] net/bnxt: optimization of the AVX2 RX paths

Mohammad Shuab Siddique mohammad-shuab.siddique at broadcom.com
Thu Jun 4 05:18:48 CEST 2026


From: Keegan Freyhof <keegan.freyhof at broadcom.com>

Fixed some dead code and some variable names to make them more
descriptive. Also optimized the V3 path to stay under 16
registers.

Signed-off-by: Keegan Freyhof <keegan.freyhof at broadcom.com>
Signed-off-by: Mohammad Shuab Siddique <mohammad-shuab.siddique at broadcom.com>
---
 drivers/net/bnxt/bnxt_rxtx_vec_avx2.c   | 240 +++++++++++-------------
 drivers/net/bnxt/bnxt_rxtx_vec_common.h |   2 +-
 2 files changed, 114 insertions(+), 128 deletions(-)

diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
index 46b51b20e4..5e22b4fc11 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_avx2.c
@@ -187,15 +187,12 @@ recv_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
 		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
 		flags_type = _mm256_unpacklo_epi64(t0, t1);
+		flags2 = _mm256_unpackhi_epi64(t0, t1);
 		ptype_idx = _mm256_and_si256(flags_type, flags_type_mask);
 		ptype_idx = _mm256_srli_epi32(ptype_idx,
 					      RX_PKT_CMPL_FLAGS_ITYPE_SFT -
 					      BNXT_PTYPE_TBL_TYPE_SFT);
 
-		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
-		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
-		flags2 = _mm256_unpackhi_epi64(t0, t1);
-
 		t0 = _mm256_srli_epi32(_mm256_and_si256(flags2, flags2_mask1),
 				       RX_PKT_CMPL_FLAGS2_META_FORMAT_SFT -
 				       BNXT_PTYPE_TBL_VLAN_SFT);
@@ -251,9 +248,6 @@ recv_burst_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		 * bits and count the number of set bits in order to determine
 		 * the number of valid descriptors.
 		 */
-		const __m256i perm_msk =
-				_mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-		info3_v = _mm256_permutevar8x32_epi32(errors_v2, perm_msk);
 		info3_v = _mm256_and_si256(errors_v2, info3_v_mask);
 		info3_v = _mm256_xor_si256(info3_v, valid_target);
 
@@ -904,7 +898,6 @@ bnxt_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_sent;
 }
 
-
 /*
  * V3 (Thor2) RX burst processing - AVX2 vectorized implementation
  *
@@ -924,7 +917,6 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 	uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size;
 	struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring;
 	uint64_t valid, desc_valid_mask = ~0ULL;
-	const __m256i info3_v_mask = _mm256_set1_epi32(CMPL_BASE_V);
 	uint32_t raw_cons = cpr->cp_raw_cons;
 	uint32_t cons, mbcons;
 	int nb_rx_pkts = 0;
@@ -937,12 +929,12 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 	 */
 	const __m256i shuf_msk =
 		_mm256_set_epi8(15, 14, 13, 12,          /* rss */
-				0xFF, 0xFF,              /* vlan_tci (filled separately) */
+				11, 10,                  /* vlan_tci */
 				3, 2,                    /* data_len */
 				0xFF, 0xFF, 3, 2,        /* pkt_len */
 				0xFF, 0xFF, 0xFF, 0xFF,  /* pkt_type (zeroes) */
 				15, 14, 13, 12,          /* rss */
-				0xFF, 0xFF,              /* vlan_tci (filled separately) */
+				11, 10,                  /* vlan_tci */
 				3, 2,                    /* data_len */
 				0xFF, 0xFF, 3, 2,        /* pkt_len */
 				0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */
@@ -952,40 +944,24 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 		_mm256_set_epi8(0xff, 0xff, 0xff, 0xff,  /* Zeroes */
 				11, 10,                  /* metadata0 (vlan_tci) */
 				9, 8,                    /* errors_v2 */
-				5, 4,                    /* metadata1 (payload_offset) */
+				5, 4,                    /* metadata2 */
 				1, 0,                    /* flags2 low */
 				0xff, 0xff, 0xff, 0xff,  /* Zeroes */
 				0xff, 0xff, 0xff, 0xff,  /* Zeroes */
 				11, 10,                  /* metadata0 (vlan_tci) */
 				9, 8,                    /* errors_v2 */
-				5, 4,                    /* metadata1 (payload_offset) */
+				5, 4,                    /* metadata2 */
 				1, 0,                    /* flags2 low */
 				0xff, 0xff, 0xff, 0xff); /* Zeroes */
+	const __m256i mask_1s =
+		_mm256_set1_epi32(0x1);
+	const __m256i mask_fs =
+		_mm256_set1_epi32(0xf);
 
-	const __m256i flags_type_mask =
-		_mm256_set1_epi32(RX_PKT_V3_CMPL_FLAGS_ITYPE_MASK);
-	const __m256i flags2_ip_type_mask =
-		_mm256_set1_epi32(RX_PKT_V3_CMPL_HI_FLAGS2_IP_TYPE);
-	const __m256i rss_mask =
-		_mm256_set1_epi32(RX_PKT_V3_CMPL_FLAGS_RSS_VALID);
-	const __m256i metadata1_valid_mask =
-		_mm256_set1_epi32(RX_PKT_V3_CMPL_METADATA1_VALID);
-	const __m256i vlan_tci_mask =
-		_mm256_set1_epi32(RX_PKT_V3_CMPL_HI_METADATA0_VID_MASK |
-				  RX_PKT_V3_CMPL_HI_METADATA0_DE |
-				  RX_PKT_V3_CMPL_HI_METADATA0_PRI_MASK);
-	const __m256i cs_err_mask =
-		_mm256_set1_epi32(RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR |
-				  RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR |
-				  RX_PKT_CMPL_ERRORS_L4_CS_ERROR |
-				  RX_PKT_CMPL_ERRORS_IP_CS_ERROR);
-	const __m256i cs_calc_mask =
-		_mm256_set1_epi32(RX_PKT_CMPL_CALC);
-
-	__m256i t0, t1, flags_type, flags2, errors, metadata1;
-	__m256i ptype_idx, ptypes, vlan_tci, vlan_flags;
-	__m256i mbuf01, mbuf23, mbuf45, mbuf67;
 	__m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5, rearm6, rearm7;
+	__m256i t0, t1, flags_type, flags2, errors_csum_idx, metadata1;
+	__m256i mbuf01, mbuf23, mbuf45, mbuf67;
+	__m256i ptype_idx, ptypes, vlan_flags;
 	__m256i ol_flags, ol_flags_hi;
 	__m256i rss_flags;
 
@@ -1025,7 +1001,8 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 				  mbcons += BNXT_RX_DESCS_PER_LOOP_VEC256) {
 		__m256i desc0, desc1, desc2, desc3, desc4, desc5, desc6, desc7;
 		__m256i rxcmp0_1, rxcmp2_3, rxcmp4_5, rxcmp6_7, info3_v;
-		__m256i errors_v2, meta0_err, cs_calc, cs_valid;
+		__m256i md1_0123, lo2_3, md1_4567, lo6_7;
+		__m256i errors_v2, cs_calc, cs_valid;
 		uint32_t num_valid;
 
 		t0 = _mm256_loadu_si256((void *)&rxr->rx_buf_ring[mbcons]);
@@ -1057,119 +1034,134 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 
 		/*
 		 * Pack needed fields from each descriptor pair.
-		 * For V3: extract rxcmp (low) for flags_type, len, rss
+		 * extract rxcmp (low) for flags_type, len, rss
 		 * and rxcmp1 (hi) for flags2, metadata0, metadata1, errors_v2
+		 * metadata1 is incrementally extracted to save on
+		 * register pressure
 		 */
 		t0 = _mm256_permute2f128_si256(desc6, desc7, 0x20);
 		t1 = _mm256_permute2f128_si256(desc6, desc7, 0x31);
 		t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
 		rxcmp6_7 = _mm256_blend_epi32(t0, t1, 0x66);
+		lo6_7 = t0;
 
 		t0 = _mm256_permute2f128_si256(desc4, desc5, 0x20);
 		t1 = _mm256_permute2f128_si256(desc4, desc5, 0x31);
 		t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
 		rxcmp4_5 = _mm256_blend_epi32(t0, t1, 0x66);
+		md1_4567 = _mm256_unpackhi_epi32(t0, lo6_7);
 
 		t0 = _mm256_permute2f128_si256(desc2, desc3, 0x20);
 		t1 = _mm256_permute2f128_si256(desc2, desc3, 0x31);
 		t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
 		rxcmp2_3 = _mm256_blend_epi32(t0, t1, 0x66);
+		lo2_3 = t0;
 
 		t0 = _mm256_permute2f128_si256(desc0, desc1, 0x20);
 		t1 = _mm256_permute2f128_si256(desc0, desc1, 0x31);
 		t1 = _mm256_shuffle_epi8(t1, dsc_shuf_msk);
 		rxcmp0_1 = _mm256_blend_epi32(t0, t1, 0x66);
+		md1_0123 = _mm256_unpackhi_epi32(t0, lo2_3);
+		metadata1 = _mm256_unpacklo_epi64(md1_0123, md1_4567);
 
-		/* Extract flags_type from low completion for eight packets */
-		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
-		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
-		flags_type = _mm256_unpacklo_epi64(t0, t1);
-
-		/* Compute ptype_idx from flags_type itype field */
-		ptype_idx = _mm256_and_si256(flags_type, flags_type_mask);
-		ptype_idx = _mm256_srli_epi32(ptype_idx,
-					      RX_PKT_V3_CMPL_FLAGS_ITYPE_SFT -
-					      BNXT_PTYPE_TBL_TYPE_SFT);
-
-		/* Extract flags2 from high completion */
+		/* Extract flags2 from high completion for eight packets */
 		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
 		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
 		flags2 = _mm256_unpackhi_epi64(t0, t1);
+		/* fs mask used for RX_PKT_CMPL_CALC */
+		cs_calc = _mm256_and_si256(flags2, mask_fs);
+		cs_valid = _mm256_cmpeq_epi32(cs_calc, _mm256_setzero_si256());
 
-		t0 = _mm256_srli_epi32(_mm256_and_si256(flags2, flags2_ip_type_mask),
-				       RX_PKT_V3_CMPL_FLAGS2_IP_TYPE_SFT -
-				       BNXT_PTYPE_TBL_IP_VER_SFT);
-		ptype_idx = _mm256_or_si256(ptype_idx, t0);
-
-		/*
-		 * Extract metadata1 (contains VLAN valid bit) from LOW completion.
-		 * metadata1_payload_offset is at word 2 of rxcmp (low 128 bits of desc).
+		/* Extract metadata0 and errors from high completion */
+		t0 = _mm256_unpackhi_epi32(rxcmp0_1, rxcmp2_3);
+		t1 = _mm256_unpackhi_epi32(rxcmp4_5, rxcmp6_7);
+		errors_v2 = _mm256_unpacklo_epi64(t0, t1);
+		/* mask_fs used in place of RX_PKT_CMPL_ERRORS_T_L4_CS_ERROR |
+		 * RX_PKT_CMPL_ERRORS_T_IP_CS_ERROR | RX_PKT_CMPL_ERRORS_L4_CS_ERROR |
+		 * RX_PKT_CMPL_ERRORS_IP_CS_ERROR
 		 */
-		{
-			__m128i m01, m23, hi;
-			hi =
-		_mm_unpacklo_epi64(_mm_unpackhi_epi32(_mm256_castsi256_si128(desc4),
-						    _mm256_castsi256_si128(desc5)),
-				 _mm_unpackhi_epi32(_mm256_castsi256_si128(desc6),
-						    _mm256_castsi256_si128(desc7)));
-			m01 = _mm_unpackhi_epi32(_mm256_castsi256_si128(desc0),
-						 _mm256_castsi256_si128(desc1));
-			m23 = _mm_unpackhi_epi32(_mm256_castsi256_si128(desc2),
-						 _mm256_castsi256_si128(desc3));
-			metadata1 =
-			_mm256_inserti128_si256(_mm256_castsi128_si256(_mm_unpacklo_epi64(m01,
-								       m23)), hi, 1);
-		}
-		metadata1 = _mm256_srli_epi32(metadata1, 16);
-
-		t0 = _mm256_srli_epi32(_mm256_and_si256(metadata1, metadata1_valid_mask),
-				       RX_PKT_V3_CMPL_METADATA1_VALID_SFT -
-				       BNXT_PTYPE_TBL_VLAN_SFT);
-		ptype_idx = _mm256_or_si256(ptype_idx, t0);
+		errors_csum_idx = _mm256_srli_epi32(_mm256_and_si256(errors_v2,
+						    _mm256_slli_epi32(mask_fs, 4)), 4);
+		errors_csum_idx = _mm256_andnot_si256(cs_valid, errors_csum_idx);
 
 		/*
-		 * Load ptypes for eight packets using gather.
+		 * Load ol_flags for eight packets using gather. Gather
+		 * operations have extremely high latency (~19 cycles),
+		 * execution and use of result should be separated as much
+		 * as possible.
 		 */
-		ptypes = _mm256_i32gather_epi32((int *)bnxt_ptype_table,
-						ptype_idx, sizeof(uint32_t));
-
-		/* Extract RSS valid flags for eight packets */
-		rss_flags = _mm256_and_si256(flags_type, rss_mask);
-		rss_flags = _mm256_srli_epi32(rss_flags, 9);
-
-		/* Extract metadata0 (contains vlan_tci) and errors from high completion */
-		t0 = _mm256_unpackhi_epi32(rxcmp0_1, rxcmp2_3);
-		t1 = _mm256_unpackhi_epi32(rxcmp4_5, rxcmp6_7);
-		meta0_err = _mm256_unpacklo_epi64(t0, t1);
+		ol_flags = _mm256_i32gather_epi32((const int *)errors_to_olflags_v3,
+						  errors_csum_idx, sizeof(uint32_t));
 
-		/* Extract vlan_tci from high 16 bits of meta0_err (metadata0) */
-		vlan_tci = _mm256_and_si256(_mm256_srli_epi32(meta0_err, 16), vlan_tci_mask);
+		/* Exctract if the packet is VLAN and the VLAN tci */
+		metadata1 = _mm256_srli_epi32(metadata1, 16);
+		/* mask_1s used in place of RX_PKT_V3_CMPL_METADATA1_VALID */
+		ptype_idx = _mm256_srli_epi32(_mm256_and_si256(metadata1,
+					      _mm256_slli_epi32(mask_1s, 15)),
+					      RX_PKT_V3_CMPL_METADATA1_VALID_SFT -
+					      BNXT_PTYPE_TBL_VLAN_SFT);
 
-		vlan_flags = _mm256_and_si256(metadata1, metadata1_valid_mask);
-		vlan_flags = _mm256_min_epu32(vlan_flags, _mm256_set1_epi32(1));
+		vlan_flags = _mm256_and_si256(metadata1, _mm256_slli_epi32(mask_1s, 15));
+		vlan_flags = _mm256_min_epu32(vlan_flags, mask_1s);
 
 		if (vnic->vlan_strip) {
 			vlan_flags = _mm256_or_si256(vlan_flags,
-				_mm256_slli_epi32(vlan_flags, 6));
+					_mm256_slli_epi32(vlan_flags, 6));
 		}
 
-		errors_v2 = meta0_err;
+		/* Extract flags_type from low completion for eight packets */
+		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
+		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
+		flags_type = _mm256_unpacklo_epi64(t0, t1);
 
-		errors = _mm256_srli_epi32(_mm256_and_si256(meta0_err, cs_err_mask), 4);
+		/* Compute ptype_idx from flags_type itype field
+		 * mask_fs is used in place of
+		 * RX_PKT_V3_CMPL_FLAGS_ITYPE_MASK
+		 */
+		t0 = _mm256_and_si256(flags_type,
+				      _mm256_slli_epi32(mask_fs, 12));
+		t0 = _mm256_srli_epi32(t0, RX_PKT_V3_CMPL_FLAGS_ITYPE_SFT -
+				       BNXT_PTYPE_TBL_TYPE_SFT);
+		ptype_idx = _mm256_or_si256(ptype_idx, t0);
 
-		cs_calc = _mm256_and_si256(flags2, cs_calc_mask);
+		/* Extract flags2 from low completion for eight packets
+		 * flags2 is re-extracted to save on registers
+		 */
+		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
+		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
+		flags2 = _mm256_unpackhi_epi64(t0, t1);
+
+		/* mask_fs is being used in place of
+		 * RX_PKT_CMPL_CALC
+		 */
+		cs_calc = _mm256_and_si256(flags2, mask_fs);
 		cs_valid = _mm256_cmpeq_epi32(cs_calc, _mm256_setzero_si256());
-		errors = _mm256_andnot_si256(cs_valid, errors);
-		ol_flags = _mm256_i32gather_epi32((const int *)errors_to_olflags_v3,
-						  errors, sizeof(uint32_t));
-		__m256i unknown_flags = _mm256_and_si256(cs_valid,
-				_mm256_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN));
-		ol_flags = _mm256_or_si256(ol_flags, unknown_flags);
+		ol_flags = _mm256_andnot_si256(cs_valid, ol_flags);
 
-		const __m256i perm_msk =
-				_mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
-		info3_v = _mm256_permutevar8x32_epi32(errors_v2, perm_msk);
-		info3_v = _mm256_and_si256(errors_v2, info3_v_mask);
+		/* mask_1s is being used in place of
+		 * RX_PKT_V3_CMPL_HI_FLAGS2_IP_TYPE
+		 */
+		t0 = _mm256_srli_epi32(_mm256_and_si256(flags2,
+				       _mm256_slli_epi32(mask_1s, 8)),
+				       RX_PKT_V3_CMPL_FLAGS2_IP_TYPE_SFT -
+				       BNXT_PTYPE_TBL_IP_VER_SFT);
+		ptype_idx = _mm256_or_si256(ptype_idx, t0);
+
+		/*
+		 * Load ptypes for eight packets using gather. Gather operations
+		 * have extremely high latency (~19 cycles), execution and use
+		 * of result should be separated as much as possible.
+		 */
+		ptypes = _mm256_i32gather_epi32((int *)bnxt_ptype_table,
+						ptype_idx, sizeof(uint32_t));
+
+		/*
+		 * Pack the 128-bit array of valid descriptor flags into 64
+		 * bits and count the number of set bits in order to determine
+		 * the number of valid descriptors.
+		 * mask_1s is used in place of CMPL_BASE_V
+		 */
+		info3_v = _mm256_and_si256(errors_v2, mask_1s);
 		info3_v = _mm256_xor_si256(info3_v, valid_target);
 
 		info3_v = _mm256_packs_epi32(info3_v, _mm256_setzero_si256());
@@ -1181,6 +1173,11 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 		if (num_valid == 0)
 			break;
 
+		/* Extract flags_type from low completion for eight packets*/
+		t0 = _mm256_unpacklo_epi32(rxcmp0_1, rxcmp2_3);
+		t1 = _mm256_unpacklo_epi32(rxcmp4_5, rxcmp6_7);
+		flags_type = _mm256_unpacklo_epi64(t0, t1);
+
 		mbuf01 = _mm256_shuffle_epi8(rxcmp0_1, shuf_msk);
 		mbuf23 = _mm256_shuffle_epi8(rxcmp2_3, shuf_msk);
 		mbuf45 = _mm256_shuffle_epi8(rxcmp4_5, shuf_msk);
@@ -1194,29 +1191,18 @@ recv_burst_vec_avx2_v3(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pk
 		mbuf67 = _mm256_blend_epi32(mbuf67,
 					_mm256_srli_si256(ptypes, 12), 0x11);
 
-		const __m256i tci_perm_01 = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0);
-		const __m256i tci_perm_23 = _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2);
-		const __m256i tci_perm_45 = _mm256_set_epi32(5, 5, 5, 5, 4, 4, 4, 4);
-		const __m256i tci_perm_67 = _mm256_set_epi32(7, 7, 7, 7, 6, 6, 6, 6);
-
-		mbuf01 = _mm256_blend_epi16(mbuf01,
-			_mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
-						tci_perm_01), 10), 0x20);
-		mbuf23 = _mm256_blend_epi16(mbuf23,
-			_mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
-						tci_perm_23), 10), 0x20);
-		mbuf45 = _mm256_blend_epi16(mbuf45,
-			_mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
-						tci_perm_45), 10), 0x20);
-		mbuf67 = _mm256_blend_epi16(mbuf67,
-			_mm256_slli_si256(_mm256_permutevar8x32_epi32(vlan_tci,
-						tci_perm_67), 10), 0x20);
-
 		rearm0 = _mm256_permute2f128_si256(mbuf_init, mbuf01, 0x20);
 		rearm1 = _mm256_blend_epi32(mbuf_init, mbuf01, 0xF0);
 		rearm2 = _mm256_permute2f128_si256(mbuf_init, mbuf23, 0x20);
 		rearm3 = _mm256_blend_epi32(mbuf_init, mbuf23, 0xF0);
 
+		/* Extract RSS valid flags for eight packets
+		 * mask_1s is being used in place of
+		 * RX_PKT_V3_CMPL_FLAGS_RSS_VALID
+		 */
+		rss_flags = _mm256_and_si256(flags_type,
+					     _mm256_slli_epi32(mask_1s, 10));
+		rss_flags = _mm256_srli_epi32(rss_flags, 9);
 		ol_flags = _mm256_or_si256(ol_flags, rss_flags);
 		ol_flags = _mm256_or_si256(ol_flags, vlan_flags);
 		ol_flags_hi = _mm256_permute2f128_si256(ol_flags,
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index e8da010dc3..d8659d1001 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -178,7 +178,7 @@ bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
 	txr->tx_raw_cons = raw_cons;
 }
 
-static const uint64_t errors_to_olflags_v3[16] = {
+static const uint32_t errors_to_olflags_v3[16] = {
 	RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
 	RTE_MBUF_F_RX_IP_CKSUM_GOOD,
 	RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
-- 
2.47.3



More information about the dev mailing list