|SUCCESS| [v1,3/3] net/iavf: add NEON support for Rx flex desc

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Fri Apr 17 15:50:36 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/163173

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-04-17

git sha: (patch series starting at 20260417130833.2503592)
Author: Jay Wang <jay.wang2 at arm.com>
Subject: [PATCH v1 1/3] net/iavf: add Rx scattered function for 32B desc

This patch series adds NEON-optimized receive and transmit functions for
the Intel IAVF driver on AArch64, including support for scattered
packets and flexible descriptors.

> diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
> index 403c61e2e8..e4936f3566 100644
> --- a/drivers/net/intel/iavf/iavf.h
> +++ b/drivers/net/intel/iavf/iavf.h
> @@ -334,6 +334,7 @@ enum iavf_rx_func_type {
>  	IAVF_RX_BULK_ALLOC,
>  	IAVF_RX_BULK_ALLOC_FLEX_RXD,
>  	IAVF_RX_NEON,
> +	IAVF_RX_NEON_SCATTERED,

[ ... ]

> diff --git a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
> index 28c90b2a72..45e377d728 100644
> --- a/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
> +++ b/drivers/net/intel/iavf/iavf_rxtx_vec_neon.c
> @@ -145,8 +145,6 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
>  		   struct rte_mbuf **__rte_restrict rx_pkts,
>  		   uint16_t nb_pkts, uint8_t *split_packet)
>  {
> -	RTE_SET_USED(split_packet);
> -

Why is the RTE_SET_USED(split_packet) macro removed? Does removing this
cause unused parameter warnings when split_packet is NULL in the non-
scattered path? Consider if this should be conditionally present based on
whether split_packet support is compiled in.

[ ... ]

> @@ -238,6 +243,13 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
>  		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
>  		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
>  
> +		if (split_packet) {
> +			rte_mbuf_prefetch_part2(rx_pkts[pos]);
> +			rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
> +			rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
> +			rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
> +		}
> +

[ ... ]

> @@ -306,6 +318,32 @@ _recv_raw_pkts_vec(struct ci_rx_queue *__rte_restrict rxq,
>  		staterr = vzipq_u16(sterr_tmp1.val[1],
>  				    sterr_tmp2.val[1]).val[0];
>  
> +		/* C* extract and record EOP bit */
> +		if (split_packet) {
> +			uint8x16_t eop_shuf_mask = {
> +				0x00, 0x02, 0x04, 0x06,
> +				0xFF, 0xFF, 0xFF, 0xFF,
> +				0xFF, 0xFF, 0xFF, 0xFF,
> +				0xFF, 0xFF, 0xFF, 0xFF
> +			};
> +			uint8x16_t eop_bits;
> +
> +			/* and with mask to extract bits, flipping 1-0 */
> +			eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr));
> +			eop_bits = vandq_u8(eop_bits, eop_check);
> +			/* the staterr values are not in order, as the count
> +			 * of dd bits doesn't care. However, for end of
> +			 * packet tracking, we do care, so shuffle. This also
> +			 * compresses the 32-bit values to 8-bit
> +			 */
> +			eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask);
> +
> +			/* store the resulting 32-bit value */
> +			vst1q_lane_u32((uint32_t *)split_packet,
> +				vreinterpretq_u32_u8(eop_bits), 0);
> +			split_packet += IAVF_VPMD_DESCS_PER_LOOP;
> +		}
> +

Does this code correctly handle alignment of the split_packet pointer? The
cast to (uint32_t *) suggests 4-byte alignment is expected. Does the
caller guarantee proper alignment of the split_packet buffer?

[ ... ]

> @@ -341,6 +379,72 @@ iavf_recv_pkts_vec(void *__rte_restrict rx_queue,
>  	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
>  }
>  
> +/*
> + * vPMD receive routine that reassembles single burst of 32 scattered
> + * packets.
> + *
> + * Notice:
> + * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> + */
> +static __rte_always_inline uint16_t
> +iavf_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +		uint16_t nb_pkts)
> +{
> +	struct ci_rx_queue *rxq = rx_queue;
> +	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
> +
> +	/* get some new buffers */
> +	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> +						split_flags);
> +
> +	if (nb_bufs == 0)
> +		return 0;
> +
> +	/* happy day case, full burst + no packets to be assembled */
> +	const uint64_t *split_fl64 = (uint64_t *)split_flags;
> +	if (!rxq->pkt_first_seg &&
> +			split_fl64[0] == 0 && split_fl64[1] == 0 &&
> +			split_fl64[2] == 0 && split_fl64[3] == 0)
> +		return nb_bufs;
> +
> +	/* reassmble any packets that need reassembly */
> +	unsigned int i = 0;
> +	if (!rxq->pkt_first_seg) {
> +		/* find the first split flag, and only reassmeble then */
> +		while (i < nb_bufs && !split_flags[i])
> +			i++;
> +		if (i == nb_bufs)
> +			return nb_bufs;
> +		rxq->pkt_first_seg = rx_pkts[i];
> +	}
> +	return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i,
> +			&split_flags[i], &rxq->pkt_first_seg, &rxq->pkt_last_seg,
> +			rxq->crc_len);
> +}

This code has typos in the comments: "reassmble" should be "reassemble"
(appears twice).

Regarding variable declarations: The variable 'i' is declared after code
statements, which violates C99 style used in DPDK. Does this follow the
coding standard? Variables should be declared at the beginning of their
scope.

[ ... ]

> diff --git a/drivers/net/intel/iavf/iavf_rxtx.h b/drivers/net/intel/iavf/iavf_rxtx.h
> index 80b06518b0..8b8e55e66f 100644
> --- a/drivers/net/intel/iavf/iavf_rxtx.h
> +++ b/drivers/net/intel/iavf/iavf_rxtx.h
> @@ -558,8 +558,6 @@ uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue,
>  uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue,
>  					       struct rte_mbuf **rx_pkts,
>  					       uint16_t nb_pkts);
> -uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> -				  uint16_t nb_pkts);

Why is the iavf_xmit_fixed_burst_vec() declaration removed from the
header? Does this function still exist? If it's being made static in
patch 2/3, the header change should be in that patch, not here.

[ ... ]

> +static __rte_always_inline void
> +iavf_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
> +	 uint64_t flags)
> +{
> +	uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
> +		((uint64_t)flags << CI_TXD_QW1_CMD_S) |
> +		((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
> +
> +	uint64x2_t descriptor = {rte_pktmbuf_iova(pkt), high_qw};
> +	vst1q_u64(RTE_CAST_PTR(uint64_t *, txdp), descriptor);
> +}

Does this initialization of the uint64x2_t vector using brace initializer
work correctly across all supported ARM64 compilers? Some compilers may
require explicit vector initialization functions like vcombine_u64() for
portability.

[ ... ]

> +static __rte_always_inline uint16_t
> +iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> +		uint16_t nb_pkts)
> +{
> +	struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
> +	volatile struct ci_tx_desc *txdp;
> +	struct ci_tx_entry_vec *txep;
> +	uint16_t n, nb_commit, tx_id;
> +	uint64_t flags = CI_TX_DESC_CMD_DEFAULT;
> +	uint64_t rs = CI_TX_DESC_CMD_RS | CI_TX_DESC_CMD_DEFAULT;
> +	int i;
> +
> +	/* cross rx_thresh boundary is not allowed */
> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
> +
> +	if (txq->nb_tx_free < txq->tx_free_thresh)
> +		ci_tx_free_bufs_vec(txq, iavf_tx_desc_done, false);

[ ... ]

> +	nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
> +	nb_commit = nb_pkts;
> +	if (unlikely(nb_pkts == 0))
> +		return 0;

Does this function leak any mbufs if nb_pkts becomes 0 after the free
check? At this point tx_pkts[] have been passed in but not transmitted or
freed.

[ ... ]

> +static __rte_always_inline void
> +iavf_flex_rxd_to_olflags_v(struct ci_rx_queue *rxq, uint64x2_t descs[4],
> +		struct rte_mbuf **rx_pkts)
> +{
> +	const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0};
> +	uint64x2_t rearm0, rearm1, rearm2, rearm3;
> +
> +	uint32x4_t tmp_desc, flags, rss_vlan;

Is the declaration style consistent with DPDK coding standards? This
declares multiple variables of the same type on one line. According to
the coding guide, each declaration should be on its own line when
possible.

[ ... ]

> +	/* map the checksum, rss and vlan flags to the checksum, rss
> +	 * and vlan flags.
> +	 */
> +	const uint8x16_t cksum_flags = {

The multi-line comment style is incorrect. The opening /* should have the
comment start on the next line when the comment spans multiple lines.

[ ... ]

> +	const uint8x16_t rss_vlan_flags = {
> +		0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED,
> +		RTE_MBUF_F_RX_RSS_HASH | RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED,
> +		0, 0, 0, 0,
> +		0, 0, 0, 0,
> +		0, 0, 0, 0,
> +	};

Does this line exceed 80 characters? The second line appears to be quite
long and should be checked for proper wrapping.

[ ... ]

> +/**
> + * vPMD receive routine that reassembles single burst of 32 scattered packets
> + * for flex RxD
> + *
> + * Notice:
> + * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet
> + */
> +static uint16_t
> +iavf_recv_scattered_burst_vec_flex_rxd(void *rx_queue,
> +				       struct rte_mbuf **rx_pkts,
> +				       uint16_t nb_pkts)
> +{
> +	struct ci_rx_queue *rxq = rx_queue;
> +	uint8_t split_flags[IAVF_VPMD_RX_BURST] = {0};
> +	unsigned int i = 0;
> +
> +	/* get some new buffers */
> +	uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts,
> +					      split_flags);

Same issue as in patch 1: variable declaration after code. The variable
'nb_bufs' should be declared at the function start with other variables.


More information about the test-report mailing list