|SUCCESS| [v4,1/1] net/ice: add NEON-optimised Rx/Tx vector paths
dpdklab at iol.unh.edu
dpdklab at iol.unh.edu
Fri Apr 10 16:16:07 CEST 2026
Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/162968
_AI Code Review_
=== Patch Review: series.patch (via Claude) ===
Review date: 2026-04-10
I'll review this patch which adds NEON-optimized Rx/Tx vector paths to the Intel ICE driver for ARM64 architecture.
commit: [Not provided in patch file]
Author: Jay Wang <jay.wang2 at arm.com>
[PATCH v4 1/1] net/ice: add NEON-optimised Rx/Tx vector paths
This patch adds the NEON-optimised Rx and Tx paths to the ice driver.
> diff --git a/drivers/net/intel/ice/ice_rxtx_vec_neon.c b/drivers/net/intel/ice/ice_rxtx_vec_neon.c
> new file mode 100644
> index 0000000000..6123e1ff8e
> --- /dev/null
> +++ b/drivers/net/intel/ice/ice_rxtx_vec_neon.c
[ ... ]
> +static __rte_always_inline void
> +ice_flex_rxd_to_olflags_v(struct ci_rx_queue *rxq, uint64x2_t descs[4],
> + struct rte_mbuf **rx_pkts)
> +{
[ ... ]
> + /* check the additional fdir_flags if fdir is enabled */
> + if (rxq->fdir_enabled) {
> + const uint32x4_t fdir_id0_1 =
> + vzip2q_u32(vreinterpretq_u32_u64(descs[0]),
> + vreinterpretq_u32_u64(descs[1]));
> + const uint32x4_t fdir_id2_3 =
> + vzip2q_u32(vreinterpretq_u32_u64(descs[2]),
> + vreinterpretq_u32_u64(descs[3]));
> + const uint32x4_t fdir_id0_3 =
> + vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(fdir_id0_1),
> + vreinterpretq_u64_u32(fdir_id2_3)));
> + const uint32x4_t fdir_flags =
> + ice_flex_rxd_to_fdir_flags_vec(fdir_id0_3);
> +
> + /* merge with fdir_flags */
> + flags = vorrq_u32(flags, fdir_flags);
> +
> + /* write fdir_id to mbuf */
> + rx_pkts[0]->hash.fdir.hi = vgetq_lane_u32(fdir_id0_3, 0);
> + rx_pkts[1]->hash.fdir.hi = vgetq_lane_u32(fdir_id0_3, 1);
> + rx_pkts[2]->hash.fdir.hi = vgetq_lane_u32(fdir_id0_3, 2);
> + rx_pkts[3]->hash.fdir.hi = vgetq_lane_u32(fdir_id0_3, 3);
> + }
Does this code corrupt the RSS hash values that may have been written to
the mbuf hash union earlier in the function? The mbuf hash field is a
union, and writing to hash.fdir.hi could overwrite hash.rss values that
were previously set.
[ ... ]
> +static __rte_always_inline uint16_t
> +_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts, uint8_t *split_packet)
> +{
[ ... ]
> +#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
> +
> + /**
> + * needs to load 2nd 16B of each desc for RSS hash parsing,
> + * will cause performance drop to get into this context.
> + */
> + if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
> + RTE_ETH_RX_OFFLOAD_RSS_HASH) {
Does this check need to ensure rxq->ice_vsi and rxq->ice_vsi->adapter
are not NULL before dereferencing? Can this code path be reached with
these pointers uninitialized?
[ ... ]
> +static uint16_t
> +ice_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct ci_rx_queue *rxq = rx_queue;
> + uint8_t split_flags[ICE_VPMD_RX_BURST] = {0};
> +
> + /* get some new buffers */
> + uint16_t nb_bufs = _ice_recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
> + split_flags);
> + if (nb_bufs == 0)
> + return 0;
> +
> + /* happy day case, full burst + no packets to be joined */
> + const uint64_t *split_fl64 = (uint64_t *)split_flags;
Is this cast introducing alignment issues? The split_flags array is
uint8_t but is being accessed as uint64_t. Can this cause unaligned
memory access on ARM64?
[ ... ]
> +static __rte_always_inline void
> +ice_vtx1(volatile struct ci_tx_desc *txdp, struct rte_mbuf *pkt,
> + uint64_t flags)
> +{
> + uint64_t high_qw = (CI_TX_DESC_DTYPE_DATA |
> + ((uint64_t)flags << CI_TXD_QW1_CMD_S) |
> + ((uint64_t)pkt->data_len << CI_TXD_QW1_TX_BUF_SZ_S));
> +
> + uint64x2_t descriptor = {rte_pktmbuf_iova(pkt), high_qw};
> + vst1q_u64(RTE_CAST_PTR(uint64_t *, txdp), descriptor);
> +}
Does this function need to handle the case where pkt is NULL? Can
rte_pktmbuf_iova() be called safely on a NULL pointer?
[ ... ]
> +static __rte_always_inline uint16_t
> +ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
> + volatile struct ci_tx_desc *txdp;
> + struct ci_tx_entry_vec *txep;
> + uint16_t n, nb_commit, tx_id;
> + uint64_t flags = CI_TX_DESC_CMD_DEFAULT;
> + uint64_t rs = CI_TX_DESC_CMD_RS | CI_TX_DESC_CMD_DEFAULT;
> + int i;
> +
> + /* cross rx_thresh boundary is not allowed */
> + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
> +
> + if (txq->nb_tx_free < txq->tx_free_thresh)
> + ci_tx_free_bufs_vec(txq, ice_tx_desc_done, false);
> +
> + nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
> + nb_commit = nb_pkts;
> + if (unlikely(nb_pkts == 0))
> + return 0;
Does this code handle the case where nb_tx_free could become zero after
calling ci_tx_free_bufs_vec()? Would this cause an infinite loop if
packets continuously fail to be freed?
[ ... ]
> diff --git a/drivers/net/intel/ice/ice_rxtx.c b/drivers/net/intel/ice/ice_rxtx.c
> index 31b74be9ba..12eac6e41e 100644
> --- a/drivers/net/intel/ice/ice_rxtx.c
> +++ b/drivers/net/intel/ice/ice_rxtx.c
[ ... ]
> @@ -3404,6 +3435,14 @@ ice_set_rx_function(struct rte_eth_dev *dev)
> for (i = 0; i < dev->data->nb_rx_queues; i++)
> if (dev->data->rx_queues[i])
> ice_rxq_vec_setup(dev->data->rx_queues[i]);
> +#elif defined(RTE_ARCH_ARM64)
> + int i;
> +
> + if (ice_rx_path_infos[ad->rx_func_type].features.simd_width >= RTE_VECT_SIMD_128)
> + /* Vector function selected. Prepare the rxq accordingly. */
> + for (i = 0; i < dev->data->nb_rx_queues; i++)
> + if (dev->data->rx_queues[i])
> + ice_rxq_vec_setup(dev->data->rx_queues[i]);
> #endif
Does this code check whether dev->data->rx_queues is NULL before
accessing it? Can this cause a NULL pointer dereference when
dev->data->nb_rx_queues is non-zero but rx_queues is not allocated?
[ ... ]
> @@ -3729,8 +3778,13 @@ ice_set_tx_function(struct rte_eth_dev *dev)
> ICE_TX_DEFAULT);
>
> out:
> +#if defined(RTE_ARCH_X86)
> ad->tx_vec_allowed =
> (ice_tx_path_infos[ad->tx_func_type].features.simd_width >= RTE_VECT_SIMD_256);
> +#elif defined(RTE_ARCH_ARM64)
> + ad->tx_vec_allowed =
> + (ice_tx_path_infos[ad->tx_func_type].features.simd_width >= RTE_VECT_SIMD_128);
> +#endif
Should there be a fallback case when neither RTE_ARCH_X86 nor
RTE_ARCH_ARM64 is defined? Does tx_vec_allowed remain uninitialized on
other architectures?
More information about the test-report
mailing list