|SUCCESS| [v5] net/ice: add NEON-optimised Rx/Tx vector paths
dpdklab at iol.unh.edu
dpdklab at iol.unh.edu
Wed Apr 15 23:54:04 CEST 2026
Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/163099
_AI Code Review_
=== Patch Review: series.patch (via Claude) ===
Review date: 2026-04-15
Commit: [git sha would be extracted from patch]
Author: Jay Wang <jay.wang2 at arm.com>
Subject: [PATCH v5] net/ice: add NEON-optimised Rx/Tx vector paths
This patch adds the NEON-optimised Rx and Tx paths to the ice driver on
AArch64. The implementation provides vector operations for improved packet
processing performance on Arm platforms.
> diff --git a/.mailmap b/.mailmap
> index 0e0d83e1c6..873bc62d0d 100644
> --- a/.mailmap
> +++ b/.mailmap
> @@ -695,6 +695,7 @@ Javen Xu <javen_xu at realsil.com.cn>
> Jay Ding <jay.ding at broadcom.com>
> Jay Jayatheerthan <jay.jayatheerthan at intel.com>
> Jay Rolette <rolette at infiniteio.com>
> +Jay Wang <jay.wang2 at arm.com>
> Jay Zhou <jianjay.zhou at huawei.com>
> Jayaprakash Shanmugam <jayaprakash.shanmugam at intel.com>
> Jean Dao <jean.dao at 6wind.com>
[ ... ]
> diff --git a/drivers/net/intel/ice/ice_rxtx_vec_neon.c b/drivers/net/intel/ice/ice_rxtx_vec_neon.c
> new file mode 100644
> index 0000000000..1d17916852
> --- /dev/null
> +++ b/drivers/net/intel/ice/ice_rxtx_vec_neon.c
> @@ -0,0 +1,745 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2026 Intel Corporation
> + * Copyright(c) 2026 Arm Limited
> + */
> +
> +#include "ice_rxtx_vec_common.h"
> +
> +#include "../common/rx_vec_arm.h"
Does this code follow the DPDK coding style guide regarding header
include ordering? Per coding_style.rst, system headers should come
first, then DPDK headers, then driver headers. Here we have driver
headers first, then what appears to be a common header that may not
exist based on the file path.
> +
> +#include <rte_vect.h>
> +
> +#define ICE_UINT16_BIT (CHAR_BIT * sizeof(uint16_t))
Can this macro be replaced with a standard definition? The coding style
guide discourages defining constants that are derivable from standard
values.
> +
> +static __rte_always_inline uint32x4_t
> +ice_flex_rxd_to_fdir_flags_vec(const uint32x4_t fdir_id0_3)
> +{
> +#define FDID_MIS_MAGIC 0xFFFFFFFFu
Does this code leak the macro definition? According to coding_style.rst,
local macros should be undefined when no longer needed, particularly
those defined within functions.
> + RTE_BUILD_BUG_ON(RTE_MBUF_F_RX_FDIR != (1u << 2));
> + RTE_BUILD_BUG_ON(RTE_MBUF_F_RX_FDIR_ID != (1u << 13));
> +
> + const uint32x4_t pkt_fdir_bit = vdupq_n_u32((uint32_t)(RTE_MBUF_F_RX_FDIR |
> + RTE_MBUF_F_RX_FDIR_ID));
Does the indentation here follow the DPDK style? Continuation lines
should align with the opening parenthesis or be indented by 4 spaces.
[ ... ]
> +static __rte_always_inline void
> +ice_flex_rxd_to_olflags_v(struct ci_rx_queue *rxq, uint64x2_t descs[4],
> + struct rte_mbuf **rx_pkts)
Does the function parameter indentation follow the style guide? Function
parameters on subsequent lines should align with the opening parenthesis
or be indented consistently.
> +{
> + const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0};
[ ... ]
> + /* check the additional fdir_flags if fdir is enabled */
> + if (rxq->fdir_enabled) {
Is this conditional check thread-safe? Can rxq->fdir_enabled change
during execution without proper synchronization?
> + const uint32x4_t fdir_id0_1 =
> + vzip2q_u32(vreinterpretq_u32_u64(descs[0]),
> + vreinterpretq_u32_u64(descs[1]));
> + const uint32x4_t fdir_id2_3 =
> + vzip2q_u32(vreinterpretq_u32_u64(descs[2]),
> + vreinterpretq_u32_u64(descs[3]));
> + const uint32x4_t fdir_id0_3 =
> + vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(fdir_id0_1),
> + vreinterpretq_u64_u32(fdir_id2_3)));
[ ... ]
> +static __rte_always_inline uint16_t
> +_ice_recv_raw_pkts_vec(struct ci_rx_queue *rxq, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts, uint8_t *split_packet)
> +{
> + volatile union ci_rx_flex_desc *rxdp;
> + struct ci_rx_entry *sw_ring;
> + uint16_t nb_pkts_recd;
> + int pos;
> + uint32_t *ptype_tbl = rxq->ice_vsi->adapter->ptype_tbl;
Can this pointer chain be null? Does this code verify that
rxq->ice_vsi->adapter is valid before dereferencing?
> +
> + uint16x8_t crc_adjust = {
> + 0, 0, /* ignore pkt_type field */
> + rxq->crc_len, /* sub crc on pkt_len */
> + 0, /* ignore high-16bits of pkt_len */
> + rxq->crc_len, /* sub crc on data_len */
> + 0, 0, 0 /* ignore non-length field */
> + };
[ ... ]
> + /* check to see if there is actually a packet available */
> + if (!(rxdp->wb.status_error0 &
> + rte_cpu_to_le_32(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S)))
> + return 0;
[ ... ]
> + /* A.1 load descs[3-0] */
> + descs[3] = vld1q_u64(RTE_CAST_PTR(uint64_t *, rxdp + 3));
> + descs[2] = vld1q_u64(RTE_CAST_PTR(uint64_t *, rxdp + 2));
> + descs[1] = vld1q_u64(RTE_CAST_PTR(uint64_t *, rxdp + 1));
> + descs[0] = vld1q_u64(RTE_CAST_PTR(uint64_t *, rxdp + 0));
Does this code have whitespace alignment issues? There appear to be
extra spaces before the equals sign which may violate style guidelines.
> +
> + /* use acquire fence to order loads of descriptor qwords */
> + rte_atomic_thread_fence(rte_memory_order_acquire);
> + /* A.2 reload qword0 to make it ordered after qword1 load */
> + descs[3] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp + 3),
> + descs[3], 0);
> + descs[2] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp + 2),
> + descs[2], 0);
> + descs[1] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp + 1),
> + descs[1], 0);
> + descs[0] = vld1q_lane_u64(RTE_CAST_PTR(uint64_t *, rxdp),
> + descs[0], 0);
[ ... ]
> +#ifndef RTE_NET_INTEL_USE_16BYTE_DESC
Does this code have whitespace issues? There appears to be a tab
character after ifndef which should be a space per style guidelines.
> +
> + /**
> + * needs to load 2nd 16B of each desc for RSS hash parsing,
> + * will cause performance drop to get into this context.
> + */
> + if (rxq->ice_vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
> + RTE_ETH_RX_OFFLOAD_RSS_HASH) {
[ ... ]
> +uint16_t
> +ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + uint16_t retval = 0;
> +
> + while (nb_pkts > ICE_VPMD_RX_BURST) {
> + uint16_t burst;
> +
> + burst = ice_recv_scattered_burst_vec(rx_queue,
> + rx_pkts + retval,
> + ICE_VPMD_RX_BURST);
> + retval += burst;
> + nb_pkts -= burst;
> + if (burst < ICE_VPMD_RX_BURST)
> + return retval;
> + }
> +
> + return retval + ice_recv_scattered_burst_vec(rx_queue,
> + rx_pkts + retval,
> + nb_pkts);
> +}
[ ... ]
> +static __rte_always_inline uint16_t
> +ice_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct ci_tx_queue *txq = (struct ci_tx_queue *)tx_queue;
> + volatile struct ci_tx_desc *txdp;
> + struct ci_tx_entry_vec *txep;
> + uint16_t n, nb_commit, tx_id;
> + uint64_t flags = CI_TX_DESC_CMD_DEFAULT;
> + uint64_t rs = CI_TX_DESC_CMD_RS | CI_TX_DESC_CMD_DEFAULT;
> + int i;
> +
> + /* cross rx_thresh boundary is not allowed */
> + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
[ ... ]
> + if (nb_commit >= n) {
> + ci_tx_backlog_entry_vec(txep, tx_pkts, n);
> +
> + for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
> + ice_vtx1(txdp, *tx_pkts, flags);
> +
> + /* write with RS for the last descriptor in the segment */
> + ice_vtx1(txdp, *tx_pkts++, rs);
> +
> + nb_commit = (uint16_t)(nb_commit - n);
> +
> + tx_id = 0;
> + txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> + /* avoid reach the end of ring */
> + txdp = &txq->ci_tx_ring[tx_id];
> + txep = &txq->sw_ring_vec[tx_id];
> + }
> +
> + ci_tx_backlog_entry_vec(txep, tx_pkts, nb_commit);
> +
> + ice_vtx(txdp, tx_pkts, nb_commit, flags);
> +
> + tx_id = (uint16_t)(tx_id + nb_commit);
> + if (tx_id > txq->tx_next_rs) {
> + txq->ci_tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |=
> + rte_cpu_to_le_64(((uint64_t)CI_TX_DESC_CMD_RS) <<
> + CI_TXD_QW1_CMD_S);
> + txq->tx_next_rs =
> + (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh);
> + }
> +
> + txq->tx_tail = tx_id;
> +
> + ICE_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail);
> +
> + return nb_pkts;
> +}
[ ... ]
> +int __rte_cold
> +ice_rxq_vec_setup(struct ci_rx_queue *rxq)
> +{
> + rxq->vector_rx = 1;
Does this code use an implicit boolean comparison? Per coding_style.rst,
explicit comparisons should be used for integer values being assigned to
boolean-like fields.
> + rxq->mbuf_initializer = ci_rxq_mbuf_initializer(rxq->port_id);
> + return 0;
> +}
More information about the test-report
mailing list