|SUCCESS| [v4,2/2] node: lookup with RISC-V vector extension

dpdklab at iol.unh.edu dpdklab at iol.unh.edu
Tue Mar 31 17:11:57 CEST 2026


Test-Label: ai-code-review
Test-Status: SUCCESS
https://dpdk.org/patch/162784

_AI Code Review_

=== Patch Review: series.patch (via Claude) ===
Review date: 2026-03-31

Commit: series.patch (commit SHA not provided in patch)
Author: Sun Yuechi <sunyuechi at iscas.ac.cn>
Subject: [PATCH v4 1/2] eal/riscv: set default SIMD bitwidth to 128

This patch series enables RISC-V vector extension support for DPDK,
setting the default SIMD bitwidth to 128 and implementing vectorized
IPv4 lookup for the node library.

> diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
> index a4357e266a..4d16082449 100644
> --- a/lib/eal/riscv/include/rte_vect.h
> +++ b/lib/eal/riscv/include/rte_vect.h
> @@ -19,7 +19,7 @@
>  extern "C" {
>  #endif
>  
> -#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
> +#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
>  
>  typedef int32_t		xmm_t __attribute__((vector_size(16)));

Does this change require runtime detection of RISC-V V extension support?
The default is changed unconditionally, but not all RISC-V platforms may
have the V extension available.

[ ... ]

> diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
> new file mode 100644
> index 0000000000..0d9db2e19b
> --- /dev/null
> +++ b/lib/node/ip4_lookup_rvv.h
> @@ -0,0 +1,164 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
> + */
> +
> +#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
> +#define __INCLUDE_IP4_LOOKUP_RVV_H__

The header guard uses a reserved identifier pattern (leading underscores
followed by uppercase). Does this comply with the coding style guide?
Section 2.1.2.1 prohibits double underscore prefixes.

> +
> +static __rte_always_inline vuint32m8_t
> +bswap32_vec(vuint32m8_t v, size_t vl)
> +{
> +	vuint32m8_t low16 = __riscv_vor_vv_u32m8(
> +		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
> +		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
> +		vl);
> +
> +	vuint32m8_t high16 = __riscv_vor_vv_u32m8(
> +		__riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
> +		__riscv_vsrl_vx_u32m8(v, 24, vl),
> +		vl);
> +
> +	return __riscv_vor_vv_u32m8(low16, high16, vl);
> +}

Does the bswap32_vec() implementation handle endianness correctly on all
RISC-V platforms? RISC-V can be configured as either little-endian or
big-endian.

> +
> +static __rte_always_inline void
> +rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
> +			uint32_t *hop, size_t vl, uint32_t defv)
> +{
> +	/* Load IP addresses (network byte order) */
> +	vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);

The indentation appears inconsistent here. Does this line use tabs or
spaces for alignment? Coding style section 2.1.3.2 requires tabs for
indentation.

> +
> +	vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
> +			__riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
> +
> +	vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
> +		(const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
> +
> +	vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
> +		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
> +		RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
> +
> +	vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
> +		__riscv_vadd_vv_u32m8(
> +			__riscv_vsll_vx_u32m8(
> +				__riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
> +			__riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
> +		2, vl);
> +
> +	vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
> +		mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);

Does this code validate that vtbl8_index stays within bounds of
lpm->tbl8? Can an out-of-bounds access occur with malformed or malicious
IP addresses?

> +
> +	vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
> +	mask = __riscv_vmseq_vx_u32m8_b4(
> +		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
> +
> +	vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
> +
> +	__riscv_vse32_v_u32m8(hop, vnext_hop, vl);
> +}
> +
> +/* Can be increased further for VLEN > 256 */
> +#define RVV_MAX_BURST 64U
> +
> +static uint16_t
> +ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
> +			void **objs, uint16_t nb_objs)
> +{
> +	struct rte_mbuf **pkts;
> +	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
> +	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
> +	rte_edge_t next_index;
> +	void **to_next, **from;
> +	uint16_t last_spec = 0;
> +	uint16_t n_left_from;
> +	uint16_t held = 0;
> +	uint32_t drop_nh;
> +
> +	/* Temporary arrays for batch processing */
> +	uint32_t ips[RVV_MAX_BURST];
> +	uint32_t res[RVV_MAX_BURST];
> +	rte_edge_t next_hops[RVV_MAX_BURST];

These stack-allocated arrays total 516 bytes (64*4 + 64*4 + 64*2). Does
this exceed any stack size limits in typical DPDK deployments?

> +
> +	/* Speculative next */
> +	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
> +	/* Drop node */
> +	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
> +
> +	pkts = (struct rte_mbuf **)objs;
> +	from = objs;
> +	n_left_from = nb_objs;
> +
> +	/* Get stream for the speculated next node */
> +	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
> +
> +	while (n_left_from > 0) {
> +		rte_edge_t fix_spec = 0;
> +
> +		size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
> +
> +		/* Extract IP addresses and metadata from current batch */
> +		for (size_t i = 0; i < vl; i++) {
> +			struct rte_ipv4_hdr *ipv4_hdr =
> +				rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
> +						sizeof(struct rte_ether_hdr));
> +			ips[i] = ipv4_hdr->dst_addr;

Does this code check if the mbuf contains a valid IPv4 header at the
expected offset? Can this dereference invalid memory if the packet is
too short or malformed?

> +			node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
> +			node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
> +		}
> +
> +		/* Perform LPM lookup */
> +		rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
> +
> +		for (size_t i = 0; i < vl; i++) {
> +			/* Update statistics */
> +			if ((res[i] >> 16) == (drop_nh >> 16))
> +				NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);

Does the coding style permit implicit boolean comparisons? Section
2.1.4.2 requires explicit comparisons. Should this be:

    if ((res[i] >> 16) == (drop_nh >> 16) ? 1 : 0)

or is the macro handling this correctly?

> +
> +			/* Extract next hop and next node */
> +			node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
> +			next_hops[i] = res[i] >> 16;
> +
> +			/* Check speculation */
> +			fix_spec |= (next_index ^ next_hops[i]);
> +		}
> +
> +		if (unlikely(fix_spec)) {
> +			/* Copy successfully speculated packets before this batch */
> +			memcpy(to_next, from, last_spec * sizeof(from[0]));
> +			from += last_spec;
> +			to_next += last_spec;
> +			held += last_spec;
> +			last_spec = 0;
> +
> +			/* Process each packet in current batch individually */
> +			for (size_t i = 0; i < vl; i++) {
> +				if (next_index == next_hops[i]) {
> +					*to_next++ = from[i];
> +					held++;
> +				} else {
> +					rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
> +				}
> +			}
> +
> +			from += vl;
> +		} else {
> +			last_spec += vl;
> +		}
> +
> +		pkts += vl;
> +		n_left_from -= vl;
> +	}
> +
> +	/* Handle successfully speculated packets */
> +	if (likely(last_spec == nb_objs)) {
> +		rte_node_next_stream_move(graph, node, next_index);
> +		return nb_objs;
> +	}
> +
> +	held += last_spec;
> +	memcpy(to_next, from, last_spec * sizeof(from[0]));
> +	rte_node_next_stream_put(graph, node, next_index, held);
> +
> +	return nb_objs;
> +}
> +#endif

The function lacks any error handling. Does rte_node_next_stream_get()
always succeed? Should there be validation that to_next is not NULL?


More information about the test-report mailing list