[PATCH] node: lookup with RISC-V vector extension

Sun Yuechi sunyuechi at iscas.ac.cn
Sun Nov 16 16:50:01 CET 2025


Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set

Signed-off-by: Sun Yuechi <sunyuechi at iscas.ac.cn>
Signed-off-by: Zijian <zijian.oerv at isrc.iscas.ac.cn>
---
 lib/eal/riscv/include/rte_vect.h |   2 +-
 lib/node/ip4_lookup.c            |   5 +-
 lib/node/ip4_lookup_rvv.h        | 167 +++++++++++++++++++++++++++++++
 3 files changed, 172 insertions(+), 2 deletions(-)
 create mode 100644 lib/node/ip4_lookup_rvv.h

diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
 extern "C" {
 #endif
 
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
 
 typedef int32_t		xmm_t __attribute__((vector_size(16)));
 
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index 9673a0d78d..d3aed089f4 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
 #include "ip4_lookup_neon.h"
 #elif defined(RTE_ARCH_X86)
 #include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
 #endif
 
 static uint16_t
@@ -211,7 +213,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
 	IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
 	IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
 
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+	(defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
 	if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
 		node->process = ip4_lookup_node_process_vec;
 #endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..a74e4fa204
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+	vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
+		__riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
+		vl);
+
+	vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+		__riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
+		__riscv_vsrl_vx_u32m8(v, 24, vl),
+		vl);
+
+	return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+			uint32_t *hop, size_t vl, uint32_t defv)
+{
+	/* Load IP addresses (network byte order) */
+	vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+	vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+			__riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+	vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+		(const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+	vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+		RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+	vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+		__riscv_vadd_vv_u32m8(
+			__riscv_vsll_vx_u32m8(
+				__riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
+			__riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+		2, vl);
+
+	vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+		mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+	vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
+	mask = __riscv_vmseq_vx_u32m8_b4(
+		__riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
+
+	vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+	__riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+			void **objs, uint16_t nb_objs)
+{
+	struct rte_mbuf **pkts;
+	struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+	const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+	rte_edge_t next_index;
+	void **to_next, **from;
+	uint16_t last_spec = 0;
+	uint16_t n_left_from;
+	uint16_t held = 0;
+	uint32_t drop_nh;
+
+	/* Temporary arrays for batch processing */
+	uint32_t ips[RVV_MAX_BURST];
+	uint32_t res[RVV_MAX_BURST];
+	rte_edge_t next_hops[RVV_MAX_BURST];
+
+	/* Speculative next */
+	next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+	/* Drop node */
+	drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+	pkts = (struct rte_mbuf **)objs;
+	from = objs;
+	n_left_from = nb_objs;
+
+	/* Get stream for the speculated next node */
+	to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+	while (n_left_from > 0) {
+		rte_edge_t fix_spec = 0;
+
+		size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
+
+		/* Extract IP addresses and metadata from current batch */
+		for (size_t i = 0; i < vl; i++) {
+			struct rte_ipv4_hdr *ipv4_hdr =
+				rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
+						sizeof(struct rte_ether_hdr));
+			ips[i] = ipv4_hdr->dst_addr;
+			node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
+			node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
+		}
+
+		/* Perform LPM lookup */
+		rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
+
+		for (size_t i = 0; i < vl; i++) {
+			/* Update statistics */
+			if ((res[i] >> 16) == (drop_nh >> 16))
+				NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+			/* Extract next hop and next node */
+			node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+			next_hops[i] = res[i] >> 16;
+
+			/* Check speculation */
+			fix_spec |= (next_index ^ next_hops[i]);
+		}
+
+		if (unlikely(fix_spec)) {
+			/* Copy successfully speculated packets before this batch */
+			rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+			from += last_spec;
+			to_next += last_spec;
+			held += last_spec;
+			last_spec = 0;
+
+			/* Process each packet in current batch individually */
+			for (size_t i = 0; i < vl; i++) {
+				if (next_index == next_hops[i]) {
+					*to_next++ = from[i];
+					held++;
+				} else {
+					rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
+				}
+			}
+
+			from += vl;
+		} else {
+			last_spec += vl;
+		}
+
+		pkts += vl;
+		n_left_from -= vl;
+	}
+
+	/* Handle successfully speculated packets */
+	if (likely(last_spec == nb_objs)) {
+		rte_node_next_stream_move(graph, node, next_index);
+		return nb_objs;
+	}
+
+	held += last_spec;
+	rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+	rte_node_next_stream_put(graph, node, next_index, held);
+
+	return nb_objs;
+}
+#endif
-- 
2.51.2



More information about the dev mailing list