[PATCH] node: lookup with RISC-V vector extension
Sun Yuechi
sunyuechi at iscas.ac.cn
Sun Nov 16 16:50:01 CET 2025
Implement ip4_lookup_node_process_vec function for RISC-V architecture
using RISC-V Vector Extension instruction set
Signed-off-by: Sun Yuechi <sunyuechi at iscas.ac.cn>
Signed-off-by: Zijian <zijian.oerv at isrc.iscas.ac.cn>
---
lib/eal/riscv/include/rte_vect.h | 2 +-
lib/node/ip4_lookup.c | 5 +-
lib/node/ip4_lookup_rvv.h | 167 +++++++++++++++++++++++++++++++
3 files changed, 172 insertions(+), 2 deletions(-)
create mode 100644 lib/node/ip4_lookup_rvv.h
diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
index a4357e266a..4d16082449 100644
--- a/lib/eal/riscv/include/rte_vect.h
+++ b/lib/eal/riscv/include/rte_vect.h
@@ -19,7 +19,7 @@
extern "C" {
#endif
-#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
+#define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_128
typedef int32_t xmm_t __attribute__((vector_size(16)));
diff --git a/lib/node/ip4_lookup.c b/lib/node/ip4_lookup.c
index 9673a0d78d..d3aed089f4 100644
--- a/lib/node/ip4_lookup.c
+++ b/lib/node/ip4_lookup.c
@@ -44,6 +44,8 @@ static struct ip4_lookup_node_main ip4_lookup_nm;
#include "ip4_lookup_neon.h"
#elif defined(RTE_ARCH_X86)
#include "ip4_lookup_sse.h"
+#elif defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V)
+#include "ip4_lookup_rvv.h"
#endif
static uint16_t
@@ -211,7 +213,8 @@ ip4_lookup_node_init(const struct rte_graph *graph, struct rte_node *node)
IP4_LOOKUP_NODE_LPM(node->ctx) = ip4_lookup_nm.lpm_tbl[graph->socket];
IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx) = dyn;
-#if defined(__ARM_NEON) || defined(RTE_ARCH_X86)
+#if defined(__ARM_NEON) || defined(RTE_ARCH_X86) || \
+ (defined(RTE_ARCH_RISCV) && defined(RTE_RISCV_FEATURE_V))
if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128)
node->process = ip4_lookup_node_process_vec;
#endif
diff --git a/lib/node/ip4_lookup_rvv.h b/lib/node/ip4_lookup_rvv.h
new file mode 100644
index 0000000000..a74e4fa204
--- /dev/null
+++ b/lib/node/ip4_lookup_rvv.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
+ */
+
+#ifndef __INCLUDE_IP4_LOOKUP_RVV_H__
+#define __INCLUDE_IP4_LOOKUP_RVV_H__
+
+#define RTE_LPM_LOOKUP_SUCCESS 0x01000000
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+static __rte_always_inline vuint32m8_t
+bswap32_vec(vuint32m8_t v, size_t vl)
+{
+ vuint32m8_t low16 = __riscv_vor_vv_u32m8(
+ __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF, vl), 24, vl),
+ __riscv_vsll_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF00, vl), 8, vl),
+ vl);
+
+ vuint32m8_t high16 = __riscv_vor_vv_u32m8(
+ __riscv_vsrl_vx_u32m8(__riscv_vand_vx_u32m8(v, 0xFF0000, vl), 8, vl),
+ __riscv_vsrl_vx_u32m8(v, 24, vl),
+ vl);
+
+ return __riscv_vor_vv_u32m8(low16, high16, vl);
+}
+
+static __rte_always_inline void
+rte_lpm_lookup_vec(const struct rte_lpm *lpm, const uint32_t *ips,
+ uint32_t *hop, size_t vl, uint32_t defv)
+{
+ /* Load IP addresses (network byte order) */
+ vuint32m8_t v_ip = bswap32_vec(__riscv_vle32_v_u32m8(ips, vl), vl);
+
+ vuint32m8_t v_tbl24_byte_offset = __riscv_vsll_vx_u32m8(
+ __riscv_vsrl_vx_u32m8(v_ip, 8, vl), 2, vl);
+
+ vuint32m8_t vtbl_entry = __riscv_vluxei32_v_u32m8(
+ (const uint32_t *)lpm->tbl24, v_tbl24_byte_offset, vl);
+
+ vbool4_t mask = __riscv_vmseq_vx_u32m8_b4(
+ __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl),
+ RTE_LPM_VALID_EXT_ENTRY_BITMASK, vl);
+
+ vuint32m8_t vtbl8_index = __riscv_vsll_vx_u32m8(
+ __riscv_vadd_vv_u32m8(
+ __riscv_vsll_vx_u32m8(
+ __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl), 8, vl),
+ __riscv_vand_vx_u32m8(v_ip, 0x000000FF, vl), vl),
+ 2, vl);
+
+ vtbl_entry = __riscv_vluxei32_v_u32m8_mu(
+ mask, vtbl_entry, (const uint32_t *)(lpm->tbl8), vtbl8_index, vl);
+
+ vuint32m8_t vnext_hop = __riscv_vand_vx_u32m8(vtbl_entry, 0x00FFFFFF, vl);
+ mask = __riscv_vmseq_vx_u32m8_b4(
+ __riscv_vand_vx_u32m8(vtbl_entry, RTE_LPM_LOOKUP_SUCCESS, vl), 0, vl);
+
+ vnext_hop = __riscv_vmerge_vxm_u32m8(vnext_hop, defv, mask, vl);
+
+ __riscv_vse32_v_u32m8(hop, vnext_hop, vl);
+}
+
+/* Can be increased further for VLEN > 256 */
+#define RVV_MAX_BURST 64U
+
+static uint16_t
+ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node,
+ void **objs, uint16_t nb_objs)
+{
+ struct rte_mbuf **pkts;
+ struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx);
+ const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx);
+ rte_edge_t next_index;
+ void **to_next, **from;
+ uint16_t last_spec = 0;
+ uint16_t n_left_from;
+ uint16_t held = 0;
+ uint32_t drop_nh;
+
+ /* Temporary arrays for batch processing */
+ uint32_t ips[RVV_MAX_BURST];
+ uint32_t res[RVV_MAX_BURST];
+ rte_edge_t next_hops[RVV_MAX_BURST];
+
+ /* Speculative next */
+ next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE;
+ /* Drop node */
+ drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16;
+
+ pkts = (struct rte_mbuf **)objs;
+ from = objs;
+ n_left_from = nb_objs;
+
+ /* Get stream for the speculated next node */
+ to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs);
+
+ while (n_left_from > 0) {
+ rte_edge_t fix_spec = 0;
+
+ size_t vl = __riscv_vsetvl_e32m8(RTE_MIN(n_left_from, RVV_MAX_BURST));
+
+ /* Extract IP addresses and metadata from current batch */
+ for (size_t i = 0; i < vl; i++) {
+ struct rte_ipv4_hdr *ipv4_hdr =
+ rte_pktmbuf_mtod_offset(pkts[i], struct rte_ipv4_hdr *,
+ sizeof(struct rte_ether_hdr));
+ ips[i] = ipv4_hdr->dst_addr;
+ node_mbuf_priv1(pkts[i], dyn)->cksum = ipv4_hdr->hdr_checksum;
+ node_mbuf_priv1(pkts[i], dyn)->ttl = ipv4_hdr->time_to_live;
+ }
+
+ /* Perform LPM lookup */
+ rte_lpm_lookup_vec(lpm, ips, res, vl, drop_nh);
+
+ for (size_t i = 0; i < vl; i++) {
+ /* Update statistics */
+ if ((res[i] >> 16) == (drop_nh >> 16))
+ NODE_INCREMENT_XSTAT_ID(node, 0, 1, 1);
+
+ /* Extract next hop and next node */
+ node_mbuf_priv1(pkts[i], dyn)->nh = res[i] & 0xFFFF;
+ next_hops[i] = res[i] >> 16;
+
+ /* Check speculation */
+ fix_spec |= (next_index ^ next_hops[i]);
+ }
+
+ if (unlikely(fix_spec)) {
+ /* Copy successfully speculated packets before this batch */
+ rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+ from += last_spec;
+ to_next += last_spec;
+ held += last_spec;
+ last_spec = 0;
+
+ /* Process each packet in current batch individually */
+ for (size_t i = 0; i < vl; i++) {
+ if (next_index == next_hops[i]) {
+ *to_next++ = from[i];
+ held++;
+ } else {
+ rte_node_enqueue_x1(graph, node, next_hops[i], from[i]);
+ }
+ }
+
+ from += vl;
+ } else {
+ last_spec += vl;
+ }
+
+ pkts += vl;
+ n_left_from -= vl;
+ }
+
+ /* Handle successfully speculated packets */
+ if (likely(last_spec == nb_objs)) {
+ rte_node_next_stream_move(graph, node, next_index);
+ return nb_objs;
+ }
+
+ held += last_spec;
+ rte_memcpy(to_next, from, last_spec * sizeof(from[0]));
+ rte_node_next_stream_put(graph, node, next_index, held);
+
+ return nb_objs;
+}
+#endif
--
2.51.2
More information about the dev
mailing list