[PATCH 1/3] fib6: fix tbl8 reservation drift in trie

Vladimir Medvedkin vladimir.medvedkin at intel.com
Fri Jun 5 15:03:17 CEST 2026


From: Maxime Leroy <maxime at leroys.fr>

trie_modify() maintained rsvd_tbl8s by computing a depth_diff from
the current RIB topology at both ADD and DEL. The two values diverge
when the RIB changes between an ADD and its later DEL (a covering
parent added or removed), and rsvd_tbl8s eventually wraps to
UINT32_MAX, rejecting all subsequent /25+ ADDs with -ENOSPC.

A helper count_empty_levels() was added to fix the issue.

Fixes: c3e12e0f0354 ("fib: add dataplane algorithm for IPv6")
Cc: stable at dpdk.org

Signed-off-by: Maxime Leroy <maxime at leroys.fr>
Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin at intel.com>
---
 lib/fib/trie.c          | 81 +++++++++++++++++++++--------------------
 lib/rib/rib6_internal.h | 24 ++++++++++++
 lib/rib/rte_rib6.c      | 12 +-----
 3 files changed, 66 insertions(+), 51 deletions(-)
 create mode 100644 lib/rib/rib6_internal.h

diff --git a/lib/fib/trie.c b/lib/fib/trie.c
index fa5d9ec6b0..e9f1141cef 100644
--- a/lib/fib/trie.c
+++ b/lib/fib/trie.c
@@ -15,6 +15,7 @@
 #include <rte_fib6.h>
 #include "fib_log.h"
 #include "trie.h"
+#include <rib6_internal.h>
 
 #ifdef CC_AVX512_SUPPORT
 
@@ -534,19 +535,45 @@ modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
 	return 0;
 }
 
+/*
+ * Count bumber of TBL8s that can be freed after deleting a prefix or allocated
+ * after adding a prefix.
+ */
+static uint8_t
+count_empty_levels(struct rte_rib6 *rib, const struct rte_ipv6_addr *ip, uint8_t depth)
+{
+	struct rte_rib6_node *cur = rte_rib6_lookup_exact(rib, ip, depth);
+	/* expect prefix exists */
+	if (cur == NULL)
+		return 0;
+
+	/* more specifics present */
+	if (cur->left != NULL || cur->right != NULL)
+		return 0;
+
+	struct rte_rib6_node *parent = cur->parent;
+	/* we know parent->depth lt a target cur->depth
+	 * also, there exists tbl8 path up to RTE_ALIGN_CEIL(parent->depth, 8)
+	 */
+	depth = RTE_MAX(depth, 24);
+	uint8_t parent_depth = (parent) ? RTE_MAX(parent->depth, 24) : 24;
+	uint8_t depth_diff = (RTE_ALIGN_CEIL(depth, 8) - RTE_ALIGN_CEIL(parent_depth, 8)) >> 3;
+
+	return depth_diff;
+}
+
 int
 trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	uint8_t depth, uint64_t next_hop, int op)
 {
 	struct rte_trie_tbl *dp;
 	struct rte_rib6 *rib;
-	struct rte_rib6_node *tmp = NULL;
 	struct rte_rib6_node *node;
 	struct rte_rib6_node *parent;
-	struct rte_ipv6_addr ip_masked, tmp_ip;
+	struct rte_ipv6_addr ip_masked;
 	int ret = 0;
 	uint64_t par_nh, node_nh;
-	uint8_t tmp_depth, depth_diff = 0, parent_depth = 24;
+	uint8_t new_levels;
 
 	if ((fib == NULL) || (ip == NULL) || (depth > RTE_IPV6_MAX_DEPTH))
 		return -EINVAL;
@@ -559,37 +586,6 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	ip_masked = *ip;
 	rte_ipv6_addr_mask(&ip_masked, depth);
 
-	if (depth > 24) {
-		tmp = rte_rib6_get_nxt(rib, &ip_masked,
-			RTE_ALIGN_FLOOR(depth, 8), NULL,
-			RTE_RIB6_GET_NXT_ALL);
-		if (tmp && op == RTE_FIB6_DEL) {
-			/* in case of delete operation, skip the prefix we are going to delete */
-			rte_rib6_get_ip(tmp, &tmp_ip);
-			rte_rib6_get_depth(tmp, &tmp_depth);
-			if (rte_ipv6_addr_eq(&ip_masked, &tmp_ip) && depth == tmp_depth)
-				tmp = rte_rib6_get_nxt(rib, &ip_masked,
-					RTE_ALIGN_FLOOR(depth, 8), tmp, RTE_RIB6_GET_NXT_ALL);
-		}
-
-		if (tmp == NULL) {
-			tmp = rte_rib6_lookup(rib, ip);
-			/**
-			 * in case of delete operation, lookup returns the prefix
-			 * we are going to delete. Find the parent.
-			 */
-			if (tmp && op == RTE_FIB6_DEL)
-				tmp = rte_rib6_lookup_parent(tmp);
-
-			if (tmp != NULL) {
-				rte_rib6_get_depth(tmp, &tmp_depth);
-				parent_depth = RTE_MAX(tmp_depth, 24);
-			}
-			depth_diff = RTE_ALIGN_CEIL(depth, 8) -
-				RTE_ALIGN_CEIL(parent_depth, 8);
-			depth_diff = depth_diff >> 3;
-		}
-	}
 	node = rte_rib6_lookup_exact(rib, &ip_masked, depth);
 	switch (op) {
 	case RTE_FIB6_ADD:
@@ -603,12 +599,16 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			return 0;
 		}
 
-		if ((depth > 24) && (dp->rsvd_tbl8s + depth_diff > dp->number_tbl8s))
-			return -ENOSPC;
-
 		node = rte_rib6_insert(rib, &ip_masked, depth);
 		if (node == NULL)
 			return -rte_errno;
+
+		new_levels = count_empty_levels(rib, &ip_masked, depth);
+		if (dp->rsvd_tbl8s + new_levels > dp->number_tbl8s) {
+			rte_rib6_remove(rib, &ip_masked, depth);
+			return -ENOSPC;
+		}
+
 		rte_rib6_set_nh(node, next_hop);
 		parent = rte_rib6_lookup_parent(node);
 		if (parent != NULL) {
@@ -622,7 +622,7 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			return ret;
 		}
 successfully_added:
-		dp->rsvd_tbl8s += depth_diff;
+		dp->rsvd_tbl8s += new_levels;
 		return 0;
 	case RTE_FIB6_DEL:
 		if (node == NULL)
@@ -640,9 +640,10 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 
 		if (ret != 0)
 			return ret;
-		rte_rib6_remove(rib, ip, depth);
 
-		dp->rsvd_tbl8s -= depth_diff;
+		dp->rsvd_tbl8s -= count_empty_levels(rib, &ip_masked, depth);
+		rte_rib6_remove(rib, &ip_masked, depth);
+
 		return 0;
 	default:
 		break;
diff --git a/lib/rib/rib6_internal.h b/lib/rib/rib6_internal.h
new file mode 100644
index 0000000000..674befc152
--- /dev/null
+++ b/lib/rib/rib6_internal.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Vladimir Medvedkin <medvedkinv at gmail.com>
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _RIB6_INTERNAL_H_
+#define _RIB6_INTERNAL_H_
+
+#include <stdint.h>
+
+#include <rte_ip6.h>
+
+struct rte_rib6_node {
+	struct rte_rib6_node	*left;
+	struct rte_rib6_node	*right;
+	struct rte_rib6_node	*parent;
+	uint64_t		nh;
+	struct rte_ipv6_addr	ip;
+	uint8_t			depth;
+	uint8_t			flag;
+	uint64_t ext[];
+};
+
+#endif /* _RIB6_INTERNAL_H_ */
diff --git a/lib/rib/rte_rib6.c b/lib/rib/rte_rib6.c
index ec8ff68e87..f9023fca59 100644
--- a/lib/rib/rte_rib6.c
+++ b/lib/rib/rte_rib6.c
@@ -19,6 +19,7 @@
 #include <rte_rib6.h>
 
 #include "rib_log.h"
+#include "rib6_internal.h"
 
 #define RTE_RIB_VALID_NODE	1
 /* Maximum length of a RIB6 name. */
@@ -30,17 +31,6 @@ static struct rte_tailq_elem rte_rib6_tailq = {
 };
 EAL_REGISTER_TAILQ(rte_rib6_tailq)
 
-struct rte_rib6_node {
-	struct rte_rib6_node	*left;
-	struct rte_rib6_node	*right;
-	struct rte_rib6_node	*parent;
-	uint64_t		nh;
-	struct rte_ipv6_addr	ip;
-	uint8_t			depth;
-	uint8_t			flag;
-	uint64_t ext[];
-};
-
 struct rte_rib6 {
 	char		name[RTE_RIB6_NAMESIZE];
 	struct rte_rib6_node	*tree;
-- 
2.43.0



More information about the dev mailing list