[RFC PATCH 3/4] fib6: add multi-VRF support

Vladimir Medvedkin vladimir.medvedkin at intel.com
Sun Mar 22 16:42:14 CET 2026


Add VRF (Virtual Routing and Forwarding) support to the IPv6
FIB library, allowing multiple independent routing tables
within a single FIB instance.

Introduce max_vrfs and vrf_default_nh in rte_fib6_conf and
add four new experimental APIs:
- rte_fib6_vrf_add() and rte_fib6_vrf_delete() for per-VRF
  route management
- rte_fib6_vrf_lookup_bulk() for multi-VRF bulk lookups
- rte_fib6_vrf_get_rib() to retrieve a per-VRF RIB handle

Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin at intel.com>
---
 lib/fib/rte_fib6.c    | 166 ++++++++++++++++++++++++++-----
 lib/fib/rte_fib6.h    |  88 ++++++++++++++++-
 lib/fib/trie.c        | 158 +++++++++++++++++++++--------
 lib/fib/trie.h        |  51 +++++++---
 lib/fib/trie_avx512.c | 225 ++++++++++++++++++++++++++++++++++++++----
 lib/fib/trie_avx512.h |  39 +++++++-
 6 files changed, 617 insertions(+), 110 deletions(-)

diff --git a/lib/fib/rte_fib6.c b/lib/fib/rte_fib6.c
index 770becdb61..0d2b2927d5 100644
--- a/lib/fib/rte_fib6.c
+++ b/lib/fib/rte_fib6.c
@@ -22,6 +22,8 @@
 #include "trie.h"
 #include "fib_log.h"
 
+#define FIB6_MAX_LOOKUP_BULK 64U
+
 TAILQ_HEAD(rte_fib6_list, rte_tailq_entry);
 static struct rte_tailq_elem rte_fib6_tailq = {
 	.name = "RTE_FIB6",
@@ -40,51 +42,61 @@ EAL_REGISTER_TAILQ(rte_fib6_tailq)
 struct rte_fib6 {
 	char			name[RTE_FIB6_NAMESIZE];
 	enum rte_fib6_type	type;	/**< Type of FIB struct */
-	struct rte_rib6		*rib;	/**< RIB helper datastructure */
-	void			*dp;	/**< pointer to the dataplane struct*/
-	rte_fib6_lookup_fn_t	lookup;	/**< FIB lookup function */
+	uint16_t		num_vrfs;	/**< Number of VRFs */
+	struct rte_rib6		**ribs;	/**< RIB helper datastructures per VRF */
+	void			*dp;	/**< pointer to the dataplane struct */
+	rte_fib6_lookup_fn_t	lookup;	/**< lookup function */
 	rte_fib6_modify_fn_t	modify; /**< modify FIB datastructure */
-	uint64_t		def_nh;
+	uint64_t		*def_nh;	/**< Per-VRF default next hop array */
 };
 
 static void
-dummy_lookup(void *fib_p, const struct rte_ipv6_addr *ips,
+dummy_lookup(void *fib_p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	unsigned int i;
 	struct rte_fib6 *fib = fib_p;
 	struct rte_rib6_node *node;
+	struct rte_rib6 *rib;
 
 	for (i = 0; i < n; i++) {
-		node = rte_rib6_lookup(fib->rib, &ips[i]);
+		RTE_ASSERT(vrf_ids[i] < fib->num_vrfs);
+		rib = rte_fib6_vrf_get_rib(fib, vrf_ids[i]);
+		node = rte_rib6_lookup(rib, &ips[i]);
 		if (node != NULL)
 			rte_rib6_get_nh(node, &next_hops[i]);
 		else
-			next_hops[i] = fib->def_nh;
+			next_hops[i] = fib->def_nh[vrf_ids[i]];
 	}
 }
 
 static int
-dummy_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
-	uint8_t depth, uint64_t next_hop, int op)
+dummy_modify(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth,
+	uint64_t next_hop, int op)
 {
 	struct rte_rib6_node *node;
+	struct rte_rib6 *rib;
 	if ((fib == NULL) || (depth > RTE_IPV6_MAX_DEPTH))
 		return -EINVAL;
+	rib = rte_fib6_vrf_get_rib(fib, vrf_id);
+	if (rib == NULL)
+		return -EINVAL;
 
-	node = rte_rib6_lookup_exact(fib->rib, ip, depth);
+	node = rte_rib6_lookup_exact(rib, ip, depth);
 
 	switch (op) {
 	case RTE_FIB6_ADD:
 		if (node == NULL)
-			node = rte_rib6_insert(fib->rib, ip, depth);
+			node = rte_rib6_insert(rib, ip, depth);
 		if (node == NULL)
 			return -rte_errno;
 		return rte_rib6_set_nh(node, next_hop);
 	case RTE_FIB6_DEL:
 		if (node == NULL)
 			return -ENOENT;
-		rte_rib6_remove(fib->rib, ip, depth);
+		rte_rib6_remove(rib, ip, depth);
 		return 0;
 	}
 	return -EINVAL;
@@ -113,7 +125,6 @@ init_dataplane(struct rte_fib6 *fib, __rte_unused int socket_id,
 	default:
 		return -EINVAL;
 	}
-	return 0;
 }
 
 RTE_EXPORT_SYMBOL(rte_fib6_add)
@@ -124,7 +135,7 @@ rte_fib6_add(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	if ((fib == NULL) || (ip == NULL) || (fib->modify == NULL) ||
 			(depth > RTE_IPV6_MAX_DEPTH))
 		return -EINVAL;
-	return fib->modify(fib, ip, depth, next_hop, RTE_FIB6_ADD);
+	return fib->modify(fib, 0, ip, depth, next_hop, RTE_FIB6_ADD);
 }
 
 RTE_EXPORT_SYMBOL(rte_fib6_delete)
@@ -135,7 +146,7 @@ rte_fib6_delete(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 	if ((fib == NULL) || (ip == NULL) || (fib->modify == NULL) ||
 			(depth > RTE_IPV6_MAX_DEPTH))
 		return -EINVAL;
-	return fib->modify(fib, ip, depth, 0, RTE_FIB6_DEL);
+	return fib->modify(fib, 0, ip, depth, 0, RTE_FIB6_DEL);
 }
 
 RTE_EXPORT_SYMBOL(rte_fib6_lookup_bulk)
@@ -144,23 +155,72 @@ rte_fib6_lookup_bulk(struct rte_fib6 *fib,
 	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, int n)
 {
+	static const uint16_t zero_vrf_ids[FIB6_MAX_LOOKUP_BULK];
+	unsigned int off = 0;
+	unsigned int total = (unsigned int)n;
+
 	FIB6_RETURN_IF_TRUE((fib == NULL) || (ips == NULL) ||
 		(next_hops == NULL) || (fib->lookup == NULL), -EINVAL);
-	fib->lookup(fib->dp, ips, next_hops, n);
+
+	while (off < total) {
+		unsigned int chunk = RTE_MIN(total - off,
+			FIB6_MAX_LOOKUP_BULK);
+		fib->lookup(fib->dp, zero_vrf_ids, ips + off,
+			next_hops + off, chunk);
+		off += chunk;
+	}
+	return 0;
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fib6_vrf_lookup_bulk, 26.07)
+int
+rte_fib6_vrf_lookup_bulk(struct rte_fib6 *fib, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips, uint64_t *next_hops, int n)
+{
+	FIB6_RETURN_IF_TRUE((fib == NULL) || (vrf_ids == NULL) || (ips == NULL) ||
+		(next_hops == NULL) || (fib->lookup == NULL), -EINVAL);
+
+	fib->lookup(fib->dp, vrf_ids, ips, next_hops, (unsigned int)n);
+
 	return 0;
 }
 
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fib6_vrf_add, 26.07)
+int
+rte_fib6_vrf_add(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth, uint64_t next_hop)
+{
+	if ((fib == NULL) || (ip == NULL) || (fib->modify == NULL) ||
+			(depth > RTE_IPV6_MAX_DEPTH))
+		return -EINVAL;
+	return fib->modify(fib, vrf_id, ip, depth, next_hop, RTE_FIB6_ADD);
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fib6_vrf_delete, 26.07)
+int
+rte_fib6_vrf_delete(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth)
+{
+	if ((fib == NULL) || (ip == NULL) || (fib->modify == NULL) ||
+			(depth > RTE_IPV6_MAX_DEPTH))
+		return -EINVAL;
+	return fib->modify(fib, vrf_id, ip, depth, 0, RTE_FIB6_DEL);
+}
+
 RTE_EXPORT_SYMBOL(rte_fib6_create)
 struct rte_fib6 *
 rte_fib6_create(const char *name, int socket_id, struct rte_fib6_conf *conf)
 {
 	char mem_name[RTE_FIB6_NAMESIZE];
+	char rib_name[RTE_FIB6_NAMESIZE];
 	int ret;
 	struct rte_fib6 *fib = NULL;
 	struct rte_rib6 *rib = NULL;
 	struct rte_tailq_entry *te;
 	struct rte_fib6_list *fib_list;
 	struct rte_rib6_conf rib_conf;
+	uint16_t num_vrfs;
+	uint16_t vrf;
 
 	/* Check user arguments. */
 	if ((name == NULL) || (conf == NULL) || (conf->max_routes < 0) ||
@@ -172,13 +232,41 @@ rte_fib6_create(const char *name, int socket_id, struct rte_fib6_conf *conf)
 	rib_conf.ext_sz = conf->rib_ext_sz;
 	rib_conf.max_nodes = conf->max_routes * 2;
 
-	rib = rte_rib6_create(name, socket_id, &rib_conf);
-	if (rib == NULL) {
-		FIB_LOG(ERR,
-			"Can not allocate RIB %s", name);
+	num_vrfs = (conf->max_vrfs == 0) ? 1 : conf->max_vrfs;
+
+	struct rte_rib6 **ribs = rte_zmalloc_socket("FIB6_RIBS",
+		num_vrfs * sizeof(*ribs), RTE_CACHE_LINE_SIZE, socket_id);
+	if (ribs == NULL) {
+		FIB_LOG(ERR, "FIB6 %s RIB array allocation failed", name);
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	uint64_t *def_nh = rte_zmalloc_socket("FIB6_DEF_NH",
+		num_vrfs * sizeof(*def_nh), RTE_CACHE_LINE_SIZE, socket_id);
+	if (def_nh == NULL) {
+		FIB_LOG(ERR, "FIB6 %s default nexthop array allocation failed", name);
+		rte_errno = ENOMEM;
+		rte_free(ribs);
 		return NULL;
 	}
 
+	for (vrf = 0; vrf < num_vrfs; vrf++) {
+		if (num_vrfs == 1)
+			snprintf(rib_name, sizeof(rib_name), "%s", name);
+		else
+			snprintf(rib_name, sizeof(rib_name), "%s_vrf%u", name, vrf);
+		rib = rte_rib6_create(rib_name, socket_id, &rib_conf);
+		if (rib == NULL) {
+			FIB_LOG(ERR, "Can not allocate RIB %s", rib_name);
+			rte_errno = ENOMEM;
+			goto free_ribs;
+		}
+		ribs[vrf] = rib;
+		def_nh[vrf] = (conf->vrf_default_nh != NULL) ?
+			conf->vrf_default_nh[vrf] : conf->default_nh;
+	}
+
 	snprintf(mem_name, sizeof(mem_name), "FIB6_%s", name);
 	fib_list = RTE_TAILQ_CAST(rte_fib6_tailq.head, rte_fib6_list);
 
@@ -214,15 +302,17 @@ rte_fib6_create(const char *name, int socket_id, struct rte_fib6_conf *conf)
 		goto free_te;
 	}
 
+	fib->num_vrfs = num_vrfs;
+	fib->ribs = ribs;
+	fib->def_nh = def_nh;
+
 	rte_strlcpy(fib->name, name, sizeof(fib->name));
-	fib->rib = rib;
 	fib->type = conf->type;
-	fib->def_nh = conf->default_nh;
 	ret = init_dataplane(fib, socket_id, conf);
 	if (ret < 0) {
 		FIB_LOG(ERR,
-			"FIB dataplane struct %s memory allocation failed",
-			name);
+			"FIB dataplane struct %s memory allocation failed with err %d",
+			name, ret);
 		rte_errno = -ret;
 		goto free_fib;
 	}
@@ -240,7 +330,12 @@ rte_fib6_create(const char *name, int socket_id, struct rte_fib6_conf *conf)
 	rte_free(te);
 exit:
 	rte_mcfg_tailq_write_unlock();
-	rte_rib6_free(rib);
+free_ribs:
+	for (vrf = 0; vrf < num_vrfs; vrf++)
+		rte_rib6_free(ribs[vrf]);
+
+	rte_free(def_nh);
+	rte_free(ribs);
 
 	return NULL;
 }
@@ -309,7 +404,13 @@ rte_fib6_free(struct rte_fib6 *fib)
 	rte_mcfg_tailq_write_unlock();
 
 	free_dataplane(fib);
-	rte_rib6_free(fib->rib);
+	if (fib->ribs != NULL) {
+		uint16_t vrf;
+		for (vrf = 0; vrf < fib->num_vrfs; vrf++)
+			rte_rib6_free(fib->ribs[vrf]);
+	}
+	rte_free(fib->ribs);
+	rte_free(fib->def_nh);
 	rte_free(fib);
 	rte_free(te);
 }
@@ -325,7 +426,18 @@ RTE_EXPORT_SYMBOL(rte_fib6_get_rib)
 struct rte_rib6 *
 rte_fib6_get_rib(struct rte_fib6 *fib)
 {
-	return (fib == NULL) ? NULL : fib->rib;
+	return (fib == NULL || fib->ribs == NULL) ? NULL : fib->ribs[0];
+}
+
+RTE_EXPORT_EXPERIMENTAL_SYMBOL(rte_fib6_vrf_get_rib, 26.07)
+struct rte_rib6 *
+rte_fib6_vrf_get_rib(struct rte_fib6 *fib, uint16_t vrf_id)
+{
+	if (fib == NULL || fib->ribs == NULL)
+		return NULL;
+	if (vrf_id >= fib->num_vrfs)
+		return NULL;
+	return fib->ribs[vrf_id];
 }
 
 RTE_EXPORT_SYMBOL(rte_fib6_select_lookup)
diff --git a/lib/fib/rte_fib6.h b/lib/fib/rte_fib6.h
index 4527328bf0..864ec08c4e 100644
--- a/lib/fib/rte_fib6.h
+++ b/lib/fib/rte_fib6.h
@@ -55,11 +55,11 @@ enum rte_fib6_type {
 };
 
 /** Modify FIB function */
-typedef int (*rte_fib6_modify_fn_t)(struct rte_fib6 *fib,
+typedef int (*rte_fib6_modify_fn_t)(struct rte_fib6 *fib, uint16_t vrf_id,
 	const struct rte_ipv6_addr *ip, uint8_t depth,
 	uint64_t next_hop, int op);
 /** FIB bulk lookup function */
-typedef void (*rte_fib6_lookup_fn_t)(void *fib,
+typedef void (*rte_fib6_lookup_fn_t)(void *fib, const uint16_t *vrf_ids,
 	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n);
 
@@ -97,6 +97,10 @@ struct rte_fib6_conf {
 			uint32_t	num_tbl8;
 		} trie;
 	};
+	/** Number of VRFs to support (0 or 1 = single VRF for backward compat) */
+	uint16_t max_vrfs;
+	/** Per-VRF default nexthops (NULL = use default_nh for all) */
+	uint64_t *vrf_default_nh;
 };
 
 /** FIB RCU QSBR configuration structure. */
@@ -215,6 +219,70 @@ rte_fib6_lookup_bulk(struct rte_fib6 *fib,
 	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, int n);
 
+/**
+ * Add a route to the FIB with VRF ID.
+ *
+ * @param fib
+ *   FIB object handle
+ * @param vrf_id
+ *   VRF ID (0 to max_vrfs-1)
+ * @param ip
+ *   IPv6 prefix address to be added to the FIB
+ * @param depth
+ *   Prefix length
+ * @param next_hop
+ *   Next hop to be added to the FIB
+ * @return
+ *   0 on success, negative value otherwise
+ */
+__rte_experimental
+int
+rte_fib6_vrf_add(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth, uint64_t next_hop);
+
+/**
+ * Delete a rule from the FIB with VRF ID.
+ *
+ * @param fib
+ *   FIB object handle
+ * @param vrf_id
+ *   VRF ID (0 to max_vrfs-1)
+ * @param ip
+ *   IPv6 prefix address to be deleted from the FIB
+ * @param depth
+ *   Prefix length
+ * @return
+ *   0 on success, negative value otherwise
+ */
+__rte_experimental
+int
+rte_fib6_vrf_delete(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth);
+
+/**
+ * Lookup multiple IP addresses in the FIB with per-packet VRF IDs.
+ *
+ * @param fib
+ *   FIB object handle
+ * @param vrf_ids
+ *   Array of VRF IDs corresponding to ips[] (0 to max_vrfs-1)
+ * @param ips
+ *   Array of IPv6s to be looked up in the FIB
+ * @param next_hops
+ *   Next hop of the most specific rule found for IP.
+ *   This is an array of eight byte values.
+ *   If the lookup for the given IP failed, then corresponding element would
+ *   contain default nexthop value configured for that VRF.
+ * @param n
+ *   Number of elements in vrf_ids/ips/next_hops arrays to lookup.
+ * @return
+ *   -EINVAL for incorrect arguments, otherwise 0
+ */
+__rte_experimental
+int
+rte_fib6_vrf_lookup_bulk(struct rte_fib6 *fib, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips, uint64_t *next_hops, int n);
+
 /**
  * Get pointer to the dataplane specific struct
  *
@@ -228,7 +296,7 @@ void *
 rte_fib6_get_dp(struct rte_fib6 *fib);
 
 /**
- * Get pointer to the RIB6
+ * Get pointer to the RIB6 for VRF 0
  *
  * @param fib
  *   FIB object handle
@@ -239,6 +307,20 @@ rte_fib6_get_dp(struct rte_fib6 *fib);
 struct rte_rib6 *
 rte_fib6_get_rib(struct rte_fib6 *fib);
 
+/**
+ * Get the RIB for a specific VRF.
+ *
+ * @param fib
+ *   FIB object handle
+ * @param vrf_id
+ *   VRF ID (0 to max_vrfs-1)
+ * @return
+ *   RIB for the specified VRF or NULL on error.
+ */
+__rte_experimental
+struct rte_rib6 *
+rte_fib6_vrf_get_rib(struct rte_fib6 *fib, uint16_t vrf_id);
+
 /**
  * Set lookup function based on type
  *
diff --git a/lib/fib/trie.c b/lib/fib/trie.c
index fa5d9ec6b0..2acc9d9526 100644
--- a/lib/fib/trie.c
+++ b/lib/fib/trie.c
@@ -30,22 +30,27 @@ enum edge {
 };
 
 static inline rte_fib6_lookup_fn_t
-get_scalar_fn(enum rte_fib_trie_nh_sz nh_sz)
+get_scalar_fn(const struct rte_trie_tbl *dp, enum rte_fib_trie_nh_sz nh_sz)
 {
+	bool single_vrf = dp->num_vrfs <= 1;
+
 	switch (nh_sz) {
 	case RTE_FIB6_TRIE_2B:
-		return rte_trie_lookup_bulk_2b;
+		return single_vrf ? rte_trie_lookup_bulk_2b :
+			rte_trie_lookup_bulk_vrf_2b;
 	case RTE_FIB6_TRIE_4B:
-		return rte_trie_lookup_bulk_4b;
+		return single_vrf ? rte_trie_lookup_bulk_4b :
+			rte_trie_lookup_bulk_vrf_4b;
 	case RTE_FIB6_TRIE_8B:
-		return rte_trie_lookup_bulk_8b;
+		return single_vrf ? rte_trie_lookup_bulk_8b :
+			rte_trie_lookup_bulk_vrf_8b;
 	default:
 		return NULL;
 	}
 }
 
 static inline rte_fib6_lookup_fn_t
-get_vector_fn(enum rte_fib_trie_nh_sz nh_sz)
+get_vector_fn(const struct rte_trie_tbl *dp, enum rte_fib_trie_nh_sz nh_sz)
 {
 #ifdef CC_AVX512_SUPPORT
 	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) <= 0 ||
@@ -53,13 +58,40 @@ get_vector_fn(enum rte_fib_trie_nh_sz nh_sz)
 			rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) <= 0 ||
 			rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_512)
 		return NULL;
+
+	if (dp->num_vrfs <= 1) {
+		switch (nh_sz) {
+		case RTE_FIB6_TRIE_2B:
+			return rte_trie_vec_lookup_bulk_2b;
+		case RTE_FIB6_TRIE_4B:
+			return rte_trie_vec_lookup_bulk_4b;
+		case RTE_FIB6_TRIE_8B:
+			return rte_trie_vec_lookup_bulk_8b;
+		default:
+			return NULL;
+		}
+	}
+
+	if (dp->num_vrfs >= 256) {
+		switch (nh_sz) {
+		case RTE_FIB6_TRIE_2B:
+			return rte_trie_vec_lookup_bulk_vrf_2b_large;
+		case RTE_FIB6_TRIE_4B:
+			return rte_trie_vec_lookup_bulk_vrf_4b_large;
+		case RTE_FIB6_TRIE_8B:
+			return rte_trie_vec_lookup_bulk_vrf_8b_large;
+		default:
+			return NULL;
+		}
+	}
+
 	switch (nh_sz) {
 	case RTE_FIB6_TRIE_2B:
-		return rte_trie_vec_lookup_bulk_2b;
+		return rte_trie_vec_lookup_bulk_vrf_2b;
 	case RTE_FIB6_TRIE_4B:
-		return rte_trie_vec_lookup_bulk_4b;
+		return rte_trie_vec_lookup_bulk_vrf_4b;
 	case RTE_FIB6_TRIE_8B:
-		return rte_trie_vec_lookup_bulk_8b;
+		return rte_trie_vec_lookup_bulk_vrf_8b;
 	default:
 		return NULL;
 	}
@@ -83,12 +115,12 @@ trie_get_lookup_fn(void *p, enum rte_fib6_lookup_type type)
 
 	switch (type) {
 	case RTE_FIB6_LOOKUP_TRIE_SCALAR:
-		return get_scalar_fn(nh_sz);
+		return get_scalar_fn(dp, nh_sz);
 	case RTE_FIB6_LOOKUP_TRIE_VECTOR_AVX512:
-		return get_vector_fn(nh_sz);
+		return get_vector_fn(dp, nh_sz);
 	case RTE_FIB6_LOOKUP_DEFAULT:
-		ret_fn = get_vector_fn(nh_sz);
-		return (ret_fn != NULL) ? ret_fn : get_scalar_fn(nh_sz);
+		ret_fn = get_vector_fn(dp, nh_sz);
+		return (ret_fn != NULL) ? ret_fn : get_scalar_fn(dp, nh_sz);
 	default:
 		return NULL;
 	}
@@ -310,19 +342,22 @@ recycle_root_path(struct rte_trie_tbl *dp, const uint8_t *ip_part,
 }
 
 static inline int
-build_common_root(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ip,
-	int common_bytes, void **tbl)
+build_common_root(struct rte_trie_tbl *dp, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, int common_bytes, void **tbl)
 {
 	void *tbl_ptr = NULL;
 	uint64_t *cur_tbl;
 	uint64_t val;
 	int i, j, idx, prev_idx = 0;
+	uint64_t idx_tbl;
+	uint64_t tbl24_base = (uint64_t)vrf_id * TRIE_TBL24_NUM_ENT;
 
 	cur_tbl = dp->tbl24;
 	for (i = 3, j = 0; i <= common_bytes; i++) {
 		idx = get_idx(ip, prev_idx, i - j, j);
-		val = get_tbl_val_by_idx(cur_tbl, idx, dp->nh_sz);
-		tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx, dp->nh_sz);
+		idx_tbl = (cur_tbl == dp->tbl24) ? idx + tbl24_base : (uint32_t)idx;
+		val = get_tbl_val_by_idx(cur_tbl, idx_tbl, dp->nh_sz);
+		tbl_ptr = get_tbl_p_by_idx(cur_tbl, idx_tbl, dp->nh_sz);
 		if ((val & TRIE_EXT_ENT) != TRIE_EXT_ENT) {
 			idx = tbl8_alloc(dp, val);
 			if (unlikely(idx < 0))
@@ -336,8 +371,11 @@ build_common_root(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ip,
 		j = i;
 		cur_tbl = dp->tbl8;
 	}
-	*tbl = get_tbl_p_by_idx(cur_tbl, prev_idx * TRIE_TBL8_GRP_NUM_ENT,
-		dp->nh_sz);
+
+	uint64_t final_idx = (cur_tbl == dp->tbl24) ?
+		(prev_idx * TRIE_TBL8_GRP_NUM_ENT + tbl24_base) :
+		(prev_idx * TRIE_TBL8_GRP_NUM_ENT);
+	*tbl = get_tbl_p_by_idx(cur_tbl, final_idx, dp->nh_sz);
 	return 0;
 }
 
@@ -385,7 +423,8 @@ write_edge(struct rte_trie_tbl *dp, const uint8_t *ip_part, uint64_t next_hop,
 #define TBL8_LEN	(RTE_IPV6_ADDR_SIZE - TBL24_BYTES)
 
 static int
-install_to_dp(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ledge,
+install_to_dp(struct rte_trie_tbl *dp, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ledge,
 	const struct rte_ipv6_addr *r, uint64_t next_hop)
 {
 	void *common_root_tbl;
@@ -409,7 +448,7 @@ install_to_dp(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ledge,
 			break;
 	}
 
-	ret = build_common_root(dp, ledge, common_bytes, &common_root_tbl);
+	ret = build_common_root(dp, vrf_id, ledge, common_bytes, &common_root_tbl);
 	if (unlikely(ret != 0))
 		return ret;
 	/*first uncommon tbl8 byte idx*/
@@ -455,7 +494,7 @@ install_to_dp(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ledge,
 
 	uint8_t	common_tbl8 = (common_bytes < TBL24_BYTES) ?
 			0 : common_bytes - (TBL24_BYTES - 1);
-	ent = get_tbl24_p(dp, ledge, dp->nh_sz);
+	ent = get_tbl24_p(dp, vrf_id, ledge, dp->nh_sz);
 	recycle_root_path(dp, ledge->a + TBL24_BYTES, common_tbl8, ent);
 	return 0;
 }
@@ -482,9 +521,8 @@ get_nxt_net(struct rte_ipv6_addr *ip, uint8_t depth)
 }
 
 static int
-modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
-	const struct rte_ipv6_addr *ip,
-	uint8_t depth, uint64_t next_hop)
+modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t depth, uint64_t next_hop)
 {
 	struct rte_rib6_node *tmp = NULL;
 	struct rte_ipv6_addr ledge, redge;
@@ -507,7 +545,7 @@ modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
 				get_nxt_net(&ledge, tmp_depth);
 				continue;
 			}
-			ret = install_to_dp(dp, &ledge, &redge, next_hop);
+			ret = install_to_dp(dp, vrf_id, &ledge, &redge, next_hop);
 			if (ret != 0)
 				return ret;
 			get_nxt_net(&redge, tmp_depth);
@@ -525,7 +563,7 @@ modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
 					!rte_ipv6_addr_is_unspec(&ledge))
 				break;
 
-			ret = install_to_dp(dp, &ledge, &redge, next_hop);
+			ret = install_to_dp(dp, vrf_id, &ledge, &redge, next_hop);
 			if (ret != 0)
 				return ret;
 		}
@@ -535,7 +573,8 @@ modify_dp(struct rte_trie_tbl *dp, struct rte_rib6 *rib,
 }
 
 int
-trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
+trie_modify(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip,
 	uint8_t depth, uint64_t next_hop, int op)
 {
 	struct rte_trie_tbl *dp;
@@ -552,9 +591,11 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 		return -EINVAL;
 
 	dp = rte_fib6_get_dp(fib);
-	RTE_ASSERT(dp);
-	rib = rte_fib6_get_rib(fib);
-	RTE_ASSERT(rib);
+	rib = rte_fib6_vrf_get_rib(fib, vrf_id);
+	RTE_ASSERT((dp != NULL) && (rib != NULL));
+
+	if (vrf_id >= dp->num_vrfs)
+		return -EINVAL;
 
 	ip_masked = *ip;
 	rte_ipv6_addr_mask(&ip_masked, depth);
@@ -597,7 +638,7 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			rte_rib6_get_nh(node, &node_nh);
 			if (node_nh == next_hop)
 				return 0;
-			ret = modify_dp(dp, rib, &ip_masked, depth, next_hop);
+			ret = modify_dp(dp, rib, vrf_id, &ip_masked, depth, next_hop);
 			if (ret == 0)
 				rte_rib6_set_nh(node, next_hop);
 			return 0;
@@ -616,7 +657,7 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			if (par_nh == next_hop)
 				goto successfully_added;
 		}
-		ret = modify_dp(dp, rib, &ip_masked, depth, next_hop);
+		ret = modify_dp(dp, rib, vrf_id, &ip_masked, depth, next_hop);
 		if (ret != 0) {
 			rte_rib6_remove(rib, &ip_masked, depth);
 			return ret;
@@ -633,10 +674,11 @@ trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
 			rte_rib6_get_nh(parent, &par_nh);
 			rte_rib6_get_nh(node, &node_nh);
 			if (par_nh != node_nh)
-				ret = modify_dp(dp, rib, &ip_masked, depth,
+				ret = modify_dp(dp, rib, vrf_id, &ip_masked, depth,
 					par_nh);
 		} else
-			ret = modify_dp(dp, rib, &ip_masked, depth, dp->def_nh);
+			ret = modify_dp(dp, rib, vrf_id, &ip_masked, depth,
+					dp->def_nh[vrf_id]);
 
 		if (ret != 0)
 			return ret;
@@ -656,9 +698,11 @@ trie_create(const char *name, int socket_id,
 {
 	char mem_name[TRIE_NAMESIZE];
 	struct rte_trie_tbl *dp = NULL;
-	uint64_t	def_nh;
 	uint32_t	num_tbl8;
 	enum rte_fib_trie_nh_sz	nh_sz;
+	uint16_t	num_vrfs;
+	uint16_t	vrf;
+	uint64_t	tbl24_sz;
 
 	if ((name == NULL) || (conf == NULL) ||
 			(conf->trie.nh_sz < RTE_FIB6_TRIE_2B) ||
@@ -673,21 +717,28 @@ trie_create(const char *name, int socket_id,
 		return NULL;
 	}
 
-	def_nh = conf->default_nh;
 	nh_sz = conf->trie.nh_sz;
 	num_tbl8 = conf->trie.num_tbl8;
+	num_vrfs = (conf->max_vrfs == 0) ? 1 : conf->max_vrfs;
+	tbl24_sz = (uint64_t)num_vrfs * TRIE_TBL24_NUM_ENT * (1 << nh_sz);
+
+	if (conf->vrf_default_nh != NULL) {
+		for (vrf = 0; vrf < num_vrfs; vrf++) {
+			if (conf->vrf_default_nh[vrf] > get_max_nh(nh_sz)) {
+				rte_errno = EINVAL;
+				return NULL;
+			}
+		}
+	}
 
 	snprintf(mem_name, sizeof(mem_name), "DP_%s", name);
-	dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) +
-		TRIE_TBL24_NUM_ENT * (1 << nh_sz) + sizeof(uint32_t),
+	dp = rte_zmalloc_socket(name, sizeof(struct rte_trie_tbl) + tbl24_sz,
 		RTE_CACHE_LINE_SIZE, socket_id);
 	if (dp == NULL) {
 		rte_errno = ENOMEM;
 		return dp;
 	}
 
-	write_to_dp(&dp->tbl24, (def_nh << 1), nh_sz, 1 << 24);
-
 	snprintf(mem_name, sizeof(mem_name), "TBL8_%p", dp);
 	dp->tbl8 = rte_zmalloc_socket(mem_name, TRIE_TBL8_GRP_NUM_ENT *
 			(1ll << nh_sz) * (num_tbl8 + 1),
@@ -697,9 +748,32 @@ trie_create(const char *name, int socket_id,
 		rte_free(dp);
 		return NULL;
 	}
-	dp->def_nh = def_nh;
+
+	snprintf(mem_name, sizeof(mem_name), "DEF_NH_%p", dp);
+	dp->def_nh = rte_zmalloc_socket(mem_name,
+		num_vrfs * sizeof(*dp->def_nh),
+		RTE_CACHE_LINE_SIZE, socket_id);
+	if (dp->def_nh == NULL) {
+		rte_errno = ENOMEM;
+		rte_free(dp->tbl8);
+		rte_free(dp);
+		return NULL;
+	}
+
+	for (vrf = 0; vrf < num_vrfs; vrf++) {
+		uint64_t vrf_def = (conf->vrf_default_nh != NULL) ?
+			conf->vrf_default_nh[vrf] : conf->default_nh;
+		uint8_t *tbl24_ptr = (uint8_t *)dp->tbl24 +
+			((uint64_t)vrf * TRIE_TBL24_NUM_ENT << nh_sz);
+
+		dp->def_nh[vrf] = vrf_def;
+		write_to_dp((void *)tbl24_ptr, (vrf_def << 1), nh_sz,
+			TRIE_TBL24_NUM_ENT);
+	}
+
 	dp->nh_sz = nh_sz;
 	dp->number_tbl8s = num_tbl8;
+	dp->num_vrfs = num_vrfs;
 
 	snprintf(mem_name, sizeof(mem_name), "TBL8_idxes_%p", dp);
 	dp->tbl8_pool = rte_zmalloc_socket(mem_name,
@@ -707,6 +781,7 @@ trie_create(const char *name, int socket_id,
 			RTE_CACHE_LINE_SIZE, socket_id);
 	if (dp->tbl8_pool == NULL) {
 		rte_errno = ENOMEM;
+		rte_free(dp->def_nh);
 		rte_free(dp->tbl8);
 		rte_free(dp);
 		return NULL;
@@ -725,6 +800,7 @@ trie_free(void *p)
 	rte_rcu_qsbr_dq_delete(dp->dq);
 	rte_free(dp->tbl8_pool);
 	rte_free(dp->tbl8);
+	rte_free(dp->def_nh);
 	rte_free(dp);
 }
 
diff --git a/lib/fib/trie.h b/lib/fib/trie.h
index c34cc2c057..ef9a1d50c6 100644
--- a/lib/fib/trie.h
+++ b/lib/fib/trie.h
@@ -9,6 +9,7 @@
 #include <stdalign.h>
 
 #include <rte_common.h>
+#include <rte_debug.h>
 #include <rte_fib6.h>
 
 /**
@@ -32,18 +33,19 @@
 struct rte_trie_tbl {
 	uint32_t	number_tbl8s;	/**< Total number of tbl8s */
 	uint32_t	rsvd_tbl8s;	/**< Number of reserved tbl8s */
-	uint32_t	cur_tbl8s;	/**< Current cumber of tbl8s */
-	uint64_t	def_nh;		/**< Default next hop */
+	uint32_t	cur_tbl8s;	/**< Current number of tbl8s */
+	uint16_t	num_vrfs;	/**< Number of VRFs */
 	enum rte_fib_trie_nh_sz	nh_sz;	/**< Size of nexthop entry */
-	uint64_t	*tbl8;		/**< tbl8 table. */
-	uint32_t	*tbl8_pool;	/**< bitmap containing free tbl8 idxes*/
-	uint32_t	tbl8_pool_pos;
 	/* RCU config. */
 	enum rte_fib6_qsbr_mode rcu_mode; /**< Blocking, defer queue. */
 	struct rte_rcu_qsbr *v; /**< RCU QSBR variable. */
 	struct rte_rcu_qsbr_dq *dq; /**< RCU QSBR defer queue. */
+	uint64_t	*def_nh;	/**< Per-VRF default next hop array */
+	uint64_t	*tbl8;		/**< tbl8 table for all VRFs */
+	uint32_t	*tbl8_pool;	/**< bitmap containing free tbl8 idxes */
+	uint32_t	tbl8_pool_pos;
 	/* tbl24 table. */
-	alignas(RTE_CACHE_LINE_SIZE) uint64_t	tbl24[];
+	alignas(RTE_CACHE_LINE_SIZE) uint64_t tbl24[];
 };
 
 static inline uint32_t
@@ -53,12 +55,15 @@ get_tbl24_idx(const struct rte_ipv6_addr *ip)
 }
 
 static inline void *
-get_tbl24_p(struct rte_trie_tbl *dp, const struct rte_ipv6_addr *ip, uint8_t nh_sz)
+get_tbl24_p(struct rte_trie_tbl *dp, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip, uint8_t nh_sz)
 {
 	uint32_t tbl24_idx;
+	uint64_t base;
 
 	tbl24_idx = get_tbl24_idx(ip);
-	return (void *)&((uint8_t *)dp->tbl24)[tbl24_idx << nh_sz];
+	base = (uint64_t)vrf_id * TRIE_TBL24_NUM_ENT;
+	return (void *)&((uint8_t *)dp->tbl24)[(base + tbl24_idx) << nh_sz];
 }
 
 static inline uint8_t
@@ -110,17 +115,26 @@ is_entry_extended(uint64_t ent)
 	return (ent & TRIE_EXT_ENT) == TRIE_EXT_ENT;
 }
 
-#define LOOKUP_FUNC(suffix, type, nh_sz)				\
+#define LOOKUP_FUNC(suffix, type, is_vrf)				\
 static inline void rte_trie_lookup_bulk_##suffix(void *p,		\
-	const struct rte_ipv6_addr *ips,				\
+	const uint16_t *vrf_ids, const struct rte_ipv6_addr *ips,	\
 	uint64_t *next_hops, const unsigned int n)			\
-{									\
+{\
 	struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;		\
 	uint64_t tmp;							\
 	uint32_t i, j;							\
+	uint32_t tbl24_idx;						\
+	uint64_t base;						\
+									\
+	if (!is_vrf)						\
+		RTE_SET_USED(vrf_ids);					\
 									\
 	for (i = 0; i < n; i++) {					\
-		tmp = ((type *)dp->tbl24)[get_tbl24_idx(&ips[i])];	\
+		uint16_t vrf_id = is_vrf ? vrf_ids[i] : 0;		\
+		RTE_ASSERT(vrf_id < dp->num_vrfs);			\
+		base = (uint64_t)vrf_id * TRIE_TBL24_NUM_ENT;	\
+		tbl24_idx = get_tbl24_idx(&ips[i]);			\
+		tmp = ((type *)dp->tbl24)[base + tbl24_idx];	\
 		j = 3;							\
 		while (is_entry_extended(tmp)) {			\
 			tmp = ((type *)dp->tbl8)[ips[i].a[j++] +	\
@@ -129,9 +143,13 @@ static inline void rte_trie_lookup_bulk_##suffix(void *p,		\
 		next_hops[i] = tmp >> 1;				\
 	}								\
 }
-LOOKUP_FUNC(2b, uint16_t, 1)
-LOOKUP_FUNC(4b, uint32_t, 2)
-LOOKUP_FUNC(8b, uint64_t, 3)
+
+LOOKUP_FUNC(2b, uint16_t, false)
+LOOKUP_FUNC(4b, uint32_t, false)
+LOOKUP_FUNC(8b, uint64_t, false)
+LOOKUP_FUNC(vrf_2b, uint16_t, true)
+LOOKUP_FUNC(vrf_4b, uint32_t, true)
+LOOKUP_FUNC(vrf_8b, uint64_t, true)
 
 void
 trie_free(void *p);
@@ -144,7 +162,8 @@ rte_fib6_lookup_fn_t
 trie_get_lookup_fn(void *p, enum rte_fib6_lookup_type type);
 
 int
-trie_modify(struct rte_fib6 *fib, const struct rte_ipv6_addr *ip,
+trie_modify(struct rte_fib6 *fib, uint16_t vrf_id,
+	const struct rte_ipv6_addr *ip,
 	uint8_t depth, uint64_t next_hop, int op);
 
 int
diff --git a/lib/fib/trie_avx512.c b/lib/fib/trie_avx512.c
index f49482a95d..19cd69e69c 100644
--- a/lib/fib/trie_avx512.c
+++ b/lib/fib/trie_avx512.c
@@ -8,6 +8,12 @@
 #include "trie.h"
 #include "trie_avx512.h"
 
+enum vrf_scale {
+	VRF_SCALE_SINGLE = 0,
+	VRF_SCALE_SMALL = 1,
+	VRF_SCALE_LARGE = 2,
+};
+
 static __rte_always_inline void
 transpose_x16(const struct rte_ipv6_addr *ips,
 	__m512i *first, __m512i *second, __m512i *third, __m512i *fourth)
@@ -67,8 +73,9 @@ transpose_x8(const struct rte_ipv6_addr *ips,
 }
 
 static __rte_always_inline void
-trie_vec_lookup_x16x2(void *p, const struct rte_ipv6_addr *ips,
-	uint64_t *next_hops, int size)
+trie_vec_lookup_x16x2(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips, uint64_t *next_hops, int size,
+	enum vrf_scale vrf_scale)
 {
 	struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;
 	const __m512i zero = _mm512_set1_epi32(0);
@@ -79,6 +86,7 @@ trie_vec_lookup_x16x2(void *p, const struct rte_ipv6_addr *ips,
 	__m512i first_2, second_2, third_2, fourth_2;
 	__m512i idxes_1, res_1;
 	__m512i idxes_2, res_2;
+	__m512i vrf32_1, vrf32_2;
 	__m512i shuf_idxes;
 	__m512i tmp_1, tmp2_1, bytes_1, byte_chunk_1;
 	__m512i tmp_2, tmp2_2, bytes_2, byte_chunk_2;
@@ -109,6 +117,24 @@ trie_vec_lookup_x16x2(void *p, const struct rte_ipv6_addr *ips,
 	idxes_1 = _mm512_shuffle_epi8(first_1, bswap.z);
 	idxes_2 = _mm512_shuffle_epi8(first_2, bswap.z);
 
+	if (vrf_scale == VRF_SCALE_SINGLE) {
+		RTE_SET_USED(vrf_ids);
+	} else {
+		uint32_t j;
+
+		for (j = 0; j < 32; j++)
+			RTE_ASSERT(vrf_ids[j] < dp->num_vrfs);
+
+		vrf32_1 = _mm512_cvtepu16_epi32(
+			_mm256_loadu_si256((const void *)vrf_ids));
+		vrf32_2 = _mm512_cvtepu16_epi32(
+			_mm256_loadu_si256((const void *)(vrf_ids + 16)));
+		idxes_1 = _mm512_add_epi32(idxes_1,
+			_mm512_slli_epi32(vrf32_1, 24));
+		idxes_2 = _mm512_add_epi32(idxes_2,
+			_mm512_slli_epi32(vrf32_2, 24));
+	}
+
 	/**
 	 * lookup in tbl24
 	 * Put it inside branch to make compiller happy with -O0
@@ -213,13 +239,15 @@ trie_vec_lookup_x16x2(void *p, const struct rte_ipv6_addr *ips,
 }
 
 static void
-trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
-	uint64_t *next_hops)
+trie_vec_lookup_x8x2(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips, uint64_t *next_hops, int size,
+	enum vrf_scale vrf_scale)
 {
 	struct rte_trie_tbl *dp = (struct rte_trie_tbl *)p;
 	const __m512i zero = _mm512_set1_epi32(0);
 	const __m512i lsb = _mm512_set1_epi32(1);
 	const __m512i three_lsb = _mm512_set1_epi32(7);
+	__m512i res_msk;
 	/* IPv6 eight byte chunks */
 	__m512i first_1, second_1;
 	__m512i first_2, second_2;
@@ -228,6 +256,7 @@ trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
 	__m512i shuf_idxes, base_idxes;
 	__m512i tmp_1, bytes_1, byte_chunk_1;
 	__m512i tmp_2, bytes_2, byte_chunk_2;
+	__m512i vrf64_1, vrf64_2;
 	const __rte_x86_zmm_t bswap = {
 		.u8 = { 2, 1, 0, 255, 255, 255, 255, 255,
 			10, 9, 8, 255, 255, 255, 255, 255,
@@ -244,6 +273,11 @@ trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
 	__mmask8 msk_ext_1, new_msk_1;
 	__mmask8 msk_ext_2, new_msk_2;
 
+	if (size == sizeof(uint16_t))
+		res_msk = _mm512_set1_epi64(UINT16_MAX);
+	else if (size == sizeof(uint32_t))
+		res_msk = _mm512_set1_epi64(UINT32_MAX);
+
 	transpose_x8(ips, &first_1, &second_1);
 	transpose_x8(ips + 8, &first_2, &second_2);
 
@@ -251,9 +285,39 @@ trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
 	idxes_1 = _mm512_shuffle_epi8(first_1, bswap.z);
 	idxes_2 = _mm512_shuffle_epi8(first_2, bswap.z);
 
+	if (vrf_scale == VRF_SCALE_SINGLE) {
+		RTE_SET_USED(vrf_ids);
+	} else {
+		uint32_t j;
+
+		for (j = 0; j < 16; j++)
+			RTE_ASSERT(vrf_ids[j] < dp->num_vrfs);
+
+		vrf64_1 = _mm512_cvtepu16_epi64(
+			_mm_loadu_si128((const void *)vrf_ids));
+		vrf64_2 = _mm512_cvtepu16_epi64(
+			_mm_loadu_si128((const void *)(vrf_ids + 8)));
+		idxes_1 = _mm512_add_epi64(idxes_1,
+			_mm512_slli_epi64(vrf64_1, 24));
+		idxes_2 = _mm512_add_epi64(idxes_2,
+			_mm512_slli_epi64(vrf64_2, 24));
+	}
+
 	/* lookup in tbl24 */
-	res_1 = _mm512_i64gather_epi64(idxes_1, (const void *)dp->tbl24, 8);
-	res_2 = _mm512_i64gather_epi64(idxes_2, (const void *)dp->tbl24, 8);
+	if (size == sizeof(uint16_t)) {
+		res_1 = _mm512_i64gather_epi64(idxes_1, (const void *)dp->tbl24, 2);
+		res_2 = _mm512_i64gather_epi64(idxes_2, (const void *)dp->tbl24, 2);
+		res_1 = _mm512_and_epi64(res_1, res_msk);
+		res_2 = _mm512_and_epi64(res_2, res_msk);
+	} else if (size == sizeof(uint32_t)) {
+		res_1 = _mm512_i64gather_epi64(idxes_1, (const void *)dp->tbl24, 4);
+		res_2 = _mm512_i64gather_epi64(idxes_2, (const void *)dp->tbl24, 4);
+		res_1 = _mm512_and_epi64(res_1, res_msk);
+		res_2 = _mm512_and_epi64(res_2, res_msk);
+	} else {
+		res_1 = _mm512_i64gather_epi64(idxes_1, (const void *)dp->tbl24, 8);
+		res_2 = _mm512_i64gather_epi64(idxes_2, (const void *)dp->tbl24, 8);
+	}
 	/* get extended entries indexes */
 	msk_ext_1 = _mm512_test_epi64_mask(res_1, lsb);
 	msk_ext_2 = _mm512_test_epi64_mask(res_2, lsb);
@@ -278,10 +342,26 @@ trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
 				shuf_idxes);
 		idxes_1 = _mm512_maskz_add_epi64(msk_ext_1, idxes_1, bytes_1);
 		idxes_2 = _mm512_maskz_add_epi64(msk_ext_2, idxes_2, bytes_2);
-		tmp_1 = _mm512_mask_i64gather_epi64(zero, msk_ext_1,
+		if (size == sizeof(uint16_t)) {
+			tmp_1 = _mm512_mask_i64gather_epi64(zero, msk_ext_1,
+				idxes_1, (const void *)dp->tbl8, 2);
+			tmp_2 = _mm512_mask_i64gather_epi64(zero, msk_ext_2,
+				idxes_2, (const void *)dp->tbl8, 2);
+			tmp_1 = _mm512_and_epi64(tmp_1, res_msk);
+			tmp_2 = _mm512_and_epi64(tmp_2, res_msk);
+		} else if (size == sizeof(uint32_t)) {
+			tmp_1 = _mm512_mask_i64gather_epi64(zero, msk_ext_1,
+				idxes_1, (const void *)dp->tbl8, 4);
+			tmp_2 = _mm512_mask_i64gather_epi64(zero, msk_ext_2,
+				idxes_2, (const void *)dp->tbl8, 4);
+			tmp_1 = _mm512_and_epi64(tmp_1, res_msk);
+			tmp_2 = _mm512_and_epi64(tmp_2, res_msk);
+		} else {
+			tmp_1 = _mm512_mask_i64gather_epi64(zero, msk_ext_1,
 				idxes_1, (const void *)dp->tbl8, 8);
-		tmp_2 = _mm512_mask_i64gather_epi64(zero, msk_ext_2,
+			tmp_2 = _mm512_mask_i64gather_epi64(zero, msk_ext_2,
 				idxes_2, (const void *)dp->tbl8, 8);
+		}
 		new_msk_1 = _mm512_test_epi64_mask(tmp_1, lsb);
 		new_msk_2 = _mm512_test_epi64_mask(tmp_2, lsb);
 		res_1 = _mm512_mask_blend_epi64(msk_ext_1 ^ new_msk_1, res_1,
@@ -306,40 +386,145 @@ trie_vec_lookup_x8x2_8b(void *p, const struct rte_ipv6_addr *ips,
 }
 
 void
-rte_trie_vec_lookup_bulk_2b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_2b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
+
 	for (i = 0; i < (n / 32); i++) {
-		trie_vec_lookup_x16x2(p, &ips[i * 32],
-				next_hops + i * 32, sizeof(uint16_t));
+		trie_vec_lookup_x16x2(p, vrf_ids + i * 32, &ips[i * 32],
+				next_hops + i * 32, sizeof(uint16_t),
+				VRF_SCALE_SINGLE);
 	}
-	rte_trie_lookup_bulk_2b(p, &ips[i * 32],
+	rte_trie_lookup_bulk_2b(p, vrf_ids + i * 32, &ips[i * 32],
 			next_hops + i * 32, n - i * 32);
 }
 
 void
-rte_trie_vec_lookup_bulk_4b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_vrf_2b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
+
 	for (i = 0; i < (n / 32); i++) {
-		trie_vec_lookup_x16x2(p, &ips[i * 32],
-				next_hops + i * 32, sizeof(uint32_t));
+		trie_vec_lookup_x16x2(p, vrf_ids + i * 32, &ips[i * 32],
+				next_hops + i * 32, sizeof(uint16_t),
+				VRF_SCALE_SMALL);
 	}
-	rte_trie_lookup_bulk_4b(p, &ips[i * 32],
+	rte_trie_lookup_bulk_vrf_2b(p, vrf_ids + i * 32, &ips[i * 32],
 			next_hops + i * 32, n - i * 32);
 }
 
 void
-rte_trie_vec_lookup_bulk_8b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_vrf_2b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n)
 {
 	uint32_t i;
+
+	for (i = 0; i < (n / 16); i++) {
+		trie_vec_lookup_x8x2(p, vrf_ids + i * 16, &ips[i * 16],
+				next_hops + i * 16, sizeof(uint16_t),
+				VRF_SCALE_LARGE);
+	}
+	rte_trie_lookup_bulk_vrf_2b(p, vrf_ids + i * 16, &ips[i * 16],
+			next_hops + i * 16, n - i * 16);
+}
+
+void
+rte_trie_vec_lookup_bulk_4b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
+	for (i = 0; i < (n / 32); i++) {
+		trie_vec_lookup_x16x2(p, vrf_ids + i * 32, &ips[i * 32],
+				next_hops + i * 32, sizeof(uint32_t),
+				VRF_SCALE_SINGLE);
+	}
+	rte_trie_lookup_bulk_4b(p, vrf_ids + i * 32, &ips[i * 32],
+			next_hops + i * 32, n - i * 32);
+}
+
+void
+rte_trie_vec_lookup_bulk_vrf_4b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
+	for (i = 0; i < (n / 32); i++) {
+		trie_vec_lookup_x16x2(p, vrf_ids + i * 32, &ips[i * 32],
+				next_hops + i * 32, sizeof(uint32_t),
+				VRF_SCALE_SMALL);
+	}
+	rte_trie_lookup_bulk_vrf_4b(p, vrf_ids + i * 32, &ips[i * 32],
+			next_hops + i * 32, n - i * 32);
+}
+
+void
+rte_trie_vec_lookup_bulk_vrf_4b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
+	for (i = 0; i < (n / 16); i++) {
+		trie_vec_lookup_x8x2(p, vrf_ids + i * 16, &ips[i * 16],
+				next_hops + i * 16, sizeof(uint32_t),
+				VRF_SCALE_LARGE);
+	}
+	rte_trie_lookup_bulk_vrf_4b(p, vrf_ids + i * 16, &ips[i * 16],
+			next_hops + i * 16, n - i * 16);
+}
+
+void
+rte_trie_vec_lookup_bulk_8b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
+	for (i = 0; i < (n / 16); i++) {
+		trie_vec_lookup_x8x2(p, vrf_ids + i * 16, &ips[i * 16],
+				next_hops + i * 16, sizeof(uint64_t),
+				VRF_SCALE_SINGLE);
+	}
+	rte_trie_lookup_bulk_8b(p, vrf_ids + i * 16, &ips[i * 16],
+			next_hops + i * 16, n - i * 16);
+}
+
+void
+rte_trie_vec_lookup_bulk_vrf_8b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
+	for (i = 0; i < (n / 16); i++) {
+		trie_vec_lookup_x8x2(p, vrf_ids + i * 16, &ips[i * 16],
+				next_hops + i * 16, sizeof(uint64_t),
+				VRF_SCALE_SMALL);
+	}
+	rte_trie_lookup_bulk_vrf_8b(p, vrf_ids + i * 16, &ips[i * 16],
+			next_hops + i * 16, n - i * 16);
+}
+
+void
+rte_trie_vec_lookup_bulk_vrf_8b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n)
+{
+	uint32_t i;
+
 	for (i = 0; i < (n / 16); i++) {
-		trie_vec_lookup_x8x2_8b(p, &ips[i * 16],
-				next_hops + i * 16);
+		trie_vec_lookup_x8x2(p, vrf_ids + i * 16, &ips[i * 16],
+				next_hops + i * 16, sizeof(uint64_t),
+				VRF_SCALE_LARGE);
 	}
-	rte_trie_lookup_bulk_8b(p, &ips[i * 16],
+	rte_trie_lookup_bulk_vrf_8b(p, vrf_ids + i * 16, &ips[i * 16],
 			next_hops + i * 16, n - i * 16);
 }
diff --git a/lib/fib/trie_avx512.h b/lib/fib/trie_avx512.h
index 1028a4899f..190a5c5aa4 100644
--- a/lib/fib/trie_avx512.h
+++ b/lib/fib/trie_avx512.h
@@ -10,15 +10,48 @@
 struct rte_ipv6_addr;
 
 void
-rte_trie_vec_lookup_bulk_2b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_2b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n);
 
 void
-rte_trie_vec_lookup_bulk_4b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_vrf_2b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n);
 
 void
-rte_trie_vec_lookup_bulk_8b(void *p, const struct rte_ipv6_addr *ips,
+rte_trie_vec_lookup_bulk_vrf_2b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_4b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_vrf_4b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_vrf_4b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_8b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_vrf_8b(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
+	uint64_t *next_hops, const unsigned int n);
+
+void
+rte_trie_vec_lookup_bulk_vrf_8b_large(void *p, const uint16_t *vrf_ids,
+	const struct rte_ipv6_addr *ips,
 	uint64_t *next_hops, const unsigned int n);
 
 #endif /* _TRIE_AVX512_H_ */
-- 
2.43.0



More information about the dev mailing list