[dpdk-dev] [PATCH v1 0/3] lpm: increase number of next hops for lpm (ipv4)

Matthew Hall mhall at mhcomputing.net
Sat Oct 24 08:09:30 CEST 2015


On 10/23/15 9:20 AM, Matthew Hall wrote:
> On Fri, Oct 23, 2015 at 03:51:48PM +0200, Michal Jastrzebski wrote:
>> From: Michal Kobylinski  <michalx.kobylinski at intel.com>
>>
>> The current DPDK implementation for LPM for IPv4 and IPv6 limits the
>> number of next hops to 256, as the next hop ID is an 8-bit long field.
>> Proposed extension increase number of next hops for IPv4 to 2^24 and
>> also allows 32-bits read/write operations.
>>
>> This patchset requires additional change to rte_table library to meet
>> ABI compatibility requirements. A v2 will be sent next week.
>
> I also have a patchset for this.
>
> I will send it out as well so we could compare.
>
> Matthew.

Sorry about the delay; I only work on DPDK in personal time and not as 
part of a job. My patchset is attached to this email.

One possible advantage with my patchset, compared to others, is that the 
space problem is fixed in both IPV4 and in IPV6, to prevent asymmetry 
between these two standards, which is something I try to avoid as much 
as humanly possible.

This is because my application code is green-field, so I absolutely 
don't want to put any ugly hacks or incompatibilities in this code if I 
can possibly avoid it.

Otherwise, I am not necessarily as expert about rte_lpm as some of the 
full-time guys, but I think with four or five of us in the thread 
hammering out patches we will be able to create something amazing 
together and I am very very very very very happy about this.

Matthew.
-------------- next part --------------
>From 6a8e3428344ed11af8a1999dcec5c31c10f37c3a Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:49:46 +0000
Subject: [PATCH 1/8] rte_lpm.h: use 24 bit extended next hop

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_lpm/rte_lpm.h | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index c299ce2..c677c4a 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -82,32 +82,36 @@ extern "C" {
 #endif
 
 /** @internal bitmask with valid and ext_entry/valid_group fields set */
-#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x0300
+#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000
+
+/** @internal bitmask with next_hop field set */
+#define RTE_LPM_NEXT_HOP_BITMASK        0x00FFFFFF
 
 /** Bitmask used to indicate successful lookup */
-#define RTE_LPM_LOOKUP_SUCCESS          0x0100
+#define RTE_LPM_LOOKUP_SUCCESS          0x01000000
+
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
 /** @internal Tbl24 entry structure. */
 struct rte_lpm_tbl24_entry {
-	/* Stores Next hop or group index (i.e. gindex)into tbl8. */
+	/* Stores Next hop or group index (i.e. gindex) into tbl8. */
 	union {
-		uint8_t next_hop;
-		uint8_t tbl8_gindex;
-	};
+		uint32_t next_hop    :24;
+		uint32_t tbl8_gindex :24;
+	} __attribute__((__packed__));
 	/* Using single uint8_t to store 3 values. */
-	uint8_t valid     :1; /**< Validation flag. */
-	uint8_t ext_entry :1; /**< External entry. */
-	uint8_t depth     :6; /**< Rule depth. */
+	uint32_t valid     :1; /**< Validation flag. */
+	uint32_t ext_entry :1; /**< External entry. */
+	uint32_t depth     :6; /**< Rule depth. */
 };
 
 /** @internal Tbl8 entry structure. */
 struct rte_lpm_tbl8_entry {
-	uint8_t next_hop; /**< next hop. */
+	uint32_t next_hop   :24; /**< next hop. */
 	/* Using single uint8_t to store 3 values. */
-	uint8_t valid       :1; /**< Validation flag. */
-	uint8_t valid_group :1; /**< Group validation flag. */
-	uint8_t depth       :6; /**< Rule depth. */
+	uint8_t valid       :1;  /**< Validation flag. */
+	uint8_t valid_group :1;  /**< Group validation flag. */
+	uint8_t depth       :6;  /**< Rule depth. */
 };
 #else
 struct rte_lpm_tbl24_entry {
@@ -130,8 +134,8 @@ struct rte_lpm_tbl8_entry {
 
 /** @internal Rule structure. */
 struct rte_lpm_rule {
-	uint32_t ip; /**< Rule IP address. */
-	uint8_t  next_hop; /**< Rule next hop. */
+	uint32_t ip;       /**< Rule IP address. */
+	uint32_t next_hop; /**< Rule next hop. */
 };
 
 /** @internal Contains metadata about the rules table. */
@@ -219,7 +223,7 @@ rte_lpm_free(struct rte_lpm *lpm);
  *   0 on success, negative value otherwise
  */
 int
-rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t next_hop);
+rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint32_t next_hop);
 
 /**
  * Check if a rule is present in the LPM table,
@@ -238,7 +242,7 @@ rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint8_t next_hop);
  */
 int
 rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
-uint8_t *next_hop);
+uint32_t *next_hop);
 
 /**
  * Delete a rule from the LPM table.
@@ -301,6 +305,8 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop)
 	*next_hop = (uint8_t)tbl_entry;
 	return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT;
 }
+int
+rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop);
 
 /**
  * Lookup multiple IP addresses in an LPM table. This may be implemented as a
@@ -360,6 +366,9 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
 
 /* Mask four results. */
 #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ff00ff00ff00ff)
+int
+rte_lpm_lookup_bulk(const struct rte_lpm *lpm, const uint32_t * ips,
+		uint32_t * next_hops, const unsigned n);
 
 /**
  * Lookup four IP addresses in an LPM table.
@@ -472,6 +481,9 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
 	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv;
 	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv;
 }
+void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint32_t hop[4],
+	uint32_t defv);
 
 #ifdef __cplusplus
 }
-- 
1.9.1

-------------- next part --------------
>From 7ee9f2e9a8853d49a332d971f5b56e79efccd71b Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:53:43 +0000
Subject: [PATCH 2/8] rte_lpm.h: disable inlining of rte_lpm lookup functions

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_lpm/rte_lpm.h | 152 -----------------------------------------------
 1 file changed, 152 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index c677c4a..76282d8 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -280,31 +280,6 @@ rte_lpm_delete_all(struct rte_lpm *lpm);
  * @return
  *   -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
  */
-static inline int
-rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint8_t *next_hop)
-{
-	unsigned tbl24_index = (ip >> 8);
-	uint16_t tbl_entry;
-
-	/* DEBUG: Check user input arguments. */
-	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL);
-
-	/* Copy tbl24 entry */
-	tbl_entry = *(const uint16_t *)&lpm->tbl24[tbl24_index];
-
-	/* Copy tbl8 entry (only if needed) */
-	if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-
-		unsigned tbl8_index = (uint8_t)ip +
-				((uint8_t)tbl_entry * RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
-
-		tbl_entry = *(const uint16_t *)&lpm->tbl8[tbl8_index];
-	}
-
-	*next_hop = (uint8_t)tbl_entry;
-	return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT;
-}
 int
 rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop);
 
@@ -328,41 +303,6 @@ rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop);
  *  @return
  *   -EINVAL for incorrect arguments, otherwise 0
  */
-#define rte_lpm_lookup_bulk(lpm, ips, next_hops, n) \
-		rte_lpm_lookup_bulk_func(lpm, ips, next_hops, n)
-
-static inline int
-rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
-		uint16_t * next_hops, const unsigned n)
-{
-	unsigned i;
-	unsigned tbl24_indexes[n];
-
-	/* DEBUG: Check user input arguments. */
-	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (ips == NULL) ||
-			(next_hops == NULL)), -EINVAL);
-
-	for (i = 0; i < n; i++) {
-		tbl24_indexes[i] = ips[i] >> 8;
-	}
-
-	for (i = 0; i < n; i++) {
-		/* Simply copy tbl24 entry to output */
-		next_hops[i] = *(const uint16_t *)&lpm->tbl24[tbl24_indexes[i]];
-
-		/* Overwrite output with tbl8 entry if needed */
-		if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-				RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-
-			unsigned tbl8_index = (uint8_t)ips[i] +
-					((uint8_t)next_hops[i] *
-					 RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
-
-			next_hops[i] = *(const uint16_t *)&lpm->tbl8[tbl8_index];
-		}
-	}
-	return 0;
-}
 
 /* Mask four results. */
 #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ff00ff00ff00ff)
@@ -389,98 +329,6 @@ rte_lpm_lookup_bulk(const struct rte_lpm *lpm, const uint32_t * ips,
  *   Default value to populate into corresponding element of hop[] array,
  *   if lookup would fail.
  */
-static inline void
-rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
-	uint16_t defv)
-{
-	__m128i i24;
-	rte_xmm_t i8;
-	uint16_t tbl[4];
-	uint64_t idx, pt;
-
-	const __m128i mask8 =
-		_mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX);
-
-	/*
-	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries
-	 * as one 64-bit value (0x0300030003000300).
-	 */
-	const uint64_t mask_xv =
-		((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK |
-		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 16 |
-		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32 |
-		(uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 48);
-
-	/*
-	 * RTE_LPM_LOOKUP_SUCCESS for 4 LPM entries
-	 * as one 64-bit value (0x0100010001000100).
-	 */
-	const uint64_t mask_v =
-		((uint64_t)RTE_LPM_LOOKUP_SUCCESS |
-		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 16 |
-		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 |
-		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48);
-
-	/* get 4 indexes for tbl24[]. */
-	i24 = _mm_srli_epi32(ip, CHAR_BIT);
-
-	/* extract values from tbl24[] */
-	idx = _mm_cvtsi128_si64(i24);
-	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
-
-	tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
-
-	idx = _mm_cvtsi128_si64(i24);
-
-	tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
-
-	/* get 4 indexes for tbl8[]. */
-	i8.x = _mm_and_si128(ip, mask8);
-
-	pt = (uint64_t)tbl[0] |
-		(uint64_t)tbl[1] << 16 |
-		(uint64_t)tbl[2] << 32 |
-		(uint64_t)tbl[3] << 48;
-
-	/* search successfully finished for all 4 IP addresses. */
-	if (likely((pt & mask_xv) == mask_v)) {
-		uintptr_t ph = (uintptr_t)hop;
-		*(uint64_t *)ph = pt & RTE_LPM_MASKX4_RES;
-		return;
-	}
-
-	if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-		i8.u32[0] = i8.u32[0] +
-			(uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-		tbl[0] = *(const uint16_t *)&lpm->tbl8[i8.u32[0]];
-	}
-	if (unlikely((pt >> 16 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-		i8.u32[1] = i8.u32[1] +
-			(uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-		tbl[1] = *(const uint16_t *)&lpm->tbl8[i8.u32[1]];
-	}
-	if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-		i8.u32[2] = i8.u32[2] +
-			(uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-		tbl[2] = *(const uint16_t *)&lpm->tbl8[i8.u32[2]];
-	}
-	if (unlikely((pt >> 48 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
-			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
-		i8.u32[3] = i8.u32[3] +
-			(uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
-		tbl[3] = *(const uint16_t *)&lpm->tbl8[i8.u32[3]];
-	}
-
-	hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[0] : defv;
-	hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[1] : defv;
-	hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[2] : defv;
-	hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? (uint8_t)tbl[3] : defv;
-}
 void
 rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint32_t hop[4],
 	uint32_t defv);
-- 
1.9.1

-------------- next part --------------
>From e54e01b6edcc820230b7e47de40920a00031b6c1 Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:48:07 +0000
Subject: [PATCH 3/8] rte_lpm.c: use 24 bit extended next hop

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_lpm/rte_lpm.c | 184 ++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 174 insertions(+), 10 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 163ba3c..d9cb007 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -159,8 +159,8 @@ rte_lpm_create(const char *name, int socket_id, int max_rules,
 
 	lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list);
 
-	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl24_entry) != 2);
-	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl8_entry) != 2);
+	/* RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl24_entry) != 2); */
+	/* RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl8_entry) != 2); */
 
 	/* Check user arguments. */
 	if ((name == NULL) || (socket_id < -1) || (max_rules == 0)){
@@ -261,7 +261,7 @@ rte_lpm_free(struct rte_lpm *lpm)
  */
 static inline int32_t
 rule_add(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
-	uint8_t next_hop)
+	uint32_t next_hop)
 {
 	uint32_t rule_gindex, rule_index, last_rule;
 	int i;
@@ -418,7 +418,7 @@ tbl8_free(struct rte_lpm_tbl8_entry *tbl8, uint32_t tbl8_group_start)
 
 static inline int32_t
 add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
-		uint8_t next_hop)
+		uint32_t next_hop)
 {
 	uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j;
 
@@ -486,7 +486,7 @@ add_depth_small(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
 
 static inline int32_t
 add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
-		uint8_t next_hop)
+		uint32_t next_hop)
 {
 	uint32_t tbl24_index;
 	int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index,
@@ -621,7 +621,7 @@ add_depth_big(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
  */
 int
 rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
-		uint8_t next_hop)
+		uint32_t next_hop)
 {
 	int32_t rule_index, status = 0;
 	uint32_t ip_masked;
@@ -665,7 +665,7 @@ rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
  */
 int
 rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
-uint8_t *next_hop)
+uint32_t *next_hop)
 {
 	uint32_t ip_masked;
 	int32_t rule_index;
@@ -681,7 +681,7 @@ uint8_t *next_hop)
 	rule_index = rule_find(lpm, ip_masked, depth);
 
 	if (rule_index >= 0) {
-		*next_hop = lpm->rules_tbl[rule_index].next_hop;
+		*next_hop = lpm->rules_tbl[rule_index].next_hop & RTE_LPM_NEXT_HOP_BITMASK;
 		return 1;
 	}
 
@@ -771,8 +771,7 @@ delete_depth_small(struct rte_lpm *lpm, uint32_t ip_masked,
 		struct rte_lpm_tbl8_entry new_tbl8_entry = {
 			.valid = VALID,
 			.depth = sub_rule_depth,
-			.next_hop = lpm->rules_tbl
-			[sub_rule_index].next_hop,
+			.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,
 		};
 
 		for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) {
@@ -1012,3 +1011,168 @@ rte_lpm_delete_all(struct rte_lpm *lpm)
 	/* Delete all rules form the rules table. */
 	memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules);
 }
+
+int
+rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop)
+{
+	unsigned tbl24_index = (ip >> 8);
+	uint32_t tbl_entry;
+
+	/* DEBUG: Check user input arguments. */
+	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL);
+
+	/* Copy tbl24 entry */
+	tbl_entry = *(const uint32_t *)&lpm->tbl24[tbl24_index];
+
+	/* Copy tbl8 entry (only if needed) */
+	if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+			RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+
+		unsigned tbl8_index = (uint8_t)ip +
+				((uint32_t)tbl_entry * RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+
+		tbl_entry = *(const uint32_t *)&lpm->tbl8[tbl8_index];
+	}
+
+	*next_hop = (uint32_t)tbl_entry & RTE_LPM_NEXT_HOP_BITMASK;
+	return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT;
+}
+
+int
+rte_lpm_lookup_bulk(const struct rte_lpm *lpm, const uint32_t * ips,
+		uint32_t * next_hops, const unsigned n)
+{
+	unsigned i;
+	unsigned tbl24_indexes[n];
+
+	/* DEBUG: Check user input arguments. */
+	RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (ips == NULL) ||
+			(next_hops == NULL)), -EINVAL);
+
+	for (i = 0; i < n; i++) {
+		tbl24_indexes[i] = ips[i] >> 8;
+	}
+
+	for (i = 0; i < n; i++) {
+		/* Simply copy tbl24 entry to output */
+		next_hops[i] = *(const uint32_t *)&lpm->tbl24[tbl24_indexes[i]];
+
+		/* Overwrite output with tbl8 entry if needed */
+		if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) ==
+				RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+
+			unsigned tbl8_index = (uint8_t)ips[i] +
+					((uint32_t)next_hops[i] *
+					 RTE_LPM_TBL8_GROUP_NUM_ENTRIES);
+
+			next_hops[i] = *(const uint32_t *)&lpm->tbl8[tbl8_index] & RTE_LPM_NEXT_HOP_BITMASK;
+		}
+	}
+	return 0;
+}
+
+
+static
+__m128i _mm_not_si128(__m128i arg)
+{
+    __m128i minusone = _mm_set1_epi32(0xffffffff);
+    return _mm_xor_si128(arg, minusone);
+}
+
+/**
+ * Lookup four IP addresses in an LPM table.
+ *
+ * @param lpm
+ *   LPM object handle
+ * @param ip
+ *   Four IPs to be looked up in the LPM table
+ * @param hop
+ *   Next hop of the most specific rule found for IP (valid on lookup hit only).
+ *   This is an 4 elements array of two byte values.
+ *   If the lookup was succesfull for the given IP, then least significant byte
+ *   of the corresponding element is the  actual next hop and the most
+ *   significant byte is zero.
+ *   If the lookup for the given IP failed, then corresponding element would
+ *   contain default value, see description of then next parameter.
+ * @param defv
+ *   Default value to populate into corresponding element of hop[] array,
+ *   if lookup would fail.
+ */
+void
+rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint32_t hop[4],
+	uint32_t defv)
+{
+	rte_xmm_t tbl24_i;
+	rte_xmm_t tbl8_i;
+	rte_xmm_t tbl_r;
+	rte_xmm_t tbl_h;
+	rte_xmm_t tbl_r_ok;
+
+	rte_xmm_t mask_8;
+	rte_xmm_t mask_ve;
+	rte_xmm_t mask_v;
+	rte_xmm_t mask_h;
+	rte_xmm_t mask_hi;
+
+	mask_8.x = _mm_set1_epi32(UINT8_MAX);
+
+	/*
+	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries
+	 * as one 64-bit value (0x0300030003000300).
+	 */
+	mask_ve.x = _mm_set1_epi32(RTE_LPM_VALID_EXT_ENTRY_BITMASK);
+
+	/*
+	 * RTE_LPM_LOOKUP_SUCCESS for 4 LPM entries
+	 * as one 64-bit value (0x0100010001000100).
+	 */
+	mask_v.x = _mm_set1_epi32(RTE_LPM_LOOKUP_SUCCESS);
+
+	mask_h.x = _mm_set1_epi32(RTE_LPM_NEXT_HOP_BITMASK);
+	mask_hi.x = _mm_not_si128(mask_h.x);
+
+	/* get 4 indexes for tbl24[]. */
+	tbl24_i.x = _mm_srli_epi32(ip, CHAR_BIT);
+
+	/* extract values from tbl24[] */
+	tbl_r.u32[0] = *(const uint32_t *) &lpm->tbl24[tbl24_i.u32[0]];
+	tbl_r.u32[1] = *(const uint32_t *) &lpm->tbl24[tbl24_i.u32[1]];
+	tbl_r.u32[2] = *(const uint32_t *) &lpm->tbl24[tbl24_i.u32[2]];
+	tbl_r.u32[3] = *(const uint32_t *) &lpm->tbl24[tbl24_i.u32[3]];
+
+	/* search successfully finished for all 4 IP addresses. */
+	tbl_r_ok.x = _mm_and_si128(tbl_r.x, mask_ve.x);
+	tbl_h.x = _mm_and_si128(tbl_r.x, mask_hi.x);
+	if (likely(_mm_test_all_ones(_mm_cmpeq_epi32(tbl_r_ok.x, mask_v.x)))) {
+		*(__m128i*) &hop = tbl_h.x;
+		return;
+	}
+
+	/* get 4 indexes for tbl8[]. */
+	tbl8_i.x = _mm_and_si128(ip, mask_8.x);
+
+	if (unlikely(tbl_r_ok.u32[0] == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		tbl8_i.u32[0] = tbl8_i.u32[0] + tbl_h.u32[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl_r.u32[0] = *(const uint32_t *) &lpm->tbl8[tbl8_i.u32[0]];
+	}
+	if (unlikely(tbl_r_ok.u32[1] == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		tbl8_i.u32[1] = tbl8_i.u32[1] + tbl_h.u32[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl_r.u32[1] = *(const uint32_t *) &lpm->tbl8[tbl8_i.u32[1]];
+	}
+	if (unlikely(tbl_r_ok.u32[2] == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		tbl8_i.u32[2] = tbl8_i.u32[2] + tbl_h.u32[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl_r.u32[2] = *(const uint32_t *) &lpm->tbl8[tbl8_i.u32[2]];
+	}
+	if (unlikely(tbl_r_ok.u32[3] == RTE_LPM_VALID_EXT_ENTRY_BITMASK)) {
+		tbl8_i.u32[3] = tbl8_i.u32[3] + tbl_h.u32[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES;
+		tbl_r.u32[3] = *(const uint32_t *) &lpm->tbl8[tbl8_i.u32[3]];
+	}
+
+	tbl_r_ok.x = _mm_and_si128(tbl_r.x, mask_v.x);
+	tbl_h.x = _mm_and_si128(tbl_r.x, mask_h.x);
+
+	hop[0] = tbl_r_ok.u32[0] ? tbl_h.u32[0] : defv;
+	hop[1] = tbl_r_ok.u32[1] ? tbl_h.u32[1] : defv;
+	hop[2] = tbl_r_ok.u32[2] ? tbl_h.u32[2] : defv;
+	hop[3] = tbl_r_ok.u32[3] ? tbl_h.u32[3] : defv;
+}
-- 
1.9.1

-------------- next part --------------
>From 402d1bce8dd05b31fc6e457ca89bcd0b7160aa69 Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:54:41 +0000
Subject: [PATCH 4/8] rte_lpm6.{c,h}: use 24 bit extended next hop

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_lpm/rte_lpm6.c | 27 ++++++++++++++-------------
 lib/librte_lpm/rte_lpm6.h |  8 ++++----
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c
index 6c2b293..8d7602f 100644
--- a/lib/librte_lpm/rte_lpm6.c
+++ b/lib/librte_lpm/rte_lpm6.c
@@ -96,9 +96,9 @@ struct rte_lpm6_tbl_entry {
 
 /** Rules tbl entry structure. */
 struct rte_lpm6_rule {
-	uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
-	uint8_t next_hop; /**< Rule next hop. */
-	uint8_t depth; /**< Rule depth. */
+	uint8_t  ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
+	uint32_t next_hop :24; /**< Rule next hop. */
+	uint32_t depth    :8; /**< Rule depth. */
 };
 
 /** LPM6 structure. */
@@ -157,7 +157,7 @@ rte_lpm6_create(const char *name, int socket_id,
 
 	lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
 
-	RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t));
+	/* RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t)); */
 
 	/* Check user arguments. */
 	if ((name == NULL) || (socket_id < -1) || (config == NULL) ||
@@ -295,7 +295,7 @@ rte_lpm6_free(struct rte_lpm6 *lpm)
  * the nexthop if so. Otherwise it adds a new rule if enough space is available.
  */
 static inline int32_t
-rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth)
+rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
 {
 	uint32_t rule_index;
 
@@ -338,7 +338,7 @@ rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth)
  */
 static void
 expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
-		uint8_t next_hop)
+		uint32_t next_hop)
 {
 	uint32_t tbl8_group_end, tbl8_gindex_next, j;
 
@@ -375,7 +375,7 @@ expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
 static inline int
 add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
 		struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes,
-		uint8_t first_byte, uint8_t depth, uint8_t next_hop)
+		uint8_t first_byte, uint8_t depth, uint32_t next_hop)
 {
 	uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i;
 	int32_t tbl8_gindex;
@@ -506,7 +506,7 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
  */
 int
 rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
-		uint8_t next_hop)
+		uint32_t next_hop)
 {
 	struct rte_lpm6_tbl_entry *tbl;
 	struct rte_lpm6_tbl_entry *tbl_next;
@@ -567,7 +567,7 @@ rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 static inline int
 lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl,
 		const struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip,
-		uint8_t first_byte, uint8_t *next_hop)
+		uint8_t first_byte, uint32_t *next_hop)
 {
 	uint32_t tbl8_index, tbl_entry;
 
@@ -596,7 +596,7 @@ lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl,
  * Looks up an IP
  */
 int
-rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
+rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint32_t *next_hop)
 {
 	const struct rte_lpm6_tbl_entry *tbl;
 	const struct rte_lpm6_tbl_entry *tbl_next;
@@ -630,13 +630,14 @@ rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop)
 int
 rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
 		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
-		int16_t * next_hops, unsigned n)
+		uint32_t * next_hops, unsigned n)
 {
 	unsigned i;
 	const struct rte_lpm6_tbl_entry *tbl;
 	const struct rte_lpm6_tbl_entry *tbl_next;
 	uint32_t tbl24_index;
-	uint8_t first_byte, next_hop;
+	uint8_t first_byte;
+	uint32_t next_hop;
 	int status;
 
 	/* DEBUG: Check user input arguments. */
@@ -697,7 +698,7 @@ rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
  */
 int
 rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
-uint8_t *next_hop)
+uint32_t *next_hop)
 {
 	uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
 	int32_t rule_index;
diff --git a/lib/librte_lpm/rte_lpm6.h b/lib/librte_lpm/rte_lpm6.h
index cedcea8..dd90beb 100644
--- a/lib/librte_lpm/rte_lpm6.h
+++ b/lib/librte_lpm/rte_lpm6.h
@@ -121,7 +121,7 @@ rte_lpm6_free(struct rte_lpm6 *lpm);
  */
 int
 rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
-		uint8_t next_hop);
+		uint32_t next_hop);
 
 /**
  * Check if a rule is present in the LPM table,
@@ -140,7 +140,7 @@ rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
  */
 int
 rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
-uint8_t *next_hop);
+uint32_t *next_hop);
 
 /**
  * Delete a rule from the LPM table.
@@ -197,7 +197,7 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm);
  *   -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit
  */
 int
-rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop);
+rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint32_t *next_hop);
 
 /**
  * Lookup multiple IP addresses in an LPM table.
@@ -218,7 +218,7 @@ rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop);
 int
 rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
 		uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE],
-		int16_t * next_hops, unsigned n);
+		uint32_t * next_hops, unsigned n);
 
 #ifdef __cplusplus
 }
-- 
1.9.1

-------------- next part --------------
>From e3ebfc026f7871d3014a0b9f8881579623b6592b Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:55:20 +0000
Subject: [PATCH 5/8] librte_table: use uint32_t for next hops from librte_lpm

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_table/rte_table_lpm.c      | 6 +++---
 lib/librte_table/rte_table_lpm_ipv6.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c
index 849d899..2af2eee 100644
--- a/lib/librte_table/rte_table_lpm.c
+++ b/lib/librte_table/rte_table_lpm.c
@@ -202,7 +202,7 @@ rte_table_lpm_entry_add(
 	struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key;
 	uint32_t nht_pos, nht_pos0_valid;
 	int status;
-	uint8_t nht_pos0 = 0;
+	uint32_t nht_pos0 = 0;
 
 	/* Check input parameters */
 	if (lpm == NULL) {
@@ -268,7 +268,7 @@ rte_table_lpm_entry_delete(
 {
 	struct rte_table_lpm *lpm = (struct rte_table_lpm *) table;
 	struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key;
-	uint8_t nht_pos;
+	uint32_t nht_pos;
 	int status;
 
 	/* Check input parameters */
@@ -342,7 +342,7 @@ rte_table_lpm_lookup(
 			uint32_t ip = rte_bswap32(
 				RTE_MBUF_METADATA_UINT32(pkt, lpm->offset));
 			int status;
-			uint8_t nht_pos;
+			uint32_t nht_pos;
 
 			status = rte_lpm_lookup(lpm->lpm, ip, &nht_pos);
 			if (status == 0) {
diff --git a/lib/librte_table/rte_table_lpm_ipv6.c b/lib/librte_table/rte_table_lpm_ipv6.c
index e9bc6a7..81a948e 100644
--- a/lib/librte_table/rte_table_lpm_ipv6.c
+++ b/lib/librte_table/rte_table_lpm_ipv6.c
@@ -213,7 +213,7 @@ rte_table_lpm_ipv6_entry_add(
 		(struct rte_table_lpm_ipv6_key *) key;
 	uint32_t nht_pos, nht_pos0_valid;
 	int status;
-	uint8_t nht_pos0;
+	uint32_t nht_pos0;
 
 	/* Check input parameters */
 	if (lpm == NULL) {
@@ -280,7 +280,7 @@ rte_table_lpm_ipv6_entry_delete(
 	struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table;
 	struct rte_table_lpm_ipv6_key *ip_prefix =
 		(struct rte_table_lpm_ipv6_key *) key;
-	uint8_t nht_pos;
+	uint32_t nht_pos;
 	int status;
 
 	/* Check input parameters */
@@ -356,7 +356,7 @@ rte_table_lpm_ipv6_lookup(
 			uint8_t *ip = RTE_MBUF_METADATA_UINT8_PTR(pkt,
 				lpm->offset);
 			int status;
-			uint8_t nht_pos;
+			uint32_t nht_pos;
 
 			status = rte_lpm6_lookup(lpm->lpm, ip, &nht_pos);
 			if (status == 0) {
-- 
1.9.1

-------------- next part --------------
>From e975f935595c6e901522dbaf10be598573276eaa Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:34:18 +0000
Subject: [PATCH 6/8] test_lpm*.c: update tests to use 24 bit extended next hop

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 app/test/test_lpm.c  |  54 +++++++++++++++-----------
 app/test/test_lpm6.c | 108 ++++++++++++++++++++++++++++++---------------------
 2 files changed, 95 insertions(+), 67 deletions(-)

diff --git a/app/test/test_lpm.c b/app/test/test_lpm.c
index 8b4ded9..44e2fb4 100644
--- a/app/test/test_lpm.c
+++ b/app/test/test_lpm.c
@@ -181,7 +181,8 @@ test3(void)
 {
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip = IPv4(0, 0, 0, 0);
-	uint8_t depth = 24, next_hop = 100;
+	uint8_t depth = 24;
+	uint32_t next_hop = 100;
 	int32_t status = 0;
 
 	/* rte_lpm_add: lpm == NULL */
@@ -248,7 +249,7 @@ test5(void)
 #if defined(RTE_LIBRTE_LPM_DEBUG)
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip = IPv4(0, 0, 0, 0);
-	uint8_t next_hop_return = 0;
+	uint32_t next_hop_return = 0;
 	int32_t status = 0;
 
 	/* rte_lpm_lookup: lpm == NULL */
@@ -278,7 +279,8 @@ test6(void)
 {
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip = IPv4(0, 0, 0, 0);
-	uint8_t depth = 24, next_hop_add = 100, next_hop_return = 0;
+	uint8_t depth = 24;
+	uint32_t next_hop_add = 100, next_hop_return = 0;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -309,10 +311,11 @@ int32_t
 test7(void)
 {
 	__m128i ipx4;
-	uint16_t hop[4];
+	uint32_t hop[4];
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip = IPv4(0, 0, 0, 0);
-	uint8_t depth = 32, next_hop_add = 100, next_hop_return = 0;
+	uint8_t depth = 32;
+	uint32_t next_hop_add = 100, next_hop_return = 0;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -355,10 +358,11 @@ int32_t
 test8(void)
 {
 	__m128i ipx4;
-	uint16_t hop[4];
+	uint32_t hop[4];
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip1 = IPv4(127, 255, 255, 255), ip2 = IPv4(128, 0, 0, 0);
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -438,7 +442,8 @@ test9(void)
 {
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip, ip_1, ip_2;
-	uint8_t depth, depth_1, depth_2, next_hop_add, next_hop_add_1,
+	uint8_t depth, depth_1, depth_2;
+	uint32_t next_hop_add, next_hop_add_1,
 		next_hop_add_2, next_hop_return;
 	int32_t status = 0;
 
@@ -602,7 +607,8 @@ test10(void)
 
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip;
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	/* Add rule that covers a TBL24 range previously invalid & lookup
@@ -788,7 +794,8 @@ test11(void)
 
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip;
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -851,10 +858,11 @@ int32_t
 test12(void)
 {
 	__m128i ipx4;
-	uint16_t hop[4];
+	uint32_t hop[4];
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip, i;
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -904,7 +912,8 @@ test13(void)
 {
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip, i;
-	uint8_t depth, next_hop_add_1, next_hop_add_2, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add_1, next_hop_add_2, next_hop_return;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -966,7 +975,8 @@ test14(void)
 
 	struct rte_lpm *lpm = NULL;
 	uint32_t ip;
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	/* Add enough space for 256 rules for every depth */
@@ -1078,10 +1088,10 @@ test17(void)
 	const uint8_t d_ip_10_32 = 32,
 			d_ip_10_24 = 24,
 			d_ip_20_25 = 25;
-	const uint8_t next_hop_ip_10_32 = 100,
+	const uint32_t next_hop_ip_10_32 = 100,
 			next_hop_ip_10_24 = 105,
 			next_hop_ip_20_25 = 111;
-	uint8_t next_hop_return = 0;
+	uint32_t next_hop_return = 0;
 	int32_t status = 0;
 
 	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, MAX_RULES, 0);
@@ -1092,7 +1102,7 @@ test17(void)
 		return -1;
 
 	status = rte_lpm_lookup(lpm, ip_10_32, &next_hop_return);
-	uint8_t test_hop_10_32 = next_hop_return;
+	uint32_t test_hop_10_32 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_10_32);
 
@@ -1101,7 +1111,7 @@ test17(void)
 			return -1;
 
 	status = rte_lpm_lookup(lpm, ip_10_24, &next_hop_return);
-	uint8_t test_hop_10_24 = next_hop_return;
+	uint32_t test_hop_10_24 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_10_24);
 
@@ -1110,7 +1120,7 @@ test17(void)
 		return -1;
 
 	status = rte_lpm_lookup(lpm, ip_20_25, &next_hop_return);
-	uint8_t test_hop_20_25 = next_hop_return;
+	uint32_t test_hop_20_25 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_20_25);
 
@@ -1175,7 +1185,7 @@ perf_test(void)
 	struct rte_lpm *lpm = NULL;
 	uint64_t begin, total_time, lpm_used_entries = 0;
 	unsigned i, j;
-	uint8_t next_hop_add = 0xAA, next_hop_return = 0;
+	uint32_t next_hop_add = 0xAA, next_hop_return = 0;
 	int status = 0;
 	uint64_t cache_line_counter = 0;
 	int64_t count = 0;
@@ -1252,7 +1262,7 @@ perf_test(void)
 	count = 0;
 	for (i = 0; i < ITERATIONS; i ++) {
 		static uint32_t ip_batch[BATCH_SIZE];
-		uint16_t next_hops[BULK_SIZE];
+		uint32_t next_hops[BULK_SIZE];
 
 		/* Create array of random IP addresses */
 		for (j = 0; j < BATCH_SIZE; j ++)
@@ -1279,7 +1289,7 @@ perf_test(void)
 	count = 0;
 	for (i = 0; i < ITERATIONS; i++) {
 		static uint32_t ip_batch[BATCH_SIZE];
-		uint16_t next_hops[4];
+		uint32_t next_hops[4];
 
 		/* Create array of random IP addresses */
 		for (j = 0; j < BATCH_SIZE; j++)
diff --git a/app/test/test_lpm6.c b/app/test/test_lpm6.c
index 1f88d7a..d5ba20a 100644
--- a/app/test/test_lpm6.c
+++ b/app/test/test_lpm6.c
@@ -291,7 +291,8 @@ test4(void)
 	struct rte_lpm6_config config;
 
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth = 24, next_hop = 100;
+	uint8_t depth = 24;
+	uint32_t next_hop = 100;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -367,7 +368,7 @@ test6(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t next_hop_return = 0;
+	uint32_t next_hop_return = 0;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -405,7 +406,7 @@ test7(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[10][16];
-	int16_t next_hop_return[10];
+	uint32_t next_hop_return[10];
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -482,7 +483,8 @@ test9(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth = 16, next_hop_add = 100, next_hop_return = 0;
+	uint8_t depth = 16;
+	uint32_t next_hop_add = 100, next_hop_return = 0;
 	int32_t status = 0;
 	uint8_t i;
 
@@ -526,7 +528,8 @@ test10(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth, next_hop_add = 100;
+	uint8_t depth;
+	uint32_t next_hop_add = 100;
 	int32_t status = 0;
 	int i;
 
@@ -570,7 +573,8 @@ test11(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth, next_hop_add = 100;
+	uint8_t depth;
+	uint32_t next_hop_add = 100;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -630,7 +634,8 @@ test12(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth, next_hop_add = 100;
+	uint8_t depth;
+	uint32_t next_hop_add = 100;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -668,7 +673,8 @@ test13(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth, next_hop_add = 100;
+	uint8_t depth;
+	uint32_t next_hop_add = 100;
 	int32_t status = 0;
 
 	config.max_rules = 2;
@@ -715,7 +721,8 @@ test14(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth = 25, next_hop_add = 100;
+	uint8_t depth = 25;
+	uint32_t next_hop_add = 100;
 	int32_t status = 0;
 	int i, j;
 
@@ -767,7 +774,8 @@ test15(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth = 24, next_hop_add = 100, next_hop_return = 0;
+	uint8_t depth = 24;
+	uint32_t next_hop_add = 100, next_hop_return = 0;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -803,7 +811,8 @@ test16(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[] = {12,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth = 128, next_hop_add = 100, next_hop_return = 0;
+	uint8_t depth = 128;
+	uint32_t next_hop_add = 100, next_hop_return = 0;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -847,7 +856,8 @@ test17(void)
 	uint8_t ip1[] = {127,255,255,255,255,255,255,255,255,
 			255,255,255,255,255,255,255};
 	uint8_t ip2[] = {128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -912,7 +922,8 @@ test18(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[16], ip_1[16], ip_2[16];
-	uint8_t depth, depth_1, depth_2, next_hop_add, next_hop_add_1,
+	uint8_t depth, depth_1, depth_2;
+	uint32_t next_hop_add, next_hop_add_1,
 		next_hop_add_2, next_hop_return;
 	int32_t status = 0;
 
@@ -1074,7 +1085,8 @@ test19(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[16];
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1272,7 +1284,8 @@ test20(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip[16];
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1339,8 +1352,9 @@ test21(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip_batch[4][16];
-	uint8_t depth, next_hop_add;
-	int16_t next_hop_return[4];
+	uint8_t depth;
+	uint32_t next_hop_add;
+	uint32_t next_hop_return[4];
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1377,7 +1391,7 @@ test21(void)
 			next_hop_return, 4);
 	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == 100
 			&& next_hop_return[1] == 101 && next_hop_return[2] == 102
-			&& next_hop_return[3] == -1);
+			&& next_hop_return[3] == (uint32_t) -1);
 
 	rte_lpm6_free(lpm);
 
@@ -1397,8 +1411,9 @@ test22(void)
 	struct rte_lpm6 *lpm = NULL;
 	struct rte_lpm6_config config;
 	uint8_t ip_batch[5][16];
-	uint8_t depth[5], next_hop_add;
-	int16_t next_hop_return[5];
+	uint8_t depth[5];
+	uint32_t next_hop_add;
+	uint32_t next_hop_return[5];
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1458,8 +1473,8 @@ test22(void)
 
 	status = rte_lpm6_lookup_bulk_func(lpm, ip_batch,
 			next_hop_return, 5);
-	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == -1
-			&& next_hop_return[1] == -1 && next_hop_return[2] == 103
+	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == (uint32_t) -1
+			&& next_hop_return[1] == (uint32_t) -1 && next_hop_return[2] == 103
 			&& next_hop_return[3] == 104 && next_hop_return[4] == 105);
 
 	/* Use the delete_bulk function to delete one more. Lookup again */
@@ -1469,8 +1484,8 @@ test22(void)
 
 	status = rte_lpm6_lookup_bulk_func(lpm, ip_batch,
 			next_hop_return, 5);
-	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == -1
-			&& next_hop_return[1] == -1 && next_hop_return[2] == -1
+	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == (uint32_t) -1
+			&& next_hop_return[1] == (uint32_t) -1 && next_hop_return[2] == (uint32_t) -1
 			&& next_hop_return[3] == 104 && next_hop_return[4] == 105);
 
 	/* Use the delete_bulk function to delete two, one invalid. Lookup again */
@@ -1482,9 +1497,9 @@ test22(void)
 	IPv6(ip_batch[4], 128, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 	status = rte_lpm6_lookup_bulk_func(lpm, ip_batch,
 			next_hop_return, 5);
-	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == -1
-			&& next_hop_return[1] == -1 && next_hop_return[2] == -1
-			&& next_hop_return[3] == -1 && next_hop_return[4] == 105);
+	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == (uint32_t) -1
+			&& next_hop_return[1] == (uint32_t) -1 && next_hop_return[2] == (uint32_t) -1
+			&& next_hop_return[3] == (uint32_t) -1 && next_hop_return[4] == 105);
 
 	/* Use the delete_bulk function to delete the remaining one. Lookup again */
 
@@ -1493,9 +1508,9 @@ test22(void)
 
 	status = rte_lpm6_lookup_bulk_func(lpm, ip_batch,
 			next_hop_return, 5);
-	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == -1
-			&& next_hop_return[1] == -1 && next_hop_return[2] == -1
-			&& next_hop_return[3] == -1 && next_hop_return[4] == -1);
+	TEST_LPM_ASSERT(status == 0 && next_hop_return[0] == (uint32_t) -1
+			&& next_hop_return[1] == (uint32_t) -1 && next_hop_return[2] == (uint32_t) -1
+			&& next_hop_return[3] == (uint32_t) -1 && next_hop_return[4] == (uint32_t) -1);
 
 	rte_lpm6_free(lpm);
 
@@ -1514,7 +1529,8 @@ test23(void)
 	struct rte_lpm6_config config;
 	uint32_t i;
 	uint8_t ip[16];
-	uint8_t depth, next_hop_add, next_hop_return;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1598,7 +1614,8 @@ test25(void)
 	struct rte_lpm6_config config;
 	uint8_t ip[16];
 	uint32_t i;
-	uint8_t depth, next_hop_add, next_hop_return, next_hop_expected;
+	uint8_t depth;
+	uint32_t next_hop_add, next_hop_return, next_hop_expected;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1646,12 +1663,12 @@ test26(void)
 	uint8_t ip_10_24[] = {10, 10, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 	uint8_t ip_20_25[] = {10, 10, 20, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 	uint8_t d_ip_10_32 = 32;
-	uint8_t	d_ip_10_24 = 24;
-	uint8_t	d_ip_20_25 = 25;
-	uint8_t next_hop_ip_10_32 = 100;
-	uint8_t	next_hop_ip_10_24 = 105;
-	uint8_t	next_hop_ip_20_25 = 111;
-	uint8_t next_hop_return = 0;
+	uint8_t d_ip_10_24 = 24;
+	uint8_t d_ip_20_25 = 25;
+	uint32_t next_hop_ip_10_32 = 100;
+	uint32_t next_hop_ip_10_24 = 105;
+	uint32_t next_hop_ip_20_25 = 111;
+	uint32_t next_hop_return = 0;
 	int32_t status = 0;
 
 	config.max_rules = MAX_RULES;
@@ -1666,7 +1683,7 @@ test26(void)
 		return -1;
 
 	status = rte_lpm6_lookup(lpm, ip_10_32, &next_hop_return);
-	uint8_t test_hop_10_32 = next_hop_return;
+	uint32_t test_hop_10_32 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_10_32);
 
@@ -1675,7 +1692,7 @@ test26(void)
 			return -1;
 
 	status = rte_lpm6_lookup(lpm, ip_10_24, &next_hop_return);
-	uint8_t test_hop_10_24 = next_hop_return;
+	uint32_t test_hop_10_24 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_10_24);
 
@@ -1684,7 +1701,7 @@ test26(void)
 		return -1;
 
 	status = rte_lpm6_lookup(lpm, ip_20_25, &next_hop_return);
-	uint8_t test_hop_20_25 = next_hop_return;
+	uint32_t test_hop_20_25 = next_hop_return;
 	TEST_LPM_ASSERT(status == 0);
 	TEST_LPM_ASSERT(next_hop_return == next_hop_ip_20_25);
 
@@ -1723,7 +1740,8 @@ test27(void)
 		struct rte_lpm6 *lpm = NULL;
 		struct rte_lpm6_config config;
 		uint8_t ip[] = {128,128,128,128,128,128,128,128,128,128,128,128,128,128,0,0};
-		uint8_t depth = 128, next_hop_add = 100, next_hop_return;
+		uint8_t depth = 128;
+		uint32_t next_hop_add = 100, next_hop_return;
 		int32_t status = 0;
 		int i, j;
 
@@ -1799,7 +1817,7 @@ perf_test(void)
 	struct rte_lpm6_config config;
 	uint64_t begin, total_time;
 	unsigned i, j;
-	uint8_t next_hop_add = 0xAA, next_hop_return = 0;
+	uint32_t next_hop_add = 0xAA, next_hop_return = 0;
 	int status = 0;
 	int64_t count = 0;
 
@@ -1856,7 +1874,7 @@ perf_test(void)
 	count = 0;
 
 	uint8_t ip_batch[NUM_IPS_ENTRIES][16];
-	int16_t next_hops[NUM_IPS_ENTRIES];
+	uint32_t next_hops[NUM_IPS_ENTRIES];
 
 	for (i = 0; i < NUM_IPS_ENTRIES; i++)
 		memcpy(ip_batch[i], large_ips_table[i].ip, 16);
@@ -1869,7 +1887,7 @@ perf_test(void)
 		total_time += rte_rdtsc() - begin;
 
 		for (j = 0; j < NUM_IPS_ENTRIES; j++)
-			if (next_hops[j] < 0)
+			if ((int32_t) next_hops[j] < 0)
 				count++;
 	}
 	printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n",
-- 
1.9.1

-------------- next part --------------
>From cfaf9c28dbff3bec0c867aa4270b01b04bf50276 Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sat, 27 Jun 2015 22:42:42 +0000
Subject: [PATCH 7/8] examples: update examples to use 24 bit extended next hop

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 examples/ip_reassembly/main.c    |  3 ++-
 examples/l3fwd-power/main.c      |  2 +-
 examples/l3fwd-vf/main.c         |  2 +-
 examples/l3fwd/main.c            | 16 ++++++++--------
 examples/load_balancer/runtime.c |  2 +-
 5 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
index 741c398..86e33a7 100644
--- a/examples/ip_reassembly/main.c
+++ b/examples/ip_reassembly/main.c
@@ -347,7 +347,8 @@ reassemble(struct rte_mbuf *m, uint8_t portid, uint32_t queue,
 	struct rte_ip_frag_death_row *dr;
 	struct rx_queue *rxq;
 	void *d_addr_bytes;
-	uint8_t next_hop, dst_port;
+	uint32_t next_hop;
+	uint8_t dst_port;
 
 	rxq = &qconf->rx_queue_list[queue];
 
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 8bb88ce..f647713 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -631,7 +631,7 @@ static inline uint8_t
 get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid,
 		lookup_struct_t *ipv4_l3fwd_lookup_struct)
 {
-	uint8_t next_hop;
+	uint32_t next_hop;
 
 	return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
 			rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
diff --git a/examples/l3fwd-vf/main.c b/examples/l3fwd-vf/main.c
index 01f610e..193c3ab 100644
--- a/examples/l3fwd-vf/main.c
+++ b/examples/l3fwd-vf/main.c
@@ -440,7 +440,7 @@ get_dst_port(struct ipv4_hdr *ipv4_hdr,  uint8_t portid, lookup_struct_t * l3fwd
 static inline uint8_t
 get_dst_port(struct ipv4_hdr *ipv4_hdr,  uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
 {
-	uint8_t next_hop;
+	uint32_t next_hop;
 
 	return (uint8_t) ((rte_lpm_lookup(l3fwd_lookup_struct,
 			rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index 1f3e5c6..4f31e52 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -710,7 +710,7 @@ get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, lookup_struct_t * ipv6_l3fwd_
 static inline uint8_t
 get_ipv4_dst_port(void *ipv4_hdr,  uint8_t portid, lookup_struct_t * ipv4_l3fwd_lookup_struct)
 {
-	uint8_t next_hop;
+	uint32_t next_hop;
 
 	return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
 		rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
@@ -720,7 +720,7 @@ get_ipv4_dst_port(void *ipv4_hdr,  uint8_t portid, lookup_struct_t * ipv4_l3fwd_
 static inline uint8_t
 get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, lookup6_struct_t * ipv6_l3fwd_lookup_struct)
 {
-	uint8_t next_hop;
+	uint32_t next_hop;
 	return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
 			((struct ipv6_hdr*)ipv6_hdr)->dst_addr, &next_hop) == 0)?
 			next_hop : portid);
@@ -1151,7 +1151,7 @@ l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, struct lcore_conf *qcon
  * to BAD_PORT value.
  */
 static inline __attribute__((always_inline)) void
-rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
+rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, uint32_t ptype)
 {
 	uint8_t ihl;
 
@@ -1182,7 +1182,7 @@ static inline __attribute__((always_inline)) uint16_t
 get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
 	uint32_t dst_ipv4, uint8_t portid)
 {
-	uint8_t next_hop;
+	uint32_t next_hop;
 	struct ipv6_hdr *ipv6_hdr;
 	struct ether_hdr *eth_hdr;
 
@@ -1205,7 +1205,7 @@ get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
 
 static inline void
 process_packet(struct lcore_conf *qconf, struct rte_mbuf *pkt,
-	uint16_t *dst_port, uint8_t portid)
+	uint32_t *dst_port, uint8_t portid)
 {
 	struct ether_hdr *eth_hdr;
 	struct ipv4_hdr *ipv4_hdr;
@@ -1275,7 +1275,7 @@ processx4_step2(const struct lcore_conf *qconf,
 		uint32_t ipv4_flag,
 		uint8_t portid,
 		struct rte_mbuf *pkt[FWDSTEP],
-		uint16_t dprt[FWDSTEP])
+		uint32_t dprt[FWDSTEP])
 {
 	rte_xmm_t dst;
 	const  __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
@@ -1301,7 +1301,7 @@ processx4_step2(const struct lcore_conf *qconf,
  * Perform RFC1812 checks and updates for IPV4 packets.
  */
 static inline void
-processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint32_t dst_port[FWDSTEP])
 {
 	__m128i te[FWDSTEP];
 	__m128i ve[FWDSTEP];
@@ -1527,7 +1527,7 @@ main_loop(__attribute__((unused)) void *dummy)
 	int32_t k;
 	uint16_t dlp;
 	uint16_t *lp;
-	uint16_t dst_port[MAX_PKT_BURST];
+	uint32_t dst_port[MAX_PKT_BURST];
 	__m128i dip[MAX_PKT_BURST / FWDSTEP];
 	uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
 	uint16_t pnum[MAX_PKT_BURST + 1];
diff --git a/examples/load_balancer/runtime.c b/examples/load_balancer/runtime.c
index 2b265c2..6944325 100644
--- a/examples/load_balancer/runtime.c
+++ b/examples/load_balancer/runtime.c
@@ -525,7 +525,7 @@ app_lcore_worker(
 			struct rte_mbuf *pkt;
 			struct ipv4_hdr *ipv4_hdr;
 			uint32_t ipv4_dst, pos;
-			uint8_t port;
+			uint32_t port;
 
 			if (likely(j < bsz_rd - 1)) {
 				APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *));
-- 
1.9.1

-------------- next part --------------
>From 2ded34ca11f61ed8a3bcfda6d13339e04b0430bf Mon Sep 17 00:00:00 2001
From: Matthew Hall <mhall at mhcomputing.net>
Date: Sun, 28 Jun 2015 22:52:45 +0000
Subject: [PATCH 8/8] Makefile: add -fno-strict-aliasing due to LPM casting
 logic

Signed-off-by: Matthew Hall <mhall at mhcomputing.net>
---
 lib/librte_lpm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile
index 688cfc9..20030b8 100644
--- a/lib/librte_lpm/Makefile
+++ b/lib/librte_lpm/Makefile
@@ -35,7 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_lpm.a
 
 CFLAGS += -O3
-CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -fno-strict-aliasing
 
 EXPORT_MAP := rte_lpm_version.map
 
-- 
1.9.1



More information about the dev mailing list