[dpdk-dev] [PATCH 3/4] eal/arm: Enable lpm/table/pipeline libs

Jianbo Liu jianbo.liu at linaro.org
Tue Dec 1 19:41:15 CET 2015


Adds ARM NEON support for lpm.
And enables table/pipeline libraries which depend on lpm.

Signed-off-by: Jianbo Liu <jianbo.liu at linaro.org>
---
 config/defconfig_arm-armv7a-linuxapp-gcc          |  3 -
 config/defconfig_arm64-armv8a-linuxapp-gcc        |  3 -
 lib/librte_eal/common/include/arch/arm/rte_vect.h | 28 ++++++++++
 lib/librte_lpm/rte_lpm.h                          | 68 ++++++++++++++++-------
 4 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/config/defconfig_arm-armv7a-linuxapp-gcc b/config/defconfig_arm-armv7a-linuxapp-gcc
index cbebd64..efffa1f 100644
--- a/config/defconfig_arm-armv7a-linuxapp-gcc
+++ b/config/defconfig_arm-armv7a-linuxapp-gcc
@@ -53,9 +53,6 @@ CONFIG_RTE_LIBRTE_KNI=n
 CONFIG_RTE_EAL_IGB_UIO=n
 
 # fails to compile on ARM
-CONFIG_RTE_LIBRTE_LPM=n
-CONFIG_RTE_LIBRTE_TABLE=n
-CONFIG_RTE_LIBRTE_PIPELINE=n
 CONFIG_RTE_SCHED_VECTOR=n
 
 # cannot use those on ARM
diff --git a/config/defconfig_arm64-armv8a-linuxapp-gcc b/config/defconfig_arm64-armv8a-linuxapp-gcc
index 504f3ed..57f7941 100644
--- a/config/defconfig_arm64-armv8a-linuxapp-gcc
+++ b/config/defconfig_arm64-armv8a-linuxapp-gcc
@@ -51,7 +51,4 @@ CONFIG_RTE_LIBRTE_IVSHMEM=n
 CONFIG_RTE_LIBRTE_FM10K_PMD=n
 CONFIG_RTE_LIBRTE_I40E_PMD=n
 
-CONFIG_RTE_LIBRTE_LPM=n
-CONFIG_RTE_LIBRTE_TABLE=n
-CONFIG_RTE_LIBRTE_PIPELINE=n
 CONFIG_RTE_SCHED_VECTOR=n
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index a33c054..7437711 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -41,6 +41,8 @@ extern "C" {
 
 typedef int32x4_t xmm_t;
 
+typedef int32x4_t __m128i;
+
 #define	XMM_SIZE	(sizeof(xmm_t))
 #define	XMM_MASK	(XMM_SIZE - 1)
 
@@ -53,6 +55,32 @@ typedef union rte_xmm {
 	double   pd[XMM_SIZE / sizeof(double)];
 } __attribute__((aligned(16))) rte_xmm_t;
 
+static __inline __m128i
+_mm_set_epi32(int i3, int i2, int i1, int i0)
+{
+	int32_t r[4] = {i0, i1, i2, i3};
+
+	return vld1q_s32(r);
+}
+
+static __inline __m128i
+_mm_loadu_si128(__m128i *p)
+{
+	return vld1q_s32((int32_t *)p);
+}
+
+static __inline __m128i
+_mm_set1_epi32(int i)
+{
+	return vdupq_n_s32(i);
+}
+
+static __inline __m128i
+_mm_and_si128(__m128i a, __m128i b)
+{
+	return vandq_s32(a, b);
+}
+
 #ifdef RTE_ARCH_ARM
 /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
 static __inline uint8x16_t
diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h
index c299ce2..c76c07d 100644
--- a/lib/librte_lpm/rte_lpm.h
+++ b/lib/librte_lpm/rte_lpm.h
@@ -361,6 +361,47 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
 /* Mask four results. */
 #define	 RTE_LPM_MASKX4_RES	UINT64_C(0x00ff00ff00ff00ff)
 
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+static inline void
+rte_lpm_tbl24_val4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t tbl[4])
+{
+	uint32x4_t i24;
+	uint32_t idx[4];
+
+	/* get 4 indexes for tbl24[]. */
+	i24 = vshrq_n_u32(vreinterpretq_u32_s32(ip), CHAR_BIT);
+	vst1q_u32(idx, i24);
+
+	/* extract values from tbl24[] */
+	tbl[0] = *(const uint16_t *)&lpm->tbl24[idx[0]];
+	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx[1]];
+	tbl[2] = *(const uint16_t *)&lpm->tbl24[idx[2]];
+	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx[3]];
+}
+#else
+static inline void
+rte_lpm_tbl24_val4(const struct rte_lpm *lpm, __m128i ip, uint16_t tbl[4])
+{
+	__m128i i24;
+	uint64_t idx;
+
+	/* get 4 indexes for tbl24[]. */
+	i24 = _mm_srli_epi32(ip, CHAR_BIT);
+
+	/* extract values from tbl24[] */
+	idx = _mm_cvtsi128_si64(i24);
+	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
+
+	tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
+
+	idx = _mm_cvtsi128_si64(i24);
+
+	tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
+	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
+}
+#endif
+
 /**
  * Lookup four IP addresses in an LPM table.
  *
@@ -381,17 +422,19 @@ rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t * ips,
  *   if lookup would fail.
  */
 static inline void
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+rte_lpm_lookupx4(const struct rte_lpm *lpm, int32x4_t ip, uint16_t hop[4],
+	uint16_t defv)
+#else
 rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
 	uint16_t defv)
+#endif
 {
-	__m128i i24;
 	rte_xmm_t i8;
 	uint16_t tbl[4];
-	uint64_t idx, pt;
-
-	const __m128i mask8 =
-		_mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX);
+	uint64_t pt;
 
+	const __m128i mask8 = _mm_set1_epi32(UINT8_MAX);
 	/*
 	 * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 4 LPM entries
 	 * as one 64-bit value (0x0300030003000300).
@@ -412,20 +455,7 @@ rte_lpm_lookupx4(const struct rte_lpm *lpm, __m128i ip, uint16_t hop[4],
 		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32 |
 		(uint64_t)RTE_LPM_LOOKUP_SUCCESS << 48);
 
-	/* get 4 indexes for tbl24[]. */
-	i24 = _mm_srli_epi32(ip, CHAR_BIT);
-
-	/* extract values from tbl24[] */
-	idx = _mm_cvtsi128_si64(i24);
-	i24 = _mm_srli_si128(i24, sizeof(uint64_t));
-
-	tbl[0] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-	tbl[1] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
-
-	idx = _mm_cvtsi128_si64(i24);
-
-	tbl[2] = *(const uint16_t *)&lpm->tbl24[(uint32_t)idx];
-	tbl[3] = *(const uint16_t *)&lpm->tbl24[idx >> 32];
+	rte_lpm_tbl24_val4(lpm, ip, tbl);
 
 	/* get 4 indexes for tbl8[]. */
 	i8.x = _mm_and_si128(ip, mask8);
-- 
1.8.3.1



More information about the dev mailing list