[dpdk-dev] [PATCH v1 1/2] rte_memcmp functions using Intel AVX and SSE intrinsics

Ravi Kerur rkerur at gmail.com
Tue Mar 8 00:00:43 CET 2016


v1:
        This patch adds memcmp functionality using AVX and SSE
        intrinsics provided by Intel. For other architectures
        supported by DPDK regular memcmp function is used.

        Compiled and tested on Ubuntu 14.04(non-NUMA) and 15.10(NUMA)
        systems.

Signed-off-by: Ravi Kerur <rkerur at gmail.com>
---
 .../common/include/arch/arm/rte_memcmp.h           |  60 ++
 .../common/include/arch/ppc_64/rte_memcmp.h        |  62 ++
 .../common/include/arch/tile/rte_memcmp.h          |  60 ++
 .../common/include/arch/x86/rte_memcmp.h           | 786 +++++++++++++++++++++
 lib/librte_eal/common/include/generic/rte_memcmp.h | 175 +++++
 5 files changed, 1143 insertions(+)
 create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h

diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcmp.h b/lib/librte_eal/common/include/arch/arm/rte_memcmp.h
new file mode 100644
index 0000000..fcbacb4
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcmp.h
@@ -0,0 +1,60 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 RehiveTech. All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_ARM_H_
+#define _RTE_MEMCMP_ARM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcmp((dst), (src), (n)) :          \
+	rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+	return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_ARM_H_ */
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..5839a2d
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2016.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must  >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcmp((dst), (src), (n)) :          \
+	rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+	return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcmp.h b/lib/librte_eal/common/include/arch/tile/rte_memcmp.h
new file mode 100644
index 0000000..de35ac5
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/tile/rte_memcmp.h
@@ -0,0 +1,60 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) EZchip Semiconductor Ltd. 2016.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_TILE_H_
+#define _RTE_MEMCMP_TILE_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcmp((dst), (src), (n)) :          \
+	rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+	return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_TILE_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..00d0d31
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,786 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to compare.
+ * @return
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *src_1, const void *src,
+		size_t n) __attribute__((always_inline));
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb(const uint8_t *x, const uint8_t *y, size_t n)
+{
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		if (x[i] != y[i])
+			return x[i] - y[i];
+	return 0;
+}
+
+/**
+ * Compare 0 to 15 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_memcmp_regular(const uint8_t *src_1u, const uint8_t *src_2u, size_t n)
+{
+	int ret = 1;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n & 0x01) {
+		ret = (*(const uint8_t *)src_1u ==
+			*(const uint8_t *)src_2u);
+
+		if ((ret != 1))
+			goto exit_1;
+
+		n -= 0x1;
+		src_1u += 0x1;
+		src_2u += 0x1;
+	}
+
+	if (n & 0x02) {
+		ret = (*(const uint16_t *)src_1u ==
+			*(const uint16_t *)src_2u);
+
+		if ((ret != 1))
+			goto exit_2;
+
+		n -= 0x2;
+		src_1u += 0x2;
+		src_2u += 0x2;
+	}
+
+	if (n & 0x04) {
+		ret = (*(const uint32_t *)src_1u ==
+			*(const uint32_t *)src_2u);
+
+		if ((ret != 1))
+			goto exit_4;
+
+		n -= 0x4;
+		src_1u += 0x4;
+		src_2u += 0x4;
+	}
+
+	if (n & 0x08) {
+		ret = (*(const uint64_t *)src_1u ==
+			*(const uint64_t *)src_2u);
+
+		if ((ret != 1))
+			goto exit_8;
+
+		n -= 0x8;
+		src_1u += 0x8;
+		src_2u += 0x8;
+	}
+
+	return !ret;
+
+exit_1:
+	return rte_cmpffdb(src_1u, src_2u, 1);
+exit_2:
+	return rte_cmpffdb(src_1u, src_2u, 2);
+exit_4:
+	return rte_cmpffdb(src_1u, src_2u, 4);
+exit_8:
+	return rte_cmpffdb(src_1u, src_2u, 8);
+}
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2)
+{
+	__m128i xmm0, xmm1, xmm2;
+
+	xmm0 = _mm_lddqu_si128((const __m128i *)src_1);
+	xmm1 = _mm_lddqu_si128((const __m128i *)src_2);
+
+	xmm2 = _mm_xor_si128(xmm0, xmm1);
+
+	if (unlikely(!_mm_testz_si128(xmm2, xmm2))) {
+		__m128i idx =
+			_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+
+		/*
+		 * Reverse byte order
+		 */
+		xmm0 = _mm_shuffle_epi8(xmm0, idx);
+		xmm1 = _mm_shuffle_epi8(xmm1, idx);
+
+		/*
+		* Compare unsigned bytes with instructions for signed bytes
+		*/
+		xmm0 = _mm_xor_si128(xmm0, _mm_set1_epi8(0x80));
+		xmm1 = _mm_xor_si128(xmm1, _mm_set1_epi8(0x80));
+
+		return _mm_movemask_epi8(xmm0 > xmm1) - _mm_movemask_epi8(xmm1 > xmm0);
+	}
+
+	return 0;
+}
+
+/**
+ * AVX2 implementation below
+ */
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+	__m256i    ff = _mm256_set1_epi32(-1);
+	__m256i    idx = _mm256_setr_epi8(
+			15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+			15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+	__m256i    sign = _mm256_set1_epi32(0x80000000);
+	__m256i    mm11, mm21;
+	__m256i    eq, gt0, gt1;
+
+	mm11 = _mm256_lddqu_si256((const __m256i *)src_1);
+	mm21 = _mm256_lddqu_si256((const __m256i *)src_2);
+
+	eq = _mm256_cmpeq_epi32(mm11, mm21);
+	/* Not equal */
+	if (!_mm256_testc_si256(eq, ff)) {
+		mm11 = _mm256_shuffle_epi8(mm11, idx);
+		mm21 = _mm256_shuffle_epi8(mm21, idx);
+
+		mm11 = _mm256_xor_si256(mm11, sign);
+		mm21 = _mm256_xor_si256(mm21, sign);
+		mm11 = _mm256_permute2f128_si256(mm11, mm11, 0x01);
+		mm21 = _mm256_permute2f128_si256(mm21, mm21, 0x01);
+
+		gt0 = _mm256_cmpgt_epi32(mm11, mm21);
+		gt1 = _mm256_cmpgt_epi32(mm21, mm11);
+		return _mm256_movemask_ps(_mm256_castsi256_ps(gt0)) - _mm256_movemask_ps(_mm256_castsi256_ps(gt1));
+	}
+
+	return 0;
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+			(const uint8_t *)src_2 + 0 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 1 * 32,
+			(const uint8_t *)src_2 + 1 * 32);
+	return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+	const __m256i *src1 = (const __m256i *)src_1;
+	const __m256i *src2 = (const __m256i *)src_2;
+
+	__m256i mm11 = _mm256_lddqu_si256(src1);
+	__m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+	__m256i mm21 = _mm256_lddqu_si256(src2);
+	__m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+	__m256i mm1 = _mm256_xor_si256(mm11, mm21);
+	__m256i mm2 = _mm256_xor_si256(mm12, mm22);
+	__m256i mm = _mm256_or_si256(mm1, mm2);
+
+	if (unlikely(!_mm256_testz_si256(mm, mm))) {
+
+		__m256i idx = _mm256_setr_epi8(
+				15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+				15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+		__m256i sign = _mm256_set1_epi32(0x80000000);
+		__m256i gt0, gt1;
+
+		/*
+		 * Find out which of the two 32-byte blocks
+		 * are different.
+		 */
+		if (_mm256_testz_si256(mm1, mm1)) {
+			mm11 = mm12;
+			mm21 = mm22;
+			mm1 = mm2;
+		}
+
+		mm11 = _mm256_shuffle_epi8(mm11, idx);
+		mm21 = _mm256_shuffle_epi8(mm21, idx);
+
+		mm11 = _mm256_xor_si256(mm11, sign);
+		mm21 = _mm256_xor_si256(mm21, sign);
+		mm11 = _mm256_permute2f128_si256(mm11, mm11, 0x01);
+		mm21 = _mm256_permute2f128_si256(mm21, mm21, 0x01);
+
+		gt0 = _mm256_cmpgt_epi32(mm11, mm21);
+		gt1 = _mm256_cmpgt_epi32(mm21, mm11);
+		return _mm256_movemask_ps(_mm256_castsi256_ps(gt0)) - _mm256_movemask_ps(_mm256_castsi256_ps(gt1));
+	}
+
+	return 0;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+			(const uint8_t *)src_2 + 0 * 64);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+			(const uint8_t *)src_2 + 1 * 64);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+			(const uint8_t *)src_2 + 0 * 64);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+			(const uint8_t *)src_2 + 1 * 64);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp64((const uint8_t *)src_1 + 2 * 64,
+			(const uint8_t *)src_2 + 2 * 64);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp64((const uint8_t *)src_1 + 3 * 64,
+			(const uint8_t *)src_2 + 3 * 64);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to compare.
+ * @return
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+	const uint8_t *src_1 = (const uint8_t *)_src_1;
+	const uint8_t *src_2 = (const uint8_t *)_src_2;
+	int ret = 0;
+
+	if (n < 16)
+		return rte_memcmp_regular(src_1, src_2, n);
+
+	if (n <= 32) {
+		ret = rte_cmp16(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+	if (n <= 48) {
+		ret = rte_cmp32(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+	if (n <= 64) {
+		ret = rte_cmp32(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+CMP_BLOCK_LESS_THAN_512:
+	if (n <= 512) {
+		if (n >= 256) {
+			ret = rte_cmp256(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			src_1 = src_1 + 256;
+			src_2 = src_2 + 256;
+			n -= 256;
+		}
+		if (n >= 128) {
+			ret = rte_cmp128(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			src_1 = src_1 + 128;
+			src_2 = src_2 + 128;
+			n -= 128;
+		}
+		if (n >= 64) {
+			n -= 64;
+			ret = rte_cmp64(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			src_1 = src_1 + 64;
+			src_2 = src_2 + 64;
+		}
+		if (n > 32) {
+			ret = rte_cmp32(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+			return ret;
+		}
+		if (n > 0)
+			ret = rte_cmp32(src_1 - 32 + n, src_2 - 32 + n);
+
+		return ret;
+	}
+
+	while (n > 512) {
+		ret = rte_cmp256(src_1 + 0 * 256, src_2 + 0 * 256);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ret = rte_cmp256(src_1 + 1 * 256, src_2 + 1 * 256);
+		if (unlikely(ret != 0))
+			return ret;
+
+		src_1 = src_1 + 512;
+		src_2 = src_2 + 512;
+		n -= 512;
+	}
+	goto CMP_BLOCK_LESS_THAN_512;
+}
+
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+	const __m128i *src1 = (const __m128i *)src_1;
+	const __m128i *src2 = (const __m128i *)src_2;
+
+	__m128i mm11 = _mm_lddqu_si128(src1);
+	__m128i mm12 = _mm_lddqu_si128(src1 + 1);
+	__m128i mm21 = _mm_lddqu_si128(src2);
+	__m128i mm22 = _mm_lddqu_si128(src2 + 1);
+
+	__m128i mm1 = _mm_xor_si128(mm11, mm21);
+	__m128i mm2 = _mm_xor_si128(mm12, mm22);
+	__m128i mm = _mm_or_si128(mm1, mm2);
+
+	if (unlikely(!_mm_testz_si128(mm, mm))) {
+
+		__m128i idx =
+			_mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+		/*
+		 * Find out which of the two 16-byte blocks
+		 * are different.
+		 */
+		if (_mm_testz_si128(mm1, mm1)) {
+			mm11 = mm12;
+			mm21 = mm22;
+			mm1 = mm2;
+		}
+
+		/*
+		 * Reverse byte order.
+		 */
+		mm11 = _mm_shuffle_epi8(mm11, idx);
+		mm21 = _mm_shuffle_epi8(mm21, idx);
+
+		/*
+		 * Compare unsigned bytes with instructions for
+		 * signed bytes.
+		 */
+		mm11 = _mm_xor_si128(mm11, _mm_set1_epi8(0x80));
+		mm21 = _mm_xor_si128(mm21, _mm_set1_epi8(0x80));
+
+		return _mm_movemask_epi8(mm11 > mm21) -
+				_mm_movemask_epi8(mm21 > mm11);
+	}
+
+	return 0;
+}
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+			(const uint8_t *)src_2 + 0 * 16);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+			(const uint8_t *)src_2 + 1 * 16);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+			(const uint8_t *)src_2 + 2 * 16);
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+			(const uint8_t *)src_2 + 0 * 16);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+			(const uint8_t *)src_2 + 1 * 16);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 2 * 16,
+			(const uint8_t *)src_2 + 2 * 16);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp16((const uint8_t *)src_1 + 3 * 16,
+			(const uint8_t *)src_2 + 3 * 16);
+}
+
+/**
+ * Compare 128 bytes or its multiple between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+			(const uint8_t *)src_2 + 0 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 1 * 32,
+			(const uint8_t *)src_2 + 1 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 2 * 32,
+			(const uint8_t *)src_2 + 2 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp32((const uint8_t *)src_1 + 3 * 32,
+			(const uint8_t *)src_2 + 3 * 32);
+}
+
+/**
+ * Compare 256 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+			(const uint8_t *)src_2 + 0 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 1 * 32,
+			(const uint8_t *)src_2 + 1 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 2 * 32,
+			(const uint8_t *)src_2 + 2 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 3 * 32,
+			(const uint8_t *)src_2 + 3 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 4 * 32,
+			(const uint8_t *)src_2 + 4 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 5 * 32,
+			(const uint8_t *)src_2 + 5 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 6 * 32,
+			(const uint8_t *)src_2 + 6 * 32);
+
+	if (unlikely(ret != 0))
+		return ret;
+
+	return rte_cmp32((const uint8_t *)src_1 + 7 * 32,
+			(const uint8_t *)src_2 + 7 * 32);
+}
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to compare.
+ * @return
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+	const uint8_t *src_1 = (const uint8_t *)_src_1;
+	const uint8_t *src_2 = (const uint8_t *)_src_2;
+	int ret = 0;
+
+	if (n < 16)
+		return rte_memcmp_regular(src_1, src_2, n);
+
+	if (n <= 32) {
+		ret = rte_cmp16(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+	if (n <= 48) {
+		ret = rte_cmp32(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+	if (n <= 64) {
+		ret = rte_cmp32(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		ret = rte_cmp16(src_1 + 32, src_2 + 32);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		return rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+	}
+
+	if (n <= 512) {
+		if (n >= 256) {
+			ret = rte_cmp256(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+
+			src_1 = src_1 + 256;
+			src_2 = src_2 + 256;
+			n -= 256;
+		}
+
+CMP_BLOCK_LESS_THAN_256:
+		if (n >= 128) {
+			ret = rte_cmp128(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+
+			src_1 = src_1 + 128;
+			src_2 = src_2 + 128;
+			n -= 128;
+		}
+
+		if (n >= 64) {
+			ret = rte_cmp64(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+
+			src_1 = src_1 + 64;
+			src_2 = src_2 + 64;
+			n -= 64;
+		}
+
+		if (n >= 32) {
+			ret = rte_cmp32(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			src_1 = src_1 + 32;
+			src_2 = src_2 + 32;
+			n -= 32;
+		}
+		if (n > 16) {
+			ret = rte_cmp16(src_1, src_2);
+			if (unlikely(ret != 0))
+				return ret;
+			ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+			return ret;
+		}
+		if (n > 0)
+			ret = rte_cmp16(src_1 - 16 + n, src_2 - 16 + n);
+
+		return ret;
+	}
+
+	for (; n >= 256; n -= 256) {
+		ret = rte_cmp256(src_1, src_2);
+		if (unlikely(ret != 0))
+			return ret;
+
+		src_1 = src_1 + 256;
+		src_2 = src_2 + 256;
+	}
+
+	goto CMP_BLOCK_LESS_THAN_256;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..1f8f2bd
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,175 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Find the first different bit for comparison.
+ */
+static inline int
+rte_cmpffd(uint32_t x, uint32_t y);
+
+/**
+ * Find the first different byte for comparison.
+ */
+static inline int
+rte_cmpffdb(const uint8_t *x, const uint8_t *y, size_t n);
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2);
+
+/**
+ * Compare 48 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2);
+
+/**
+ * Compare 128 bytes between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2);
+
+/**
+ * Compare 256 bytes or greater between two locations using
+ * optimised instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static inline int
+rte_cmp256(const void *src_1, const void *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   zero if src_1 equal src_2
+ *   -ve if src_1 less than src_2
+ *   +ve if src_1 greater than src_2
+ */
+static int
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline int
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
-- 
1.9.1



More information about the dev mailing list