[dpdk-dev] [PATCH v2] Implement memcmp using AVX/SSE instructions.

Ravi Kerur rkerur at gmail.com
Fri May 8 23:19:49 CEST 2015


This patch replaces memcmp in librte_hash with rte_memcmp which is
implemented with AVX/SSE instructions.

Preliminary results on Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz, Ubuntu
14.04 x86_64 shows comparisons using AVX/SSE instructions taking 1/3rd
CPU ticks for 16, 32, 48 and 64 bytes comparison. In addition,
hash_perf_autotest results shows using new comparison function results in
faster completion of hash operations than existing memcmp in all categories.

Signed-off-by: Ravi Kerur <rkerur at gmail.com>
---
 app/test/test_hash_perf.c                          |  36 +-
 .../common/include/arch/ppc_64/rte_memcmp.h        |  62 +++
 .../common/include/arch/x86/rte_memcmp.h           | 421 +++++++++++++++++++++
 lib/librte_eal/common/include/generic/rte_memcmp.h | 131 +++++++
 lib/librte_hash/rte_hash.c                         |  59 ++-
 5 files changed, 675 insertions(+), 34 deletions(-)
 create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcmp.h
 create mode 100644 lib/librte_eal/common/include/generic/rte_memcmp.h

diff --git a/app/test/test_hash_perf.c b/app/test/test_hash_perf.c
index 6eabb21..6887629 100644
--- a/app/test/test_hash_perf.c
+++ b/app/test/test_hash_perf.c
@@ -440,7 +440,7 @@ run_single_tbl_perf_test(const struct rte_hash *h, hash_operation func,
 		uint32_t *invalid_pos_count)
 {
 	uint64_t begin, end, ticks = 0;
-	uint8_t *key = NULL;
+	uint8_t * volatile key = NULL;
 	uint32_t *bucket_occupancies = NULL;
 	uint32_t num_buckets, i, j;
 	int32_t pos;
@@ -547,30 +547,30 @@ run_tbl_perf_test(struct tbl_perf_test_params *params)
 	case ADD_UPDATE:
 		num_iterations = params->num_iterations;
 		params->num_iterations = params->entries;
-		run_single_tbl_perf_test(handle, rte_hash_add_key, params,
-				&avg_occupancy, &invalid_pos);
-		params->num_iterations = num_iterations;
 		ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
 				params, &avg_occupancy, &invalid_pos);
+		params->num_iterations = num_iterations;
+		ticks += run_single_tbl_perf_test(handle, rte_hash_add_key,
+				params, &avg_occupancy, &invalid_pos);
 		break;
 	case DELETE:
 		num_iterations = params->num_iterations;
 		params->num_iterations = params->entries;
-		run_single_tbl_perf_test(handle, rte_hash_add_key, params,
-				&avg_occupancy, &invalid_pos);
+		ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+				params, &avg_occupancy, &invalid_pos);
 
 		params->num_iterations = num_iterations;
-		ticks = run_single_tbl_perf_test(handle, rte_hash_del_key,
+		ticks += run_single_tbl_perf_test(handle, rte_hash_del_key,
 				params, &avg_occupancy, &invalid_pos);
 		break;
 	case LOOKUP:
 		num_iterations = params->num_iterations;
 		params->num_iterations = params->entries;
-		run_single_tbl_perf_test(handle, rte_hash_add_key, params,
-				&avg_occupancy, &invalid_pos);
+		ticks = run_single_tbl_perf_test(handle, rte_hash_add_key,
+				params, &avg_occupancy, &invalid_pos);
 
 		params->num_iterations = num_iterations;
-		ticks = run_single_tbl_perf_test(handle, rte_hash_lookup,
+		ticks += run_single_tbl_perf_test(handle, rte_hash_lookup,
 				params, &avg_occupancy, &invalid_pos);
 		break;
 	default: return -1;
@@ -623,10 +623,15 @@ static int run_all_tbl_perf_tests(void)
 static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
 		uint32_t key_len)
 {
-	static uint8_t key[RTE_HASH_KEY_LENGTH_MAX];
+	static uint8_t * volatile key;
 	uint64_t ticks = 0, start, end;
 	unsigned i, j;
 
+	key = rte_zmalloc("func hash key",
+			  key_len * sizeof(uint8_t), 16);
+	if (key == NULL)
+		return;
+
 	for (i = 0; i < HASHTEST_ITERATIONS; i++) {
 
 		for (j = 0; j < key_len; j++)
@@ -638,8 +643,11 @@ static void run_hash_func_test(rte_hash_function f, uint32_t init_val,
 		ticks += end - start;
 	}
 
-	printf("%-12s, %-18u, %-13u, %.02f\n", get_hash_name(f), (unsigned) key_len,
-			(unsigned) init_val, (double)ticks / HASHTEST_ITERATIONS);
+	rte_free(key);
+
+	printf("%-12s, %-18u, %-13u, %.02f\n",
+		get_hash_name(f), (unsigned) key_len, (unsigned) init_val,
+		(double)ticks / HASHTEST_ITERATIONS);
 }
 
 /*
@@ -687,7 +695,7 @@ fbk_hash_perf_test(void)
 		.socket_id = rte_socket_id(),
 	};
 	struct rte_fbk_hash_table *handle = NULL;
-	uint32_t *keys = NULL;
+	uint32_t * volatile keys = NULL;
 	unsigned indexes[TEST_SIZE];
 	uint64_t lookup_time = 0;
 	unsigned added = 0;
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
new file mode 100644
index 0000000..7f99ee1
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcmp.h
@@ -0,0 +1,62 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) IBM Corporation 2014.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IBM Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _RTE_MEMCMP_PPC_64_H_
+#define _RTE_MEMCMP_PPC_64_H_
+
+#include <stdint.h>
+#include <string.h>
+/*To include altivec.h, GCC version must  >= 4.8 */
+#include <altivec.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "generic/rte_memcmp.h"
+
+#define rte_memcmp(dst, src, n)              \
+	({ (__builtin_constant_p(n)) ?       \
+	memcmp((dst), (src), (n)) :          \
+	rte_memcmp_func((dst), (src), (n)); })
+
+static inline bool
+rte_memcmp_func(void *dst, const void *src, size_t n)
+{
+	return memcmp(dst, src, n);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_PPC_64_H_ */
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcmp.h b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
new file mode 100644
index 0000000..b2bdeec
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcmp.h
@@ -0,0 +1,421 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_X86_64_H_
+#define _RTE_MEMCMP_X86_64_H_
+
+/**
+ * @file
+ *
+ * Functions for SSE/AVX/AVX2 implementation of memcmp().
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <rte_vect.h>
+#include <rte_branch_prediction.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to compare.
+ * @return
+ *   true if equal otherwise false.
+ */
+static inline int
+rte_memcmp(const void *src_1, const void *src,
+		size_t n) __attribute__((always_inline));
+
+/**
+ * Compare 16 bytes between two locations.
+ * locations should not overlap.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2)
+{
+	__m128i xmm0, xmm1, xmm2;
+	int ret = 0;
+
+	xmm0 = _mm_lddqu_si128((const __m128i *)src_1);
+	xmm1 = _mm_lddqu_si128((const __m128i *)src_2);
+	xmm2 = _mm_xor_si128(xmm0, xmm1);
+
+	if (unlikely(!_mm_testz_si128(xmm2, xmm2))) {
+
+		const uint64_t mm11 = *(const uint64_t *)src_1;
+		const uint64_t mm12 = *((const uint64_t *)src_1 + 1);
+
+		const uint64_t mm21 = *(const uint64_t *)src_2;
+		const uint64_t mm22 = *((const uint64_t *)src_2 + 1);
+
+		if (mm11 == mm21)
+			(mm12 < mm22) ? (ret = -1) : (ret = 1);
+		else
+			(mm11 < mm21) ? (ret = -1) : (ret = 1);
+	}
+
+	return ret;
+}
+
+/**
+ * AVX2 implementation below
+ */
+#ifdef RTE_MACHINE_CPUFLAG_AVX2
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+	const __m128i* src1 = (const __m128i*)src_1;
+	const __m128i* src2 = (const __m128i*)src_2;
+
+	__m128i mm11 = _mm_lddqu_si128(src1);
+	__m128i mm12 = _mm_lddqu_si128(src1 + 1);
+	__m128i mm21 = _mm_lddqu_si128(src2);
+	__m128i mm22 = _mm_lddqu_si128(src2 + 1);
+
+	__m128i mm1 = _mm_xor_si128(mm11, mm21);
+	__m128i mm2 = _mm_xor_si128(mm12, mm22);
+	__m128i mm = _mm_or_si128(mm1, mm2);
+
+	if (unlikely(!_mm_testz_si128(mm, mm))) {
+
+		/*
+		 * Find out which of the two 16-byte blocks
+		 * are different.
+		 */
+		if (_mm_testz_si128(mm1, mm1)) {
+			mm11 = mm12;
+			mm21 = mm22;
+			mm1 = mm2;
+		}
+
+		// Produce the comparison result
+		__m128i mm_cmp = _mm_cmpgt_epi8(mm21, mm11);
+		__m128i mm_rcmp = _mm_cmpgt_epi8(mm11, mm21);
+		mm_cmp = _mm_xor_si128(mm1, mm_cmp);
+		mm_rcmp = _mm_xor_si128(mm1, mm_rcmp);
+
+		uint32_t cmp = _mm_movemask_epi8(mm_cmp);
+		uint32_t rcmp = _mm_movemask_epi8(mm_rcmp);
+		cmp = (cmp - 1u) ^ cmp;
+		rcmp = (rcmp - 1u) ^ rcmp;
+		return (int32_t)cmp - (int32_t)rcmp;
+	}
+
+	return 0;
+}
+
+static inline int
+rte_cmp64 (const void* src_1, const void* src_2)
+{
+	const __m256i* src1 = (const __m256i*)src_1;
+	const __m256i* src2 = (const __m256i*)src_2;
+
+	__m256i mm11 = _mm256_lddqu_si256(src1);
+	__m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+	__m256i mm21 = _mm256_lddqu_si256(src2);
+	__m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+	__m256i mm1 = _mm256_xor_si256(mm11, mm21);
+	__m256i mm2 = _mm256_xor_si256(mm12, mm22);
+	__m256i mm = _mm256_or_si256(mm1, mm2);
+
+	if (unlikely(!_mm256_testz_si256(mm, mm))) {
+		/*
+		 * Find out which of the two 32-byte blocks
+		 * are different.
+		 */
+		if (_mm256_testz_si256(mm1, mm1)) {
+			mm11 = mm12;
+			mm21 = mm22;
+			mm1 = mm2;
+		}
+
+		// Produce the comparison result
+		__m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11);
+		__m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21);
+		mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+		mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+		uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+		uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+		cmp = (cmp - 1u) ^ cmp;
+		rcmp = (rcmp - 1u) ^ rcmp;
+		return (int32_t)cmp - (int32_t)rcmp;
+	}
+
+	return 0;
+}
+
+static inline int
+rte_cmp128 (const void* src_1, const void* src_2)
+{
+	const __m256i* src1 = (const __m256i*)src_1;
+	const __m256i* src2 = (const __m256i*)src_2;
+	const size_t n = 2;
+	size_t i;
+
+	for (i = 0; i < n; ++i, src1 += 2, src2 += 2) {
+		__m256i mm11 = _mm256_lddqu_si256(src1);
+		__m256i mm12 = _mm256_lddqu_si256(src1 + 1);
+		__m256i mm21 = _mm256_lddqu_si256(src2);
+		__m256i mm22 = _mm256_lddqu_si256(src2 + 1);
+
+		__m256i mm1 = _mm256_xor_si256(mm11, mm21);
+		__m256i mm2 = _mm256_xor_si256(mm12, mm22);
+		__m256i mm = _mm256_or_si256(mm1, mm2);
+
+		if (unlikely(!_mm256_testz_si256(mm, mm))) {
+			/*
+			 * Find out which of the two 32-byte blocks
+			 * are different.
+			 */
+			if (_mm256_testz_si256(mm1, mm1)) {
+				mm11 = mm12;
+				mm21 = mm22;
+				mm1 = mm2;
+			}
+
+			// Produce the comparison result
+			__m256i mm_cmp = _mm256_cmpgt_epi8(mm21, mm11);
+			__m256i mm_rcmp = _mm256_cmpgt_epi8(mm11, mm21);
+			mm_cmp = _mm256_xor_si256(mm1, mm_cmp);
+			mm_rcmp = _mm256_xor_si256(mm1, mm_rcmp);
+
+			uint32_t cmp = _mm256_movemask_epi8(mm_cmp);
+			uint32_t rcmp = _mm256_movemask_epi8(mm_rcmp);
+			cmp = (cmp - 1u) ^ cmp;
+			rcmp = (rcmp - 1u) ^ rcmp;
+			return (int32_t)cmp - (int32_t)rcmp;
+		}
+	}
+
+	return 0;
+}
+#else /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+/**
+ * Compare 32 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp16((const uint8_t *)src_1 + 0 * 16,
+				(const uint8_t *)src_2 + 0 * 16);
+
+	if (likely(ret == 0))
+		return rte_cmp16((const uint8_t *)src_1 + 1 * 16,
+				(const uint8_t *)src_2 + 1 * 16);
+
+	return ret;
+}
+
+/**
+ * Compare 64 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+				(const uint8_t *)src_2 + 0 * 32);
+
+	if (likely(ret == 0))
+		return rte_cmp32((const uint8_t *)src_1 + 1 * 32,
+				(const uint8_t *)src_2 + 1 * 32);
+
+	return ret;
+}
+
+/**
+ * Compare 128 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp64((const uint8_t *)src_1 + 0 * 64,
+			(const uint8_t *)src_2 + 0 * 64);
+
+	if (likely(ret == 0))
+		return rte_cmp64((const uint8_t *)src_1 + 1 * 64,
+				(const uint8_t *)src_2 + 1 * 64);
+
+	return ret;
+}
+
+#endif /* RTE_MACHINE_CPUFLAG_AVX2 */
+
+
+/**
+ * Compare 48 bytes between two locations.
+ * Locations should not overlap.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2)
+{
+	int ret;
+
+	ret = rte_cmp32((const uint8_t *)src_1 + 0 * 32,
+			(const uint8_t *)src_2 + 0 * 32);
+
+	if (likely(ret == 0))
+		return rte_cmp16((const uint8_t *)src_1 + 1 * 32,
+				(const uint8_t *)src_2 + 1 * 32);
+	return ret;
+}
+
+static inline int
+rte_memcmp_remainder(const uint8_t *src_1u, const uint8_t *src_2u, size_t n)
+{
+	int ret = 1;
+
+	/**
+	 * Compare less than 16 bytes
+	 */
+	if (n & 0x08) {
+		ret = (*(const uint64_t *)src_1u ==
+				*(const uint64_t *)src_2u);
+		if (likely(ret == 1)) {
+			n -= 0x8;
+			src_1u += 0x8;
+			src_2u += 0x8;
+		} else {
+			goto exit;
+		}
+	}
+
+	if (n & 0x04) {
+		ret = (*(const uint32_t *)src_1u ==
+				*(const uint32_t *)src_2u);
+		if (likely(ret == 1)) {
+			n -= 0x4;
+			src_1u += 0x4;
+			src_2u += 0x4;
+		} else {
+			goto exit;
+		}
+	}
+
+	if (n & 0x02) {
+		ret = (*(const uint16_t *)src_1u ==
+				*(const const uint16_t *)src_2u);
+
+		if (likely(ret == 1)) {
+			n -= 0x2;
+			src_1u += 0x2;
+			src_2u += 0x2;
+		} else {
+			goto exit;
+		}
+	}
+
+	if (n & 0x01) {
+		ret = (*(const uint8_t *)src_1u ==
+				*(const uint8_t *)src_2u);
+		if (likely(ret == 1)) {
+			return 0;
+		} else {
+			goto exit;
+		}
+	}
+
+	return !ret;
+exit:
+
+	return src_1u < src_2u ? -1 : 1;
+}
+
+static inline int
+rte_memcmp(const void *_src_1, const void *_src_2, size_t n)
+{
+	const uint8_t *src_1 = (const uint8_t *)_src_1;
+	const uint8_t *src_2 = (const uint8_t *)_src_2;
+	int ret = 0;
+
+	if (n & 0x80)
+		return rte_cmp128(src_1, src_2);
+
+	if (n & 0x40)
+		return rte_cmp64(src_1, src_2);
+
+	if (n & 0x20) {
+		ret = rte_cmp32(src_1, src_2);
+		n -= 0x20;
+		src_1 += 0x20;
+		src_2 += 0x20;
+	}
+
+	if ((n & 0x10) && likely(ret == 0)) {
+		ret = rte_cmp16(src_1, src_2);
+		n -= 0x10;
+		src_1 += 0x10;
+		src_2 += 0x10;
+	}
+
+	if (n && likely(ret == 0))
+		ret = rte_memcmp_remainder(src_1, src_2, n);
+
+	return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMCMP_X86_64_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_memcmp.h b/lib/librte_eal/common/include/generic/rte_memcmp.h
new file mode 100644
index 0000000..db9626b
--- /dev/null
+++ b/lib/librte_eal/common/include/generic/rte_memcmp.h
@@ -0,0 +1,131 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMCMP_H_
+#define _RTE_MEMCMP_H_
+
+/**
+ * @file
+ *
+ * Functions for vectorised implementation of memcmp().
+ */
+
+/**
+ * Compare 16 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp16(const void *src_1, const void *src_2);
+
+/**
+ * Compare 32 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp32(const void *src_1, const void *src_2);
+
+/**
+ * Compare 64 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp64(const void *src_1, const void *src_2);
+
+/**
+ * Compare 48 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src
+ *   Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp48(const void *src_1, const void *src_2);
+
+/**
+ * Compare 128 bytes between two locations using optimised
+ * instructions. The locations should not overlap.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ */
+static inline int
+rte_cmp128(const void *src_1, const void *src_2);
+
+#ifdef __DOXYGEN__
+
+/**
+ * Compare bytes between two locations. The locations must not overlap.
+ *
+ * @note This is implemented as a macro, so it's address should not be taken
+ * and care is needed as parameter expressions may be evaluated multiple times.
+ *
+ * @param src_1
+ *   Pointer to the first source of the data.
+ * @param src_2
+ *   Pointer to the second source of the data.
+ * @param n
+ *   Number of bytes to copy.
+ * @return
+ *   true if match otherwise false.
+ */
+static int
+rte_memcmp(const void *dst, const void *src, size_t n);
+
+#endif /* __DOXYGEN__ */
+
+/*
+ * memcmp() function used by rte_memcmp macro
+ */
+static inline int
+rte_memcmp_func(void *dst, const void *src, size_t n) __attribute__((always_inline));
+
+#endif /* _RTE_MEMCMP_H_ */
diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
index 9245716..075da62 100644
--- a/lib/librte_hash/rte_hash.c
+++ b/lib/librte_hash/rte_hash.c
@@ -42,6 +42,7 @@
 #include <rte_memory.h>         /* for definition of RTE_CACHE_LINE_SIZE */
 #include <rte_log.h>
 #include <rte_memcpy.h>
+#include <rte_memcmp.h>
 #include <rte_prefetch.h>
 #include <rte_branch_prediction.h>
 #include <rte_memzone.h>
@@ -299,6 +300,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
 	uint8_t *key_bucket;
 	uint32_t bucket_index, i;
 	int32_t pos;
+	const void * volatile key_1 = key;
 
 	/* Get the hash signature and bucket index */
 	sig |= h->sig_msb;
@@ -308,10 +310,13 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h,
 
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
-		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
-				  h->key_len) == 0)) {
-			return bucket_index * h->bucket_entries + i;
+		if (sig == sig_bucket[i]) {
+
+			const void * volatile key_2 =
+				get_key_from_bucket(h, key_bucket, i);
+
+			if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+				return bucket_index * h->bucket_entries + i;
 		}
 	}
 
@@ -350,6 +355,8 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
 	uint8_t *key_bucket;
 	uint32_t bucket_index, i;
 
+	const void * volatile key_1 = key;
+
 	/* Get the hash signature and bucket index */
 	sig = sig | h->sig_msb;
 	bucket_index = sig & h->bucket_bitmask;
@@ -358,11 +365,14 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
 
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
-		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
-				  h->key_len) == 0)) {
-			sig_bucket[i] = NULL_SIGNATURE;
-			return bucket_index * h->bucket_entries + i;
+		if (sig == sig_bucket[i]) {
+			const void * volatile key_2 =
+				get_key_from_bucket(h, key_bucket, i);
+
+			if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0)) {
+				sig_bucket[i] = NULL_SIGNATURE;
+				return bucket_index * h->bucket_entries + i;
+			}
 		}
 	}
 
@@ -392,6 +402,8 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
 	uint8_t *key_bucket;
 	uint32_t bucket_index, i;
 
+	const void * volatile key_1 = key;
+
 	/* Get the hash signature and bucket index */
 	sig |= h->sig_msb;
 	bucket_index = sig & h->bucket_bitmask;
@@ -400,10 +412,13 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
 
 	/* Check if key is already present in the hash */
 	for (i = 0; i < h->bucket_entries; i++) {
-		if ((sig == sig_bucket[i]) &&
-		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
-				  h->key_len) == 0)) {
-			return bucket_index * h->bucket_entries + i;
+		if (sig == sig_bucket[i]) {
+
+			const void * volatile key_2 =
+				get_key_from_bucket(h, key_bucket, i);
+
+			if (likely(rte_memcmp(key_1, key_2, h->key_len) == 0))
+				return bucket_index * h->bucket_entries + i;
 		}
 	}
 
@@ -456,13 +471,17 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 		positions[i] = -ENOENT;
 
 		for (j = 0; j < h->bucket_entries; j++) {
-			if ((sigs[i] == sig_bucket[j]) &&
-			    likely(memcmp(keys[i],
-					  get_key_from_bucket(h, key_bucket, j),
-					  h->key_len) == 0)) {
-				positions[i] = bucket_index *
-					h->bucket_entries + j;
-				break;
+			if (sigs[i] == sig_bucket[j]) {
+
+				const void * volatile key_1 = keys[i];
+				const void * volatile key_2 =
+					get_key_from_bucket(h, key_bucket, j);
+				if (likely(rte_memcmp(key_1, key_2,
+							h->key_len) == 0)) {
+					positions[i] = bucket_index *
+							h->bucket_entries + j;
+					break;
+				}
 			}
 		}
 	}
-- 
1.9.1



More information about the dev mailing list