[dpdk-dev] [PATCH v6 4/7] member: add AVX for HT mode
    Yipeng Wang 
    yipeng1.wang at intel.com
       
    Wed Oct  4 05:12:22 CEST 2017
    
    
  
For key search, the signatures of all entries are compared against
the signature of the key that is being looked up. Since all
signatures are contiguously put in a bucket, they can be compared
with vector instructions (AVX2), achieving higher lookup performance.
This patch adds AVX2 implementation in a separate header file.
Signed-off-by: Yipeng Wang <yipeng1.wang at intel.com>
Reviewed-by: Pablo de Lara <pablo.de.lara.guarch at intel.com>
---
 lib/librte_member/rte_member_ht.c  | 142 +++++++++++++++++++++++++++++--------
 lib/librte_member/rte_member_x86.h | 107 ++++++++++++++++++++++++++++
 2 files changed, 218 insertions(+), 31 deletions(-)
 create mode 100644 lib/librte_member/rte_member_x86.h
diff --git a/lib/librte_member/rte_member_ht.c b/lib/librte_member/rte_member_ht.c
index e038987..59332d5 100644
--- a/lib/librte_member/rte_member_ht.c
+++ b/lib/librte_member/rte_member_ht.c
@@ -40,6 +40,10 @@
 #include "rte_member.h"
 #include "rte_member_ht.h"
 
+#if defined(RTE_ARCH_X86)
+#include "rte_member_x86.h"
+#endif
+
 /* Search bucket for entry with tmp_sig and update set_id */
 static inline int
 update_entry_search(uint32_t bucket_id, member_sig_t tmp_sig,
@@ -136,6 +140,13 @@ rte_member_create_ht(struct rte_member_setsum *ss,
 		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
 			buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
 	}
+#if defined(RTE_ARCH_X86)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) &&
+			RTE_MEMBER_BUCKET_ENTRIES == 16)
+		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_AVX2;
+	else
+#endif
+		ss->sig_cmp_fn = RTE_MEMBER_COMPARE_SCALAR;
 
 	RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
 			"the table has %u entries, %u buckets\n",
@@ -193,11 +204,23 @@ rte_member_lookup_ht(const struct rte_member_setsum *ss,
 	*set_id = RTE_MEMBER_NO_MATCH;
 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
 
-	if (search_bucket_single(prim_bucket, tmp_sig, buckets,
-			set_id) ||
-			search_bucket_single(sec_bucket, tmp_sig,
-				buckets, set_id))
-		return 1;
+	switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+	case RTE_MEMBER_COMPARE_AVX2:
+		if (search_bucket_single_avx(prim_bucket, tmp_sig, buckets,
+				set_id) ||
+				search_bucket_single_avx(sec_bucket, tmp_sig,
+					buckets, set_id))
+			return 1;
+		break;
+#endif
+	default:
+		if (search_bucket_single(prim_bucket, tmp_sig, buckets,
+				set_id) ||
+				search_bucket_single(sec_bucket, tmp_sig,
+					buckets, set_id))
+			return 1;
+	}
 
 	return 0;
 }
@@ -221,13 +244,27 @@ rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
 	}
 
 	for (i = 0; i < num_keys; i++) {
-		if (search_bucket_single(prim_buckets[i], tmp_sig[i],
-				buckets, &set_id[i]) ||
-				search_bucket_single(sec_buckets[i],
-				tmp_sig[i], buckets, &set_id[i]))
-			num_matches++;
-		else
-			set_id[i] = RTE_MEMBER_NO_MATCH;
+		switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+		case RTE_MEMBER_COMPARE_AVX2:
+			if (search_bucket_single_avx(prim_buckets[i],
+					tmp_sig[i], buckets, &set_id[i]) ||
+				search_bucket_single_avx(sec_buckets[i],
+					tmp_sig[i], buckets, &set_id[i]))
+				num_matches++;
+			else
+				set_id[i] = RTE_MEMBER_NO_MATCH;
+			break;
+#endif
+		default:
+			if (search_bucket_single(prim_buckets[i], tmp_sig[i],
+					buckets, &set_id[i]) ||
+					search_bucket_single(sec_buckets[i],
+					tmp_sig[i], buckets, &set_id[i]))
+				num_matches++;
+			else
+				set_id[i] = RTE_MEMBER_NO_MATCH;
+		}
 	}
 	return num_matches;
 }
@@ -244,12 +281,24 @@ rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
 
 	get_buckets_index(ss, key, &prim_bucket, &sec_bucket, &tmp_sig);
 
-	search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
-			 match_per_key, set_id);
-	if (num_matches < match_per_key)
-		search_bucket_multi(sec_bucket, tmp_sig,
-			buckets, &num_matches, match_per_key, set_id);
-	return num_matches;
+	switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+	case RTE_MEMBER_COMPARE_AVX2:
+		search_bucket_multi_avx(prim_bucket, tmp_sig, buckets,
+			&num_matches, match_per_key, set_id);
+		if (num_matches < match_per_key)
+			search_bucket_multi_avx(sec_bucket, tmp_sig,
+				buckets, &num_matches, match_per_key, set_id);
+		return num_matches;
+#endif
+	default:
+		search_bucket_multi(prim_bucket, tmp_sig, buckets, &num_matches,
+				 match_per_key, set_id);
+		if (num_matches < match_per_key)
+			search_bucket_multi(sec_bucket, tmp_sig,
+				buckets, &num_matches, match_per_key, set_id);
+		return num_matches;
+	}
 }
 
 uint32_t
@@ -275,16 +324,34 @@ rte_member_lookup_multi_bulk_ht(const struct rte_member_setsum *ss,
 	for (i = 0; i < num_keys; i++) {
 		match_cnt_tmp = 0;
 
-		search_bucket_multi(prim_buckets[i], tmp_sig[i],
-			buckets, &match_cnt_tmp, match_per_key,
-			&set_ids[i*match_per_key]);
-		if (match_cnt_tmp < match_per_key)
-			search_bucket_multi(sec_buckets[i], tmp_sig[i],
+		switch (ss->sig_cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+		case RTE_MEMBER_COMPARE_AVX2:
+			search_bucket_multi_avx(prim_buckets[i], tmp_sig[i],
 				buckets, &match_cnt_tmp, match_per_key,
 				&set_ids[i*match_per_key]);
-		match_count[i] = match_cnt_tmp;
-		if (match_cnt_tmp != 0)
-			num_matches++;
+			if (match_cnt_tmp < match_per_key)
+				search_bucket_multi_avx(sec_buckets[i],
+					tmp_sig[i], buckets, &match_cnt_tmp,
+					match_per_key,
+					&set_ids[i*match_per_key]);
+			match_count[i] = match_cnt_tmp;
+			if (match_cnt_tmp != 0)
+				num_matches++;
+			break;
+#endif
+		default:
+			search_bucket_multi(prim_buckets[i], tmp_sig[i],
+				buckets, &match_cnt_tmp, match_per_key,
+				&set_ids[i*match_per_key]);
+			if (match_cnt_tmp < match_per_key)
+				search_bucket_multi(sec_buckets[i], tmp_sig[i],
+					buckets, &match_cnt_tmp, match_per_key,
+					&set_ids[i*match_per_key]);
+			match_count[i] = match_cnt_tmp;
+			if (match_cnt_tmp != 0)
+				num_matches++;
+		}
 	}
 	return num_matches;
 }
@@ -315,11 +382,24 @@ try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
 
 static inline int
 try_update(struct member_ht_bucket *buckets, uint32_t prim, uint32_t sec,
-		member_sig_t sig, member_set_t set_id)
+		member_sig_t sig, member_set_t set_id,
+		enum rte_member_sig_compare_function cmp_fn)
 {
-	if (update_entry_search(prim, sig, buckets, set_id) ||
-		update_entry_search(sec, sig, buckets, set_id))
-		return 0;
+	switch (cmp_fn) {
+#if defined(RTE_ARCH_X86) && defined(RTE_MACHINE_CPUFLAG_AVX2)
+	case RTE_MEMBER_COMPARE_AVX2:
+		if (update_entry_search_avx(prim, sig, buckets, set_id) ||
+				update_entry_search_avx(sec, sig, buckets,
+					set_id))
+			return 0;
+		break;
+#endif
+	default:
+		if (update_entry_search(prim, sig, buckets, set_id) ||
+				update_entry_search(sec, sig, buckets,
+					set_id))
+			return 0;
+	}
 	return -1;
 }
 
@@ -430,7 +510,7 @@ rte_member_add_ht(const struct rte_member_setsum *ss,
 	 */
 	if (ss->cache) {
 		ret = try_update(buckets, prim_bucket, sec_bucket, tmp_sig,
-					set_id);
+					set_id, ss->sig_cmp_fn);
 		if (ret != -1)
 			return ret;
 	}
diff --git a/lib/librte_member/rte_member_x86.h b/lib/librte_member/rte_member_x86.h
new file mode 100644
index 0000000..d29dd3f
--- /dev/null
+++ b/lib/librte_member/rte_member_x86.h
@@ -0,0 +1,107 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_MEMBER_X86_H_
+#define _RTE_MEMBER_X86_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <x86intrin.h>
+
+#if defined(RTE_MACHINE_CPUFLAG_AVX2)
+
+static inline int
+update_entry_search_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+		struct member_ht_bucket *buckets,
+		member_set_t set_id)
+{
+	uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+		_mm256_set1_epi16(tmp_sig)));
+	if (hitmask) {
+		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		buckets[bucket_id].sets[hit_idx] = set_id;
+		return 1;
+	}
+	return 0;
+}
+
+static inline int
+search_bucket_single_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+		struct member_ht_bucket *buckets,
+		member_set_t *set_id)
+{
+	uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+		_mm256_set1_epi16(tmp_sig)));
+	while (hitmask) {
+		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
+			*set_id = buckets[bucket_id].sets[hit_idx];
+			return 1;
+		}
+		hitmask &= ~(3U << ((hit_idx) << 1));
+	}
+	return 0;
+}
+
+static inline void
+search_bucket_multi_avx(uint32_t bucket_id, member_sig_t tmp_sig,
+				struct member_ht_bucket *buckets,
+				uint32_t *counter,
+				uint32_t match_per_key,
+				member_set_t *set_id)
+{
+	uint32_t hitmask = _mm256_movemask_epi8((__m256i)_mm256_cmpeq_epi16(
+		_mm256_load_si256((__m256i const *)buckets[bucket_id].sigs),
+		_mm256_set1_epi16(tmp_sig)));
+	while (hitmask) {
+		uint32_t hit_idx = __builtin_ctzl(hitmask) >> 1;
+		if (buckets[bucket_id].sets[hit_idx] != RTE_MEMBER_NO_MATCH) {
+			set_id[*counter] = buckets[bucket_id].sets[hit_idx];
+			(*counter)++;
+			if (*counter >= match_per_key)
+				return;
+		}
+		hitmask &= ~(3U << ((hit_idx) << 1));
+	}
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_MEMBER_X86_H_ */
-- 
2.7.4
    
    
More information about the dev
mailing list