[dpdk-dev] [PATCH v3] hash table: add an iterator over conflicting entries

Wang, Yipeng1 yipeng1.wang at intel.com
Tue Sep 4 20:55:35 CEST 2018


Thanks for the patch.

Do we need both of the state and istate struct? struct rte_hash_iterator_state  seems not doing much.
How about we only have one "state" struct and just not expose the internals to the public API, similar to the
rte_hash struct or rte_member_setsum struct. 
And in _init function use rte_malloc to allocate the state and add a _free function to free it.

Thanks
Yipeng


>-----Original Message-----
>From: Qiaobin Fu [mailto:qiaobinf at bu.edu]
>Sent: Friday, August 31, 2018 9:51 AM
>To: Richardson, Bruce <bruce.richardson at intel.com>; De Lara Guarch, Pablo <pablo.de.lara.guarch at intel.com>
>Cc: dev at dpdk.org; doucette at bu.edu; Wiles, Keith <keith.wiles at intel.com>; Gobriel, Sameh <sameh.gobriel at intel.com>; Tai, Charlie
><charlie.tai at intel.com>; stephen at networkplumber.org; nd at arm.com; honnappa.nagarahalli at arm.com; Wang, Yipeng1
><yipeng1.wang at intel.com>; michel at digirati.com.br; qiaobinf at bu.edu
>Subject: [PATCH v3] hash table: add an iterator over conflicting entries
>
>Function rte_hash_iterate_conflict_entries() iterates over
>the entries that conflict with an incoming entry.
>
>Iterating over conflicting entries enables one to decide
>if the incoming entry is more valuable than the entries already
>in the hash table. This is particularly useful after
>an insertion failure.
>
>v3:
>* Make the rte_hash_iterate() API similar to
>  rte_hash_iterate_conflict_entries()
>
>v2:
>* Fix the style issue
>
>* Make the API more universal
>
>Signed-off-by: Qiaobin Fu <qiaobinf at bu.edu>
>Reviewed-by: Cody Doucette <doucette at bu.edu>
>Reviewed-by: Michel Machado <michel at digirati.com.br>
>Reviewed-by: Keith Wiles <keith.wiles at intel.com>
>Reviewed-by: Yipeng Wang <yipeng1.wang at intel.com>
>Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
>---
> lib/librte_hash/Makefile             |   1 +
> lib/librte_hash/rte_cuckoo_hash.c    | 132 +++++++++++++++++++++++----
> lib/librte_hash/rte_hash.h           |  80 ++++++++++++++--
> lib/librte_hash/rte_hash_version.map |   8 ++
> test/test/test_hash.c                |   7 +-
> test/test/test_hash_multiwriter.c    |   8 +-
> test/test/test_hash_readwrite.c      |  16 ++--
> 7 files changed, 219 insertions(+), 33 deletions(-)
>
>diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
>index c8c435dfd..9be58a205 100644
>--- a/lib/librte_hash/Makefile
>+++ b/lib/librte_hash/Makefile
>@@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
> # library name
> LIB = librte_hash.a
>
>+CFLAGS += -DALLOW_EXPERIMENTAL_API
> CFLAGS += -O3
> CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
> LDLIBS += -lrte_eal -lrte_ring
>diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
>index f7b86c8c9..cf5b28196 100644
>--- a/lib/librte_hash/rte_cuckoo_hash.c
>+++ b/lib/librte_hash/rte_cuckoo_hash.c
>@@ -1300,45 +1300,143 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
> 	return __builtin_popcountl(*hit_mask);
> }
>
>+/* istate stands for internal state. */
>+struct rte_hash_iterator_istate {
>+	const struct rte_hash *h;
>+	uint32_t              next;
>+	uint32_t              total_entries;
>+};
>+
>+int32_t
>+rte_hash_iterator_init(const struct rte_hash *h,
>+	struct rte_hash_iterator_state *state)
>+{
>+	struct rte_hash_iterator_istate *__state;
>+
>+	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
>+
>+	__state = (struct rte_hash_iterator_istate *)state;
>+	__state->h = h;
>+	__state->next = 0;
>+	__state->total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
>+
>+	return 0;
>+}
>+
> int32_t
>-rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
>+rte_hash_iterate(
>+	struct rte_hash_iterator_state *state, const void **key, void **data)
> {
>+	struct rte_hash_iterator_istate *__state;
> 	uint32_t bucket_idx, idx, position;
> 	struct rte_hash_key *next_key;
>
>-	RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
>+	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
>+		(data == NULL)), -EINVAL);
>+
>+	__state = (struct rte_hash_iterator_istate *)state;
>
>-	const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
> 	/* Out of bounds */
>-	if (*next >= total_entries)
>+	if (__state->next >= __state->total_entries)
> 		return -ENOENT;
>
> 	/* Calculate bucket and index of current iterator */
>-	bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
>-	idx = *next % RTE_HASH_BUCKET_ENTRIES;
>+	bucket_idx = __state->next / RTE_HASH_BUCKET_ENTRIES;
>+	idx = __state->next % RTE_HASH_BUCKET_ENTRIES;
>
> 	/* If current position is empty, go to the next one */
>-	while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
>-		(*next)++;
>+	while (__state->h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
>+		__state->next++;
> 		/* End of table */
>-		if (*next == total_entries)
>+		if (__state->next == __state->total_entries)
> 			return -ENOENT;
>-		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
>-		idx = *next % RTE_HASH_BUCKET_ENTRIES;
>+		bucket_idx = __state->next / RTE_HASH_BUCKET_ENTRIES;
>+		idx = __state->next % RTE_HASH_BUCKET_ENTRIES;
> 	}
>-	__hash_rw_reader_lock(h);
>+	__hash_rw_reader_lock(__state->h);
> 	/* Get position of entry in key table */
>-	position = h->buckets[bucket_idx].key_idx[idx];
>-	next_key = (struct rte_hash_key *) ((char *)h->key_store +
>-				position * h->key_entry_size);
>+	position = __state->h->buckets[bucket_idx].key_idx[idx];
>+	next_key = (struct rte_hash_key *) ((char *)__state->h->key_store +
>+				position * __state->h->key_entry_size);
> 	/* Return key and data */
> 	*key = next_key->key;
> 	*data = next_key->pdata;
>
>-	__hash_rw_reader_unlock(h);
>+	__hash_rw_reader_unlock(__state->h);
>
> 	/* Increment iterator */
>-	(*next)++;
>+	__state->next++;
>
> 	return position - 1;
> }
>+
>+/* istate stands for internal state. */
>+struct rte_hash_iterator_conflict_entries_istate {
>+	const struct rte_hash *h;
>+	uint32_t              vnext;
>+	uint32_t              primary_bidx;
>+	uint32_t              secondary_bidx;
>+};
>+
>+int32_t __rte_experimental
>+rte_hash_iterator_conflict_entries_init_with_hash(const struct rte_hash *h,
>+	hash_sig_t sig, struct rte_hash_iterator_state *state)
>+{
>+	struct rte_hash_iterator_conflict_entries_istate *__state;
>+
>+	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
>+
>+	__state = (struct rte_hash_iterator_conflict_entries_istate *)state;
>+	__state->h = h;
>+	__state->vnext = 0;
>+
>+	/* Get the primary bucket index given the precomputed hash value. */
>+	__state->primary_bidx = sig & h->bucket_bitmask;
>+	/* Get the secondary bucket index given the precomputed hash value. */
>+	__state->secondary_bidx =
>+		rte_hash_secondary_hash(sig) & h->bucket_bitmask;
>+
>+	return 0;
>+}
>+
>+int32_t __rte_experimental
>+rte_hash_iterate_conflict_entries(
>+	struct rte_hash_iterator_state *state, const void **key, void **data)
>+{
>+	struct rte_hash_iterator_conflict_entries_istate *__state;
>+
>+	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
>+		(data == NULL)), -EINVAL);
>+
>+	__state = (struct rte_hash_iterator_conflict_entries_istate *)state;
>+
>+	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
>+		uint32_t bidx = __state->vnext < RTE_HASH_BUCKET_ENTRIES ?
>+			__state->primary_bidx : __state->secondary_bidx;
>+		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
>+		uint32_t position = __state->h->buckets[bidx].key_idx[next];
>+		struct rte_hash_key *next_key;
>+
>+		/* Increment iterator. */
>+		__state->vnext++;
>+
>+		/*
>+		 * The test below is unlikely because this iterator is meant
>+		 * to be used after a failed insert.
>+		 */
>+		if (unlikely(position == EMPTY_SLOT))
>+			continue;
>+
>+		/* Get the entry in key table. */
>+		next_key = (struct rte_hash_key *) (
>+			(char *)__state->h->key_store +
>+			position * __state->h->key_entry_size);
>+		/* Return key and data. */
>+		*key = next_key->key;
>+		*data = next_key->pdata;
>+
>+		return position - 1;
>+	}
>+
>+	return -ENOENT;
>+}
>diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
>index 9e7d9315f..fdb01023e 100644
>--- a/lib/librte_hash/rte_hash.h
>+++ b/lib/librte_hash/rte_hash.h
>@@ -14,6 +14,8 @@
> #include <stdint.h>
> #include <stddef.h>
>
>+#include <rte_compat.h>
>+
> #ifdef __cplusplus
> extern "C" {
> #endif
>@@ -64,6 +66,16 @@ struct rte_hash_parameters {
> /** @internal A hash table structure. */
> struct rte_hash;
>
>+/**
>+ * @warning
>+ * @b EXPERIMENTAL: this API may change without prior notice.
>+ *
>+ * @internal A hash table iterator state structure.
>+ */
>+struct rte_hash_iterator_state {
>+	uint8_t space[64];
>+} __rte_cache_aligned;
>+
> /**
>  * Create a new hash table.
>  *
>@@ -443,26 +455,82 @@ rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> 		      uint32_t num_keys, int32_t *positions);
>
> /**
>- * Iterate through the hash table, returning key-value pairs.
>+ * Initialize the iterator over the hash table.
>  *
>  * @param h
>- *   Hash table to iterate
>+ *   Hash table to iterate.
>+ * @param state
>+ *   Pointer to the iterator state.
>+ * @return
>+ *   - 0 if successful.
>+ *   - -EINVAL if the parameters are invalid.
>+ */
>+int32_t
>+rte_hash_iterator_init(const struct rte_hash *h,
>+	struct rte_hash_iterator_state *state);
>+
>+/**
>+ * Iterate through the hash table, returning key-value pairs.
>+ *
>+ * @param state
>+ *   Pointer to the iterator state.
>  * @param key
>  *   Output containing the key where current iterator
>  *   was pointing at
>  * @param data
>  *   Output containing the data associated with key.
>  *   Returns NULL if data was not stored.
>- * @param next
>- *   Pointer to iterator. Should be 0 to start iterating the hash table.
>- *   Iterator is incremented after each call of this function.
>  * @return
>  *   Position where key was stored, if successful.
>  *   - -EINVAL if the parameters are invalid.
>  *   - -ENOENT if end of the hash table.
>  */
> int32_t
>-rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next);
>+rte_hash_iterate(
>+	struct rte_hash_iterator_state *state, const void **key, void **data);
>+
>+/**
>+ * @warning
>+ * @b EXPERIMENTAL: this API may change without prior notice.
>+ *
>+ * Initialize the iterator over entries that conflict with a given hash.
>+ *
>+ * @param h
>+ *   Hash table to iterate.
>+ * @param sig
>+ *   Precomputed hash value with which the returning entries conflict.
>+ * @param state
>+ *   Pointer to the iterator state.
>+ * @return
>+ *   - 0 if successful.
>+ *   - -EINVAL if the parameters are invalid.
>+ */
>+int32_t __rte_experimental
>+rte_hash_iterator_conflict_entries_init_with_hash(const struct rte_hash *h,
>+	hash_sig_t sig, struct rte_hash_iterator_state *state);
>+
>+/**
>+ * @warning
>+ * @b EXPERIMENTAL: this API may change without prior notice.
>+ *
>+ * Iterate over entries that conflict with a given hash.
>+ *
>+ * @param state
>+ *   Pointer to the iterator state.
>+ * @param key
>+ *   Output containing the key at where the iterator is currently pointing.
>+ * @param data
>+ *   Output containing the data associated with key.
>+ *   Returns NULL if data was not stored.
>+ * @return
>+ *   Position where key was stored, if successful.
>+ *   - -EINVAL if the parameters are invalid.
>+ *   - -ENOENT if there is no more conflicting entries.
>+ */
>+int32_t __rte_experimental
>+rte_hash_iterate_conflict_entries(
>+	struct rte_hash_iterator_state *state, const void **key, void **data);
>+
> #ifdef __cplusplus
> }
> #endif
>diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
>index e216ac8e2..301d4638c 100644
>--- a/lib/librte_hash/rte_hash_version.map
>+++ b/lib/librte_hash/rte_hash_version.map
>@@ -24,6 +24,7 @@ DPDK_2.1 {
>
> 	rte_hash_add_key_data;
> 	rte_hash_add_key_with_hash_data;
>+	rte_hash_iterator_init;
> 	rte_hash_iterate;
> 	rte_hash_lookup_bulk_data;
> 	rte_hash_lookup_data;
>@@ -53,3 +54,10 @@ DPDK_18.08 {
> 	rte_hash_count;
>
> } DPDK_16.07;
>+
>+EXPERIMENTAL {
>+	global:
>+
>+	rte_hash_iterator_conflict_entries_init_with_hash;
>+	rte_hash_iterate_conflict_entries;
>+};
>diff --git a/test/test/test_hash.c b/test/test/test_hash.c
>index b3db9fd10..bf57004c3 100644
>--- a/test/test/test_hash.c
>+++ b/test/test/test_hash.c
>@@ -1170,8 +1170,8 @@ static int test_hash_iteration(void)
> 	void *next_data;
> 	void *data[NUM_ENTRIES];
> 	unsigned added_keys;
>-	uint32_t iter = 0;
> 	int ret = 0;
>+	struct rte_hash_iterator_state state;
>
> 	ut_params.entries = NUM_ENTRIES;
> 	ut_params.name = "test_hash_iteration";
>@@ -1180,6 +1180,9 @@ static int test_hash_iteration(void)
> 	handle = rte_hash_create(&ut_params);
> 	RETURN_IF_ERROR(handle == NULL, "hash creation failed");
>
>+	RETURN_IF_ERROR(rte_hash_iterator_init(handle, &state) != 0,
>+			"initialization of the hash iterator failed");
>+
> 	/* Add random entries until key cannot be added */
> 	for (added_keys = 0; added_keys < NUM_ENTRIES; added_keys++) {
> 		data[added_keys] = (void *) ((uintptr_t) rte_rand());
>@@ -1191,7 +1194,7 @@ static int test_hash_iteration(void)
> 	}
>
> 	/* Iterate through the hash table */
>-	while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
>+	while (rte_hash_iterate(&state, &next_key, &next_data) >= 0) {
> 		/* Search for the key in the list of keys added */
> 		for (i = 0; i < NUM_ENTRIES; i++) {
> 			if (memcmp(next_key, keys[i], ut_params.key_len) == 0) {
>diff --git a/test/test/test_hash_multiwriter.c b/test/test/test_hash_multiwriter.c
>index 6a3eb10bd..48db8007d 100644
>--- a/test/test/test_hash_multiwriter.c
>+++ b/test/test/test_hash_multiwriter.c
>@@ -125,18 +125,22 @@ test_hash_multiwriter(void)
>
> 	const void *next_key;
> 	void *next_data;
>-	uint32_t iter = 0;
>
> 	uint32_t duplicated_keys = 0;
> 	uint32_t lost_keys = 0;
> 	uint32_t count;
>
>+	struct rte_hash_iterator_state state;
>+
> 	snprintf(name, 32, "test%u", calledCount++);
> 	hash_params.name = name;
>
> 	handle = rte_hash_create(&hash_params);
> 	RETURN_IF_ERROR(handle == NULL, "hash creation failed");
>
>+	RETURN_IF_ERROR(rte_hash_iterator_init(handle, &state) != 0,
>+			"initialization of the hash iterator failed");
>+
> 	tbl_multiwriter_test_params.h = handle;
> 	tbl_multiwriter_test_params.nb_tsx_insertion =
> 		nb_total_tsx_insertion / rte_lcore_count();
>@@ -203,7 +207,7 @@ test_hash_multiwriter(void)
> 		goto err3;
> 	}
>
>-	while (rte_hash_iterate(handle, &next_key, &next_data, &iter) >= 0) {
>+	while (rte_hash_iterate(&state, &next_key, &next_data) >= 0) {
> 		/* Search for the key in the list of keys added .*/
> 		i = *(const uint32_t *)next_key;
> 		tbl_multiwriter_test_params.found[i]++;
>diff --git a/test/test/test_hash_readwrite.c b/test/test/test_hash_readwrite.c
>index 55ae33d80..9cdab9992 100644
>--- a/test/test/test_hash_readwrite.c
>+++ b/test/test/test_hash_readwrite.c
>@@ -166,12 +166,13 @@ test_hash_readwrite_functional(int use_htm)
> 	unsigned int i;
> 	const void *next_key;
> 	void *next_data;
>-	uint32_t iter = 0;
>
> 	uint32_t duplicated_keys = 0;
> 	uint32_t lost_keys = 0;
> 	int use_jhash = 1;
>
>+	struct rte_hash_iterator_state state;
>+
> 	rte_atomic64_init(&gcycles);
> 	rte_atomic64_clear(&gcycles);
>
>@@ -188,6 +189,8 @@ test_hash_readwrite_functional(int use_htm)
> 		tbl_rw_test_param.num_insert
> 		* rte_lcore_count();
>
>+	rte_hash_iterator_init(tbl_rw_test_param.h, &state);
>+
> 	printf("++++++++Start function tests:+++++++++\n");
>
> 	/* Fire all threads. */
>@@ -195,8 +198,7 @@ test_hash_readwrite_functional(int use_htm)
> 				 NULL, CALL_MASTER);
> 	rte_eal_mp_wait_lcore();
>
>-	while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
>-			&next_data, &iter) >= 0) {
>+	while (rte_hash_iterate(&state, &next_key, &next_data) >= 0) {
> 		/* Search for the key in the list of keys added .*/
> 		i = *(const uint32_t *)next_key;
> 		tbl_rw_test_param.found[i]++;
>@@ -315,9 +317,10 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
>
> 	const void *next_key;
> 	void *next_data;
>-	uint32_t iter = 0;
> 	int use_jhash = 0;
>
>+	struct rte_hash_iterator_state state;
>+
> 	uint32_t duplicated_keys = 0;
> 	uint32_t lost_keys = 0;
>
>@@ -336,6 +339,8 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
> 	if (init_params(use_htm, use_jhash) != 0)
> 		goto err;
>
>+	rte_hash_iterator_init(tbl_rw_test_param.h, &state);
>+
> 	/*
> 	 * Do a readers finish faster or writers finish faster test.
> 	 * When readers finish faster, we timing the readers, and when writers
>@@ -484,8 +489,7 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
>
> 		rte_eal_mp_wait_lcore();
>
>-		while (rte_hash_iterate(tbl_rw_test_param.h,
>-				&next_key, &next_data, &iter) >= 0) {
>+		while (rte_hash_iterate(&state, &next_key, &next_data) >= 0) {
> 			/* Search for the key in the list of keys added .*/
> 			i = *(const uint32_t *)next_key;
> 			tbl_rw_test_param.found[i]++;
>--
>2.17.1



More information about the dev mailing list