[dpdk-dev] [PATCH v3] hash table: add an iterator over conflicting entries

Gaëtan Rivet gaetan.rivet at 6wind.com
Sat Sep 1 00:53:18 CEST 2018


Hi Qiaobin,

This work seems interesting, but is difficult to follow because
the previous discussion is not referenced.

You can find a how-to there:

http://doc.dpdk.org/guides/contributing/patches.html#sending-patches

--in-reply-to is useful to check which comments were already made and
understand the work previously done on a patchset.

On Fri, Aug 31, 2018 at 04:51:01PM +0000, Qiaobin Fu wrote:
> Function rte_hash_iterate_conflict_entries() iterates over
> the entries that conflict with an incoming entry.
> 
> Iterating over conflicting entries enables one to decide
> if the incoming entry is more valuable than the entries already
> in the hash table. This is particularly useful after
> an insertion failure.
> 
> v3:
> * Make the rte_hash_iterate() API similar to
>   rte_hash_iterate_conflict_entries()
> 
> v2:
> * Fix the style issue
> 
> * Make the API more universal
> 
> Signed-off-by: Qiaobin Fu <qiaobinf at bu.edu>
> Reviewed-by: Cody Doucette <doucette at bu.edu>
> Reviewed-by: Michel Machado <michel at digirati.com.br>
> Reviewed-by: Keith Wiles <keith.wiles at intel.com>
> Reviewed-by: Yipeng Wang <yipeng1.wang at intel.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> ---
>  lib/librte_hash/Makefile             |   1 +
>  lib/librte_hash/rte_cuckoo_hash.c    | 132 +++++++++++++++++++++++----
>  lib/librte_hash/rte_hash.h           |  80 ++++++++++++++--
>  lib/librte_hash/rte_hash_version.map |   8 ++
>  test/test/test_hash.c                |   7 +-
>  test/test/test_hash_multiwriter.c    |   8 +-
>  test/test/test_hash_readwrite.c      |  16 ++--
>  7 files changed, 219 insertions(+), 33 deletions(-)
> 
> diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile
> index c8c435dfd..9be58a205 100644
> --- a/lib/librte_hash/Makefile
> +++ b/lib/librte_hash/Makefile
> @@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  # library name
>  LIB = librte_hash.a
>  
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>  CFLAGS += -O3
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
>  LDLIBS += -lrte_eal -lrte_ring
> diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
> index f7b86c8c9..cf5b28196 100644
> --- a/lib/librte_hash/rte_cuckoo_hash.c
> +++ b/lib/librte_hash/rte_cuckoo_hash.c
> @@ -1300,45 +1300,143 @@ rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys,
>  	return __builtin_popcountl(*hit_mask);
>  }
>  
> +/* istate stands for internal state. */

Is a name requiring a comment to explain a good name?
Maybe rte_hash_iterator_priv?

> +struct rte_hash_iterator_istate {
> +	const struct rte_hash *h;
> +	uint32_t              next;
> +	uint32_t              total_entries;
> +};

You should check that your private structure does not grow beyond
the public one, using RTE_BUILD_BUG_ON(sizeof(priv) < sizeof(pub)) somewhere.

"rte_hash_iterator_[i]state" seems unnecessarily verbose.
The memory you are manipulating through this variable is already holding
the state of your iterator. It is useless to append "_state".

    struct rte_hash_iterator_priv *state;

is also clear and reads better.
On the other hand "h" is maybe not verbose enough. Why not "hash"?

Also, please do not align field names in a structure. It forces
future changes to either break the pattern or edit the whole structure
when someone attempts to insert a field with a name that is too long.

> +
> +int32_t
> +rte_hash_iterator_init(const struct rte_hash *h,
> +	struct rte_hash_iterator_state *state)
> +{
> +	struct rte_hash_iterator_istate *__state;

Please do not use the "__" prefix to convey that
you are using a private version of the structure.

You could use "istate" or "it", the common shorthand for
iterator handles.

> +
> +	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_istate *)state;
> +	__state->h = h;
> +	__state->next = 0;
> +	__state->total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
> +
> +	return 0;
> +}
> +
>  int32_t
> -rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next)
> +rte_hash_iterate(
> +	struct rte_hash_iterator_state *state, const void **key, void **data)

Why an empty first line of parameters here?

rte_hash_iterate(struct rte_hash_iterator_state *state,
                 const void **key,
                 void **data)

reads better.

>  {
> +	struct rte_hash_iterator_istate *__state;
>  	uint32_t bucket_idx, idx, position;
>  	struct rte_hash_key *next_key;
>  
> -	RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
> +	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
> +		(data == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_istate *)state;
>  
> -	const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
>  	/* Out of bounds */
> -	if (*next >= total_entries)
> +	if (__state->next >= __state->total_entries)
>  		return -ENOENT;
>  
>  	/* Calculate bucket and index of current iterator */
> -	bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
> -	idx = *next % RTE_HASH_BUCKET_ENTRIES;
> +	bucket_idx = __state->next / RTE_HASH_BUCKET_ENTRIES;
> +	idx = __state->next % RTE_HASH_BUCKET_ENTRIES;
>  
>  	/* If current position is empty, go to the next one */
> -	while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
> -		(*next)++;
> +	while (__state->h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
> +		__state->next++;
>  		/* End of table */
> -		if (*next == total_entries)
> +		if (__state->next == __state->total_entries)
>  			return -ENOENT;
> -		bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
> -		idx = *next % RTE_HASH_BUCKET_ENTRIES;
> +		bucket_idx = __state->next / RTE_HASH_BUCKET_ENTRIES;
> +		idx = __state->next % RTE_HASH_BUCKET_ENTRIES;
>  	}
> -	__hash_rw_reader_lock(h);
> +	__hash_rw_reader_lock(__state->h);
>  	/* Get position of entry in key table */
> -	position = h->buckets[bucket_idx].key_idx[idx];
> -	next_key = (struct rte_hash_key *) ((char *)h->key_store +
> -				position * h->key_entry_size);
> +	position = __state->h->buckets[bucket_idx].key_idx[idx];
> +	next_key = (struct rte_hash_key *) ((char *)__state->h->key_store +
> +				position * __state->h->key_entry_size);
>  	/* Return key and data */
>  	*key = next_key->key;
>  	*data = next_key->pdata;
>  
> -	__hash_rw_reader_unlock(h);
> +	__hash_rw_reader_unlock(__state->h);
>  
>  	/* Increment iterator */
> -	(*next)++;
> +	__state->next++;
>  
>  	return position - 1;
>  }
> +
> +/* istate stands for internal state. */
> +struct rte_hash_iterator_conflict_entries_istate {

I find "conflict_entries" awkward, how about

rte_hash_dup_iterator

instead? It is shorter and conveys that you will iterate duplicate
entries.

> +	const struct rte_hash *h;
> +	uint32_t              vnext;
> +	uint32_t              primary_bidx;
> +	uint32_t              secondary_bidx;
> +};
> +
> +int32_t __rte_experimental
> +rte_hash_iterator_conflict_entries_init_with_hash(const struct rte_hash *h,

rte_hash_dup_iterator_init() maybe?

Why is _with_hash mentioned here? Is it possible to initialize this kind
of iterator without a reference to compare against? That this reference
is an rte_hash is already given by the parameter list.

In any case, 49 characters for a name is too long.

> +	hash_sig_t sig, struct rte_hash_iterator_state *state)
> +{
> +	struct rte_hash_iterator_conflict_entries_istate *__state;
> +
> +	RETURN_IF_TRUE(((h == NULL) || (state == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_conflict_entries_istate *)state;
> +	__state->h = h;
> +	__state->vnext = 0;
> +
> +	/* Get the primary bucket index given the precomputed hash value. */
> +	__state->primary_bidx = sig & h->bucket_bitmask;
> +	/* Get the secondary bucket index given the precomputed hash value. */
> +	__state->secondary_bidx =
> +		rte_hash_secondary_hash(sig) & h->bucket_bitmask;
> +
> +	return 0;
> +}
> +
> +int32_t __rte_experimental
> +rte_hash_iterate_conflict_entries(
> +	struct rte_hash_iterator_state *state, const void **key, void **data)

How about "rte_hash_dup_next()"?
Also, please break the parameter list instead of having an empty first
line.

> +{
> +	struct rte_hash_iterator_conflict_entries_istate *__state;
> +
> +	RETURN_IF_TRUE(((state == NULL) || (key == NULL) ||
> +		(data == NULL)), -EINVAL);
> +
> +	__state = (struct rte_hash_iterator_conflict_entries_istate *)state;
> +
> +	while (__state->vnext < RTE_HASH_BUCKET_ENTRIES * 2) {
> +		uint32_t bidx = __state->vnext < RTE_HASH_BUCKET_ENTRIES ?
> +			__state->primary_bidx : __state->secondary_bidx;
> +		uint32_t next = __state->vnext & (RTE_HASH_BUCKET_ENTRIES - 1);
> +		uint32_t position = __state->h->buckets[bidx].key_idx[next];
> +		struct rte_hash_key *next_key;
> +
> +		/* Increment iterator. */
> +		__state->vnext++;
> +
> +		/*
> +		 * The test below is unlikely because this iterator is meant
> +		 * to be used after a failed insert.
> +		 */
> +		if (unlikely(position == EMPTY_SLOT))
> +			continue;
> +
> +		/* Get the entry in key table. */
> +		next_key = (struct rte_hash_key *) (
> +			(char *)__state->h->key_store +
> +			position * __state->h->key_entry_size);
> +		/* Return key and data. */
> +		*key = next_key->key;
> +		*data = next_key->pdata;
> +
> +		return position - 1;
> +	}
> +
> +	return -ENOENT;
> +}

[snip]

> diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
> index e216ac8e2..301d4638c 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -24,6 +24,7 @@ DPDK_2.1 {
>  
>  	rte_hash_add_key_data;
>  	rte_hash_add_key_with_hash_data;
> +	rte_hash_iterator_init;
>  	rte_hash_iterate;
>  	rte_hash_lookup_bulk_data;
>  	rte_hash_lookup_data;
> @@ -53,3 +54,10 @@ DPDK_18.08 {
>  	rte_hash_count;
>  
>  } DPDK_16.07;
> +
> +EXPERIMENTAL {
> +	global:
> +
> +	rte_hash_iterator_conflict_entries_init_with_hash;
> +	rte_hash_iterate_conflict_entries;
> +};

A blank line may be missing here.

Regards,
-- 
Gaëtan Rivet
6WIND


More information about the dev mailing list