[dpdk-dev] [PATCH 2/6] hash: replace existing hash library with cuckoo hash implementation
    Bruce Richardson 
    bruce.richardson at intel.com
       
    Thu Jun 18 11:50:15 CEST 2015
    
    
  
On Fri, Jun 05, 2015 at 03:33:20PM +0100, Pablo de Lara wrote:
> This patch replaces the existing hash library with another approach,
> using the Cuckoo Hash method to resolve collisions (open addressing),
> which pushes items from a full bucket when a new entry tries
> to be added in it, storing the evicted entry in an alternative location,
> using a secondary hash function.
> 
> This gives the user the ability to store more entries when a bucket
> is full, in comparison with the previous implementation.
> Therefore, the unit test has been updated, as some scenarios have changed
> (such as the previous removed restriction).
> 
> Also note that the API has not been changed, although new fields
> have been added in the rte_hash structure.
> The main change when creating a new table is that the number of entries
> per bucket is fixed now, so its parameter is ignored now
> (still there to maintain the same parameters structure).
> 
> As a last note, the maximum burst size in lookup_burst function
> hash been increased to 64, to improve performance.
> 
> Signed-off-by: Pablo de Lara <pablo.de.lara.guarch at intel.com>
Hi Pablo,
Some review comments below.
/Bruce
> ---
>  app/test/test_hash.c       |  86 +----
>  lib/librte_hash/rte_hash.c | 797 ++++++++++++++++++++++++++++++++++-----------
>  lib/librte_hash/rte_hash.h | 157 +++++----
>  3 files changed, 721 insertions(+), 319 deletions(-)
> 
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c
> index 1da27c5..4ef99ee 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -169,7 +169,6 @@ static struct flow_key keys[5] = { {
>  /* Parameters used for hash table in unit test functions. Name set later. */
>  static struct rte_hash_parameters ut_params = {
>  	.entries = 64,
> -	.bucket_entries = 4,
>  	.key_len = sizeof(struct flow_key), /* 13 */
>  	.hash_func = rte_jhash,
>  	.hash_func_init_val = 0,
> @@ -527,21 +526,18 @@ static int test_five_keys(void)
>  /*
>   * Add keys to the same bucket until bucket full.
>   *	- add 5 keys to the same bucket (hash created with 4 keys per bucket):
> - *	  first 4 successful, 5th unsuccessful
> - *	- lookup the 5 keys: 4 hits, 1 miss
> - *	- add the 5 keys again: 4 OK, one error as bucket is full
> - *	- lookup the 5 keys: 4 hits (updated data), 1 miss
> - *	- delete the 5 keys: 5 OK (even if the 5th is not in the table)
> + *	  first 4 successful, 5th successful, pushing existing item in bucket
> + *	- lookup the 5 keys: 5 hits
> + *	- add the 5 keys again: 5 OK
> + *	- lookup the 5 keys: 5 hits (updated data)
> + *	- delete the 5 keys: 5 OK
>   *	- lookup the 5 keys: 5 misses
> - *	- add the 5th key: OK
> - *	- lookup the 5th key: hit
>   */
>  static int test_full_bucket(void)
>  {
>  	struct rte_hash_parameters params_pseudo_hash = {
>  		.name = "test4",
>  		.entries = 64,
> -		.bucket_entries = 4,
>  		.key_len = sizeof(struct flow_key), /* 13 */
>  		.hash_func = pseudo_hash,
>  		.hash_func_init_val = 0,
> @@ -555,7 +551,7 @@ static int test_full_bucket(void)
>  	handle = rte_hash_create(¶ms_pseudo_hash);
>  	RETURN_IF_ERROR(handle == NULL, "hash creation failed");
>  
> -	/* Fill bucket*/
> +	/* Fill bucket */
>  	for (i = 0; i < 4; i++) {
>  		pos[i] = rte_hash_add_key(handle, &keys[i]);
>  		print_key_info("Add", &keys[i], pos[i]);
> @@ -563,47 +559,36 @@ static int test_full_bucket(void)
>  			"failed to add key (pos[%u]=%d)", i, pos[i]);
>  		expected_pos[i] = pos[i];
>  	}
> -	/* This shouldn't work because the bucket is full */
> +	/* This should work and will push one of the items in the bucket because it is full */
>  	pos[4] = rte_hash_add_key(handle, &keys[4]);
>  	print_key_info("Add", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != -ENOSPC,
> -			"fail: added key to full bucket (pos[4]=%d)", pos[4]);
> +	RETURN_IF_ERROR(pos[4] < 0,
> +			"failed to add key (pos[4]=%d)", pos[4]);
> +	expected_pos[5] = pos[5];
>  
>  	/* Lookup */
> -	for (i = 0; i < 4; i++) {
> +	for (i = 0; i < 5; i++) {
>  		pos[i] = rte_hash_lookup(handle, &keys[i]);
>  		print_key_info("Lkp", &keys[i], pos[i]);
>  		RETURN_IF_ERROR(pos[i] != expected_pos[i],
>  			"failed to find key (pos[%u]=%d)", i, pos[i]);
>  	}
> -	pos[4] = rte_hash_lookup(handle, &keys[4]);
> -	print_key_info("Lkp", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != -ENOENT,
> -			"fail: found non-existent key (pos[4]=%d)", pos[4]);
>  
>  	/* Add - update */
> -	for (i = 0; i < 4; i++) {
> +	for (i = 0; i < 5; i++) {
>  		pos[i] = rte_hash_add_key(handle, &keys[i]);
>  		print_key_info("Add", &keys[i], pos[i]);
>  		RETURN_IF_ERROR(pos[i] != expected_pos[i],
>  			"failed to add key (pos[%u]=%d)", i, pos[i]);
>  	}
> -	pos[4] = rte_hash_add_key(handle, &keys[4]);
> -	print_key_info("Add", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != -ENOSPC,
> -			"fail: added key to full bucket (pos[4]=%d)", pos[4]);
>  
>  	/* Lookup */
> -	for (i = 0; i < 4; i++) {
> +	for (i = 0; i < 5; i++) {
>  		pos[i] = rte_hash_lookup(handle, &keys[i]);
>  		print_key_info("Lkp", &keys[i], pos[i]);
>  		RETURN_IF_ERROR(pos[i] != expected_pos[i],
>  			"failed to find key (pos[%u]=%d)", i, pos[i]);
>  	}
> -	pos[4] = rte_hash_lookup(handle, &keys[4]);
> -	print_key_info("Lkp", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != -ENOENT,
> -			"fail: found non-existent key (pos[4]=%d)", pos[4]);
>  
>  	/* Delete 1 key, check other keys are still found */
>  	pos[1] = rte_hash_del_key(handle, &keys[1]);
> @@ -623,35 +608,21 @@ static int test_full_bucket(void)
>  	RETURN_IF_ERROR(pos[1] < 0, "failed to add key (pos[1]=%d)", pos[1]);
>  
>  	/* Delete */
> -	for (i = 0; i < 4; i++) {
> +	for (i = 0; i < 5; i++) {
>  		pos[i] = rte_hash_del_key(handle, &keys[i]);
>  		print_key_info("Del", &keys[i], pos[i]);
>  		RETURN_IF_ERROR(pos[i] != expected_pos[i],
>  			"failed to delete key (pos[%u]=%d)", i, pos[i]);
>  	}
> -	pos[4] = rte_hash_del_key(handle, &keys[4]);
> -	print_key_info("Del", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != -ENOENT,
> -			"fail: deleted non-existent key (pos[4]=%d)", pos[4]);
>  
>  	/* Lookup */
> -	for (i = 0; i < 4; i++) {
> +	for (i = 0; i < 5; i++) {
>  		pos[i] = rte_hash_lookup(handle, &keys[i]);
>  		print_key_info("Lkp", &keys[i], pos[i]);
>  		RETURN_IF_ERROR(pos[i] != -ENOENT,
>  			"fail: found non-existent key (pos[%u]=%d)", i, pos[i]);
>  	}
>  
> -	/* Add and lookup the 5th key */
> -	pos[4] = rte_hash_add_key(handle, &keys[4]);
> -	print_key_info("Add", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] < 0, "failed to add key (pos[4]=%d)", pos[4]);
> -	expected_pos[4] = pos[4];
> -	pos[4] = rte_hash_lookup(handle, &keys[4]);
> -	print_key_info("Lkp", &keys[4], pos[4]);
> -	RETURN_IF_ERROR(pos[4] != expected_pos[4],
> -			"failed to find key (pos[4]=%d)", pos[4]);
> -
>  	rte_hash_free(handle);
>  
>  	/* Cover the NULL case. */
> @@ -1017,18 +988,8 @@ static int test_hash_creation_with_bad_parameters(void)
>  	}
>  
>  	memcpy(¶ms, &ut_params, sizeof(params));
> -	params.name = "creation_with_bad_parameters_1";
> -	params.bucket_entries = RTE_HASH_BUCKET_ENTRIES_MAX + 1;
> -	handle = rte_hash_create(¶ms);
> -	if (handle != NULL) {
> -		rte_hash_free(handle);
> -		printf("Impossible creating hash sucessfully with bucket_entries in parameter exceeded\n");
> -		return -1;
> -	}
> -
> -	memcpy(¶ms, &ut_params, sizeof(params));
>  	params.name = "creation_with_bad_parameters_2";
> -	params.entries = params.bucket_entries - 1;
> +	params.entries = RTE_HASH_BUCKET_ENTRIES - 1;
>  	handle = rte_hash_create(¶ms);
>  	if (handle != NULL) {
>  		rte_hash_free(handle);
> @@ -1048,16 +1009,6 @@ static int test_hash_creation_with_bad_parameters(void)
>  
>  	memcpy(¶ms, &ut_params, sizeof(params));
>  	params.name = "creation_with_bad_parameters_4";
> -	params.bucket_entries = params.bucket_entries - 1;
> -	handle = rte_hash_create(¶ms);
> -	if (handle != NULL) {
> -		rte_hash_free(handle);
> -		printf("Impossible creating hash sucessfully if bucket_entries in parameter is not power of 2\n");
> -		return -1;
> -	}
> -
> -	memcpy(¶ms, &ut_params, sizeof(params));
> -	params.name = "creation_with_bad_parameters_5";
>  	params.key_len = 0;
>  	handle = rte_hash_create(¶ms);
>  	if (handle != NULL) {
> @@ -1067,7 +1018,7 @@ static int test_hash_creation_with_bad_parameters(void)
>  	}
>  
>  	memcpy(¶ms, &ut_params, sizeof(params));
> -	params.name = "creation_with_bad_parameters_6";
> +	params.name = "creation_with_bad_parameters_5";
>  	params.key_len = RTE_HASH_KEY_LENGTH_MAX + 1;
>  	handle = rte_hash_create(¶ms);
>  	if (handle != NULL) {
> @@ -1077,7 +1028,7 @@ static int test_hash_creation_with_bad_parameters(void)
>  	}
>  
>  	memcpy(¶ms, &ut_params, sizeof(params));
> -	params.name = "creation_with_bad_parameters_7";
> +	params.name = "creation_with_bad_parameters_6";
>  	params.socket_id = RTE_MAX_NUMA_NODES + 1;
>  	handle = rte_hash_create(¶ms);
>  	if (handle != NULL) {
> @@ -1158,7 +1109,6 @@ static uint8_t key[16] = {0x00, 0x01, 0x02, 0x03,
>  static struct rte_hash_parameters hash_params_ex = {
>  	.name = NULL,
>  	.entries = 64,
> -	.bucket_entries = 4,
>  	.key_len = 0,
>  	.hash_func = NULL,
>  	.hash_func_init_val = 0,
> diff --git a/lib/librte_hash/rte_hash.c b/lib/librte_hash/rte_hash.c
> index 9245716..cbfe17e 100644
> --- a/lib/librte_hash/rte_hash.c
> +++ b/lib/librte_hash/rte_hash.c
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -83,64 +83,12 @@ EAL_REGISTER_TAILQ(rte_hash_tailq)
>  #define DEFAULT_HASH_FUNC       rte_jhash
>  #endif
>  
> -/* Signature bucket size is a multiple of this value */
> -#define SIG_BUCKET_ALIGNMENT    16
> -
> -/* Stoered key size is a multiple of this value */
> -#define KEY_ALIGNMENT           16
> +/* Bucket size is a multiple of this value */
> +#define BUCKET_ALIGNMENT	16
Why this value? Why not just CACHE_LINE_SIZE? In fact, given that the buckets
in this implementation are now a fixed size, this is unnecessary. The buckets are
always one cache line in size, due to __rte_cache_aligned on the definition.
>  
>  /* The high bit is always set in real signatures */
>  #define NULL_SIGNATURE          0
>  
> -/* Returns a pointer to the first signature in specified bucket. */
> -static inline hash_sig_t *
> -get_sig_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index)
> -{
> -	return (hash_sig_t *)
> -			&(h->sig_tbl[bucket_index * h->sig_tbl_bucket_size]);
> -}
> -
> -/* Returns a pointer to the first key in specified bucket. */
> -static inline uint8_t *
> -get_key_tbl_bucket(const struct rte_hash *h, uint32_t bucket_index)
> -{
> -	return (uint8_t *) &(h->key_tbl[bucket_index * h->bucket_entries *
> -				     h->key_tbl_key_size]);
> -}
> -
> -/* Returns a pointer to a key at a specific position in a specified bucket. */
> -static inline void *
> -get_key_from_bucket(const struct rte_hash *h, uint8_t *bkt, uint32_t pos)
> -{
> -	return (void *) &bkt[pos * h->key_tbl_key_size];
> -}
> -
> -/* Does integer division with rounding-up of result. */
> -static inline uint32_t
> -div_roundup(uint32_t numerator, uint32_t denominator)
> -{
> -	return (numerator + denominator - 1) / denominator;
> -}
> -
> -/* Increases a size (if needed) to a multiple of alignment. */
> -static inline uint32_t
> -align_size(uint32_t val, uint32_t alignment)
> -{
> -	return alignment * div_roundup(val, alignment);
> -}
> -
> -/* Returns the index into the bucket of the first occurrence of a signature. */
> -static inline int
> -find_first(uint32_t sig, const uint32_t *sig_bucket, uint32_t num_sigs)
> -{
> -	uint32_t i;
> -	for (i = 0; i < num_sigs; i++) {
> -		if (sig == sig_bucket[i])
> -			return i;
> -	}
> -	return -1;
> -}
> -
>  struct rte_hash *
>  rte_hash_find_existing(const char *name)
>  {
> @@ -165,25 +113,49 @@ rte_hash_find_existing(const char *name)
>  	return h;
>  }
>  
> +/* Does integer division with rounding-up of result. */
> +static inline uint32_t
> +div_roundup(uint32_t numerator, uint32_t denominator)
> +{
> +	return (numerator + denominator - 1) / denominator;
> +}
> +
> +/* Increases a size (if needed) to a multiple of alignment. */
> +static inline uint32_t
> +align_size(uint32_t val, uint32_t alignment)
> +{
> +	return alignment * div_roundup(val, alignment);
> +}
There are already inline functions/macros for alignment in rte_common.h, that
should be used if possible.
> +
>  struct rte_hash *
>  rte_hash_create(const struct rte_hash_parameters *params)
>  {
>  	struct rte_hash *h = NULL;
>  	struct rte_tailq_entry *te;
> -	uint32_t num_buckets, sig_bucket_size, key_size,
> -		hash_tbl_size, sig_tbl_size, key_tbl_size, mem_size;
> +	uint32_t num_buckets, bucket_size,
> +		tbl_size, mem_size, hash_struct_size;
>  	char hash_name[RTE_HASH_NAMESIZE];
>  	struct rte_hash_list *hash_list;
> +	void *k = NULL;
> +	struct rte_ring *r = NULL;
> +	char ring_name[RTE_RING_NAMESIZE];
> +	unsigned key_entry_size;
> +	const unsigned anyalignment = 0;
> +	unsigned i;
> +	void *ptr;
>  
>  	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
>  
> +	if (params == NULL) {
> +		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
> +		return NULL;
> +	}
> +
>  	/* Check for valid parameters */
> -	if ((params == NULL) ||
> -			(params->entries > RTE_HASH_ENTRIES_MAX) ||
> -			(params->bucket_entries > RTE_HASH_BUCKET_ENTRIES_MAX) ||
> -			(params->entries < params->bucket_entries) ||
> +	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
> +			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
>  			!rte_is_power_of_2(params->entries) ||
> -			!rte_is_power_of_2(params->bucket_entries) ||
> +			!rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) ||
>  			(params->key_len == 0) ||
>  			(params->key_len > RTE_HASH_KEY_LENGTH_MAX)) {
>  		rte_errno = EINVAL;
> @@ -194,19 +166,18 @@ rte_hash_create(const struct rte_hash_parameters *params)
>  	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
>  
>  	/* Calculate hash dimensions */
> -	num_buckets = params->entries / params->bucket_entries;
> -	sig_bucket_size = align_size(params->bucket_entries *
> -				     sizeof(hash_sig_t), SIG_BUCKET_ALIGNMENT);
> -	key_size =  align_size(params->key_len, KEY_ALIGNMENT);
> +	num_buckets = params->entries / RTE_HASH_BUCKET_ENTRIES;
> +
> +	bucket_size = align_size(sizeof(struct rte_hash_bucket), BUCKET_ALIGNMENT);
unnecessary, due to cache line ailgnment of the struct.
>  
> -	hash_tbl_size = align_size(sizeof(struct rte_hash), RTE_CACHE_LINE_SIZE);
> -	sig_tbl_size = align_size(num_buckets * sig_bucket_size,
> +	hash_struct_size = align_size(sizeof(struct rte_hash), RTE_CACHE_LINE_SIZE);
I don't think this is necessary. Better to make structure cache line aligned, and
then just use sizeof without any additional calculations on it's size.
> +	tbl_size = align_size(num_buckets * bucket_size,
>  				  RTE_CACHE_LINE_SIZE);
Again, unecessary alignment calculation.
> -	key_tbl_size = align_size(num_buckets * key_size *
> -				  params->bucket_entries, RTE_CACHE_LINE_SIZE);
>  
>  	/* Total memory required for hash context */
> -	mem_size = hash_tbl_size + sig_tbl_size + key_tbl_size;
> +	mem_size = hash_struct_size + tbl_size;
> +
> +	key_entry_size = params->key_len;
Is this not a case where we do want some sort of alignment on our keys, just to
reduce the chances of a key crossing a cache line boundary?
>  
>  	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
>  
> @@ -216,6 +187,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
>  		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
>  			break;
>  	}
> +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
>  	if (te != NULL)
>  		goto exit;
>  
> @@ -226,36 +198,73 @@ rte_hash_create(const struct rte_hash_parameters *params)
>  	}
>  
>  	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, mem_size,
> -					   RTE_CACHE_LINE_SIZE, params->socket_id);
> +					RTE_CACHE_LINE_SIZE, params->socket_id);
> +
>  	if (h == NULL) {
>  		RTE_LOG(ERR, HASH, "memory allocation failed\n");
>  		rte_free(te);
>  		goto exit;
>  	}
>  
> +	k = rte_zmalloc_socket(NULL, key_entry_size * (params->entries + 1),
> +			anyalignment, params->socket_id);
> +
> +	if (k == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		h = NULL;
> +		rte_free(te);
> +		rte_free(h);
Rather than building-up an ever increasing list of memory to be freed on each
exit branch, define a separate error label and always free all the memory pointers
allocated during the run. The rte_free() API has no effect on NULL pointers, so
it can be safe to do so (assuming you NULL-initialize all vars first).
> +		goto exit;
> +	}
> +
> +	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
> +	r = rte_ring_lookup(ring_name);
> +	if (r != NULL) {
> +		/* clear the free ring */
> +		while (rte_ring_dequeue(r, &ptr) == 0)
> +			rte_pause();
> +	} else
> +		r = rte_ring_create(ring_name, rte_align32pow2(params->entries),
> +				params->socket_id, 0);
> +	if (r == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		rte_free(te);
> +		rte_free(h);
> +		h = NULL;
> +		rte_free(k);
> +		goto exit;
> +	}
> +
>  	/* Setup hash context */
>  	snprintf(h->name, sizeof(h->name), "%s", params->name);
>  	h->entries = params->entries;
> -	h->bucket_entries = params->bucket_entries;
> +	h->bucket_entries = RTE_HASH_BUCKET_ENTRIES;
>  	h->key_len = params->key_len;
> +	h->key_entry_size = key_entry_size;
>  	h->hash_func_init_val = params->hash_func_init_val;
> +	h->socket_id = params->socket_id;
> +
>  	h->num_buckets = num_buckets;
>  	h->bucket_bitmask = h->num_buckets - 1;
> -	h->sig_msb = 1 << (sizeof(hash_sig_t) * 8 - 1);
> -	h->sig_tbl = (uint8_t *)h + hash_tbl_size;
> -	h->sig_tbl_bucket_size = sig_bucket_size;
> -	h->key_tbl = h->sig_tbl + sig_tbl_size;
> -	h->key_tbl_key_size = key_size;
> +	h->buckets = (struct rte_hash_bucket *)((uint8_t *)h + hash_struct_size);
You can avoid the addition and typecasting by adding
	struct rte_hash_bucket buckets[0] __rte_cache_aligned;
as the last field in your rte_hash structure.
>  	h->hash_func = (params->hash_func == NULL) ?
>  		DEFAULT_HASH_FUNC : params->hash_func;
>  
> +	h->sig_msb = 1ULL << (sizeof(uint64_t) * 8 - 1);
8 == CHAR_BIT
> +	h->sig_secondary = 1ULL << (sizeof(uint64_t) * 8 - 2);
> +
h->sig_secondary = h->sig_msb >> 1; // ??
> +	h->key_store = k;
> +	h->free_slots = r;
>  	te->data = (void *) h;
>  
> -	TAILQ_INSERT_TAIL(hash_list, te, next);
> +	/* populate the free slots ring. Entry zero is reserved for key misses */
> +	for (i = 1; i < params->entries + 1; i++)
> +			rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
>  
> -exit:
> +	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
> +	TAILQ_INSERT_TAIL(hash_list, te, next);
>  	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> -
> +exit:
>  	return h;
>  }
>  
> @@ -287,49 +296,164 @@ rte_hash_free(struct rte_hash *h)
>  
>  	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
>  
> +	rte_free(h->key_store);
>  	rte_free(h);
>  	rte_free(te);
>  }
>  
>  static inline int32_t
> -__rte_hash_add_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig)
> +run_cuckoo(const struct rte_hash *h, struct rte_hash_bucket *bkt, uint32_t key_idx,
> +		uint64_t hash, uint64_t original_hash, const void *original_key)
>  {
> -	hash_sig_t *sig_bucket;
> -	uint8_t *key_bucket;
> -	uint32_t bucket_index, i;
> -	int32_t pos;
> -
> -	/* Get the hash signature and bucket index */
> -	sig |= h->sig_msb;
> -	bucket_index = sig & h->bucket_bitmask;
> -	sig_bucket = get_sig_tbl_bucket(h, bucket_index);
> -	key_bucket = get_key_tbl_bucket(h, bucket_index);
> -
> -	/* Check if key is already present in the hash */
> -	for (i = 0; i < h->bucket_entries; i++) {
> -		if ((sig == sig_bucket[i]) &&
> -		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
> -				  h->key_len) == 0)) {
> -			return bucket_index * h->bucket_entries + i;
> +	/* idx = 0 if primary, 1 if secondary */
> +	unsigned idx;
> +	static unsigned number_pushes;
> +	void *k, *keys = h->key_store;
> +	unsigned i, j;
> +
> +	uint64_t hash_stored;
> +	uint32_t key_idx_stored;
> +	uint32_t bucket_stored_idx;
> +	struct rte_hash_bucket *bkt_stored;
> +
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		/* Check if slot is available */
> +		if (likely(bkt->signatures[i] == NULL_SIGNATURE)) {
> +			bkt->signatures[i] = hash;
> +			bkt->key_idx[i] = key_idx;
> +			number_pushes = 0;
> +			return bkt->key_idx[i];
> +		}
> +	}
> +
> +	/*
> +	 * If number of pushes has exceeded a certain limit, it
> +	 * is very likely that it has entered in a loop, need rehasing
> +	 */
> +	if (++number_pushes > 1 && hash == original_hash) {
> +		k = (char *)keys + key_idx * h->key_entry_size;
> +		if (!memcmp(k, original_key, h->key_len)) {
> +			rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t)key_idx));
> +			number_pushes = 0;
> +			/*
> +			 * Indicates to the user that key could not be added,
> +			 * so he can rehash and add it again or decide not to.
> +			 */
> +			return -EAGAIN;
>  		}
>  	}
>  
> -	/* Check if any free slot within the bucket to add the new key */
> -	pos = find_first(NULL_SIGNATURE, sig_bucket, h->bucket_entries);
> +	/*
> +	 * Push existing item (search for bucket with space in alternative locations)
> +	 * to its alternative location
> +	 */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		/*
> +		 * Check if item was stored in its primary/secondary location,
> +		 * to get the hash in the alternative location
> +		 */
> +		idx = !(bkt->signatures[i] & (h->sig_secondary));
> +		key_idx_stored = bkt->key_idx[i];
> +		k = (char *)keys + key_idx_stored * h->key_entry_size;
> +
> +		if (idx == 0)
> +			hash_stored = rte_hash_hash(h, k);
> +		else
> +			hash_stored = rte_hash_secondary_hash(bkt->signatures[i]);
> +
> +		bucket_stored_idx = hash_stored & h->bucket_bitmask;
> +		bkt_stored = &h->buckets[bucket_stored_idx];
> +		hash_stored |= h->sig_msb;
> +
> +		if (idx != 0)
> +			hash_stored |= h->sig_secondary;
> +
> +		for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) {
> +			if (bkt_stored->signatures[j] == NULL_SIGNATURE)
> +				break;
> +		}
> +
> +		if (j != RTE_HASH_BUCKET_ENTRIES)
> +			break;
> +	}
> +
> +	/* Push existing item (if all alternative buckets are full, pick the last one) */
> +	if (i == RTE_HASH_BUCKET_ENTRIES)
> +		i -= 1;
> +
> +	bkt->signatures[i] = hash;
> +	bkt->key_idx[i] = key_idx;
> +
> +	/* There is an empty slot in the alternative bucket */
> +	if (j != RTE_HASH_BUCKET_ENTRIES) {
> +		bkt_stored->signatures[j] = hash_stored;
> +		bkt_stored->key_idx[j] = key_idx_stored;
> +
> +		number_pushes = 0;
> +		return bkt_stored->key_idx[j];
> +	} else
> +		return run_cuckoo(h, bkt_stored, key_idx_stored, hash_stored,
> +				original_hash, original_key);
> +}
> +
> +static inline int32_t
> +__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
> +						hash_sig_t sig)
> +{
> +	uint64_t hash0, bucket_idx0, hash1, bucket_idx1;
> +	unsigned i;
> +	struct rte_hash_bucket *bkt0, *bkt1;
> +	void *k, *keys = h->key_store;
> +	void *slot_id;
> +
> +	hash0 = sig;
> +	bucket_idx0 = hash0 & h->bucket_bitmask;
> +	hash0 |= h->sig_msb;
> +
> +	bkt0 = &h->buckets[bucket_idx0];
>  
> -	if (unlikely(pos < 0))
> +	hash1 = rte_hash_secondary_hash(hash0);
Should this not be done before you go setting the msb on hash0?
> +
> +	bucket_idx1 = hash1 & h->bucket_bitmask;
> +	hash1 |= h->sig_msb;
> +	/* Secondary location, add an extra 1 in the second MSB */
> +	hash1 |= h->sig_secondary;
> +
> +	bkt1 = &h->buckets[bucket_idx1];
> +
> +	/* Check if key is already inserted in primary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt0->signatures[i] == hash0) {
> +			k = (char *)keys + bkt0->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0)
> +				return bkt0->key_idx[i];
> +		}
> +	}
> +
> +	/* Check if key is already inserted in secondary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt1->signatures[i] == hash1) {
> +			k = (char *)keys + bkt1->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0)
> +				return bkt1->key_idx[i];
> +		}
> +	}
> +
> +	k = (char *)keys + bkt0->key_idx[i] * h->key_entry_size;
Useless statement. You assign to k again just 3 lines further down.
> +	if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0)
>  		return -ENOSPC;
>  
> -	/* Add the new key to the bucket */
> -	sig_bucket[pos] = sig;
> -	rte_memcpy(get_key_from_bucket(h, key_bucket, pos), key, h->key_len);
> -	return bucket_index * h->bucket_entries + pos;
> +	/* Copy key*/
> +	k = (char *)keys + (uintptr_t)slot_id * h->key_entry_size;
> +	memcpy(k, key, h->key_len);
> +
> +	/* Run cuckoo algorithm */
> +	return run_cuckoo(h, bkt0, (uint32_t)((uintptr_t) slot_id), hash0, hash0, key);
Is this not meant to be hash0, hash1 for parameters 4 & 5?
>  }
>  
>  int32_t
>  rte_hash_add_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig)
> +			const void *key, hash_sig_t sig)
>  {
>  	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
>  	return __rte_hash_add_key_with_hash(h, key, sig);
> @@ -343,26 +467,45 @@ rte_hash_add_key(const struct rte_hash *h, const void *key)
>  }
>  
>  static inline int32_t
> -__rte_hash_del_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig)
> +__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
> +					hash_sig_t sig)
>  {
> -	hash_sig_t *sig_bucket;
> -	uint8_t *key_bucket;
> -	uint32_t bucket_index, i;
> -
> -	/* Get the hash signature and bucket index */
> -	sig = sig | h->sig_msb;
> -	bucket_index = sig & h->bucket_bitmask;
> -	sig_bucket = get_sig_tbl_bucket(h, bucket_index);
> -	key_bucket = get_key_tbl_bucket(h, bucket_index);
> -
> -	/* Check if key is already present in the hash */
> -	for (i = 0; i < h->bucket_entries; i++) {
> -		if ((sig == sig_bucket[i]) &&
> -		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
> -				  h->key_len) == 0)) {
> -			sig_bucket[i] = NULL_SIGNATURE;
> -			return bucket_index * h->bucket_entries + i;
> +	uint64_t hash, bucket_idx;
> +	unsigned i;
> +	struct rte_hash_bucket *bkt;
> +	void *k, *keys = h->key_store;
> +
> +	hash = sig;
> +	bucket_idx = hash & h->bucket_bitmask;
> +	hash |= h->sig_msb;
> +
> +	bkt = &h->buckets[bucket_idx];
> +
> +	/* Check if key is in primary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt->signatures[i] == hash) {
> +			k = (char *)keys + bkt->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0)
> +				return bkt->key_idx[i];
> +		}
> +	}
> +
> +	/* Calculate secondary hash */
> +	hash = rte_hash_secondary_hash(hash);
> +
> +	bucket_idx = hash & h->bucket_bitmask;
> +	hash |= h->sig_msb;
> +	/* Secondary location, add an extra 1 in the second MSB */
> +	hash |= h->sig_secondary;
> +
> +	bkt = &h->buckets[bucket_idx];
> +
> +	/* Check if key is in secondary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt->signatures[i] == hash) {
> +			k = (char *)keys + bkt->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0)
> +				return bkt->key_idx[i];
>  		}
>  	}
>  
> @@ -370,40 +513,66 @@ __rte_hash_del_key_with_hash(const struct rte_hash *h,
>  }
>  
>  int32_t
> -rte_hash_del_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig)
> +rte_hash_lookup_with_hash(const struct rte_hash *h,
> +			const void *key, hash_sig_t sig)
>  {
>  	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
> -	return __rte_hash_del_key_with_hash(h, key, sig);
> +	return __rte_hash_lookup_with_hash(h, key, sig);
>  }
>  
>  int32_t
> -rte_hash_del_key(const struct rte_hash *h, const void *key)
> +rte_hash_lookup(const struct rte_hash *h, const void *key)
>  {
>  	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
> -	return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
> +	return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key));
>  }
>  
>  static inline int32_t
> -__rte_hash_lookup_with_hash(const struct rte_hash *h,
> -			const void *key, hash_sig_t sig)
> +__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
> +						hash_sig_t sig)
>  {
> -	hash_sig_t *sig_bucket;
> -	uint8_t *key_bucket;
> -	uint32_t bucket_index, i;
> -
> -	/* Get the hash signature and bucket index */
> -	sig |= h->sig_msb;
> -	bucket_index = sig & h->bucket_bitmask;
> -	sig_bucket = get_sig_tbl_bucket(h, bucket_index);
> -	key_bucket = get_key_tbl_bucket(h, bucket_index);
> -
> -	/* Check if key is already present in the hash */
> -	for (i = 0; i < h->bucket_entries; i++) {
> -		if ((sig == sig_bucket[i]) &&
> -		    likely(memcmp(key, get_key_from_bucket(h, key_bucket, i),
> -				  h->key_len) == 0)) {
> -			return bucket_index * h->bucket_entries + i;
> +	uint64_t hash, bucket_idx;
> +	unsigned i;
> +	struct rte_hash_bucket *bkt;
> +	void *k, *keys = h->key_store;
> +
> +	hash = sig;
> +	bucket_idx = hash & h->bucket_bitmask;
> +	hash |= h->sig_msb;
> +
> +	bkt = &h->buckets[bucket_idx];
> +
> +	/* Check if key is in primary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt->signatures[i] == hash) {
> +			k = (char *)keys + bkt->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0) {
> +				bkt->signatures[i] = NULL_SIGNATURE;
> +				rte_ring_sp_enqueue(h->free_slots,
> +						(void *)((uintptr_t)bkt->key_idx[i]));
> +				return bkt->key_idx[i];
> +			}
> +		}
> +	}
> +
> +	hash = rte_hash_secondary_hash(hash);
> +	bucket_idx = hash & h->bucket_bitmask;
> +	hash |= h->sig_msb;
> +	/* Secondary location, add an extra 1 in the second MSB */
> +	hash |= h->sig_secondary;
> +
> +	bkt = &h->buckets[bucket_idx];
> +
> +	/* Check if key is in secondary location */
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		if (bkt->signatures[i] == hash) {
> +			k = (char *)keys + bkt->key_idx[i] * h->key_entry_size;
> +			if (memcmp(key, k, h->key_len) == 0) {
> +				bkt->signatures[i] = NULL_SIGNATURE;
> +				rte_ring_sp_enqueue(h->free_slots,
> +						(void *)((uintptr_t)bkt->key_idx[i]));
> +				return bkt->key_idx[i];
> +			}
>  		}
>  	}
>  
> @@ -411,61 +580,313 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h,
>  }
>  
>  int32_t
> -rte_hash_lookup_with_hash(const struct rte_hash *h,
> +rte_hash_del_key_with_hash(const struct rte_hash *h,
>  			const void *key, hash_sig_t sig)
>  {
>  	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
> -	return __rte_hash_lookup_with_hash(h, key, sig);
> +	return __rte_hash_del_key_with_hash(h, key, sig);
>  }
>  
>  int32_t
> -rte_hash_lookup(const struct rte_hash *h, const void *key)
> +rte_hash_del_key(const struct rte_hash *h, const void *key)
>  {
>  	RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL);
> -	return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key));
> +	return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key));
>  }
>  
> -int
> -rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> -		      uint32_t num_keys, int32_t *positions)
> +/* Lookup bulk stage 0: Calculate next primary/secondary hash value (from new key)  */
> +static inline void
> +lookup_stage0(unsigned *idx, uint64_t *lookup_mask,
> +		uint64_t *primary_hash,	uint64_t *secondary_hash,
> +		const void * const *keys, const struct rte_hash *h)
>  {
> -	uint32_t i, j, bucket_index;
> -	hash_sig_t sigs[RTE_HASH_LOOKUP_BULK_MAX];
> +	*idx = __builtin_ctzl(*lookup_mask);
> +	if (*lookup_mask == 0)
> +		*idx = 0;
>  
> -	RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
> -			(num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
> -			(positions == NULL)), -EINVAL);
> +	*primary_hash = rte_hash_hash(h, keys[*idx]);
> +	*secondary_hash = rte_hash_secondary_hash(*primary_hash);
>  
> -	/* Get the hash signature and bucket index */
> -	for (i = 0; i < num_keys; i++) {
> -		sigs[i] = h->hash_func(keys[i], h->key_len,
> -				h->hash_func_init_val) | h->sig_msb;
> -		bucket_index = sigs[i] & h->bucket_bitmask;
> +	*primary_hash |= h->sig_msb;
> +
> +	*secondary_hash |= h->sig_msb;
> +	*secondary_hash |= h->sig_secondary;
> +
> +	*lookup_mask &= ~(1llu << *idx);
> +}
> +
> +
> +/* Lookup bulk stage 1: Prefetch primary/secondary buckets */
> +static inline void
> +lookup_stage1(uint64_t primary_hash, uint64_t secondary_hash,
> +		const struct rte_hash_bucket **primary_bkt,
> +		const struct rte_hash_bucket **secondary_bkt,
> +		const struct rte_hash *h)
> +{
> +	*primary_bkt = &h->buckets[primary_hash & h->bucket_bitmask];
> +	*secondary_bkt = &h->buckets[secondary_hash & h->bucket_bitmask];
> +
> +	rte_prefetch0(*primary_bkt);
> +	rte_prefetch0(*secondary_bkt);
> +}
>  
> -		/* Pre-fetch relevant buckets */
> -		rte_prefetch1((void *) get_sig_tbl_bucket(h, bucket_index));
> -		rte_prefetch1((void *) get_key_tbl_bucket(h, bucket_index));
> +/*
> + * Lookup bulk stage 2:  Search for match hashes in primary/secondary locations
> + * and prefetch first key slot
> + */
> +static inline void
> +lookup_stage2(unsigned idx, uint64_t primary_hash, uint64_t secondary_hash,
> +		const struct rte_hash_bucket *primary_bkt,
> +		const struct rte_hash_bucket *secondary_bkt,
> +		const void **key_slot,
> +		int32_t *positions,
> +		uint64_t *extra_hits_mask,
> +		const void *keys, const struct rte_hash *h)
> +{
> +	unsigned primary_hash_matches, secondary_hash_matches, key_idx, i;
> +	unsigned total_hash_matches;
> +
> +	total_hash_matches = 1 << (RTE_HASH_BUCKET_ENTRIES * 2);
> +	primary_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		primary_hash_matches |= ((primary_hash == primary_bkt->signatures[i]) << i);
> +		total_hash_matches |= ((primary_hash == primary_bkt->signatures[i]) << i);
>  	}
>  
> -	/* Check if key is already present in the hash */
> -	for (i = 0; i < num_keys; i++) {
> -		bucket_index = sigs[i] & h->bucket_bitmask;
> -		hash_sig_t *sig_bucket = get_sig_tbl_bucket(h, bucket_index);
> -		uint8_t *key_bucket = get_key_tbl_bucket(h, bucket_index);
> -
> -		positions[i] = -ENOENT;
> -
> -		for (j = 0; j < h->bucket_entries; j++) {
> -			if ((sigs[i] == sig_bucket[j]) &&
> -			    likely(memcmp(keys[i],
> -					  get_key_from_bucket(h, key_bucket, j),
> -					  h->key_len) == 0)) {
> -				positions[i] = bucket_index *
> -					h->bucket_entries + j;
> -				break;
> -			}
> -		}
> +	key_idx = primary_bkt->key_idx[__builtin_ctzl(primary_hash_matches)];
> +
> +	secondary_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES;
> +	for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
> +		secondary_hash_matches |= ((secondary_hash == secondary_bkt->signatures[i]) << i);
> +		total_hash_matches |= ((secondary_hash == secondary_bkt->signatures[i])
> +						<< (i + RTE_HASH_BUCKET_ENTRIES));
> +	}
> +
> +	if (key_idx == 0)
> +		key_idx = secondary_bkt->key_idx[__builtin_ctzl(secondary_hash_matches)];
> +
> +	*key_slot = (const char *)keys + key_idx * h->key_entry_size;
> +
> +	rte_prefetch0(*key_slot);
> +	positions[idx] = key_idx;
> +
> +	*extra_hits_mask |= (uint64_t)(__builtin_popcount(primary_hash_matches) > 2) << idx;
> +	*extra_hits_mask |= (uint64_t)(__builtin_popcount(secondary_hash_matches) > 2) << idx;
> +	*extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 2) << idx;
> +
> +}
> +
> +
> +/* Lookup bulk stage 3: Check if key matches, update hit mask */
> +static inline void
> +lookup_stage3(unsigned idx, const void *key_slot,
> +		const void * const *keys, int32_t *positions,
> +		uint64_t *hits, const struct rte_hash *h)
> +{
> +	unsigned hit;
> +
> +	hit = !memcmp(key_slot, keys[idx], h->key_len);
> +	if (unlikely(hit == 0))
> +		positions[idx] = -ENOENT;
> +	*hits = (uint64_t)(hit) << idx;
> +}
> +
> +static inline int
> +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> +		      uint32_t num_keys, int32_t *positions) {
> +
> +	uint64_t hits = 0;
> +	uint64_t next_mask = 0;
> +	uint64_t extra_hits_mask = 0;
> +	uint64_t lookup_mask;
> +	unsigned idx;
> +	const void *key_store = h->key_store;
> +
> +	unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31;
> +	const struct rte_hash_bucket *primary_bkt10, *primary_bkt11;
> +	const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11;
> +	const struct rte_hash_bucket *primary_bkt20, *primary_bkt21;
> +	const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21;
> +	const void *k_slot20, *k_slot21, *k_slot30, *k_slot31;
> +	uint64_t primary_hash00, primary_hash01;
> +	uint64_t secondary_hash00, secondary_hash01;
> +	uint64_t primary_hash10, primary_hash11;
> +	uint64_t secondary_hash10, secondary_hash11;
> +	uint64_t primary_hash20, primary_hash21;
> +	uint64_t secondary_hash20, secondary_hash21;
> +
> +	if (num_keys == RTE_HASH_LOOKUP_BULK_MAX)
> +		lookup_mask = 0xffffffffffffffff;
> +	else
> +		lookup_mask = (1 << num_keys) - 1;
> +
> +	lookup_stage0(&idx00, &lookup_mask, &primary_hash00,
> +			&secondary_hash00, keys, h);
> +	lookup_stage0(&idx01, &lookup_mask, &primary_hash01,
> +			&secondary_hash01, keys, h);
> +
> +	primary_hash10 = primary_hash00;
> +	primary_hash11 = primary_hash01;
> +	secondary_hash10 = secondary_hash00;
> +	secondary_hash11 = secondary_hash01;
> +	idx10 = idx00, idx11 = idx01;
> +
> +	lookup_stage0(&idx00, &lookup_mask, &primary_hash00,
> +			&secondary_hash00, keys, h);
> +	lookup_stage0(&idx01, &lookup_mask, &primary_hash01,
> +			&secondary_hash01, keys, h);
> +	lookup_stage1(primary_hash10, secondary_hash10, &primary_bkt10,
> +			&secondary_bkt10, h);
> +	lookup_stage1(primary_hash11, secondary_hash11, &primary_bkt11,
> +			&secondary_bkt11, h);
> +
> +	primary_bkt20 = primary_bkt10;
> +	primary_bkt21 = primary_bkt11;
> +	secondary_bkt20 = secondary_bkt10;
> +	secondary_bkt21 = secondary_bkt11;
> +	primary_hash20 = primary_hash10;
> +	primary_hash21 = primary_hash11;
> +	secondary_hash20 = secondary_hash10;
> +	secondary_hash21 = secondary_hash11;
> +	idx20 = idx10, idx21 = idx11;
> +	primary_hash10 = primary_hash00;
> +	primary_hash11 = primary_hash01;
> +	secondary_hash10 = secondary_hash00;
> +	secondary_hash11 = secondary_hash01;
> +	idx10 = idx00, idx11 = idx01;
> +
> +	lookup_stage0(&idx00, &lookup_mask, &primary_hash00,
> +			&secondary_hash00, keys, h);
> +	lookup_stage0(&idx01, &lookup_mask, &primary_hash01,
> +			&secondary_hash01, keys, h);
> +	lookup_stage1(primary_hash10, secondary_hash10, &primary_bkt10,
> +			&secondary_bkt10, h);
> +	lookup_stage1(primary_hash11, secondary_hash11, &primary_bkt11,
> +			&secondary_bkt11, h);
> +	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
> +		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
> +		key_store, h);
> +	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
> +		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
> +		key_store, h);
> +
> +	while (lookup_mask) {
> +		k_slot30 = k_slot20, k_slot31 = k_slot21;
> +		idx30 = idx20, idx31 = idx21;
> +		primary_bkt20 = primary_bkt10;
> +		primary_bkt21 = primary_bkt11;
> +		secondary_bkt20 = secondary_bkt10;
> +		secondary_bkt21 = secondary_bkt11;
> +		primary_hash20 = primary_hash10;
> +		primary_hash21 = primary_hash11;
> +		secondary_hash20 = secondary_hash10;
> +		secondary_hash21 = secondary_hash11;
> +		idx20 = idx10, idx21 = idx11;
> +		primary_hash10 = primary_hash00;
> +		primary_hash11 = primary_hash01;
> +		secondary_hash10 = secondary_hash00;
> +		secondary_hash11 = secondary_hash01;
> +		idx10 = idx00, idx11 = idx01;
> +
> +		lookup_stage0(&idx00, &lookup_mask, &primary_hash00,
> +				&secondary_hash00, keys, h);
> +		lookup_stage0(&idx01, &lookup_mask, &primary_hash01,
> +				&secondary_hash01, keys, h);
> +		lookup_stage1(primary_hash10, secondary_hash10, &primary_bkt10,
> +				&secondary_bkt10, h);
> +		lookup_stage1(primary_hash11, secondary_hash11, &primary_bkt11,
> +				&secondary_bkt11, h);
> +		lookup_stage2(idx20, primary_hash20, secondary_hash20,
> +			primary_bkt20, secondary_bkt20, &k_slot20, positions,
> +			&extra_hits_mask, key_store, h);
> +		lookup_stage2(idx21, primary_hash21, secondary_hash21,
> +			primary_bkt21, secondary_bkt21,	&k_slot21, positions,
> +			&extra_hits_mask, key_store, h);
> +		lookup_stage3(idx30, k_slot30, keys, positions, &hits, h);
> +		lookup_stage3(idx31, k_slot31, keys, positions, &hits, h);
> +	}
> +
> +	k_slot30 = k_slot20, k_slot31 = k_slot21;
> +	idx30 = idx20, idx31 = idx21;
> +	primary_bkt20 = primary_bkt10;
> +	primary_bkt21 = primary_bkt11;
> +	secondary_bkt20 = secondary_bkt10;
> +	secondary_bkt21 = secondary_bkt11;
> +	primary_hash20 = primary_hash10;
> +	primary_hash21 = primary_hash11;
> +	secondary_hash20 = secondary_hash10;
> +	secondary_hash21 = secondary_hash11;
> +	idx20 = idx10, idx21 = idx11;
> +	primary_hash10 = primary_hash00;
> +	primary_hash11 = primary_hash01;
> +	secondary_hash10 = secondary_hash00;
> +	secondary_hash11 = secondary_hash01;
> +	idx10 = idx00, idx11 = idx01;
> +	lookup_stage1(primary_hash10, secondary_hash10, &primary_bkt10,
> +		&secondary_bkt10, h);
> +	lookup_stage1(primary_hash11, secondary_hash11, &primary_bkt11,
> +		&secondary_bkt11, h);
> +	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
> +		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
> +		key_store, h);
> +	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
> +		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
> +		key_store, h);
> +	lookup_stage3(idx30, k_slot30, keys, positions, &hits, h);
> +	lookup_stage3(idx31, k_slot31, keys, positions, &hits, h);
> +
> +	k_slot30 = k_slot20, k_slot31 = k_slot21;
> +	idx30 = idx20, idx31 = idx21;
> +	primary_bkt20 = primary_bkt10;
> +	primary_bkt21 = primary_bkt11;
> +	secondary_bkt20 = secondary_bkt10;
> +	secondary_bkt21 = secondary_bkt11;
> +	primary_hash20 = primary_hash10;
> +	primary_hash21 = primary_hash11;
> +	secondary_hash20 = secondary_hash10;
> +	secondary_hash21 = secondary_hash11;
> +	idx20 = idx10, idx21 = idx11;
> +
> +	lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20,
> +		secondary_bkt20, &k_slot20, positions, &extra_hits_mask,
> +		key_store, h);
> +	lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21,
> +		secondary_bkt21, &k_slot21, positions, &extra_hits_mask,
> +		key_store, h);
> +	lookup_stage3(idx30, k_slot30, keys, positions, &hits, h);
> +	lookup_stage3(idx31, k_slot31, keys, positions, &hits, h);
> +
> +	k_slot30 = k_slot20, k_slot31 = k_slot21;
> +	idx30 = idx20, idx31 = idx21;
> +
> +	lookup_stage3(idx30, k_slot30, keys, positions, &hits, h);
> +	lookup_stage3(idx31, k_slot31, keys, positions, &hits, h);
> +
> +	/* handle extra_hits_mask */
> +	next_mask |= extra_hits_mask;
> +
> +	/* ignore any items we have already found */
> +	next_mask &= ~hits;
> +
> +	if (unlikely(next_mask)) {
> +		/* run a single search for each remaining item */
> +		do {
> +			idx = __builtin_ctzl(next_mask);
> +			positions[idx] = rte_hash_lookup(h, keys[idx]);
> +			next_mask &= ~(1llu << idx);
> +		} while (next_mask);
>  	}
>  
>  	return 0;
>  }
> +
> +int
> +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> +		      uint32_t num_keys, int32_t *positions)
> +{
> +	RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) ||
> +			(num_keys > RTE_HASH_LOOKUP_BULK_MAX) ||
> +			(positions == NULL)), -EINVAL);
> +
> +	return __rte_hash_lookup_bulk(h, keys, num_keys, positions);
> +}
> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
> index 821a9d4..4088ac4 100644
> --- a/lib/librte_hash/rte_hash.h
> +++ b/lib/librte_hash/rte_hash.h
> @@ -1,7 +1,7 @@
>  /*-
>   *   BSD LICENSE
>   *
> - *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
>   *   All rights reserved.
>   *
>   *   Redistribution and use in source and binary forms, with or without
> @@ -42,27 +42,36 @@
>  
>  #include <stdint.h>
>  #include <sys/queue.h>
> +#include <rte_ring.h>
The depdirs in the makefile needs to be updated with a new dependency on
rte_ring library.
>  
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
>  
>  /** Maximum size of hash table that can be created. */
> -#define RTE_HASH_ENTRIES_MAX			(1 << 26)
> +#define RTE_HASH_ENTRIES_MAX			(1 << 30)
>  
> -/** Maximum bucket size that can be created. */
> -#define RTE_HASH_BUCKET_ENTRIES_MAX		16
This is a public macro, you can't just drop it. I suggest changing it's value
to 4, and adding the new macro below alongside it. If you want, you can
also see about marking it as deprecated to remove in future, but I'm not sure
it's worthwhile doing so.
> +/** Number of items per bucket. */
> +#define RTE_HASH_BUCKET_ENTRIES			4
>  
>  /** Maximum length of key that can be used. */
>  #define RTE_HASH_KEY_LENGTH_MAX			64
>  
> -/** Max number of keys that can be searched for using rte_hash_lookup_multi. */
> -#define RTE_HASH_LOOKUP_BULK_MAX		16
> -#define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX
As above, you need to keep the lookup_multi_max macro.
> -
> -/** Max number of characters in hash name.*/
> +/** Maximum number of characters in hash name.*/
>  #define RTE_HASH_NAMESIZE			32
>  
> +/** Bits that will be right shifted when calculating a secondary hash. */
> +#define RTE_HASH_ALT_BITS_SHIFT			12
> +
> +/** Bits that will be XORed to obtained a secondary hash. */
> +#define RTE_HASH_ALT_BITS_XOR_MASK		0x5bd1e995
If these macros are for use internally in the hash implementation only, mark
them as @internal, so that people don't start depending on them in their apps.
This allows us to rename them or remove them in future if we like, without having
to give one-releases deprecation warning.
> +
> +/** Maximum number of keys that can be searched for using rte_hash_lookup_bulk. */
> +#define RTE_HASH_LOOKUP_BULK_MAX		64
> +
> +/* Stored key size is a multiple of this value */
> +#define KEY_ALIGNMENT				16
> +
>  /** Signature of key that is stored internally. */
>  typedef uint32_t hash_sig_t;
>  
> @@ -77,9 +86,9 @@ typedef uint32_t (*rte_hash_function)(const void *key, uint32_t key_len,
>  struct rte_hash_parameters {
>  	const char *name;		/**< Name of the hash. */
>  	uint32_t entries;		/**< Total hash table entries. */
> -	uint32_t bucket_entries;	/**< Bucket entries. */
> -	uint32_t key_len;		/**< Length of hash key. */
> -	rte_hash_function hash_func;	/**< Function used to calculate hash. */
> +	uint32_t bucket_entries;        /**< Bucket entries. */
> +	uint16_t key_len;		/**< Length of hash key. */
> +	rte_hash_function hash_func;	/**< Primary Hash function used to calculate hash. */
>  	uint32_t hash_func_init_val;	/**< Init value used by hash_func. */
>  	int socket_id;			/**< NUMA Socket ID for memory. */
>  };
Due to padding of the uint16_t value, I don't think this will break the ABI.
However, it might be worthwhile manually putting in an explicit pad (or comment)
to make this clear.
> @@ -88,24 +97,35 @@ struct rte_hash_parameters {
>  struct rte_hash {
As part of this patchset, it might be worthwhile updating the comment on
struct rte_hash to indicate that this is an internal data-structure that should
not be directly referenced by apps.
>  	char name[RTE_HASH_NAMESIZE];	/**< Name of the hash. */
>  	uint32_t entries;		/**< Total table entries. */
> -	uint32_t bucket_entries;	/**< Bucket entries. */
> -	uint32_t key_len;		/**< Length of hash key. */
> +	uint32_t bucket_entries;        /**< Bucket entries. */
> +	uint16_t key_len;		/**< Length of hash key. */
>  	rte_hash_function hash_func;	/**< Function used to calculate hash. */
>  	uint32_t hash_func_init_val;	/**< Init value used by hash_func. */
>  	uint32_t num_buckets;		/**< Number of buckets in table. */
>  	uint32_t bucket_bitmask;	/**< Bitmask for getting bucket index
>  							from hash signature. */
> -	hash_sig_t sig_msb;	/**< MSB is always set in valid signatures. */
> -	uint8_t *sig_tbl;	/**< Flat array of hash signature buckets. */
> -	uint32_t sig_tbl_bucket_size;	/**< Signature buckets may be padded for
> -					   alignment reasons, and this is the
> -					   bucket size used by sig_tbl. */
> -	uint8_t *key_tbl;	/**< Flat array of key value buckets. */
> -	uint32_t key_tbl_key_size;	/**< Keys may be padded for alignment
> -					   reasons, and this is the key size
> -					   used	by key_tbl. */
> +	uint16_t key_entry_size;	/**< Size of each key entry. */
> +	int socket_id;			/**< NUMA Socket ID for memory. */
> +	uint64_t sig_msb;		/**< MSB is always set in valid signatures. */
> +	uint64_t sig_secondary;		/**< Second MSB is set when hash is calculated
> +						from secondary hash function. */
> +
> +	struct rte_ring *free_slots;	/**< Ring that stores all indexes
> +						of the free slots in the key table */
> +	void *key_store;		/**< Table storing all keys and data */
> +
> +	struct rte_hash_bucket *buckets;	/**< Table with buckets storing all the
> +							hash values and key indexes
> +							to the key table*/
>  };
Since this header file does not contain any inline functions that operate on
this structure, it can be safely moved to the .c file, and replaced by a simple
"struct rte_hash" definition.
>  
> +/** Bucket structure */
> +struct rte_hash_bucket {
> +	uint64_t signatures[RTE_HASH_BUCKET_ENTRIES];
> +	uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
> +} __rte_cache_aligned;
> +
Move to .c file as unused in this file.
> +
>  /**
>   * Create a new hash table.
>   *
> @@ -126,7 +146,6 @@ struct rte_hash {
>  struct rte_hash *
>  rte_hash_create(const struct rte_hash_parameters *params);
>  
> -
>  /**
>   * Find an existing hash table object and return a pointer to it.
>   *
> @@ -158,9 +177,9 @@ rte_hash_free(struct rte_hash *h);
>   *   Key to add to the hash table.
>   * @return
>   *   - -EINVAL if the parameters are invalid.
> + *   - -EAGAIN if key could not be added (table needs rehash)
>   *   - -ENOSPC if there is no space in the hash for this key.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key.
> + *   - 0 if key was added successfully
>   */
This is an ABI and API change. With this change you can no longer use the old-style
way of returning indexes to allow the app to store the data locally. 
>  int32_t
>  rte_hash_add_key(const struct rte_hash *h, const void *key);
> @@ -177,13 +196,12 @@ rte_hash_add_key(const struct rte_hash *h, const void *key);
>   *   Hash value to add to the hash table.
>   * @return
>   *   - -EINVAL if the parameters are invalid.
> + *   - -EAGAIN if key could not be added (table needs rehash)
>   *   - -ENOSPC if there is no space in the hash for this key.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key.
> + *   - 0 if key was added successfully
>   */
>  int32_t
> -rte_hash_add_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig);
> +rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig);
>  
>  /**
>   * Remove a key from an existing hash table. This operation is not multi-thread
> @@ -196,9 +214,7 @@ rte_hash_add_key_with_hash(const struct rte_hash *h,
>   * @return
>   *   - -EINVAL if the parameters are invalid.
>   *   - -ENOENT if the key is not found.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key, and is the same
> - *     value that was returned when the key was added.
> + *   - 0 if key was deleted successfully
>   */
>  int32_t
>  rte_hash_del_key(const struct rte_hash *h, const void *key);
> @@ -216,14 +232,10 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
>   * @return
>   *   - -EINVAL if the parameters are invalid.
>   *   - -ENOENT if the key is not found.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key, and is the same
> - *     value that was returned when the key was added.
> + *   - 0 if key was deleted successfully
>   */
>  int32_t
> -rte_hash_del_key_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig);
> -
> +rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig);
>  
>  /**
>   * Find a key in the hash table. This operation is multi-thread safe.
> @@ -235,9 +247,7 @@ rte_hash_del_key_with_hash(const struct rte_hash *h,
>   * @return
>   *   - -EINVAL if the parameters are invalid.
>   *   - -ENOENT if the key is not found.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key, and is the same
> - *     value that was returned when the key was added.
> + *   - 0 if key was found successfully
>   */
>  int32_t
>  rte_hash_lookup(const struct rte_hash *h, const void *key);
> @@ -254,14 +264,40 @@ rte_hash_lookup(const struct rte_hash *h, const void *key);
>   * @return
>   *   - -EINVAL if the parameters are invalid.
>   *   - -ENOENT if the key is not found.
> - *   - A positive value that can be used by the caller as an offset into an
> - *     array of user data. This value is unique for this key, and is the same
> - *     value that was returned when the key was added.
> + *   - 0 if key was found successfully
>   */
>  int32_t
> -rte_hash_lookup_with_hash(const struct rte_hash *h,
> -				const void *key, hash_sig_t sig);
> +rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig);
>  
> +#define rte_hash_lookup_multi rte_hash_lookup_bulk
> +/**
> + * Find multiple keys in the hash table. This operation is multi-thread safe.
> + *
> + * @param h
> + *   Hash table to look in.
> + * @param keys
> + *   A pointer to a list of keys to look for.
> + * @param num_keys
> + *   How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX).
> + * @param positions
> + *   Output containing a list of values, corresponding to the list of keys that
> + *   can be used by the caller as an offset into an array of user data. These
> + *   values are unique for each key, and are the same values that were returned
> + *   when each key was added. If a key in the list was not found, then -ENOENT
> + *   will be the value.
> + * @return
> + *   -EINVAL if there's an error, otherwise number of hits.
> + */
> +int
> +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> +		      uint32_t num_keys, int32_t *positions);
> +
> +static inline hash_sig_t
> +hash_alt(const uint32_t primary_hash) {
> +	uint32_t tag = primary_hash >> RTE_HASH_ALT_BITS_SHIFT;
> +
> +	return (primary_hash ^ ((tag + 1) * RTE_HASH_ALT_BITS_XOR_MASK));
> +}
This needs a doxygen comment, is it public or internal?
>  
>  /**
>   * Calc a hash value by key. This operation is not multi-process safe.
> @@ -280,28 +316,23 @@ rte_hash_hash(const struct rte_hash *h, const void *key)
>  	return h->hash_func(key, h->key_len, h->hash_func_init_val);
>  }
>  
> -#define rte_hash_lookup_multi rte_hash_lookup_bulk
>  /**
> - * Find multiple keys in the hash table. This operation is multi-thread safe.
> + * Calc the secondary hash value by key. This operation is not multi-process safe.
>   *
>   * @param h
>   *   Hash table to look in.
> - * @param keys
> - *   A pointer to a list of keys to look for.
> - * @param num_keys
> - *   How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX).
> - * @param positions
> - *   Output containing a list of values, corresponding to the list of keys that
> - *   can be used by the caller as an offset into an array of user data. These
> - *   values are unique for each key, and are the same values that were returned
> - *   when each key was added. If a key in the list was not found, then -ENOENT
> - *   will be the value.
> + * @param key
> + *   Key to find.
>   * @return
> - *   -EINVAL if there's an error, otherwise 0.
> + *   - hash value
>   */
> -int
> -rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
> -		      uint32_t num_keys, int32_t *positions);
> +static inline hash_sig_t
> +rte_hash_secondary_hash(const uint32_t primary_hash)
> +{
> +	/* calc hash result by key */
> +	return hash_alt(primary_hash);
> +}
> +
>  #ifdef __cplusplus
>  }
>  #endif
> -- 
> 2.4.2
> 
    
    
More information about the dev
mailing list