[dpdk-dev] [PATCH 1/3] hash: deprecate lock ellision and read/write concurreny flags

Bruce Richardson bruce.richardson at intel.com
Thu Nov 1 10:45:31 CET 2018


On Wed, Oct 31, 2018 at 11:54:52PM -0500, Honnappa Nagarahalli wrote:
> Hash library should provide read/write concurrency by default
> as most of the use cases require read/write concurrency. Hence
> the flag RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is deprecated.
> The library will decide if locking is required to provide
> the concurrency based on other configuration flags.
> 
> If a lock is used to provide the read/write concurrency, best
> possible locking mechanism available on the platform should
> be used. Hence, the flag RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
> is deprecated. The library will use transactional memory
> if the platform supports it.
> 
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thakkar at arm.com>
> Reviewed-by: Gavin Hu <gavin.hu at arm.com>
> ---
>  lib/librte_hash/rte_cuckoo_hash.c    | 319 ++++++++++++++++++++++++++-
>  lib/librte_hash/rte_hash.h           |  22 +-
>  lib/librte_hash/rte_hash_version.map |   7 +
>  3 files changed, 345 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
> index 5ddcccd87..a11de22be 100644
> --- a/lib/librte_hash/rte_cuckoo_hash.c
> +++ b/lib/librte_hash/rte_cuckoo_hash.c
> @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,
>  }
>  
>  struct rte_hash *
> -rte_hash_create(const struct rte_hash_parameters *params)
> +rte_hash_create_v20(const struct rte_hash_parameters *params)
>  {
>  	struct rte_hash *h = NULL;
>  	struct rte_tailq_entry *te = NULL;
> @@ -446,6 +446,323 @@ rte_hash_create(const struct rte_hash_parameters *params)
>  	rte_free(tbl_chng_cnt);
>  	return NULL;
>  }
> +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);
> +

To make reviewing this easier, can I ask if in V2 you can put the v18.11
function first, before the existing one. Hopefully that means that the "new"
function from git's point of view is the existing one, showing it as a
block add that we can pretty much skip reviewing. The benefit of this is that
the changes in the v1811 should then show as line-by-line diffs in the
patch, so we can easily review the changes made in the new case. It's hard
to spot them in the whole function below.

> +struct rte_hash *
> +rte_hash_create_v1811(const struct rte_hash_parameters *params)
> +{
> +	struct rte_hash *h = NULL;
> +	struct rte_tailq_entry *te = NULL;
> +	struct rte_hash_list *hash_list;
> +	struct rte_ring *r = NULL;
> +	struct rte_ring *r_ext = NULL;
> +	char hash_name[RTE_HASH_NAMESIZE];
> +	void *k = NULL;
> +	void *buckets = NULL;
> +	void *buckets_ext = NULL;
> +	char ring_name[RTE_RING_NAMESIZE];
> +	char ext_ring_name[RTE_RING_NAMESIZE];
> +	unsigned num_key_slots;
> +	unsigned i;
> +	unsigned int use_local_cache = 0;
> +	unsigned int ext_table_support = 0;
> +	unsigned int readwrite_concur_support = 1;
> +	unsigned int writer_takes_lock = 1;
> +	unsigned int no_free_on_del = 0;
> +	uint32_t *tbl_chng_cnt = NULL;
> +	unsigned int readwrite_concur_lf_support = 0;
> +
> +	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
> +
> +	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
> +
> +	if (params == NULL) {
> +		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
> +		return NULL;
> +	}
> +
> +	/* Check for valid parameters */
> +	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
> +			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
> +			(params->key_len == 0)) {
> +		rte_errno = EINVAL;
> +		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
> +		return NULL;
> +	}
> +
> +	/* Validate correct usage of extra options */
> +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
> +	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
> +		rte_errno = EINVAL;
> +		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
> +			"feature not supported with rw concurrency "
> +			"lock free\n");

Please don't split the error text across lines. If you put it on a line by
itself, hopefully checkpatch should not complain. If checkpatch does
complain about the long line, just ignore it!
[Yes, I know this is copied from original code, but since it appears as new
code in this patch, we should fix it]

> +		return NULL;
> +	}
> +
> +	/* Check extra flags field to check extra options. */
> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
> +		use_local_cache = 1;
> +
> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
> +		ext_table_support = 1;
> +
> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
> +		no_free_on_del = 1;
> +
> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
> +		/* Do not use lock for RW concurrency */
> +		readwrite_concur_support = 0;
> +		readwrite_concur_lf_support = 1;
> +		/* Enable not freeing internal memory/index on delete */
> +		no_free_on_del = 1;
> +	}
> +
> +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
> +	    !(params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD))
> +		writer_takes_lock = 0;
> +
> +	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
> +	if (use_local_cache)
> +		/*
> +		 * Increase number of slots by total number of indices
> +		 * that can be stored in the lcore caches
> +		 * except for the first cache
> +		 */
> +		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
> +					(LCORE_CACHE_SIZE - 1) + 1;
> +	else
> +		num_key_slots = params->entries + 1;
> +
> +	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
> +	/* Create ring (Dummy slot index is not enqueued) */
> +	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
> +			params->socket_id, 0);
> +	if (r == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		goto err;
> +	}
> +
> +	const uint32_t num_buckets = rte_align32pow2(params->entries) /
> +						RTE_HASH_BUCKET_ENTRIES;
> +
> +	/* Create ring for extendable buckets. */
> +	if (ext_table_support) {
> +		snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
> +								params->name);
> +		r_ext = rte_ring_create(ext_ring_name,
> +				rte_align32pow2(num_buckets + 1),
> +				params->socket_id, 0);
> +
> +		if (r_ext == NULL) {
> +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
> +								"failed\n");
> +			goto err;
> +		}
> +	}
> +
> +	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
> +
> +	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
> +
> +	/* guarantee there's no existing: this is normally already checked
> +	 * by ring creation above */
> +	TAILQ_FOREACH(te, hash_list, next) {
> +		h = (struct rte_hash *) te->data;
> +		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
> +			break;
> +	}
> +	h = NULL;
> +	if (te != NULL) {
> +		rte_errno = EEXIST;
> +		te = NULL;
> +		goto err_unlock;
> +	}
> +
> +	te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
> +		goto err_unlock;
> +	}
> +
> +	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
> +					RTE_CACHE_LINE_SIZE, params->socket_id);
> +
> +	if (h == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		goto err_unlock;
> +	}
> +
> +	buckets = rte_zmalloc_socket(NULL,
> +				num_buckets * sizeof(struct rte_hash_bucket),
> +				RTE_CACHE_LINE_SIZE, params->socket_id);
> +
> +	if (buckets == NULL) {
> +		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
> +		goto err_unlock;
> +	}
> +
> +	/* Allocate same number of extendable buckets */
> +	if (ext_table_support) {
> +		buckets_ext = rte_zmalloc_socket(NULL,
> +				num_buckets * sizeof(struct rte_hash_bucket),
> +				RTE_CACHE_LINE_SIZE, params->socket_id);
> +		if (buckets_ext == NULL) {
> +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
> +							"failed\n");
> +			goto err_unlock;
> +		}
> +		/* Populate ext bkt ring. We reserve 0 similar to the
> +		 * key-data slot, just in case in future we want to
> +		 * use bucket index for the linked list and 0 means NULL
> +		 * for next bucket
> +		 */
> +		for (i = 1; i <= num_buckets; i++)
> +			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
> +	}
> +
> +	const uint32_t key_entry_size =
> +		RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
> +			  KEY_ALIGNMENT);
> +	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
> +
> +	k = rte_zmalloc_socket(NULL, key_tbl_size,
> +			RTE_CACHE_LINE_SIZE, params->socket_id);
> +
> +	if (k == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		goto err_unlock;
> +	}
> +
> +	tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
> +			RTE_CACHE_LINE_SIZE, params->socket_id);
> +
> +	if (tbl_chng_cnt == NULL) {
> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");
> +		goto err_unlock;
> +	}
> +
> +/*
> + * If x86 architecture is used, select appropriate compare function,
> + * which may use x86 intrinsics, otherwise use memcmp
> + */
> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> +	/* Select function to compare keys */
> +	switch (params->key_len) {
> +	case 16:
> +		h->cmp_jump_table_idx = KEY_16_BYTES;
> +		break;
> +	case 32:
> +		h->cmp_jump_table_idx = KEY_32_BYTES;
> +		break;
> +	case 48:
> +		h->cmp_jump_table_idx = KEY_48_BYTES;
> +		break;
> +	case 64:
> +		h->cmp_jump_table_idx = KEY_64_BYTES;
> +		break;
> +	case 80:
> +		h->cmp_jump_table_idx = KEY_80_BYTES;
> +		break;
> +	case 96:
> +		h->cmp_jump_table_idx = KEY_96_BYTES;
> +		break;
> +	case 112:
> +		h->cmp_jump_table_idx = KEY_112_BYTES;
> +		break;
> +	case 128:
> +		h->cmp_jump_table_idx = KEY_128_BYTES;
> +		break;
> +	default:
> +		/* If key is not multiple of 16, use generic memcmp */
> +		h->cmp_jump_table_idx = KEY_OTHER_BYTES;
> +	}
> +#else
> +	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
> +#endif
> +
> +	if (use_local_cache) {
> +		h->local_free_slots = rte_zmalloc_socket(NULL,
> +				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
> +				RTE_CACHE_LINE_SIZE, params->socket_id);
> +	}
> +
> +	/* Default hash function */
> +#if defined(RTE_ARCH_X86)
> +	default_hash_func = (rte_hash_function)rte_hash_crc;
> +#elif defined(RTE_ARCH_ARM64)
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> +		default_hash_func = (rte_hash_function)rte_hash_crc;
> +#endif
> +	/* Setup hash context */
> +	snprintf(h->name, sizeof(h->name), "%s", params->name);
> +	h->entries = params->entries;
> +	h->key_len = params->key_len;
> +	h->key_entry_size = key_entry_size;
> +	h->hash_func_init_val = params->hash_func_init_val;
> +
> +	h->num_buckets = num_buckets;
> +	h->bucket_bitmask = h->num_buckets - 1;
> +	h->buckets = buckets;
> +	h->buckets_ext = buckets_ext;
> +	h->free_ext_bkts = r_ext;
> +	h->hash_func = (params->hash_func == NULL) ?
> +		default_hash_func : params->hash_func;
> +	h->key_store = k;
> +	h->free_slots = r;
> +	h->tbl_chng_cnt = tbl_chng_cnt;
> +	*h->tbl_chng_cnt = 0;
> +	h->hw_trans_mem_support = rte_tm_supported();
> +	h->use_local_cache = use_local_cache;
> +	h->readwrite_concur_support = readwrite_concur_support;
> +	h->ext_table_support = ext_table_support;
> +	h->writer_takes_lock = writer_takes_lock;
> +	h->no_free_on_del = no_free_on_del;
> +	h->readwrite_concur_lf_support = readwrite_concur_lf_support;
> +
> +#if defined(RTE_ARCH_X86)
> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
> +		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
> +	else
> +#endif
> +		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
> +
> +	if (h->writer_takes_lock) {
> +		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
> +						RTE_CACHE_LINE_SIZE);
> +		if (h->readwrite_lock == NULL)
> +			goto err_unlock;
> +
> +		rte_rwlock_init(h->readwrite_lock);
> +	}
> +
> +	/* Populate free slots ring. Entry zero is reserved for key misses. */
> +	for (i = 1; i < num_key_slots; i++)
> +		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
> +
> +	te->data = (void *) h;
> +	TAILQ_INSERT_TAIL(hash_list, te, next);
> +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> +
> +	return h;
> +err_unlock:
> +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
> +err:
> +	rte_ring_free(r);
> +	rte_ring_free(r_ext);
> +	rte_free(te);
> +	rte_free(h);
> +	rte_free(buckets);
> +	rte_free(buckets_ext);
> +	rte_free(k);
> +	rte_free(tbl_chng_cnt);
> +	return NULL;
> +}
> +BIND_DEFAULT_SYMBOL(rte_hash_create, _v1811, 18.11);
> +MAP_STATIC_SYMBOL(
> +	struct rte_hash *rte_hash_create(
> +		const struct rte_hash_parameters *params),
> +		rte_hash_create_v1811);
>  
>  void
>  rte_hash_free(struct rte_hash *h)
> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
> index c93d1a137..93c7019ec 100644
> --- a/lib/librte_hash/rte_hash.h
> +++ b/lib/librte_hash/rte_hash.h
> @@ -30,13 +30,27 @@ extern "C" {
>  #define RTE_HASH_LOOKUP_BULK_MAX		64
>  #define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX
>  
> -/** Enable Hardware transactional memory support. */
> +/**
> + * @deprecated
> + * This define will be removed in the next release.
> + * If the target platform supports hardware transactional memory
> + * it will be used without user consent as it provides the best possible
> + * performance.
> + *
> + * Enable Hardware transactional memory support.
> + */
>  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01
>  
>  /** Default behavior of insertion, single writer/multi writer */
>  #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
>  
> -/** Flag to support reader writer concurrency */
> +/**
> + * @deprecated
> + * This define will be removed in the next release.
> + * This library should be thread-safe by default.
> + *
> + * Flag to support reader writer concurrency
> + */
>  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
>  

Do we not need/want to add some new flags to disable these features, in
case there are cases where either RW concurrency, or transaction memory is
unwanted?

>  /** Flag to indicate the extendabe bucket table feature should be used */
> @@ -105,6 +119,10 @@ struct rte_hash;
>   */
>  struct rte_hash *
>  rte_hash_create(const struct rte_hash_parameters *params);
> +struct rte_hash *
> +rte_hash_create_v20(const struct rte_hash_parameters *params);
> +struct rte_hash *
> +rte_hash_create_v1811(const struct rte_hash_parameters *params);
>  
>  /**
>   * Set a new hash compare function other than the default one.
> diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
> index 734ae28b0..c72469099 100644
> --- a/lib/librte_hash/rte_hash_version.map
> +++ b/lib/librte_hash/rte_hash_version.map
> @@ -54,6 +54,13 @@ DPDK_18.08 {
>  
>  } DPDK_16.07;
>  
> +DPDK_18.11 {
> +	global:
> +
> +	rte_hash_create;
> +
> +} DPDK_18.08;
> +
>  EXPERIMENTAL {
>  	global:
>  
> -- 
> 2.17.1
> 


More information about the dev mailing list