[dpdk-dev] [RFC 1/2] hash: add lock free support for extendable bucket

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Fri Mar 15 01:31:54 CET 2019


<snip>

> >> @@ -1072,10 +1071,23 @@ __rte_hash_add_key_with_hash(const struct
> rte_hash *h, const void *key,
> >> 	bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
> >> 	/* Use the first location of the new bucket */
> >> 	(h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
> >> -	(h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
> >> +	/* Key can be of arbitrary length, so it is
> >> +	 * not possible to store it atomically.
> >> +	 * Hence the new key element's memory stores
> >> +	 * (key as well as data) should be complete
> >> +	 * before it is referenced.
> >> +	 */
> >> +	__atomic_store_n(&(h->buckets_ext[bkt_id]).key_idx[0],
> >> +			 new_idx,
> >> +			 __ATOMIC_RELEASE);
> > [Wang, Yipeng] Since it has not been linked and later on the linking
> > is protected by release, do we really need atomic store here?
> Atomic store is used here to maintain the code consistency.
Agree the release order is not required. Removing it does not help much as it is only for the 1st element of a new bucket.

> >
> >> 	/* Link the new bucket to sec bucket linked list */
> >> 	last = rte_hash_get_last_bkt(sec_bkt);
> >> -	last->next = &h->buckets_ext[bkt_id];
> >> +	/* New bucket's memory stores (key as well as data)
> >> +	 * should be complete before it is referenced
> >> +	 */
> >> +	__atomic_store_n(&last->next,
> >> +			 &h->buckets_ext[bkt_id],
> >> +			 __ATOMIC_RELEASE);
The corresponding load-acquire is missing in the 'FOR_EACH_BUCKET' macro

> >> 	__hash_rw_writer_unlock(h);
> >> 	return new_idx - 1;
> >>
> >> @@ -1366,7 +1378,8 @@ remove_entry(const struct rte_hash *h, struct
> >> rte_hash_bucket *bkt, unsigned i)
> >> * empty slot.
> >> */
> >> static inline void
> >> -__rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
> >> +__rte_hash_compact_ll(const struct rte_hash *h,
> >> +			struct rte_hash_bucket *cur_bkt, int pos) {
> >> 	int i;
> >> 	struct rte_hash_bucket *last_bkt;
> >>
> >> @@ -1377,10 +1390,27 @@ __rte_hash_compact_ll(struct
> rte_hash_bucket
> >> *cur_bkt, int pos) {
> >>
> >> 	for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
> >> 		if (last_bkt->key_idx[i] != EMPTY_SLOT) {
> >> -			cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
> >> 			cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
> >> +			__atomic_store_n(&cur_bkt->key_idx[pos],
> >> +					 last_bkt->key_idx[i],
> >> +					 __ATOMIC_RELEASE);
> >> +			if (h->readwrite_concur_lf_support) {
> >> +				/* Inform the readers that the table has
> changed
> >> +				 * Since there is one writer, load acquires on
> >> +				 * tbl_chng_cnt are not required.
> >> +				 */
> >> +				__atomic_store_n(h->tbl_chng_cnt,
> >> +					 *h->tbl_chng_cnt + 1,
> >> +					 __ATOMIC_RELEASE);
> >> +				/* The stores to sig_alt and sig_current should
> >> +				 * not move above the store to tbl_chng_cnt.
> >> +				 */
> >> +				__atomic_thread_fence(__ATOMIC_RELEASE);
> >> +			}
> >> 			last_bkt->sig_current[i] = NULL_SIGNATURE;
> >> -			last_bkt->key_idx[i] = EMPTY_SLOT;
> >> +			__atomic_store_n(&last_bkt->key_idx[i],
> >> +					 EMPTY_SLOT,
> >> +					 __ATOMIC_RELEASE);
> >> 			return;
> >> 		}
> >> 	}
> >> @@ -1449,7 +1479,7 @@ __rte_hash_del_key_with_hash(const struct
> rte_hash *h, const void *key,
> >> 	/* look for key in primary bucket */
> >> 	ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
> >> 	if (ret != -1) {
> >> -		__rte_hash_compact_ll(prim_bkt, pos);
> >> +		__rte_hash_compact_ll(h, prim_bkt, pos);
> >> 		last_bkt = prim_bkt->next;
> >> 		prev_bkt = prim_bkt;
> >> 		goto return_bkt;
> >> @@ -1461,7 +1491,7 @@ __rte_hash_del_key_with_hash(const struct
> rte_hash *h, const void *key,
> >> 	FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
> >> 		ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
> >> 		if (ret != -1) {
> >> -			__rte_hash_compact_ll(cur_bkt, pos);
> >> +			__rte_hash_compact_ll(h, cur_bkt, pos);
> >> 			last_bkt = sec_bkt->next;
> >> 			prev_bkt = sec_bkt;
> >> 			goto return_bkt;
> >> @@ -1488,11 +1518,21 @@ __rte_hash_del_key_with_hash(const struct
> rte_hash *h, const void *key,
> >> 	}
> >> 	/* found empty bucket and recycle */
> >> 	if (i == RTE_HASH_BUCKET_ENTRIES) {
> >> -		prev_bkt->next = last_bkt->next = NULL;
> >> +		__atomic_store_n(&prev_bkt->next,
> >> +				 NULL,
> >> +				 __ATOMIC_RELEASE);
> >> 		uint32_t index = last_bkt - h->buckets_ext + 1;
> >> -		rte_ring_sp_enqueue(h->free_ext_bkts, (void
> *)(uintptr_t)index);
> >> -	}
> >> +		if (!h->no_free_on_del)
> >> +			rte_ring_sp_enqueue(h->free_ext_bkts, (void
> *)(uintptr_t)index);
> >> +		else {
> >> +			struct rte_hash_key *key_slot =
> >> +				(struct rte_hash_key *)(
> >> +				(char *)h->key_store +
> >> +				ret * h->key_entry_size);
> >> +			key_slot->ext_bkt_to_free = index;
> > [Wang, Yipeng] Is there chance that a key_slot may already have one
> > previous ext_bkt and now got overwritten, so that the previous one gone
> forever?
> No, it is not possible. Since, the index is being stored in a key_slot which is
> associated with a deleted key.
> >>
> >> +		}
> >> +	}
> >> 	__hash_rw_writer_unlock(h);
> >> 	return ret;
> >> }
> >> @@ -1567,6 +1607,14 @@ rte_hash_free_key_with_position(const struct
> rte_hash *h,
> >> 				(void *)((uintptr_t)position));
> >> 	}
> >>
> >> +	const struct rte_hash_key *key_slot = (const struct rte_hash_key *)(
> >> +						(const char *)h->key_store +
> >> +						position * h->key_entry_size);
> >> +	uint32_t index = key_slot->ext_bkt_to_free;
> >> +	if (!index)
> >> +		/* Recycle empty ext bkt to free list. */
> >> +		rte_ring_sp_enqueue(h->free_ext_bkts, (void
> *)(uintptr_t)index);
Suggest moving this to before freeing the key_index to avoid race conditions.
key_slot->ext_bkt_to_free needs to be set to 0 after freeing.

> >> +
> >> 	return 0;
> >> }
> >>
> >> @@ -1855,6 +1903,9 @@ __rte_hash_lookup_bulk_lf(const struct
> rte_hash *h, const void **keys,
> >> 		rte_prefetch0(secondary_bkt[i]);
> >> 	}
> >>
> >> +	for (i = 0; i < num_keys; i++)
> >> +		positions[i] = -ENOENT;
> > [Wang, Yipeng] So is this for performance reason?
> Resetting positions[] on each iteration of do…while() require ‘hits’ to be reset
> as well, which causes performance hit.
> >> +
> >> 	do {
> >> 		/* Load the table change counter before the lookup
> >> 		 * starts. Acquire semantics will make sure that @@ -1899,7
> +1950,6
> >> @@ __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void
> >> **keys,
> >>
> >> 		/* Compare keys, first hits in primary first */
> >> 		for (i = 0; i < num_keys; i++) {
> >> -			positions[i] = -ENOENT;
> >> 			while (prim_hitmask[i]) {
> >> 				uint32_t hit_index =
> >> 						__builtin_ctzl(prim_hitmask[i])
> @@ -1972,6 +2022,36 @@
> >> __rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys,
> >> 			continue;
> >> 		}
> >>
> >> +
> >> +		/* all found, do not need to go through ext bkt */
> >> +		if (hits == ((1ULL << num_keys) - 1)) {
> >> +			if (hit_mask != NULL)
> >> +				*hit_mask = hits;
> >
> > [Wang, Yipeng] I think you need to check the version counter before
> > return, and how about the fence?
> If all the keys are found, there is no need to check the counter.
> >> +			return;
> >> +		}
> >> +		/* need to check ext buckets for match */
> >> +		if (h->ext_table_support) {
> >> +			for (i = 0; i < num_keys; i++) {
> >> +				if ((hits & (1ULL << i)) != 0)
> >> +					continue;
> >> +				next_bkt = secondary_bkt[i]->next;
> >> +				FOR_EACH_BUCKET(cur_bkt, next_bkt) {
> >> +					if (data != NULL)
> >> +						ret = search_one_bucket_lf(h,
> >> +							keys[i], sig[i],
> >> +							&data[i], cur_bkt);
> >> +					else
> >> +						ret = search_one_bucket_lf(h,
> >> +								keys[i], sig[i],
> >> +								NULL,
> cur_bkt);
> >> +					if (ret != -1) {
> >> +						positions[i] = ret;
> >> +						hits |= 1ULL << i;
> >> +						break;
> >> +					}
> >> +				}
> >> +			}
> >> +		}
> >> 		/* The loads of sig_current in compare_signatures
> >> 		 * should not move below the load from tbl_chng_cnt.
> >> 		 */
> >> @@ -1988,34 +2068,6 @@ __rte_hash_lookup_bulk_lf(const struct
> rte_hash *h, const void **keys,
> >> 					__ATOMIC_ACQUIRE);
> >> 	} while (cnt_b != cnt_a);
> >>
> >> -	/* all found, do not need to go through ext bkt */
> >> -	if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
> >> -		if (hit_mask != NULL)
> >> -			*hit_mask = hits;
> >> -		__hash_rw_reader_unlock(h);
> >> -		return;
> >> -	}
> >> -
> >> -	/* need to check ext buckets for match */
> >> -	for (i = 0; i < num_keys; i++) {
> >> -		if ((hits & (1ULL << i)) != 0)
> >> -			continue;
> >> -		next_bkt = secondary_bkt[i]->next;
> >> -		FOR_EACH_BUCKET(cur_bkt, next_bkt) {
> >> -			if (data != NULL)
> >> -				ret = search_one_bucket_lf(h, keys[i],
> >> -						sig[i], &data[i], cur_bkt);
> >> -			else
> >> -				ret = search_one_bucket_lf(h, keys[i],
> >> -						sig[i], NULL, cur_bkt);
> >> -			if (ret != -1) {
> >> -				positions[i] = ret;
> >> -				hits |= 1ULL << i;
> >> -				break;
> >> -			}
> >> -		}
> >> -	}
> >> -
> >> 	if (hit_mask != NULL)
> >> 		*hit_mask = hits;
> >> }
> >> diff --git a/lib/librte_hash/rte_cuckoo_hash.h
> >> b/lib/librte_hash/rte_cuckoo_hash.h
> >> index eacdaa8d4684..062cc2dd0296 100644
> >> --- a/lib/librte_hash/rte_cuckoo_hash.h
> >> +++ b/lib/librte_hash/rte_cuckoo_hash.h
> >> @@ -129,6 +129,14 @@ struct lcore_cache {
> >>
> >> /* Structure that stores key-value pair */ struct rte_hash_key {
> >> +	/* Stores index of an empty ext bkt to be recycled on calling
> >> +	 * rte_hash_del_xxx APIs. When lock free read-wrie concurrency is
> > [Wang, Yipeng] typo
> Will update it in the next version.
> >> +	 * enabled, an empty ext bkt cannot be put into free list immediately
> >> +	 * (as readers might be using it still). Hence freeing of the ext bkt
> >> +	 * is piggy-backed to freeing of the key index.
> >> +	 */
> > [Wang, Yipeng] I am thinking if this breaks the "guarantee" provided
> > by ext table, Since a whole bucket could not be reused if one key not
> > freed. Is there any fundamental issue with a new API to recycle ext bucket or
> you just do not want to add a new API?
> With lock-free feature, ‘delete’ becomes a two step process of ‘delete’ and
> ‘free’. In other words, it can be viewed by the applications as a 'prolonged
> delete’. I’m not sure how adding a new API to recycle ext bucket will help
> solving the issue.
> >> +	uint32_t ext_bkt_to_free;
> >> +
> >> 	union {
> >> 		uintptr_t idata;
> >> 		void *pdata;
> >> --
> >> 2.17.1



More information about the dev mailing list