[dpdk-dev] [PATCH v4 2/7] member: implement HT mode

De Lara Guarch, Pablo pablo.de.lara.guarch at intel.com
Mon Oct 2 15:30:56 CEST 2017



> -----Original Message-----
> From: Wang, Yipeng1
> Sent: Wednesday, September 27, 2017 6:40 PM
> To: dev at dpdk.org
> Cc: thomas at monjalon.net; Tai, Charlie <charlie.tai at intel.com>; Gobriel,
> Sameh <sameh.gobriel at intel.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch at intel.com>; Mcnamara, John
> <john.mcnamara at intel.com>; Wang, Yipeng1 <yipeng1.wang at intel.com>
> Subject: [PATCH v4 2/7] member: implement HT mode
> 

...

> diff --git a/lib/librte_member/Makefile b/lib/librte_member/Makefile index
> 1a79eaa..ad26548 100644
> --- a/lib/librte_member/Makefile
> +++ b/lib/librte_member/Makefile
> @@ -42,7 +42,7 @@ EXPORT_MAP := rte_member_version.map
> LIBABIVER := 1
> 
>  # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_MEMBER) +=  rte_member.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MEMBER) +=  rte_member.c
> rte_member_ht.c
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_MEMBER)-include := rte_member.h
> 
> diff --git a/lib/librte_member/rte_member_ht.c
> b/lib/librte_member/rte_member_ht.c
> new file mode 100644
> index 0000000..55672a4
> --- /dev/null
> +++ b/lib/librte_member/rte_member_ht.c

...

> +
> +static inline int
> +insert_overwrite_search(uint32_t bucket, member_sig_t tmp_sig,
> +		struct member_ht_bucket *buckets,
> +		member_set_t set_id)

I would call "bucket", "bucket_id", for better understanding.
This comment also applies to other parts of the code (e.g. prim_buckets).

> +{
> +	int i;
> +	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
> +		if (buckets[bucket].sigs[i] == tmp_sig) {
> +			buckets[bucket].sets[i] = set_id;
> +			return 1;
> +		}

Is this function used to update an existing entry?
At first, I thought that this was evicting another entry, when the bucket was full,
but it looks like it is updating the set_id of an existing entry.
If this is the case, I would change the function name and add a comment explaining this.

> +	}
> +	return 0;
> +}
> +
> +static inline int
> +search_bucket_single(uint32_t bucket, member_sig_t tmp_sig,
> +		struct member_ht_bucket *buckets,
> +		member_set_t *set_id)
> +{
> +	int iter;

Iter should be "unsigned int" (or similar, maybe "uint8_t")

> +	for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
> +		if (tmp_sig == buckets[bucket].sigs[iter] &&
> +				buckets[bucket].sets[iter] !=
> +				RTE_MEMBER_NO_MATCH) {
> +			*set_id = buckets[bucket].sets[iter];
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +static inline void
> +search_bucket_multi(uint32_t bucket, member_sig_t tmp_sig,
> +		struct member_ht_bucket *buckets,
> +		uint32_t *counter,
> +		uint32_t match_per_key,

Better change to "matches_per_key".

> +		member_set_t *set_id)
> +{
> +	int iter;
> +	for (iter = 0; iter < RTE_MEMBER_BUCKET_ENTRIES; iter++) {
> +		if (tmp_sig == buckets[bucket].sigs[iter] &&
> +				buckets[bucket].sets[iter] !=
> +				RTE_MEMBER_NO_MATCH) {
> +			set_id[*counter] = buckets[bucket].sets[iter];
> +			(*counter)++;
> +			if (*counter >= match_per_key)
> +				return;
> +		}
> +	}
> +}
> +
> +int
> +rte_member_create_ht(struct rte_member_setsum *ss,
> +		const struct rte_member_parameters *params) {

...

> +
> +	RTE_MEMBER_LOG(DEBUG, "Hash table based filter created, "
> +			"the table has %u entries, %u buckets\n",
> +		num_buckets,
> +		num_buckets / RTE_MEMBER_BUCKET_ENTRIES);

Shouldn't this be "num_buckets * RTE_MEMBER_BUCKET_ENTRIES" and "num_buckets"?

> +	return 0;
> +}
> +
> +static inline
> +void get_buckets_index(const struct rte_member_setsum *ss, const void
> *key,
> +		uint32_t *prim_bkt, uint32_t *sec_bkt, member_sig_t
> *sig) {

"static inline void" should be in the same line.

> +	uint32_t first_hash = MEMBER_HASH_FUNC(key, ss->key_len,
> +						ss->prim_hash_seed);
> +	uint32_t sec_hash = MEMBER_HASH_FUNC(&first_hash,
> sizeof(uint32_t),
> +						ss->sec_hash_seed);
> +	*sig = first_hash;
> +	if (ss->cache) {
> +		*prim_bkt = sec_hash & ss->bucket_mask;

Is this correct? Using the secondary hash to acces the primary bucket?
Why is the cache case different from the non-cache case?
I think this function deserves some comments to explain all this calculations.

> +		*sec_bkt =  (sec_hash >> 16) & ss->bucket_mask;
> +	} else {
> +		*prim_bkt = sec_hash & ss->bucket_mask;
> +		*sec_bkt =  (*prim_bkt ^ *sig) & ss->bucket_mask;
> +	}
> +}
> +
> +int
> +rte_member_lookup_ht(const struct rte_member_setsum *ss,
> +		const void *key, member_set_t *set_id) {
> +	uint32_t prim_bucket, sec_bucket;
> +	member_sig_t tmp_sig;
> +	struct member_ht_bucket *buckets = ss->table;
> +
> +

Remove extra blank line.

> +	*set_id = RTE_MEMBER_NO_MATCH;
> +	get_buckets_index(ss, key, &prim_bucket, &sec_bucket,
> &tmp_sig);
> +
> +	if (search_bucket_single(prim_bucket, tmp_sig, buckets,
> +			set_id) ||
> +			search_bucket_single(sec_bucket, tmp_sig,
> +				buckets, set_id))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +uint32_t
> +rte_member_lookup_bulk_ht(const struct rte_member_setsum *ss,
> +		const void **keys, uint32_t num_keys, member_set_t
> *set_id) {
> +	uint32_t i;
> +	uint32_t ret = 0;

Better change to something more meaningful, like "nr_matches".

> +	struct member_ht_bucket *buckets = ss->table;
> +	member_sig_t tmp_sig[RTE_MEMBER_LOOKUP_BULK_MAX];
> +	uint32_t prim_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
> +	uint32_t sec_buckets[RTE_MEMBER_LOOKUP_BULK_MAX];
> +
> +	for (i = 0; i < num_keys; i++) {
> +		get_buckets_index(ss, keys[i], &prim_buckets[i],
> +				&sec_buckets[i], &tmp_sig[i]);
> +		rte_prefetch0(&buckets[prim_buckets[i]]);
> +		rte_prefetch0(&buckets[sec_buckets[i]]);
> +	}
> +
> +	for (i = 0; i < num_keys; i++) {
> +		if (search_bucket_single(prim_buckets[i], tmp_sig[i],
> +				buckets, &set_id[i]) ||
> +				search_bucket_single(sec_buckets[i],
> +				tmp_sig[i], buckets, &set_id[i]))
> +			ret++;
> +		else
> +			set_id[i] = RTE_MEMBER_NO_MATCH;
> +	}
> +	return ret;
> +}
> +
> +uint32_t
> +rte_member_lookup_multi_ht(const struct rte_member_setsum *ss,
> +		const void *key, uint32_t match_per_key,
> +		member_set_t *set_id)
> +{
> +	uint32_t ret = 0;

Better change to something more meaningful, like "nr_matches".
This applies to the following functions.

> +	uint32_t prim_bucket, sec_bucket;
> +	member_sig_t tmp_sig;
> +	struct member_ht_bucket *buckets = ss->table;
> +
> +	get_buckets_index(ss, key, &prim_bucket, &sec_bucket,
> &tmp_sig);
> +
> +	search_bucket_multi(prim_bucket, tmp_sig, buckets, &ret,
> +			 match_per_key, set_id);
> +	if (ret < match_per_key)
> +		search_bucket_multi(sec_bucket, tmp_sig,
> +			buckets, &ret, match_per_key, set_id);
> +	return ret;
> +}
> +

...

> +static inline int
> +try_insert(struct member_ht_bucket *buckets, uint32_t prim, uint32_t
> sec,
> +		member_sig_t sig, member_set_t set_id) {
> +	int i;
> +	/* If not full then insert into one slot*/

Extra space at the end of the comment, before */
> +	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
> +		if (buckets[prim].sets[i] == RTE_MEMBER_NO_MATCH) {
> +			buckets[prim].sigs[i] = sig;
> +			buckets[prim].sets[i] = set_id;
> +			return 0;
> +		}
> +	}
> +	/* if prim failed, we need to access second cache line */

Second bucket, instead of second cache line? Also, check that all comments
start with capital letters.

> +	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
> +		if (buckets[sec].sets[i] == RTE_MEMBER_NO_MATCH) {
> +			buckets[sec].sigs[i] = sig;
> +			buckets[sec].sets[i] = set_id;
> +			return 0;
> +		}
> +	}
> +	return -1;
> +}
> +
> +static inline int
> +try_overwrite(struct member_ht_bucket *buckets, uint32_t prim,
> uint32_t sec,
> +		member_sig_t sig, member_set_t set_id) {
> +	if (insert_overwrite_search(prim, sig, buckets, set_id) ||
> +			insert_overwrite_search(sec, sig, buckets,
> +				set_id))
> +		return 0;
> +	return -1;
> +}
> +
> +static inline int
> +evict_from_bucket(void)
> +{
> +	/* for now, we randomly pick one entry to evict */
> +	return rte_rand() & (RTE_MEMBER_BUCKET_ENTRIES - 1); }
> +
> +/*
> + * This function is similar to the cuckoo hash make_space function in
> +hash
> + * library
> + */
> +static inline int
> +make_space_bucket(const struct rte_member_setsum *ss, uint32_t
> bkt_num)
> +{
> +	static unsigned int nr_pushes;

A patch has been sent to change this static variable to be non-static,
in the hash library. Since this is following a similar implementation,
it will need the same change:
http://dpdk.org/dev/patchwork/patch/29104/

> +	unsigned int i, j;
> +	int ret;
> +	struct member_ht_bucket *buckets = ss->table;
> +	uint32_t next_bucket_idx;
> +	struct member_ht_bucket
> *next_bkt[RTE_MEMBER_BUCKET_ENTRIES];
> +	struct member_ht_bucket *bkt = &buckets[bkt_num];
> +	member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);

Include a comment about this flag_mask
(MSB is set to indicate if an entry has been already pushed).

> +	/*
> +	 * Push existing item (search for bucket with space in
> +	 * alternative locations) to its alternative location
> +	 */
> +	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++) {
> +		/* Search for space in alternative locations */
> +		next_bucket_idx = (bkt->sigs[i] ^ bkt_num) & ss-
> >bucket_mask;
> +		next_bkt[i] = &buckets[next_bucket_idx];
> +		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++) {
> +			if (next_bkt[i]->sets[j] ==
> RTE_MEMBER_NO_MATCH)
> +				break;
> +		}
> +
> +		if (j != RTE_MEMBER_BUCKET_ENTRIES)
> +			break;
> +	}
> +
> +	/* Alternative location has spare room (end of recursive function)
> */
> +	if (i != RTE_MEMBER_BUCKET_ENTRIES) {
> +		next_bkt[i]->sigs[j] = bkt->sigs[i];
> +		next_bkt[i]->sets[j] = bkt->sets[i];
> +		return i;

Since you want to return 1 when there is an eviction, I think you can return 1 here,
no need to return the index in the bucket.

> +	}
> +
> +	/* Pick entry that has not been pushed yet */
> +	for (i = 0; i < RTE_MEMBER_BUCKET_ENTRIES; i++)
> +		if ((bkt->sets[i] & flag_mask) == 0)
> +			break;
> +
> +	/* All entries have been pushed, so entry cannot be added */
> +	if (i == RTE_MEMBER_BUCKET_ENTRIES ||
> +			nr_pushes > RTE_MEMBER_MAX_PUSHES)
> +		return -ENOSPC;
> +
> +	next_bucket_idx = (bkt->sigs[i] ^ bkt_num) & ss->bucket_mask;
> +	/* Set flag to indicate that this entry is going to be pushed */
> +	bkt->sets[i] |= flag_mask;
> +
> +	nr_pushes++;
> +	/* Need room in alternative bucket to insert the pushed entry */
> +	ret = make_space_bucket(ss, next_bucket_idx);
> +	/*
> +	 * After recursive function.
> +	 * Clear flags and insert the pushed entry
> +	 * in its alternative location if successful,
> +	 * or return error
> +	 */
> +	bkt->sets[i] &= ~flag_mask;
> +	nr_pushes = 0;
> +	if (ret >= 0) {
> +		next_bkt[i]->sigs[ret] = bkt->sigs[i];
> +		next_bkt[i]->sets[ret] = bkt->sets[i];
> +		return i;

Same about return 1 here.

> +	} else
> +		return ret;
> +}
> +
> +int
> +rte_member_add_ht(const struct rte_member_setsum *ss,
> +		const void *key, member_set_t set_id) {
> +	int ret;
> +	uint32_t prim_bucket, sec_bucket;
> +	member_sig_t tmp_sig;
> +	struct member_ht_bucket *buckets = ss->table;
> +	member_set_t flag_mask = 1U << (sizeof(member_set_t) * 8 - 1);
> +
> +	if (set_id == RTE_MEMBER_NO_MATCH || (set_id & flag_mask) !=
> 0)
> +		return -EINVAL;
> +
> +	get_buckets_index(ss, key, &prim_bucket, &sec_bucket,
> &tmp_sig);
> +
> +	/* if it is cache based filter, we try overwriting existing entry */
> +	if (ss->cache) {
> +		ret = try_overwrite(buckets, prim_bucket, sec_bucket,
> tmp_sig,
> +					set_id);

If the comment that I made above (in the insert_overwrite_search function)
is true and this function is used to update the set_id of an entry,
can this be used also in non-cache mode?

> +		if (ret != -1)
> +			return ret;
> +	}
> +	/* If not full then insert into one slot*/

Extra space before */.

> +	ret = try_insert(buckets, prim_bucket, sec_bucket, tmp_sig, set_id);
> +	if (ret != -1)
> +		return ret;
> +
> +	/* random pick prim or sec for recursive displacement */
> +
> +	uint32_t select_bucket = (tmp_sig && 1U) ? prim_bucket :
> sec_bucket;
> +	if (ss->cache) {
> +		ret = evict_from_bucket();
> +		buckets[select_bucket].sigs[ret] = tmp_sig;
> +		buckets[select_bucket].sets[ret] = set_id;
> +		return 1;
> +	}
> +
> +	ret = make_space_bucket(ss, select_bucket);

If this only return a negative value when failure or 1 when success,
you can check for ret == 1 and not set ret = 1.

> +	if (ret >= 0) {
> +		buckets[select_bucket].sigs[ret] = tmp_sig;
> +		buckets[select_bucket].sets[ret] = set_id;
> +		ret = 1;
> +	}
> +
> +	return ret;
> +}
> +
> +void
> +rte_member_free_ht(struct rte_member_setsum *ss) {
> +	rte_free(ss->table);
> +}
> +

...

> +
> +void
> +rte_member_reset_ht(const struct rte_member_setsum *ss) {
> +	uint32_t i, j;
> +	struct member_ht_bucket *buckets = ss->table;

To keep to consistency, I would leave a blank line between the variables declarations
and the rest of the function implementation.

> +	for (i = 0; i < ss->bucket_cnt; i++) {
> +		for (j = 0; j < RTE_MEMBER_BUCKET_ENTRIES; j++)
> +			buckets[i].sets[j] = RTE_MEMBER_NO_MATCH;
> +	}
> +}



More information about the dev mailing list