[PATCH v2 3/4] hash: reduce architecture special cases
    Morten Brørup 
    mb at smartsharesystems.com
       
    Tue Aug 26 08:55:23 CEST 2025
    
    
  
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Friday, 22 August 2025 20.20
> 
> Make comparison of sizes compatible across platforms.
> Keep the special case code for 16 bytes for x86 and arm64 but
> also add simple xor for others.
> 
> Need to keep rte_hash_k32_cmp_eq() exposed because ip_frag
> code poaches it.
> 
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
>  lib/hash/rte_cmp_arm64.h   | 56 +------------------------
>  lib/hash/rte_cmp_generic.h | 35 ++++++++++++++++
>  lib/hash/rte_cmp_x86.h     | 60 ++------------------------
>  lib/hash/rte_cuckoo_hash.c | 86 +++++++++++++++++++++++++++++++++-----
>  4 files changed, 116 insertions(+), 121 deletions(-)
>  create mode 100644 lib/hash/rte_cmp_generic.h
> 
> diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
> index a3e85635eb..2b2a37ebd2 100644
> --- a/lib/hash/rte_cmp_arm64.h
> +++ b/lib/hash/rte_cmp_arm64.h
> @@ -2,7 +2,7 @@
>   * Copyright(c) 2015 Cavium, Inc
>   */
> 
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Functions to compare multiple of 16 byte keys */
>  static inline int
>  rte_hash_k16_cmp_eq(const void *key1, const void *key2,
>  		    size_t key_len __rte_unused)
> @@ -27,59 +27,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void
> *key2,
>  static inline int
>  rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>  {
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
>  		rte_hash_k16_cmp_eq((const char *) key1 + 16,
>  				(const char *) key2 + 16, key_len);
>  }
Agree with the change from "||" to "|", to compare in blocks of 32 bytes instead of 16.
Another potential improvement:
Consider passing the actual length, instead of passing key_len, i.e.:
	return rte_hash_k16_cmp_eq(key1, key2, 16) |
		rte_hash_k16_cmp_eq((const char *) key1 + 16,
				(const char *) key2 + 16, 16);
[...]
> diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
> index ddfbef462f..e7a38c8fcd 100644
> --- a/lib/hash/rte_cmp_x86.h
> +++ b/lib/hash/rte_cmp_x86.h
> @@ -4,7 +4,7 @@
> 
>  #include <rte_vect.h>
> 
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Function to compare multiple of 16 byte keys */
>  static inline int
>  rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len
> __rte_unused)
>  {
> @@ -18,59 +18,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void
> *key2, size_t key_len __rte_unu
>  static inline int
>  rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
>  {
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 16,
> -				(const char *) key2 + 16, key_len);
> -}
> -
> -static inline int
> -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 16,
> -				(const char *) key2 + 16, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 32,
> -				(const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k32_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len) ||
> -		rte_hash_k16_cmp_eq((const char *) key1 + 96,
> -				(const char *) key2 + 96, key_len);
> -}
> -
> -static inline int
> -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> -{
> -	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> -		rte_hash_k64_cmp_eq((const char *) key1 + 64,
> -				(const char *) key2 + 64, key_len);
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> +				(const uint8_t *) key2 + 16, key_len);
You should use "const char *" (not "const uint8_t *") here too.
Or add RTE_CONST_PTR_ADD() to rte_common.h:
/**
 * add a byte-value offset to a const pointer
 */
#define RTE_CONST_PTR_ADD(ptr, x) ((const void*)((uintptr_t)(ptr) + (x)))
>  }
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 619fe0c691..199cb62bf0 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -42,13 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
>  #define RETURN_IF_TRUE(cond, retval)
>  #endif
> 
> -#if defined(RTE_ARCH_X86)
> -#include "rte_cmp_x86.h"
> -#endif
> -
> -#if defined(RTE_ARCH_ARM64)
> -#include "rte_cmp_arm64.h"
> -#endif
> 
>  /*
>   * All different options to select a key compare function,
> @@ -57,7 +50,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
>   */
>  enum cmp_jump_table_case {
>  	KEY_CUSTOM = 0,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
>  	KEY_16_BYTES,
>  	KEY_32_BYTES,
>  	KEY_48_BYTES,
> @@ -66,11 +58,85 @@ enum cmp_jump_table_case {
>  	KEY_96_BYTES,
>  	KEY_112_BYTES,
>  	KEY_128_BYTES,
> -#endif
>  	KEY_OTHER_BYTES,
>  	NUM_KEY_CMP_CASES,
>  };
> 
> +/*
> + * Comparison functions for different key sizes.
> + * Each function is only called with a specific fixed key size.
> + *
> + * Return value is 0 on equality to allow direct use of memcmp.
> + * Recommend using XOR and | operator to avoid branching
> + * as long as key is smaller than cache line size.
> + *
> + * Key1 always points to key[] in rte_hash_key which is aligned.
> + * Key2 is parameter to insert which might not be.
> + *
> + * Special case for 16 and 32 bytes to allow for architecture
> + * specific optimizations.
> + */
> +
> +#if defined(RTE_ARCH_X86)
> +#include "rte_cmp_x86.h"
> +#elif defined(RTE_ARCH_ARM64)
> +#include "rte_cmp_arm64.h"
> +#else
> +#include "rte_cmp_generic.h"
> +#endif
> +
> +static int
> +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> +				    (const uint8_t *) key2 + 16, key_len) ||
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32,
> +				    (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k32_cmp_eq(key1, key2, key_len) |
Is the "|" instead of "||", to compare in blocks of 64 bytes instead of 32, intentional?
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32,
> +				    (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> +				    (const uint8_t *) key2 + 64, key_len) ||
> +		rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96,
> +				    (const uint8_t *) key2 + 96, key_len);
> +}
> +
> +static int
> +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> +{
> +	return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> +		rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64,
> +				(const uint8_t *) key2 + 64, key_len);
> +}
> +
>  /* Enum used to select the implementation of the signature comparison
> function to use
>   * eg: a system supporting SVE might want to use a NEON or scalar
> implementation.
>   */
> @@ -160,7 +226,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h,
> rte_hash_cmp_eq_t func)
>   */
>  static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
>  	[KEY_CUSTOM] = NULL,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
>  	[KEY_16_BYTES] = rte_hash_k16_cmp_eq,
>  	[KEY_32_BYTES] = rte_hash_k32_cmp_eq,
>  	[KEY_48_BYTES] = rte_hash_k48_cmp_eq,
> @@ -169,7 +234,6 @@ static const rte_hash_cmp_eq_t
> cmp_jump_table[NUM_KEY_CMP_CASES] = {
>  	[KEY_96_BYTES] = rte_hash_k96_cmp_eq,
>  	[KEY_112_BYTES] = rte_hash_k112_cmp_eq,
>  	[KEY_128_BYTES] = rte_hash_k128_cmp_eq,
> -#endif
>  	[KEY_OTHER_BYTES] = memcmp,
>  };
> 
> --
> 2.47.2
With or without suggested changes...
Acked-by: Morten Brørup <mb at smartsharesystems.com>
    
    
More information about the dev
mailing list