[PATCH v2 3/4] hash: reduce architecture special cases
Morten Brørup
mb at smartsharesystems.com
Tue Aug 26 08:55:23 CEST 2025
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Friday, 22 August 2025 20.20
>
> Make comparison of sizes compatible across platforms.
> Keep the special case code for 16 bytes for x86 and arm64 but
> also add simple xor for others.
>
> Need to keep rte_hash_k32_cmp_eq() exposed because ip_frag
> code poaches it.
>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
> lib/hash/rte_cmp_arm64.h | 56 +------------------------
> lib/hash/rte_cmp_generic.h | 35 ++++++++++++++++
> lib/hash/rte_cmp_x86.h | 60 ++------------------------
> lib/hash/rte_cuckoo_hash.c | 86 +++++++++++++++++++++++++++++++++-----
> 4 files changed, 116 insertions(+), 121 deletions(-)
> create mode 100644 lib/hash/rte_cmp_generic.h
>
> diff --git a/lib/hash/rte_cmp_arm64.h b/lib/hash/rte_cmp_arm64.h
> index a3e85635eb..2b2a37ebd2 100644
> --- a/lib/hash/rte_cmp_arm64.h
> +++ b/lib/hash/rte_cmp_arm64.h
> @@ -2,7 +2,7 @@
> * Copyright(c) 2015 Cavium, Inc
> */
>
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Functions to compare multiple of 16 byte keys */
> static inline int
> rte_hash_k16_cmp_eq(const void *key1, const void *key2,
> size_t key_len __rte_unused)
> @@ -27,59 +27,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void
> *key2,
> static inline int
> rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> - return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> + return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> rte_hash_k16_cmp_eq((const char *) key1 + 16,
> (const char *) key2 + 16, key_len);
> }
Agree with the change from "||" to "|", to compare in blocks of 32 bytes instead of 16.
Another potential improvement:
Consider passing the actual length, instead of passing key_len, i.e.:
return rte_hash_k16_cmp_eq(key1, key2, 16) |
rte_hash_k16_cmp_eq((const char *) key1 + 16,
(const char *) key2 + 16, 16);
[...]
> diff --git a/lib/hash/rte_cmp_x86.h b/lib/hash/rte_cmp_x86.h
> index ddfbef462f..e7a38c8fcd 100644
> --- a/lib/hash/rte_cmp_x86.h
> +++ b/lib/hash/rte_cmp_x86.h
> @@ -4,7 +4,7 @@
>
> #include <rte_vect.h>
>
> -/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */
> +/* Function to compare multiple of 16 byte keys */
> static inline int
> rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len
> __rte_unused)
> {
> @@ -18,59 +18,7 @@ rte_hash_k16_cmp_eq(const void *key1, const void
> *key2, size_t key_len __rte_unu
> static inline int
> rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len)
> {
> - return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k16_cmp_eq((const char *) key1 + 16,
> - (const char *) key2 + 16, key_len);
> -}
> -
> -static inline int
> -rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> - return rte_hash_k16_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k16_cmp_eq((const char *) key1 + 16,
> - (const char *) key2 + 16, key_len) ||
> - rte_hash_k16_cmp_eq((const char *) key1 + 32,
> - (const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> - return rte_hash_k32_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k32_cmp_eq((const char *) key1 + 32,
> - (const char *) key2 + 32, key_len);
> -}
> -
> -static inline int
> -rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> - return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k16_cmp_eq((const char *) key1 + 64,
> - (const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> -{
> - return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k32_cmp_eq((const char *) key1 + 64,
> - (const char *) key2 + 64, key_len);
> -}
> -
> -static inline int
> -rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> -{
> - return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k32_cmp_eq((const char *) key1 + 64,
> - (const char *) key2 + 64, key_len) ||
> - rte_hash_k16_cmp_eq((const char *) key1 + 96,
> - (const char *) key2 + 96, key_len);
> -}
> -
> -static inline int
> -rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> -{
> - return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> - rte_hash_k64_cmp_eq((const char *) key1 + 64,
> - (const char *) key2 + 64, key_len);
> + return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> + (const uint8_t *) key2 + 16, key_len);
You should use "const char *" (not "const uint8_t *") here too.
Or add RTE_CONST_PTR_ADD() to rte_common.h:
/**
* add a byte-value offset to a const pointer
*/
#define RTE_CONST_PTR_ADD(ptr, x) ((const void*)((uintptr_t)(ptr) + (x)))
> }
> diff --git a/lib/hash/rte_cuckoo_hash.c b/lib/hash/rte_cuckoo_hash.c
> index 619fe0c691..199cb62bf0 100644
> --- a/lib/hash/rte_cuckoo_hash.c
> +++ b/lib/hash/rte_cuckoo_hash.c
> @@ -42,13 +42,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
> #define RETURN_IF_TRUE(cond, retval)
> #endif
>
> -#if defined(RTE_ARCH_X86)
> -#include "rte_cmp_x86.h"
> -#endif
> -
> -#if defined(RTE_ARCH_ARM64)
> -#include "rte_cmp_arm64.h"
> -#endif
>
> /*
> * All different options to select a key compare function,
> @@ -57,7 +50,6 @@ RTE_LOG_REGISTER_DEFAULT(hash_logtype, INFO);
> */
> enum cmp_jump_table_case {
> KEY_CUSTOM = 0,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> KEY_16_BYTES,
> KEY_32_BYTES,
> KEY_48_BYTES,
> @@ -66,11 +58,85 @@ enum cmp_jump_table_case {
> KEY_96_BYTES,
> KEY_112_BYTES,
> KEY_128_BYTES,
> -#endif
> KEY_OTHER_BYTES,
> NUM_KEY_CMP_CASES,
> };
>
> +/*
> + * Comparison functions for different key sizes.
> + * Each function is only called with a specific fixed key size.
> + *
> + * Return value is 0 on equality to allow direct use of memcmp.
> + * Recommend using XOR and | operator to avoid branching
> + * as long as key is smaller than cache line size.
> + *
> + * Key1 always points to key[] in rte_hash_key which is aligned.
> + * Key2 is parameter to insert which might not be.
> + *
> + * Special case for 16 and 32 bytes to allow for architecture
> + * specific optimizations.
> + */
> +
> +#if defined(RTE_ARCH_X86)
> +#include "rte_cmp_x86.h"
> +#elif defined(RTE_ARCH_ARM64)
> +#include "rte_cmp_arm64.h"
> +#else
> +#include "rte_cmp_generic.h"
> +#endif
> +
> +static int
> +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> + return rte_hash_k16_cmp_eq(key1, key2, key_len) |
> + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 16,
> + (const uint8_t *) key2 + 16, key_len) ||
> + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 32,
> + (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> + return rte_hash_k32_cmp_eq(key1, key2, key_len) |
Is the "|" instead of "||", to compare in blocks of 64 bytes instead of 32, intentional?
> + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 32,
> + (const uint8_t *) key2 + 32, key_len);
> +}
> +
> +static int
> +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> + return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 64,
> + (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len)
> +{
> + return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> + (const uint8_t *) key2 + 64, key_len);
> +}
> +
> +static int
> +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> +{
> + return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> + rte_hash_k32_cmp_eq((const uint8_t *) key1 + 64,
> + (const uint8_t *) key2 + 64, key_len) ||
> + rte_hash_k16_cmp_eq((const uint8_t *) key1 + 96,
> + (const uint8_t *) key2 + 96, key_len);
> +}
> +
> +static int
> +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t
> key_len)
> +{
> + return rte_hash_k64_cmp_eq(key1, key2, key_len) ||
> + rte_hash_k64_cmp_eq((const uint8_t *) key1 + 64,
> + (const uint8_t *) key2 + 64, key_len);
> +}
> +
> /* Enum used to select the implementation of the signature comparison
> function to use
> * eg: a system supporting SVE might want to use a NEON or scalar
> implementation.
> */
> @@ -160,7 +226,6 @@ void rte_hash_set_cmp_func(struct rte_hash *h,
> rte_hash_cmp_eq_t func)
> */
> static const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
> [KEY_CUSTOM] = NULL,
> -#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
> [KEY_16_BYTES] = rte_hash_k16_cmp_eq,
> [KEY_32_BYTES] = rte_hash_k32_cmp_eq,
> [KEY_48_BYTES] = rte_hash_k48_cmp_eq,
> @@ -169,7 +234,6 @@ static const rte_hash_cmp_eq_t
> cmp_jump_table[NUM_KEY_CMP_CASES] = {
> [KEY_96_BYTES] = rte_hash_k96_cmp_eq,
> [KEY_112_BYTES] = rte_hash_k112_cmp_eq,
> [KEY_128_BYTES] = rte_hash_k128_cmp_eq,
> -#endif
> [KEY_OTHER_BYTES] = memcmp,
> };
>
> --
> 2.47.2
With or without suggested changes...
Acked-by: Morten Brørup <mb at smartsharesystems.com>
More information about the dev
mailing list