[dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
Ruifeng Wang
Ruifeng.Wang at arm.com
Wed May 13 05:04:44 CEST 2020
> -----Original Message-----
> From: pbhagavatula at marvell.com <pbhagavatula at marvell.com>
> Sent: Wednesday, May 13, 2020 4:40 AM
> To: jerinj at marvell.com; konstantin.ananyev at intel.com;
> harry.van.haaren at intel.com; Yipeng Wang <yipeng1.wang at intel.com>;
> Sameh Gobriel <sameh.gobriel at intel.com>; Bruce Richardson
> <bruce.richardson at intel.com>; Ruifeng Wang <Ruifeng.Wang at arm.com>
> Cc: dev at dpdk.org; Pavan Nikhilesh <pbhagavatula at marvell.com>
> Subject: [dpdk-dev] [RFC v2] hash: unify crc32 API header for x86 and ARM
>
> From: Pavan Nikhilesh <pbhagavatula at marvell.com>
>
> Merge crc32 hash calculation public API headers for x86 and ARM.
> Select the best available CRC32 algorithm when unsupported algorithm on a
> given CPU architecture is requested by an application.
>
> Previously, if an application directly includes `rte_crc_arm64.h` without
> including `rte_hash_crc.h` it will fail to compile.
> Although, `rte_crc_arm64.h` is no longer needed make it a dummy file for
> ABI purposes.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
> ---
> v2 Changes:
> - Don't remove `rte_crc_arm64.h` for ABI purposes.
> - Revert function pointer approach for performance reasons.
> - Select the best available algorithm based on the arch when user passes an
> unsupported crc32 algorithm.
>
Maybe split the patch? Changes to select the best available algorithm can be a separate one.
More ifdefs are added. Is it possible to have arch specific rte_hash_crc_xx implementations
like what was done in rte_crc_arm64.h, and include specific headers according to arch in rte_hash.crc.h?
For ABI purpose, rte_crc_arm64.h can be kept and it only includes the new arm64 specific header.
> app/test/test_hash.c | 6 ++
> lib/librte_hash/meson.build | 6 +-
> lib/librte_hash/rte_crc_arm64.h | 175 +-------------------------------
> lib/librte_hash/rte_hash_crc.h | 153 ++++++++++++++++++++--------
> 4 files changed, 122 insertions(+), 218 deletions(-)
>
> diff --git a/app/test/test_hash.c b/app/test/test_hash.c index
> afa3a1a3c..7bd457dac 100644
> --- a/app/test/test_hash.c
> +++ b/app/test/test_hash.c
> @@ -195,7 +195,13 @@ test_crc32_hash_alg_equiv(void)
> }
>
> /* Resetting to best available algorithm */
> +#if defined RTE_ARCH_X86
> rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> + rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> + rte_hash_crc_set_alg(CRC32_SW);
> +#endif
>
> if (i == CRC32_ITERATIONS)
> return 0;
> diff --git a/lib/librte_hash/meson.build b/lib/librte_hash/meson.build index
> 6ab46ae9d..8a3cf2f64 100644
> --- a/lib/librte_hash/meson.build
> +++ b/lib/librte_hash/meson.build
> @@ -1,12 +1,14 @@
> # SPDX-License-Identifier: BSD-3-Clause # Copyright(c) 2017 Intel
> Corporation
>
> -headers = files('rte_crc_arm64.h',
> - 'rte_fbk_hash.h',
> +headers = files('rte_fbk_hash.h',
> 'rte_hash_crc.h',
> 'rte_hash.h',
> 'rte_jhash.h',
> 'rte_thash.h')
> +if dpdk_conf.has('RTE_ARCH_ARM64')
> + headers += files('rte_crc_arm64.h')
> +endif
>
> sources = files('rte_cuckoo_hash.c', 'rte_fbk_hash.c') deps += ['ring'] diff --
> git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h
> index b4628cfc0..adfcafc7d 100644
> --- a/lib/librte_hash/rte_crc_arm64.h
> +++ b/lib/librte_hash/rte_crc_arm64.h
> @@ -5,179 +5,6 @@
> #ifndef _RTE_CRC_ARM64_H_
> #define _RTE_CRC_ARM64_H_
>
> -/**
> - * @file
> - *
> - * RTE CRC arm64 Hash
> - */
> -
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> -
> -#include <stdint.h>
> -#include <rte_cpuflags.h>
> -#include <rte_branch_prediction.h>
> -#include <rte_common.h>
> -
> -static inline uint32_t
> -crc32c_arm64_u8(uint8_t data, uint32_t init_val) -{
> - __asm__ volatile(
> - "crc32cb %w[crc], %w[crc], %w[value]"
> - : [crc] "+r" (init_val)
> - : [value] "r" (data));
> - return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u16(uint16_t data, uint32_t init_val) -{
> - __asm__ volatile(
> - "crc32ch %w[crc], %w[crc], %w[value]"
> - : [crc] "+r" (init_val)
> - : [value] "r" (data));
> - return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u32(uint32_t data, uint32_t init_val) -{
> - __asm__ volatile(
> - "crc32cw %w[crc], %w[crc], %w[value]"
> - : [crc] "+r" (init_val)
> - : [value] "r" (data));
> - return init_val;
> -}
> -
> -static inline uint32_t
> -crc32c_arm64_u64(uint64_t data, uint32_t init_val) -{
> - __asm__ volatile(
> - "crc32cx %w[crc], %w[crc], %x[value]"
> - : [crc] "+r" (init_val)
> - : [value] "r" (data));
> - return init_val;
> -}
> -
> -/**
> - * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash
> - * calculation.
> - *
> - * @param alg
> - * An OR of following flags:
> - * - (CRC32_SW) Don't use arm64 crc intrinsics
> - * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> - *
> - */
> -static inline void
> -rte_hash_crc_set_alg(uint8_t alg)
> -{
> - switch (alg) {
> - case CRC32_ARM64:
> - if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> - alg = CRC32_SW;
> - /* fall-through */
> - case CRC32_SW:
> - crc32_alg = alg;
> - /* fall-through */
> - default:
> - break;
> - }
> -}
> -
> -/* Setting the best available algorithm */
> -RTE_INIT(rte_hash_crc_init_alg)
> -{
> - rte_hash_crc_set_alg(CRC32_ARM64);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 1 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - * Data to perform hash on.
> - * @param init_val
> - * Value to initialise hash generator.
> - * @return
> - * 32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_1byte(uint8_t data, uint32_t init_val) -{
> - if (likely(crc32_alg & CRC32_ARM64))
> - return crc32c_arm64_u8(data, init_val);
> -
> - return crc32c_1byte(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - * Data to perform hash on.
> - * @param init_val
> - * Value to initialise hash generator.
> - * @return
> - * 32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_2byte(uint16_t data, uint32_t init_val) -{
> - if (likely(crc32_alg & CRC32_ARM64))
> - return crc32c_arm64_u16(data, init_val);
> -
> - return crc32c_2bytes(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - * Data to perform hash on.
> - * @param init_val
> - * Value to initialise hash generator.
> - * @return
> - * 32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_4byte(uint32_t data, uint32_t init_val) -{
> - if (likely(crc32_alg & CRC32_ARM64))
> - return crc32c_arm64_u32(data, init_val);
> -
> - return crc32c_1word(data, init_val);
> -}
> -
> -/**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case arm64 crc intrinsics is
> - * not supported
> - *
> - * @param data
> - * Data to perform hash on.
> - * @param init_val
> - * Value to initialise hash generator.
> - * @return
> - * 32bit calculated hash value.
> - */
> -static inline uint32_t
> -rte_hash_crc_8byte(uint64_t data, uint32_t init_val) -{
> - if (likely(crc32_alg == CRC32_ARM64))
> - return crc32c_arm64_u64(data, init_val);
> -
> - return crc32c_2words(data, init_val);
> -}
> -
> -#ifdef __cplusplus
> -}
> -#endif
> +#include "rte_hash_crc.h"
>
> #endif /* _RTE_CRC_ARM64_H_ */
> diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h
> index cf28031b3..eaba70c12 100644
> --- a/lib/librte_hash/rte_hash_crc.h
> +++ b/lib/librte_hash/rte_hash_crc.h
> @@ -16,10 +16,12 @@ extern "C" {
> #endif
>
> #include <stdint.h>
> -#include <rte_config.h>
> -#include <rte_cpuflags.h>
> +
> #include <rte_branch_prediction.h>
> #include <rte_common.h>
> +#include <rte_config.h>
> +#include <rte_cpuflags.h>
> +#include <rte_log.h>
>
> /* Lookup tables for software implementation of CRC32C */ static const
> uint32_t crc32c_tables[8][256] = {{ @@ -322,7 +324,7 @@
> crc32c_2bytes(uint16_t data, uint32_t init_val) }
>
> static inline uint32_t
> -crc32c_1word(uint32_t data, uint32_t init_val)
> +crc32c_4bytes(uint32_t data, uint32_t init_val)
> {
> uint32_t crc, term1, term2;
> crc = init_val;
> @@ -336,7 +338,7 @@ crc32c_1word(uint32_t data, uint32_t init_val) }
>
> static inline uint32_t
> -crc32c_2words(uint64_t data, uint32_t init_val)
> +crc32c_8bytes(uint64_t data, uint32_t init_val)
> {
> uint32_t crc, term1, term2;
> union {
> @@ -358,6 +360,48 @@ crc32c_2words(uint64_t data, uint32_t init_val)
> return crc;
> }
>
> +#if defined(RTE_ARCH_ARM64) &&
> defined(RTE_MACHINE_CPUFLAG_CRC32)
> +static inline uint32_t
> +crc32c_arm64_u8(uint8_t data, uint32_t init_val) {
> + __asm__ volatile(
> + "crc32cb %w[crc], %w[crc], %w[value]"
> + : [crc] "+r" (init_val)
> + : [value] "r" (data));
> + return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u16(uint16_t data, uint32_t init_val) {
> + __asm__ volatile(
> + "crc32ch %w[crc], %w[crc], %w[value]"
> + : [crc] "+r" (init_val)
> + : [value] "r" (data));
> + return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u32(uint32_t data, uint32_t init_val) {
> + __asm__ volatile(
> + "crc32cw %w[crc], %w[crc], %w[value]"
> + : [crc] "+r" (init_val)
> + : [value] "r" (data));
> + return init_val;
> +}
> +
> +static inline uint32_t
> +crc32c_arm64_u64(uint64_t data, uint32_t init_val) {
> + __asm__ volatile(
> + "crc32cx %w[crc], %w[crc], %x[value]"
> + : [crc] "+r" (init_val)
> + : [value] "r" (data));
> + return init_val;
> +}
> +#endif
> +
> #if defined(RTE_ARCH_X86)
> static inline uint32_t
> crc32c_sse42_u8(uint8_t data, uint32_t init_val) @@ -424,42 +468,69 @@
> crc32c_sse42_u64(uint64_t data, uint64_t init_val)
>
> static uint8_t crc32_alg = CRC32_SW;
>
> -#if defined(RTE_ARCH_ARM64) &&
> defined(RTE_MACHINE_CPUFLAG_CRC32)
> -#include "rte_crc_arm64.h"
> -#else
> -
> /**
> - * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash
> + * Allow or disallow use of SSE4.2/ARMv8 instrinsics for CRC32 hash
> * calculation.
> *
> * @param alg
> * An OR of following flags:
> - * - (CRC32_SW) Don't use SSE4.2 intrinsics
> + * - (CRC32_SW) Don't use SSE4.2 intrinsics (default non-[x86/ARMv8])
> * - (CRC32_SSE42) Use SSE4.2 intrinsics if available
> - * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default)
> - *
> + * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default x86)
> + * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available
> */
> static inline void
> rte_hash_crc_set_alg(uint8_t alg)
> {
> -#if defined(RTE_ARCH_X86)
> - if (alg == CRC32_SSE42_x64 &&
> - !rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> - alg = CRC32_SSE42;
> + switch (alg) {
> + case CRC32_SSE42_x64:
> + case CRC32_SSE42:
> +#if defined RTE_ARCH_X86
> + if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T))
> + crc32_alg = CRC32_SSE42;
> + else
> + crc32_alg = alg;
> +#endif
> +#if defined RTE_ARCH_ARM64
> + RTE_LOG(WARNING, HASH,
> + "Incorrect CRC32 algorithm requested setting best"
> + "available algorithm on the architecture\n");
> + rte_hash_crc_set_alg(CRC32_ARM64);
> +#endif
> + break;
> + case CRC32_ARM64:
> +#if defined RTE_ARCH_ARM64
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
> + crc32_alg = CRC32_ARM64;
> #endif
> - crc32_alg = alg;
> +#if defined RTE_ARCH_X86
> + RTE_LOG(WARNING, HASH,
> + "Incorrect CRC32 algorithm requested setting best"
> + "available algorithm on the architecture\n");
> + rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#endif
> + break;
> + case CRC32_SW:
> + default:
> + crc32_alg = CRC32_SW;
> + break;
> + }
> }
>
> /* Setting the best available algorithm */
> RTE_INIT(rte_hash_crc_init_alg)
> {
> +#if defined RTE_ARCH_X86
> rte_hash_crc_set_alg(CRC32_SSE42_x64);
> +#elif defined RTE_ARCH_ARM64
> + rte_hash_crc_set_alg(CRC32_ARM64);
> +#else
> + rte_hash_crc_set_alg(CRC32_SW);
> +#endif
> }
>
> /**
> - * Use single crc32 instruction to perform a hash on a byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 1bytes.
> *
> * @param data
> * Data to perform hash on.
> @@ -474,15 +545,15 @@ rte_hash_crc_1byte(uint8_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
> if (likely(crc32_alg & CRC32_SSE42))
> return crc32c_sse42_u8(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> + if (likely(crc32_alg & CRC32_ARM64))
> + return crc32c_arm64_u8(data, init_val);
> #endif
> -
> return crc32c_1byte(data, init_val);
> }
>
> /**
> - * Use single crc32 instruction to perform a hash on a 2 bytes value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 2bytes.
> *
> * @param data
> * Data to perform hash on.
> @@ -497,15 +568,15 @@ rte_hash_crc_2byte(uint16_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
> if (likely(crc32_alg & CRC32_SSE42))
> return crc32c_sse42_u16(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> + if (likely(crc32_alg & CRC32_ARM64))
> + return crc32c_arm64_u16(data, init_val);
> #endif
> -
> return crc32c_2bytes(data, init_val);
> }
>
> /**
> - * Use single crc32 instruction to perform a hash on a 4 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 4bytes.
> *
> * @param data
> * Data to perform hash on.
> @@ -520,15 +591,15 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> #if defined RTE_ARCH_X86
> if (likely(crc32_alg & CRC32_SSE42))
> return crc32c_sse42_u32(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> + if (likely(crc32_alg & CRC32_ARM64))
> + return crc32c_arm64_u32(data, init_val);
> #endif
> -
> - return crc32c_1word(data, init_val);
> + return crc32c_4bytes(data, init_val);
> }
>
> /**
> - * Use single crc32 instruction to perform a hash on a 8 byte value.
> - * Fall back to software crc32 implementation in case SSE4.2 is
> - * not supported
> + * Calculate crc32 hash value of 8bytes.
> *
> * @param data
> * Data to perform hash on.
> @@ -540,21 +611,19 @@ rte_hash_crc_4byte(uint32_t data, uint32_t init_val)
> static inline uint32_t rte_hash_crc_8byte(uint64_t data, uint32_t init_val) { -
> #ifdef RTE_ARCH_X86_64
> - if (likely(crc32_alg == CRC32_SSE42_x64))
> +#if defined RTE_ARCH_X86_64
> + if (likely(crc32_alg & CRC32_SSE42_x64))
> return crc32c_sse42_u64(data, init_val); -#endif
> -
> -#if defined RTE_ARCH_X86
> +#elif defined RTE_ARCH_X86
> if (likely(crc32_alg & CRC32_SSE42))
> return crc32c_sse42_u64_mimic(data, init_val);
> +#elif defined RTE_ARCH_ARM64
> + if (likely(crc32_alg & CRC32_ARM64))
> + return crc32c_arm64_u64(data, init_val);
> #endif
> -
> - return crc32c_2words(data, init_val);
> + return crc32c_8bytes(data, init_val);
> }
>
> -#endif
> -
> /**
> * Calculate CRC32 hash on user-supplied byte array.
> *
> --
> 2.17.1
More information about the dev
mailing list