[dpdk-dev] [PATCH v3 17/18] net: add checks for max SIMD bitwidth

Olivier Matz olivier.matz at 6wind.com
Tue Oct 6 11:58:36 CEST 2020


Hi,

On Wed, Sep 30, 2020 at 02:04:13PM +0100, Ciara Power wrote:
> When choosing a vector path to take, an extra condition must be
> satisfied to ensure the max SIMD bitwidth allows for the CPU enabled
> path.
> 
> The vector path was initially chosen in RTE_INIT, however this is no
> longer suitable as we cannot check the max SIMD bitwidth at that time.
> The default chosen in RTE_INIT is now scalar. For best performance
> and to use vector paths, apps must explicitly call the set algorithm
> function before using other functions from this library, as this is
> where vector handlers are now chosen.
> 
> Suggested-by: Jasvinder Singh <jasvinder.singh at intel.com>
> 
> Signed-off-by: Ciara Power <ciara.power at intel.com>
> 
> ---
> v3:
>   - Moved choosing vector paths out of RTE_INIT.
>   - Moved checking max_simd_bitwidth into the set_alg function.
> ---
>  lib/librte_net/rte_net_crc.c | 26 +++++++++++++++++---------
>  lib/librte_net/rte_net_crc.h |  3 ++-
>  2 files changed, 19 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c
> index 9fd4794a9d..241eb16399 100644
> --- a/lib/librte_net/rte_net_crc.c
> +++ b/lib/librte_net/rte_net_crc.c
> @@ -9,6 +9,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_common.h>
>  #include <rte_net_crc.h>
> +#include <rte_eal.h>
>  
>  #if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_PCLMULQDQ)
>  #define X86_64_SSE42_PCLMULQDQ     1
> @@ -60,6 +61,9 @@ static rte_net_crc_handler handlers_neon[] = {
>  };
>  #endif
>  
> +static uint16_t max_simd_bitwidth;
> +#define RTE_LOGTYPE_NET RTE_LOGTYPE_USER1

RTE_LOG_REGISTER() should be used instead.

> +
>  /**
>   * Reflect the bits about the middle
>   *
> @@ -145,18 +149,26 @@ rte_crc32_eth_handler(const uint8_t *data, uint32_t data_len)
>  void
>  rte_net_crc_set_alg(enum rte_net_crc_alg alg)
>  {
> +	if (max_simd_bitwidth == 0)
> +		max_simd_bitwidth = rte_get_max_simd_bitwidth();
> +
>  	switch (alg) {
>  #ifdef X86_64_SSE42_PCLMULQDQ
>  	case RTE_NET_CRC_SSE42:
> -		handlers = handlers_sse42;
> -		break;
> +		if (max_simd_bitwidth >= RTE_MAX_128_SIMD) {
> +			handlers = handlers_sse42;
> +			return;
> +		}
> +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low, using scalar\n");

If max_simd_bitwidth is too low, it will keep the previous value.
I think we should avoid to say "using scalar" in the log, even if it is
correct today. For instance, when the avx implementation will be added,
the log will become wrong.


>  #elif defined ARM64_NEON_PMULL
>  		/* fall-through */
>  	case RTE_NET_CRC_NEON:
> -		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL) &&
> +				max_simd_bitwidth >= RTE_MAX_128_SIMD) {
>  			handlers = handlers_neon;
> -			break;
> +			return;
>  		}
> +		RTE_LOG(INFO, NET, "Max SIMD Bitwidth too low or CPU flag not enabled, using scalar\n");
>  #endif
>  		/* fall-through */
>  	case RTE_NET_CRC_SCALAR:
> @@ -184,19 +196,15 @@ rte_net_crc_calc(const void *data,
>  /* Select highest available crc algorithm as default one */
>  RTE_INIT(rte_net_crc_init)
>  {
> -	enum rte_net_crc_alg alg = RTE_NET_CRC_SCALAR;
> -
>  	rte_net_crc_scalar_init();
>  
>  #ifdef X86_64_SSE42_PCLMULQDQ
> -	alg = RTE_NET_CRC_SSE42;
>  	rte_net_crc_sse42_init();
>  #elif defined ARM64_NEON_PMULL
>  	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_PMULL)) {
> -		alg = RTE_NET_CRC_NEON;
>  		rte_net_crc_neon_init();
>  	}
>  #endif
>  
> -	rte_net_crc_set_alg(alg);
> +	rte_net_crc_set_alg(RTE_NET_CRC_SCALAR);
>  }
> diff --git a/lib/librte_net/rte_net_crc.h b/lib/librte_net/rte_net_crc.h
> index 16e85ca970..7a45ebe193 100644
> --- a/lib/librte_net/rte_net_crc.h
> +++ b/lib/librte_net/rte_net_crc.h
> @@ -28,7 +28,8 @@ enum rte_net_crc_alg {
>  /**
>   * This API set the CRC computation algorithm (i.e. scalar version,
>   * x86 64-bit sse4.2 intrinsic version, etc.) and internal data
> - * structure.
> + * structure. This should be called before any other functions, to
> + * choose the algorithm for best performance.
>   *
>   * @param alg
>   *   This parameter is used to select the CRC implementation version.
> -- 
> 2.17.1
> 


More information about the dev mailing list