[PATCH v2 1/3] net: add thread-safe crc api

Ferruh Yigit ferruh.yigit at amd.com
Tue Oct 8 05:42:48 CEST 2024


On 10/1/2024 7:11 PM, Arkadiusz Kusztal wrote:
> The current net CRC API is not thread-safe, this patch
> solves this by adding another, thread-safe API functions.
> This API is also safe to use across multiple processes,
> yet with limitations on max-simd-bitwidth, which will be checked only by
> the process that created the CRC context; all other processes will use
> the same CRC function when used with the same CRC context.
> It is an undefined behavior when process binaries are compiled
> with different SIMD capabilities when the same CRC context is used.
> 
> Signed-off-by: Arkadiusz Kusztal <arkadiuszx.kusztal at intel.com>

<...>

> +static struct
> +{
> +	uint32_t (*f[RTE_NET_CRC_REQS])
> +		(const uint8_t *data, uint32_t data_len);
>

It increases readability to typedef function pointers.


> +} handlers[RTE_NET_CRC_AVX512 + 1];
>  
> -/**
> - * Reflect the bits about the middle
> - *
> - * @param val
> - *   value to be reflected
> - *
> - * @return
> - *   reflected value
> - */
> -static uint32_t
> +static inline uint32_t
>

Does changing to 'inline' required, as function is static compiler can
do the same.

>  reflect_32bits(uint32_t val)
>  {
>  	uint32_t i, res = 0;
> @@ -99,26 +43,7 @@ reflect_32bits(uint32_t val)
>  	return res;
>  }
>  
> -static void
> -crc32_eth_init_lut(uint32_t poly,
> -	uint32_t *lut)
> -{
> -	uint32_t i, j;
> -
> -	for (i = 0; i < CRC_LUT_SIZE; i++) {
> -		uint32_t crc = reflect_32bits(i);
> -
> -		for (j = 0; j < 8; j++) {
> -			if (crc & 0x80000000L)
> -				crc = (crc << 1) ^ poly;
> -			else
> -				crc <<= 1;
> -		}
> -		lut[i] = reflect_32bits(crc);
> -	}
> -}
> -
> -static __rte_always_inline uint32_t
> +static inline uint32_t
>

Why not forcing inline anymore?
Are these inline changes related to the thread-safety?

>  crc32_eth_calc_lut(const uint8_t *data,
>  	uint32_t data_len,
>  	uint32_t crc,
> @@ -130,20 +55,9 @@ crc32_eth_calc_lut(const uint8_t *data,
>  	return crc;
>  }
>  
> -static void
> -rte_net_crc_scalar_init(void)
> -{
> -	/* 32-bit crc init */
> -	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> -
> -	/* 16-bit CRC init */
> -	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> -}
> -
>  static inline uint32_t
> -rte_crc16_ccitt_handler(const uint8_t *data, uint32_t data_len)
> +crc16_ccitt(const uint8_t *data, uint32_t data_len)
>  {
> -	/* return 16-bit CRC value */
>

Why not keep comments? Are they wrong?

<...>

> +static void
> +crc_scalar_init(void)
> +{
> +	crc32_eth_init_lut(CRC32_ETH_POLYNOMIAL, crc32_eth_lut);
> +	crc32_eth_init_lut(CRC16_CCITT_POLYNOMIAL << 16, crc16_ccitt_lut);
> +
> +	handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC16_CCITT] = crc16_ccitt;
> +	handlers[RTE_NET_CRC_SCALAR].f[RTE_NET_CRC32_ETH] = crc32_eth;
>

+1 to remove global handlers pointer and add context,

But current handlers array content is static, it can be set when
defined, instead of functions.

<...>

> -static uint32_t
> -rte_crc32_eth_default_handler(const uint8_t *data, uint32_t data_len)
> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
> +	enum rte_net_crc_type type)
>  {
> -	handlers = NULL;
> -	if (max_simd_bitwidth == 0)
> -		max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> -
> -	handlers = avx512_vpclmulqdq_get_handlers();
> -	if (handlers != NULL)
> -		return handlers[RTE_NET_CRC32_ETH](data, data_len);
> -	handlers = sse42_pclmulqdq_get_handlers();
> -	if (handlers != NULL)
> -		return handlers[RTE_NET_CRC32_ETH](data, data_len);
> -	handlers = neon_pmull_get_handlers();
> -	if (handlers != NULL)
> -		return handlers[RTE_NET_CRC32_ETH](data, data_len);
> -	handlers = handlers_scalar;
> -	return handlers[RTE_NET_CRC32_ETH](data, data_len);
> -}
> +	uint16_t max_simd_bitwidth;
>  
> -/* Public API */
> -
> -void
> -rte_net_crc_set_alg(enum rte_net_crc_alg alg)
> -{
> -	handlers = NULL;
> -	if (max_simd_bitwidth == 0)
> -		max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
> +	max_simd_bitwidth = rte_vect_get_max_simd_bitwidth();
>  
>  	switch (alg) {
>  	case RTE_NET_CRC_AVX512:
> -		handlers = avx512_vpclmulqdq_get_handlers();
> -		if (handlers != NULL)
> -			break;
> +#ifdef CC_X86_64_AVX512_VPCLMULQDQ_SUPPORT
> +		if (AVX512_VPCLMULQDQ_CPU_SUPPORTED &&
> +				max_simd_bitwidth >= RTE_VECT_SIMD_512) {
> +			return (struct rte_net_crc){ RTE_NET_CRC_AVX512, type };
> +		}
> +#endif
>  		/* fall-through */
>  	case RTE_NET_CRC_SSE42:
> -		handlers = sse42_pclmulqdq_get_handlers();
> -		break; /* for x86, always break here */
> +#ifdef CC_X86_64_SSE42_PCLMULQDQ_SUPPORT
> +		if (SSE42_PCLMULQDQ_CPU_SUPPORTED &&
> +				max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> +			return (struct rte_net_crc){ RTE_NET_CRC_SSE42, type };
> +		}
> +#endif
> +		break;
>  	case RTE_NET_CRC_NEON:
> -		handlers = neon_pmull_get_handlers();
> -		/* fall-through */
> -	case RTE_NET_CRC_SCALAR:
> -		/* fall-through */
> +#ifdef CC_ARM64_NEON_PMULL_SUPPORT
> +		if (NEON_PMULL_CPU_SUPPORTED &&
> +				max_simd_bitwidth >= RTE_VECT_SIMD_128) {
> +			return (struct rte_net_crc){ RTE_NET_CRC_NEON, type };
> +		}
> +#endif
>

Is it more readable as following, up to you:

```
rte_net_crc_set(alg, type) {
  enum rte_net_crc_alg new_alg = RTE_NET_CRC_SCALAR;
  switch (alg) {
  case AVX512:
    new_alg = ..
  case NEON:
    new_alg = ..
  }
  return struct rte_net_crc){ new_alg, type };
```




> +		break;
>  	default:
>  		break;
>  	}
> -
> -	if (handlers == NULL)
> -		handlers = handlers_scalar;
> +	return (struct rte_net_crc){ RTE_NET_CRC_SCALAR, type };
>  }
>  
> -uint32_t
> -rte_net_crc_calc(const void *data,
> -	uint32_t data_len,
> -	enum rte_net_crc_type type)
> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> +	const void *data, const uint32_t data_len)
>  {
> -	uint32_t ret;
> -	rte_net_crc_handler f_handle;
> -
> -	f_handle = handlers[type];
> -	ret = f_handle(data, data_len);
> -
> -	return ret;
> +	return handlers[ctx->alg].f[ctx->type](data, data_len);
>

'rte_net_crc()' gets input from user and "struct rte_net_crc" is not
opaque, so user can provide invalid input, ctx->alg & ctx->type.
To protect against it input values should be checked before using.

Or I think user not need to know the details of the "struct
rte_net_crc", so it can be an opaque variable for user.

<...>

> -/**
> - * CRC compute API
> - *
> - * @param data
> - *   Pointer to the packet data for CRC computation
> - * @param data_len
> - *   Data length for CRC computation
> - * @param type
> - *   CRC type (enum rte_net_crc_type)
> - *
> - * @return
> - *   CRC value
> - */
> -uint32_t
> -rte_net_crc_calc(const void *data,
> -	uint32_t data_len,
> +struct rte_net_crc rte_net_crc_set(enum rte_net_crc_alg alg,
>  	enum rte_net_crc_type type);
>  
> +uint32_t rte_net_crc(const struct rte_net_crc *ctx,
> +	const void *data, const uint32_t data_len);
> +
>

As these are APIs, can you please add doxygen comments to them?

>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/net/version.map b/lib/net/version.map
> index bec4ce23ea..47daf1464a 100644
> --- a/lib/net/version.map
> +++ b/lib/net/version.map
> @@ -4,11 +4,25 @@ DPDK_25 {
>  	rte_eth_random_addr;
>  	rte_ether_format_addr;
>  	rte_ether_unformat_addr;
> -	rte_net_crc_calc;
> -	rte_net_crc_set_alg;
>  	rte_net_get_ptype;
>  	rte_net_make_rarp_packet;
>  	rte_net_skip_ip6_ext;
> +	rte_net_crc;
> +	rte_net_crc_set;
>  
>  	local: *;
>  };
> +
> +INTERNAL {
> +	global:
> +
> +	rte_net_crc_sse42_init;
> +	rte_crc16_ccitt_sse42_handler;
> +	rte_crc32_eth_sse42_handler;
> +	rte_net_crc_avx512_init;
> +	rte_crc16_ccitt_avx512_handler;
> +	rte_crc32_eth_avx512_handler;
> +	rte_net_crc_neon_init;
> +	rte_crc16_ccitt_neon_handler;
> +	rte_crc32_eth_neon_handler;
> +};
>

+1 to David's comment, these are used only within component, no need to
export.




More information about the dev mailing list