[dpdk-dev] [PATCH] eal/atomic: reimplement rte atomic APIs with atomic builtins

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Thu Jan 31 21:06:36 CET 2019
Previous message: [dpdk-dev] [PATCH] eal/atomic: reimplement rte atomic APIs with atomic builtins
Next message: [dpdk-dev] [PATCH] packet_ordering: replace sync builtins with atomic builtins
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Hi Phil,
	The rte atomic APIs are not designed for adding memory model details. The memory model to use comes from the API usage (and not from the definition). For ex:  rte_atomic32_add can have RELAXED memory ordering in a statistics increment kind of use case, but may be RELEASE if the variable is used for some kind of synchronization. Hence, I think it is not optimal to add memory models to these implementations to gain performance across the board.

At the same time, we do not need new APIs in DPDK as __atomic_xxx intrinsics act as those APIs.

Instead, I think it is better to look at where these APIs are called from and change that code to add the correct memory model using __atomic_xxx intrinsics.

Thanks,
Honnappa


> -----Original Message-----
> From: dev <dev-bounces at dpdk.org> On Behalf Of Phil Yang
> Sent: Thursday, January 3, 2019 3:42 AM
> To: dev at dpdk.org
> Cc: nd <nd at arm.com>
> Subject: [dpdk-dev] [PATCH] eal/atomic: reimplement rte atomic APIs with
> atomic builtins
> 
> '__sync' builtins are deprecated, enable '__atomic' builtins for generic atomic
> operations.
> 
> Signed-off-by: Phil Yang <phil.yang at arm.com>
> Reviewed-by: Gavin Hu <gavin.hu at arm.com>
> Tested-by: Phil Yang <phil.yang at arm.com>
> 
> ---
>  lib/librte_eal/common/include/generic/rte_atomic.h | 80
> ++++++++++++++++++++++
>  1 file changed, 80 insertions(+)
> 
> diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h
> b/lib/librte_eal/common/include/generic/rte_atomic.h
> index b99ba46..260cdf3 100644
> --- a/lib/librte_eal/common/include/generic/rte_atomic.h
> +++ b/lib/librte_eal/common/include/generic/rte_atomic.h
> @@ -186,7 +186,12 @@ rte_atomic16_cmpset(volatile uint16_t *dst,
> uint16_t exp, uint16_t src);  static inline int  rte_atomic16_cmpset(volatile
> uint16_t *dst, uint16_t exp, uint16_t src)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_bool_compare_and_swap(dst, exp, src);
> +#else
> +	return __atomic_compare_exchange(dst, &exp, &src, 0,
> __ATOMIC_ACQUIRE,
> +			__ATOMIC_ACQUIRE) ? 1 : 0;
> +#endif
>  }
>  #endif
> 
> @@ -283,7 +288,11 @@ rte_atomic16_set(rte_atomic16_t *v, int16_t
> new_value)  static inline void  rte_atomic16_add(rte_atomic16_t *v, int16_t
> inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_add(&v->cnt, inc);
> +#else
> +	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE); #endif
Could be __ATOMIC_RELAXED for a statistics use case.

>  }
> 
>  /**
> @@ -297,7 +306,11 @@ rte_atomic16_add(rte_atomic16_t *v, int16_t inc)
> static inline void  rte_atomic16_sub(rte_atomic16_t *v, int16_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_sub(&v->cnt, dec);
> +#else
> +	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE); #endif
>  }
> 
>  /**
> @@ -350,7 +363,11 @@ rte_atomic16_dec(rte_atomic16_t *v)  static inline
> int16_t  rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_add_and_fetch(&v->cnt, inc);
> +#else
> +	return __atomic_add_fetch(&v->cnt, inc, __ATOMIC_ACQUIRE);
> #endif
>  }
> 
>  /**
> @@ -370,7 +387,11 @@ rte_atomic16_add_return(rte_atomic16_t *v,
> int16_t inc)  static inline int16_t  rte_atomic16_sub_return(rte_atomic16_t
> *v, int16_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_sub_and_fetch(&v->cnt, dec);
> +#else
> +	return __atomic_sub_fetch(&v->cnt, dec, __ATOMIC_ACQUIRE);
> #endif
>  }
> 
>  /**
> @@ -389,7 +410,11 @@ static inline int
> rte_atomic16_inc_and_test(rte_atomic16_t *v);  #ifdef
> RTE_FORCE_INTRINSICS  static inline int
> rte_atomic16_inc_and_test(rte_atomic16_t *v)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_add_and_fetch(&v->cnt, 1) == 0;
> +#else
> +	return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
> #endif
>  }
>  #endif
> 
> @@ -409,7 +434,11 @@ static inline int
> rte_atomic16_dec_and_test(rte_atomic16_t *v);  #ifdef
> RTE_FORCE_INTRINSICS  static inline int
> rte_atomic16_dec_and_test(rte_atomic16_t *v)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_sub_and_fetch(&v->cnt, 1) == 0;
> +#else
> +	return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
> #endif
>  }
>  #endif
> 
> @@ -469,7 +498,13 @@ rte_atomic32_cmpset(volatile uint32_t *dst,
> uint32_t exp, uint32_t src);  static inline int  rte_atomic32_cmpset(volatile
> uint32_t *dst, uint32_t exp, uint32_t src)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_bool_compare_and_swap(dst, exp, src);
> +#else
> +	return __atomic_compare_exchange(dst, &exp, &src, 0,
> __ATOMIC_ACQUIRE,
> +			__ATOMIC_ACQUIRE) ? 1 : 0;
These memory orderings would depend on the use case.

> +#endif
> +
>  }
>  #endif
> 
> @@ -566,7 +601,11 @@ rte_atomic32_set(rte_atomic32_t *v, int32_t
> new_value)  static inline void  rte_atomic32_add(rte_atomic32_t *v, int32_t
> inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_add(&v->cnt, inc);
> +#else
> +	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE); #endif
>  }
> 
>  /**
> @@ -580,7 +619,11 @@ rte_atomic32_add(rte_atomic32_t *v, int32_t inc)
> static inline void  rte_atomic32_sub(rte_atomic32_t *v, int32_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_sub(&v->cnt, dec);
> +#else
> +	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE); #endif
>  }
> 
>  /**
> @@ -633,7 +676,11 @@ rte_atomic32_dec(rte_atomic32_t *v)  static inline
> int32_t  rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_add_and_fetch(&v->cnt, inc);
> +#else
> +	return __atomic_add_fetch(&v->cnt, inc, __ATOMIC_ACQUIRE);
> #endif
>  }
> 
>  /**
> @@ -653,7 +700,11 @@ rte_atomic32_add_return(rte_atomic32_t *v,
> int32_t inc)  static inline int32_t  rte_atomic32_sub_return(rte_atomic32_t
> *v, int32_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_sub_and_fetch(&v->cnt, dec);
> +#else
> +	return __atomic_sub_fetch(&v->cnt, dec, __ATOMIC_ACQUIRE);
> #endif
>  }
> 
>  /**
> @@ -672,7 +723,11 @@ static inline int
> rte_atomic32_inc_and_test(rte_atomic32_t *v);  #ifdef
> RTE_FORCE_INTRINSICS  static inline int
> rte_atomic32_inc_and_test(rte_atomic32_t *v)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_add_and_fetch(&v->cnt, 1) == 0;
> +#else
> +	return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
> #endif
>  }
>  #endif
> 
> @@ -692,7 +747,11 @@ static inline int
> rte_atomic32_dec_and_test(rte_atomic32_t *v);  #ifdef
> RTE_FORCE_INTRINSICS  static inline int
> rte_atomic32_dec_and_test(rte_atomic32_t *v)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_sub_and_fetch(&v->cnt, 1) == 0;
> +#else
> +	return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
> #endif
>  }
>  #endif
> 
> @@ -751,7 +810,12 @@ rte_atomic64_cmpset(volatile uint64_t *dst,
> uint64_t exp, uint64_t src);  static inline int  rte_atomic64_cmpset(volatile
> uint64_t *dst, uint64_t exp, uint64_t src)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_bool_compare_and_swap(dst, exp, src);
> +#else
> +	return __atomic_compare_exchange(dst, &exp, &src, 0,
> __ATOMIC_ACQUIRE,
> +			__ATOMIC_ACQUIRE) ? 1 : 0;
> +#endif
>  }
>  #endif
> 
> @@ -902,7 +966,11 @@ rte_atomic64_add(rte_atomic64_t *v, int64_t inc);
> static inline void  rte_atomic64_add(rte_atomic64_t *v, int64_t inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_add(&v->cnt, inc);
> +#else
> +	__atomic_fetch_add(&v->cnt, inc, __ATOMIC_ACQUIRE); #endif
>  }
>  #endif
> 
> @@ -921,7 +989,11 @@ rte_atomic64_sub(rte_atomic64_t *v, int64_t dec);
> static inline void  rte_atomic64_sub(rte_atomic64_t *v, int64_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	__sync_fetch_and_sub(&v->cnt, dec);
> +#else
> +	__atomic_fetch_sub(&v->cnt, dec, __ATOMIC_ACQUIRE); #endif
>  }
>  #endif
> 
> @@ -979,7 +1051,11 @@ rte_atomic64_add_return(rte_atomic64_t *v,
> int64_t inc);  static inline int64_t  rte_atomic64_add_return(rte_atomic64_t
> *v, int64_t inc)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_add_and_fetch(&v->cnt, inc);
> +#else
> +	return __atomic_add_fetch(&v->cnt, inc, __ATOMIC_ACQUIRE);
> #endif
>  }
>  #endif
> 
> @@ -1003,7 +1079,11 @@ rte_atomic64_sub_return(rte_atomic64_t *v,
> int64_t dec);  static inline int64_t  rte_atomic64_sub_return(rte_atomic64_t
> *v, int64_t dec)  {
> +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
>  	return __sync_sub_and_fetch(&v->cnt, dec);
> +#else
> +	return __atomic_sub_fetch(&v->cnt, dec, __ATOMIC_ACQUIRE);
> #endif
>  }
>  #endif
> 
> --
> 2.7.4
Previous message: [dpdk-dev] [PATCH] eal/atomic: reimplement rte atomic APIs with atomic builtins
Next message: [dpdk-dev] [PATCH] packet_ordering: replace sync builtins with atomic builtins
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the dev mailing list