[PATCH v6 01/39] eal: use C11 alignas

Konstantin Ananyev konstantin.ananyev at huawei.com
Tue Feb 27 10:34:49 CET 2024


> Subject: [PATCH v6 01/39] eal: use C11 alignas
> 
> The current location used for __rte_aligned(a) for alignment of types
> and variables is not compatible with MSVC. There is only a single
> location accepted by both toolchains.
> 
> For variables standard C11 offers alignas(a) supported by conformant
> compilers i.e. both MSVC and GCC.
> 
> For types the standard offers no alignment facility that compatibly
> interoperates with C and C++ but may be achieved by relocating the
> placement of __rte_aligned(a) to the aforementioned location accepted
> by all currently supported toolchains.
> 
> To allow alignment for both compilers do the following:
> 
> * Expand __rte_aligned(a) to __declspec(align(a)) when building
>   with MSVC.
> 
> * Move __rte_aligned from the end of {struct,union} definitions to
>   be between {struct,union} and tag.
> 
>   The placement between {struct,union} and the tag allows the desired
>   alignment to be imparted on the type regardless of the toolchain being
>   used for all of GCC, LLVM, MSVC compilers building both C and C++.
> 
> * Replace use of __rte_aligned(a) on variables/fields with alignas(a).
> 
> Signed-off-by: Tyler Retzlaff <roretzla at linux.microsoft.com>
> Acked-by: Morten Brørup <mb at smartsharesystems.com>
> Acked-by: Bruce Richardson <bruce.richardson at intel.com>
> ---
>  lib/eal/arm/include/rte_vect.h       |  4 ++--
>  lib/eal/common/malloc_elem.h         |  4 ++--
>  lib/eal/common/malloc_heap.h         |  4 ++--
>  lib/eal/common/rte_keepalive.c       |  3 ++-
>  lib/eal/common/rte_random.c          |  4 ++--
>  lib/eal/common/rte_service.c         |  8 ++++----
>  lib/eal/include/generic/rte_atomic.h |  4 ++--
>  lib/eal/include/rte_common.h         | 23 +++++++++++++++--------
>  lib/eal/loongarch/include/rte_vect.h |  8 ++++----
>  lib/eal/ppc/include/rte_vect.h       |  4 ++--
>  lib/eal/riscv/include/rte_vect.h     |  4 ++--
>  lib/eal/x86/include/rte_vect.h       |  4 ++--
>  lib/eal/x86/rte_power_intrinsics.c   | 10 ++++++----
>  13 files changed, 47 insertions(+), 37 deletions(-)
> 
> diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
> index 8cfe4bd..c97d299 100644
> --- a/lib/eal/arm/include/rte_vect.h
> +++ b/lib/eal/arm/include/rte_vect.h
> @@ -24,14 +24,14 @@
>  #define	XMM_SIZE	(sizeof(xmm_t))
>  #define	XMM_MASK	(XMM_SIZE - 1)
> 
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
>  	xmm_t    x;
>  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>  	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
> 
>  #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
>  /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
> diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
> index 952ce73..c7ff671 100644
> --- a/lib/eal/common/malloc_elem.h
> +++ b/lib/eal/common/malloc_elem.h
> @@ -20,7 +20,7 @@ enum elem_state {
>  	ELEM_PAD  /* element is a padding-only header */
>  };
> 
> -struct malloc_elem {
> +struct __rte_cache_aligned malloc_elem {
>  	struct malloc_heap *heap;
>  	struct malloc_elem *volatile prev;
>  	/**< points to prev elem in memseg */
> @@ -48,7 +48,7 @@ struct malloc_elem {
>  	size_t user_size;
>  	uint64_t asan_cookie[2]; /* must be next to header_cookie */
>  #endif
> -} __rte_cache_aligned;
> +};
> 
>  static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
> 
> diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h
> index 8f3ab57..0c49588 100644
> --- a/lib/eal/common/malloc_heap.h
> +++ b/lib/eal/common/malloc_heap.h
> @@ -21,7 +21,7 @@
>  /**
>   * Structure to hold malloc heap
>   */
> -struct malloc_heap {
> +struct __rte_cache_aligned malloc_heap {
>  	rte_spinlock_t lock;
>  	LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
>  	struct malloc_elem *volatile first;
> @@ -31,7 +31,7 @@ struct malloc_heap {
>  	unsigned int socket_id;
>  	size_t total_size;
>  	char name[RTE_HEAP_NAME_MAX_LEN];
> -} __rte_cache_aligned;
> +};
> 
>  void *
>  malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
> diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
> index f6db973..391c1be 100644
> --- a/lib/eal/common/rte_keepalive.c
> +++ b/lib/eal/common/rte_keepalive.c
> @@ -2,6 +2,7 @@
>   * Copyright(c) 2015-2016 Intel Corporation
>   */
> 
> +#include <stdalign.h>
>  #include <inttypes.h>
> 
>  #include <rte_common.h>
> @@ -19,7 +20,7 @@ struct rte_keepalive {
>  		/*
>  		 * Each element must be cache aligned to prevent false sharing.
>  		 */
> -		enum rte_keepalive_state core_state __rte_cache_aligned;
> +		alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state core_state;
>  	} live_data[RTE_KEEPALIVE_MAXCORES];
> 
>  	/** Last-seen-alive timestamps */
> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> index 7709b8f..90e91b3 100644
> --- a/lib/eal/common/rte_random.c
> +++ b/lib/eal/common/rte_random.c
> @@ -13,14 +13,14 @@
>  #include <rte_lcore.h>
>  #include <rte_random.h>
> 
> -struct rte_rand_state {
> +struct __rte_cache_aligned rte_rand_state {
>  	uint64_t z1;
>  	uint64_t z2;
>  	uint64_t z3;
>  	uint64_t z4;
>  	uint64_t z5;
>  	RTE_CACHE_GUARD;
> -} __rte_cache_aligned;
> +};
> 
>  /* One instance each for every lcore id-equipped thread, and one
>   * additional instance to be shared by all others threads (i.e., all
> diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
> index d959c91..5637993 100644
> --- a/lib/eal/common/rte_service.c
> +++ b/lib/eal/common/rte_service.c
> @@ -32,7 +32,7 @@
>  #define RUNSTATE_RUNNING 1
> 
>  /* internal representation of a service */
> -struct rte_service_spec_impl {
> +struct __rte_cache_aligned rte_service_spec_impl {
>  	/* public part of the struct */
>  	struct rte_service_spec spec;
> 
> @@ -53,7 +53,7 @@ struct rte_service_spec_impl {
>  	 * on currently.
>  	 */
>  	RTE_ATOMIC(uint32_t) num_mapped_cores;
> -} __rte_cache_aligned;
> +};
> 
>  struct service_stats {
>  	RTE_ATOMIC(uint64_t) calls;
> @@ -61,7 +61,7 @@ struct service_stats {
>  };
> 
>  /* the internal values of a service core */
> -struct core_state {
> +struct __rte_cache_aligned core_state {
>  	/* map of services IDs are run on this core */
>  	uint64_t service_mask;
>  	RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
> @@ -71,7 +71,7 @@ struct core_state {
>  	RTE_ATOMIC(uint64_t) loops;
>  	RTE_ATOMIC(uint64_t) cycles;
>  	struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
> -} __rte_cache_aligned;
> +};
> 
>  static uint32_t rte_service_count;
>  static struct rte_service_spec_impl *rte_services;
> diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
> index 0e639da..f859707 100644
> --- a/lib/eal/include/generic/rte_atomic.h
> +++ b/lib/eal/include/generic/rte_atomic.h
> @@ -1094,7 +1094,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
>  /**
>   * 128-bit integer structure.
>   */
> -typedef struct {
> +typedef struct __rte_aligned(16) {
>  	union {
>  		uint64_t val[2];
>  #ifdef RTE_ARCH_64
> @@ -1103,7 +1103,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
>  #endif
>  #endif
>  	};
> -} __rte_aligned(16) rte_int128_t;
> +} rte_int128_t;
> 
>  #ifdef __DOXYGEN__
> 
> diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
> index 1cc1222..0908aa0 100644
> --- a/lib/eal/include/rte_common.h
> +++ b/lib/eal/include/rte_common.h
> @@ -12,6 +12,8 @@
>   * for DPDK.
>   */
> 
> +#include <stdalign.h>
> +
>  #ifdef __cplusplus
>  extern "C" {
>  #endif
> @@ -63,10 +65,19 @@
>  #endif
> 
>  /**
> - * Force alignment
> + * Force type alignment
> + *
> + * This macro should be used when alignment of a struct or union type
> + * is required. For toolchain compatibility it should appear between
> + * the {struct,union} keyword and tag. e.g.
> + *
> + *   struct __rte_aligned(8) tag { ... };
> + *
> + * If alignment of an object/variable is required then this macro should
> + * not be used, instead prefer C11 alignas(a).
>   */
>  #ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_aligned(a)
> +#define __rte_aligned(a) __declspec(align(a))
>  #else
>  #define __rte_aligned(a) __attribute__((__aligned__(a)))
>  #endif
> @@ -538,18 +549,14 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
>  #define RTE_CACHE_LINE_MIN_SIZE 64
> 
>  /** Force alignment to cache line. */
> -#ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_cache_aligned
> -#else
>  #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
> -#endif
> 
>  /** Force minimum cache line alignment. */
>  #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
> 
>  #define _RTE_CACHE_GUARD_HELPER2(unique) \
> -	char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] \
> -	__rte_cache_aligned
> +	alignas(RTE_CACHE_LINE_SIZE) \
> +	char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES]
>  #define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique)
>  /**
>   * Empty cache lines, to guard against false sharing-like effects
> diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h
> index 1546515..aa334e8 100644
> --- a/lib/eal/loongarch/include/rte_vect.h
> +++ b/lib/eal/loongarch/include/rte_vect.h
> @@ -15,7 +15,7 @@
> 
>  #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
> 
> -typedef union xmm {
> +typedef union __rte_aligned(16) xmm {
>  	int8_t   i8[16];
>  	int16_t  i16[8];
>  	int32_t  i32[4];
> @@ -25,19 +25,19 @@
>  	uint32_t u32[4];
>  	uint64_t u64[2];
>  	double   pd[2];
> -} __rte_aligned(16) xmm_t;
> +} xmm_t;
> 
>  #define XMM_SIZE        (sizeof(xmm_t))
>  #define XMM_MASK        (XMM_SIZE - 1)
> 
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
>  	xmm_t	 x;
>  	uint8_t	 u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>  	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
> 
>  static inline xmm_t
>  vect_load_128(void *p)
> diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
> index a5f009b..c8bace2 100644
> --- a/lib/eal/ppc/include/rte_vect.h
> +++ b/lib/eal/ppc/include/rte_vect.h
> @@ -22,14 +22,14 @@
>  #define	XMM_SIZE	(sizeof(xmm_t))
>  #define	XMM_MASK	(XMM_SIZE - 1)
> 
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
>  	xmm_t    x;
>  	uint8_t  u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
>  	double   pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
> 
>  #ifdef __cplusplus
>  }
> diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
> index da9092a..6df10fa 100644
> --- a/lib/eal/riscv/include/rte_vect.h
> +++ b/lib/eal/riscv/include/rte_vect.h
> @@ -22,14 +22,14 @@
>  #define XMM_SIZE	(sizeof(xmm_t))
>  #define XMM_MASK	(XMM_SIZE - 1)
> 
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
>  	xmm_t		x;
>  	uint8_t		u8[XMM_SIZE / sizeof(uint8_t)];
>  	uint16_t	u16[XMM_SIZE / sizeof(uint16_t)];
>  	uint32_t	u32[XMM_SIZE / sizeof(uint32_t)];
>  	uint64_t	u64[XMM_SIZE / sizeof(uint64_t)];
>  	double		pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
> 
>  static inline xmm_t
>  vect_load_128(void *p)
> diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
> index 560f9e4..a1a537e 100644
> --- a/lib/eal/x86/include/rte_vect.h
> +++ b/lib/eal/x86/include/rte_vect.h
> @@ -91,7 +91,7 @@
>  #define RTE_X86_ZMM_SIZE	(sizeof(__m512i))
>  #define RTE_X86_ZMM_MASK	(RTE_X86_ZMM_SIZE - 1)
> 
> -typedef union __rte_x86_zmm {
> +typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
>  	__m512i	 z;
>  	ymm_t    y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
>  	xmm_t    x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
> @@ -100,7 +100,7 @@
>  	uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
>  	uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
>  	double   pd[RTE_X86_ZMM_SIZE / sizeof(double)];
> -} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
> +} __rte_x86_zmm_t;
> 
>  #endif /* __AVX512F__ */
> 
> diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
> index 532a2e6..6d9b642 100644
> --- a/lib/eal/x86/rte_power_intrinsics.c
> +++ b/lib/eal/x86/rte_power_intrinsics.c
> @@ -2,6 +2,8 @@
>   * Copyright(c) 2020 Intel Corporation
>   */
> 
> +#include <stdalign.h>
> +
>  #include <rte_common.h>
>  #include <rte_lcore.h>
>  #include <rte_rtm.h>
> @@ -12,10 +14,10 @@
>  /*
>   * Per-lcore structure holding current status of C0.2 sleeps.
>   */
> -static struct power_wait_status {
> +static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status {
>  	rte_spinlock_t lock;
>  	volatile void *monitor_addr; /**< NULL if not currently sleeping */
> -} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
> +} wait_status[RTE_MAX_LCORE];
> 
>  /*
>   * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
> @@ -85,10 +87,10 @@ static void amd_mwaitx(const uint64_t timeout)
>  #endif
>  }
> 
> -static struct {
> +static alignas(RTE_CACHE_LINE_SIZE) struct {
>  	void (*mmonitor)(volatile void *addr);
>  	void (*mwait)(const uint64_t timeout);
> -} __rte_cache_aligned power_monitor_ops;
> +} power_monitor_ops;
> 
>  static inline void
>  __umwait_wakeup(volatile void *addr)
> --

Acked-by: Konstantin Ananyev <konstantin.ananyev at huawei.com>

> 1.8.3.1



More information about the dev mailing list