[PATCH v6 01/39] eal: use C11 alignas
Konstantin Ananyev
konstantin.ananyev at huawei.com
Tue Feb 27 10:34:49 CET 2024
> Subject: [PATCH v6 01/39] eal: use C11 alignas
>
> The current location used for __rte_aligned(a) for alignment of types
> and variables is not compatible with MSVC. There is only a single
> location accepted by both toolchains.
>
> For variables standard C11 offers alignas(a) supported by conformant
> compilers i.e. both MSVC and GCC.
>
> For types the standard offers no alignment facility that compatibly
> interoperates with C and C++ but may be achieved by relocating the
> placement of __rte_aligned(a) to the aforementioned location accepted
> by all currently supported toolchains.
>
> To allow alignment for both compilers do the following:
>
> * Expand __rte_aligned(a) to __declspec(align(a)) when building
> with MSVC.
>
> * Move __rte_aligned from the end of {struct,union} definitions to
> be between {struct,union} and tag.
>
> The placement between {struct,union} and the tag allows the desired
> alignment to be imparted on the type regardless of the toolchain being
> used for all of GCC, LLVM, MSVC compilers building both C and C++.
>
> * Replace use of __rte_aligned(a) on variables/fields with alignas(a).
>
> Signed-off-by: Tyler Retzlaff <roretzla at linux.microsoft.com>
> Acked-by: Morten Brørup <mb at smartsharesystems.com>
> Acked-by: Bruce Richardson <bruce.richardson at intel.com>
> ---
> lib/eal/arm/include/rte_vect.h | 4 ++--
> lib/eal/common/malloc_elem.h | 4 ++--
> lib/eal/common/malloc_heap.h | 4 ++--
> lib/eal/common/rte_keepalive.c | 3 ++-
> lib/eal/common/rte_random.c | 4 ++--
> lib/eal/common/rte_service.c | 8 ++++----
> lib/eal/include/generic/rte_atomic.h | 4 ++--
> lib/eal/include/rte_common.h | 23 +++++++++++++++--------
> lib/eal/loongarch/include/rte_vect.h | 8 ++++----
> lib/eal/ppc/include/rte_vect.h | 4 ++--
> lib/eal/riscv/include/rte_vect.h | 4 ++--
> lib/eal/x86/include/rte_vect.h | 4 ++--
> lib/eal/x86/rte_power_intrinsics.c | 10 ++++++----
> 13 files changed, 47 insertions(+), 37 deletions(-)
>
> diff --git a/lib/eal/arm/include/rte_vect.h b/lib/eal/arm/include/rte_vect.h
> index 8cfe4bd..c97d299 100644
> --- a/lib/eal/arm/include/rte_vect.h
> +++ b/lib/eal/arm/include/rte_vect.h
> @@ -24,14 +24,14 @@
> #define XMM_SIZE (sizeof(xmm_t))
> #define XMM_MASK (XMM_SIZE - 1)
>
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
> xmm_t x;
> uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
> uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> double pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>
> #if defined(RTE_ARCH_ARM) && defined(RTE_ARCH_32)
> /* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */
> diff --git a/lib/eal/common/malloc_elem.h b/lib/eal/common/malloc_elem.h
> index 952ce73..c7ff671 100644
> --- a/lib/eal/common/malloc_elem.h
> +++ b/lib/eal/common/malloc_elem.h
> @@ -20,7 +20,7 @@ enum elem_state {
> ELEM_PAD /* element is a padding-only header */
> };
>
> -struct malloc_elem {
> +struct __rte_cache_aligned malloc_elem {
> struct malloc_heap *heap;
> struct malloc_elem *volatile prev;
> /**< points to prev elem in memseg */
> @@ -48,7 +48,7 @@ struct malloc_elem {
> size_t user_size;
> uint64_t asan_cookie[2]; /* must be next to header_cookie */
> #endif
> -} __rte_cache_aligned;
> +};
>
> static const unsigned int MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem);
>
> diff --git a/lib/eal/common/malloc_heap.h b/lib/eal/common/malloc_heap.h
> index 8f3ab57..0c49588 100644
> --- a/lib/eal/common/malloc_heap.h
> +++ b/lib/eal/common/malloc_heap.h
> @@ -21,7 +21,7 @@
> /**
> * Structure to hold malloc heap
> */
> -struct malloc_heap {
> +struct __rte_cache_aligned malloc_heap {
> rte_spinlock_t lock;
> LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
> struct malloc_elem *volatile first;
> @@ -31,7 +31,7 @@ struct malloc_heap {
> unsigned int socket_id;
> size_t total_size;
> char name[RTE_HEAP_NAME_MAX_LEN];
> -} __rte_cache_aligned;
> +};
>
> void *
> malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
> diff --git a/lib/eal/common/rte_keepalive.c b/lib/eal/common/rte_keepalive.c
> index f6db973..391c1be 100644
> --- a/lib/eal/common/rte_keepalive.c
> +++ b/lib/eal/common/rte_keepalive.c
> @@ -2,6 +2,7 @@
> * Copyright(c) 2015-2016 Intel Corporation
> */
>
> +#include <stdalign.h>
> #include <inttypes.h>
>
> #include <rte_common.h>
> @@ -19,7 +20,7 @@ struct rte_keepalive {
> /*
> * Each element must be cache aligned to prevent false sharing.
> */
> - enum rte_keepalive_state core_state __rte_cache_aligned;
> + alignas(RTE_CACHE_LINE_SIZE) enum rte_keepalive_state core_state;
> } live_data[RTE_KEEPALIVE_MAXCORES];
>
> /** Last-seen-alive timestamps */
> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> index 7709b8f..90e91b3 100644
> --- a/lib/eal/common/rte_random.c
> +++ b/lib/eal/common/rte_random.c
> @@ -13,14 +13,14 @@
> #include <rte_lcore.h>
> #include <rte_random.h>
>
> -struct rte_rand_state {
> +struct __rte_cache_aligned rte_rand_state {
> uint64_t z1;
> uint64_t z2;
> uint64_t z3;
> uint64_t z4;
> uint64_t z5;
> RTE_CACHE_GUARD;
> -} __rte_cache_aligned;
> +};
>
> /* One instance each for every lcore id-equipped thread, and one
> * additional instance to be shared by all others threads (i.e., all
> diff --git a/lib/eal/common/rte_service.c b/lib/eal/common/rte_service.c
> index d959c91..5637993 100644
> --- a/lib/eal/common/rte_service.c
> +++ b/lib/eal/common/rte_service.c
> @@ -32,7 +32,7 @@
> #define RUNSTATE_RUNNING 1
>
> /* internal representation of a service */
> -struct rte_service_spec_impl {
> +struct __rte_cache_aligned rte_service_spec_impl {
> /* public part of the struct */
> struct rte_service_spec spec;
>
> @@ -53,7 +53,7 @@ struct rte_service_spec_impl {
> * on currently.
> */
> RTE_ATOMIC(uint32_t) num_mapped_cores;
> -} __rte_cache_aligned;
> +};
>
> struct service_stats {
> RTE_ATOMIC(uint64_t) calls;
> @@ -61,7 +61,7 @@ struct service_stats {
> };
>
> /* the internal values of a service core */
> -struct core_state {
> +struct __rte_cache_aligned core_state {
> /* map of services IDs are run on this core */
> uint64_t service_mask;
> RTE_ATOMIC(uint8_t) runstate; /* running or stopped */
> @@ -71,7 +71,7 @@ struct core_state {
> RTE_ATOMIC(uint64_t) loops;
> RTE_ATOMIC(uint64_t) cycles;
> struct service_stats service_stats[RTE_SERVICE_NUM_MAX];
> -} __rte_cache_aligned;
> +};
>
> static uint32_t rte_service_count;
> static struct rte_service_spec_impl *rte_services;
> diff --git a/lib/eal/include/generic/rte_atomic.h b/lib/eal/include/generic/rte_atomic.h
> index 0e639da..f859707 100644
> --- a/lib/eal/include/generic/rte_atomic.h
> +++ b/lib/eal/include/generic/rte_atomic.h
> @@ -1094,7 +1094,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
> /**
> * 128-bit integer structure.
> */
> -typedef struct {
> +typedef struct __rte_aligned(16) {
> union {
> uint64_t val[2];
> #ifdef RTE_ARCH_64
> @@ -1103,7 +1103,7 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
> #endif
> #endif
> };
> -} __rte_aligned(16) rte_int128_t;
> +} rte_int128_t;
>
> #ifdef __DOXYGEN__
>
> diff --git a/lib/eal/include/rte_common.h b/lib/eal/include/rte_common.h
> index 1cc1222..0908aa0 100644
> --- a/lib/eal/include/rte_common.h
> +++ b/lib/eal/include/rte_common.h
> @@ -12,6 +12,8 @@
> * for DPDK.
> */
>
> +#include <stdalign.h>
> +
> #ifdef __cplusplus
> extern "C" {
> #endif
> @@ -63,10 +65,19 @@
> #endif
>
> /**
> - * Force alignment
> + * Force type alignment
> + *
> + * This macro should be used when alignment of a struct or union type
> + * is required. For toolchain compatibility it should appear between
> + * the {struct,union} keyword and tag. e.g.
> + *
> + * struct __rte_aligned(8) tag { ... };
> + *
> + * If alignment of an object/variable is required then this macro should
> + * not be used, instead prefer C11 alignas(a).
> */
> #ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_aligned(a)
> +#define __rte_aligned(a) __declspec(align(a))
> #else
> #define __rte_aligned(a) __attribute__((__aligned__(a)))
> #endif
> @@ -538,18 +549,14 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
> #define RTE_CACHE_LINE_MIN_SIZE 64
>
> /** Force alignment to cache line. */
> -#ifdef RTE_TOOLCHAIN_MSVC
> -#define __rte_cache_aligned
> -#else
> #define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE)
> -#endif
>
> /** Force minimum cache line alignment. */
> #define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE)
>
> #define _RTE_CACHE_GUARD_HELPER2(unique) \
> - char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES] \
> - __rte_cache_aligned
> + alignas(RTE_CACHE_LINE_SIZE) \
> + char cache_guard_ ## unique[RTE_CACHE_LINE_SIZE * RTE_CACHE_GUARD_LINES]
> #define _RTE_CACHE_GUARD_HELPER1(unique) _RTE_CACHE_GUARD_HELPER2(unique)
> /**
> * Empty cache lines, to guard against false sharing-like effects
> diff --git a/lib/eal/loongarch/include/rte_vect.h b/lib/eal/loongarch/include/rte_vect.h
> index 1546515..aa334e8 100644
> --- a/lib/eal/loongarch/include/rte_vect.h
> +++ b/lib/eal/loongarch/include/rte_vect.h
> @@ -15,7 +15,7 @@
>
> #define RTE_VECT_DEFAULT_SIMD_BITWIDTH RTE_VECT_SIMD_DISABLED
>
> -typedef union xmm {
> +typedef union __rte_aligned(16) xmm {
> int8_t i8[16];
> int16_t i16[8];
> int32_t i32[4];
> @@ -25,19 +25,19 @@
> uint32_t u32[4];
> uint64_t u64[2];
> double pd[2];
> -} __rte_aligned(16) xmm_t;
> +} xmm_t;
>
> #define XMM_SIZE (sizeof(xmm_t))
> #define XMM_MASK (XMM_SIZE - 1)
>
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
> xmm_t x;
> uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
> uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> double pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>
> static inline xmm_t
> vect_load_128(void *p)
> diff --git a/lib/eal/ppc/include/rte_vect.h b/lib/eal/ppc/include/rte_vect.h
> index a5f009b..c8bace2 100644
> --- a/lib/eal/ppc/include/rte_vect.h
> +++ b/lib/eal/ppc/include/rte_vect.h
> @@ -22,14 +22,14 @@
> #define XMM_SIZE (sizeof(xmm_t))
> #define XMM_MASK (XMM_SIZE - 1)
>
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
> xmm_t x;
> uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
> uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> double pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>
> #ifdef __cplusplus
> }
> diff --git a/lib/eal/riscv/include/rte_vect.h b/lib/eal/riscv/include/rte_vect.h
> index da9092a..6df10fa 100644
> --- a/lib/eal/riscv/include/rte_vect.h
> +++ b/lib/eal/riscv/include/rte_vect.h
> @@ -22,14 +22,14 @@
> #define XMM_SIZE (sizeof(xmm_t))
> #define XMM_MASK (XMM_SIZE - 1)
>
> -typedef union rte_xmm {
> +typedef union __rte_aligned(16) rte_xmm {
> xmm_t x;
> uint8_t u8[XMM_SIZE / sizeof(uint8_t)];
> uint16_t u16[XMM_SIZE / sizeof(uint16_t)];
> uint32_t u32[XMM_SIZE / sizeof(uint32_t)];
> uint64_t u64[XMM_SIZE / sizeof(uint64_t)];
> double pd[XMM_SIZE / sizeof(double)];
> -} __rte_aligned(16) rte_xmm_t;
> +} rte_xmm_t;
>
> static inline xmm_t
> vect_load_128(void *p)
> diff --git a/lib/eal/x86/include/rte_vect.h b/lib/eal/x86/include/rte_vect.h
> index 560f9e4..a1a537e 100644
> --- a/lib/eal/x86/include/rte_vect.h
> +++ b/lib/eal/x86/include/rte_vect.h
> @@ -91,7 +91,7 @@
> #define RTE_X86_ZMM_SIZE (sizeof(__m512i))
> #define RTE_X86_ZMM_MASK (RTE_X86_ZMM_SIZE - 1)
>
> -typedef union __rte_x86_zmm {
> +typedef union __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm {
> __m512i z;
> ymm_t y[RTE_X86_ZMM_SIZE / sizeof(ymm_t)];
> xmm_t x[RTE_X86_ZMM_SIZE / sizeof(xmm_t)];
> @@ -100,7 +100,7 @@
> uint32_t u32[RTE_X86_ZMM_SIZE / sizeof(uint32_t)];
> uint64_t u64[RTE_X86_ZMM_SIZE / sizeof(uint64_t)];
> double pd[RTE_X86_ZMM_SIZE / sizeof(double)];
> -} __rte_aligned(RTE_X86_ZMM_SIZE) __rte_x86_zmm_t;
> +} __rte_x86_zmm_t;
>
> #endif /* __AVX512F__ */
>
> diff --git a/lib/eal/x86/rte_power_intrinsics.c b/lib/eal/x86/rte_power_intrinsics.c
> index 532a2e6..6d9b642 100644
> --- a/lib/eal/x86/rte_power_intrinsics.c
> +++ b/lib/eal/x86/rte_power_intrinsics.c
> @@ -2,6 +2,8 @@
> * Copyright(c) 2020 Intel Corporation
> */
>
> +#include <stdalign.h>
> +
> #include <rte_common.h>
> #include <rte_lcore.h>
> #include <rte_rtm.h>
> @@ -12,10 +14,10 @@
> /*
> * Per-lcore structure holding current status of C0.2 sleeps.
> */
> -static struct power_wait_status {
> +static alignas(RTE_CACHE_LINE_SIZE) struct power_wait_status {
> rte_spinlock_t lock;
> volatile void *monitor_addr; /**< NULL if not currently sleeping */
> -} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
> +} wait_status[RTE_MAX_LCORE];
>
> /*
> * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
> @@ -85,10 +87,10 @@ static void amd_mwaitx(const uint64_t timeout)
> #endif
> }
>
> -static struct {
> +static alignas(RTE_CACHE_LINE_SIZE) struct {
> void (*mmonitor)(volatile void *addr);
> void (*mwait)(const uint64_t timeout);
> -} __rte_cache_aligned power_monitor_ops;
> +} power_monitor_ops;
>
> static inline void
> __umwait_wakeup(volatile void *addr)
> --
Acked-by: Konstantin Ananyev <konstantin.ananyev at huawei.com>
> 1.8.3.1
More information about the dev
mailing list