[dpdk-dev] [RFC 1/6] eal: add power management intrinsics
Ananyev, Konstantin
konstantin.ananyev at intel.com
Thu May 28 13:39:55 CEST 2020
Hi Anatoly,
>
> Add two new power management intrinsics, and provide an implementation
> in eal/x86 based on UMONITOR/UMWAIT instructions. The instructions
> are implemented as raw byte opcodes because there is not yet widespread
> compiler support for these instructions.
>
> The power management instructions provide an architecture-specific
> function to either wait until a specified TSC timestamp is reached, or
> optionally wait until either a TSC timestamp is reached or a memory
> location is written to. The monitor function also provides an optional
> comparison, to avoid sleeping when the expected write has already
> happened, and no more writes are expected.
Recently ARM guys introduced new generic API
for similar (as I understand) purposes: rte_wait_until_equal_(16|32|64).
Probably would make sense to unite both APIs into something common
and HW transparent.
Konstantin
>
> Signed-off-by: Liang J. Ma <liang.j.ma at intel.com>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> ---
> .../include/generic/rte_power_intrinsics.h | 64 +++++++++
> lib/librte_eal/include/meson.build | 1 +
> lib/librte_eal/x86/include/meson.build | 1 +
> lib/librte_eal/x86/include/rte_cpuflags.h | 1 +
> .../x86/include/rte_power_intrinsics.h | 134 ++++++++++++++++++
> lib/librte_eal/x86/rte_cpuflags.c | 2 +
> 6 files changed, 203 insertions(+)
> create mode 100644 lib/librte_eal/include/generic/rte_power_intrinsics.h
> create mode 100644 lib/librte_eal/x86/include/rte_power_intrinsics.h
>
> diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..8646c4ac16
> --- /dev/null
> +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> @@ -0,0 +1,64 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_H_
> +#define _RTE_POWER_INTRINSIC_H_
> +
> +#include <inttypes.h>
> +
> +/**
> + * @file
> + * Advanced power management operations.
> + *
> + * This file define APIs for advanced power management,
> + * which are architecture-dependent.
> + */
> +
> +/**
> + * Monitor specific address for changes. This will cause the CPU to enter an
> + * architecture-defined optimized power state until either the specified
> + * memory address is written to, or a certain TSC timestamp is reached.
> + *
> + * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
> + * mask is non-zero, the current value pointed to by the `p` pointer will be
> + * checked against the expected value, and if they match, the entering of
> + * optimized power state may be aborted.
> + *
> + * @param p
> + * Address to monitor for changes. Must be aligned on an 8-byte boundary.
> + * @param expected_value
> + * Before attempting the monitoring, the `p` address may be read and compared
> + * against this value. If `value_mask` is zero, this step will be skipped.
> + * @param value_mask
> + * The 64-bit mask to use to extract current value from `p`.
> + * @param state
> + * Architecture-dependent optimized power state number
> + * @param tsc_timestamp
> + * Maximum TSC timestamp to wait for. Note that the wait behavior is
> + * architecture-dependent.
> + *
> + * @return
> + * Architecture-dependent return value.
> + */
> +static inline int rte_power_monitor(const volatile void *p,
> + const uint64_t expected_value, const uint64_t value_mask,
> + const uint32_t state, const uint64_t tsc_timestamp);
> +
> +/**
> + * Enter an architecture-defined optimized power state until a certain TSC
> + * timestamp is reached.
> + *
> + * @param state
> + * Architecture-dependent optimized power state number
> + * @param tsc_timestamp
> + * Maximum TSC timestamp to wait for. Note that the wait behavior is
> + * architecture-dependent.
> + *
> + * @return
> + * Architecture-dependent return value.
> + */
> +static inline int rte_power_pause(const uint32_t state,
> + const uint64_t tsc_timestamp);
> +
> +#endif /* _RTE_POWER_INTRINSIC_H_ */
> diff --git a/lib/librte_eal/include/meson.build b/lib/librte_eal/include/meson.build
> index bc73ec2c5c..b54a2be4f6 100644
> --- a/lib/librte_eal/include/meson.build
> +++ b/lib/librte_eal/include/meson.build
> @@ -59,6 +59,7 @@ generic_headers = files(
> 'generic/rte_memcpy.h',
> 'generic/rte_pause.h',
> 'generic/rte_prefetch.h',
> + 'generic/rte_power_intrinsics.h',
> 'generic/rte_rwlock.h',
> 'generic/rte_spinlock.h',
> 'generic/rte_ticketlock.h',
> diff --git a/lib/librte_eal/x86/include/meson.build b/lib/librte_eal/x86/include/meson.build
> index f0e998c2fe..494a8142a2 100644
> --- a/lib/librte_eal/x86/include/meson.build
> +++ b/lib/librte_eal/x86/include/meson.build
> @@ -13,6 +13,7 @@ arch_headers = files(
> 'rte_io.h',
> 'rte_memcpy.h',
> 'rte_prefetch.h',
> + 'rte_power_intrinsics.h',
> 'rte_pause.h',
> 'rte_rtm.h',
> 'rte_rwlock.h',
> diff --git a/lib/librte_eal/x86/include/rte_cpuflags.h b/lib/librte_eal/x86/include/rte_cpuflags.h
> index c1d20364d1..94d6a43763 100644
> --- a/lib/librte_eal/x86/include/rte_cpuflags.h
> +++ b/lib/librte_eal/x86/include/rte_cpuflags.h
> @@ -110,6 +110,7 @@ enum rte_cpu_flag_t {
> RTE_CPUFLAG_RDTSCP, /**< RDTSCP */
> RTE_CPUFLAG_EM64T, /**< EM64T */
>
> + RTE_CPUFLAG_WAITPKG, /**< UMINITOR/UMWAIT/TPAUSE */
> /* (EAX 80000007h) EDX features */
> RTE_CPUFLAG_INVTSC, /**< INVTSC */
>
> diff --git a/lib/librte_eal/x86/include/rte_power_intrinsics.h b/lib/librte_eal/x86/include/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..a0522400fb
> --- /dev/null
> +++ b/lib/librte_eal/x86/include/rte_power_intrinsics.h
> @@ -0,0 +1,134 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_X86_64_H_
> +#define _RTE_POWER_INTRINSIC_X86_64_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_atomic.h>
> +#include <rte_common.h>
> +
> +#include "generic/rte_power_intrinsics.h"
> +
> +/**
> + * Monitor specific address for changes. This will cause the CPU to enter an
> + * architecture-defined optimized power state until either the specified
> + * memory address is written to, or a certain TSC timestamp is reached.
> + *
> + * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
> + * mask is non-zero, the current value pointed to by the `p` pointer will be
> + * checked against the expected value, and if they match, the entering of
> + * optimized power state may be aborted.
> + *
> + * This function uses UMONITOR/UMWAIT instructions. For more information about
> + * their usage, please refer to Intel(R) 64 and IA-32 Architectures Software
> + * Developer's Manual.
> + *
> + * @param p
> + * Address to monitor for changes. Must be aligned on an 8-byte boundary.
> + * @param expected_value
> + * Before attempting the monitoring, the `p` address may be read and compared
> + * against this value. If `value_mask` is zero, this step will be skipped.
> + * @param value_mask
> + * The 64-bit mask to use to extract current value from `p`.
> + * @param state
> + * Architecture-dependent optimized power state number. Can be 0 (C0.2) or
> + * 1 (C0.1).
> + * @param tsc_timestamp
> + * Maximum TSC timestamp to wait for.
> + *
> + * @return
> + * - 1 if wakeup was due to TSC timeout expiration.
> + * - 0 if wakeup was due to memory write or other reasons.
> + */
> +static inline int rte_power_monitor(const volatile void *p,
> + const uint64_t expected_value, const uint64_t value_mask,
> + const uint32_t state, const uint64_t tsc_timestamp)
> +{
> + const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> + const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> + uint64_t rflags;
> +
> + /*
> + * we're using raw byte codes for now as only the newest compiler
> + * versions support this instruction natively.
> + */
> +
> + /* set address for UMONITOR */
> + asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
> + :
> + : "D"(p));
> + rte_mb();
> + if (value_mask) {
> + const uint64_t cur_value = *(const volatile uint64_t *)p;
> + const uint64_t masked = cur_value & value_mask;
> + /* if the masked value is already matching, abort */
> + if (masked == expected_value)
> + return 0;
> + }
> + /* execute UMWAIT */
> + asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;\n"
> + /*
> + * UMWAIT sets CF flag in RFLAGS, so PUSHF to push them
> + * onto the stack, then pop them back into `rflags` so that
> + * we can read it.
> + */
> + "pushf;\n"
> + "pop %0;\n"
> + : "=r"(rflags)
> + : "D"(state), "a"(tsc_l), "d"(tsc_h));
> +
> + /* we're interested in the first bit (the carry flag) */
> + return rflags & 0x1;
> +}
> +
> +/**
> + * Enter an architecture-defined optimized power state until a certain TSC
> + * timestamp is reached.
> + *
> + * This function uses TPAUSE instruction. For more information about its usage,
> + * please refer to Intel(R) 64 and IA-32 Architectures Software Developer's
> + * Manual.
> + *
> + * @param state
> + * Architecture-dependent optimized power state number. Can be 0 (C0.2) or
> + * 1 (C0.1).
> + * @param tsc_timestamp
> + * Maximum TSC timestamp to wait for.
> + *
> + * @return
> + * - 1 if wakeup was due to TSC timeout expiration.
> + * - 0 if wakeup was due to other reasons.
> + */
> +static inline int rte_power_pause(const uint32_t state,
> + const uint64_t tsc_timestamp)
> +{
> + const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> + const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> + uint64_t rflags;
> +
> + /* execute TPAUSE */
> + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;\n"
> + /*
> + * TPAUSE sets CF flag in RFLAGS, so PUSHF to push them
> + * onto the stack, then pop them back into `rflags` so that
> + * we can read it.
> + */
> + "pushf;\n"
> + "pop %0;\n"
> + : "=r"(rflags)
> + : "D"(state), "a"(tsc_l), "d"(tsc_h));
> +
> + /* we're interested in the first bit (the carry flag) */
> + return rflags & 0x1;
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_POWER_INTRINSIC_X86_64_H_ */
> diff --git a/lib/librte_eal/x86/rte_cpuflags.c b/lib/librte_eal/x86/rte_cpuflags.c
> index 30439e7951..0325c4b93b 100644
> --- a/lib/librte_eal/x86/rte_cpuflags.c
> +++ b/lib/librte_eal/x86/rte_cpuflags.c
> @@ -110,6 +110,8 @@ const struct feature_entry rte_cpu_feature_table[] = {
> FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16)
> FEAT_DEF(RDSEED, 0x00000007, 0, RTE_REG_EBX, 18)
>
> + FEAT_DEF(WAITPKG, 0x00000007, 0, RTE_REG_ECX, 5)
> +
> FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0)
> FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4)
>
> --
> 2.17.1
More information about the dev
mailing list