[dpdk-dev] [PATCH v7 02/10] eal: add power management intrinsics

Jerin Jacob jerinjacobk at gmail.com
Thu Oct 15 14:06:50 CEST 2020


On Thu, Oct 15, 2020 at 5:34 PM Anatoly Burakov
<anatoly.burakov at intel.com> wrote:
>
> From: Liang Ma <liang.j.ma at intel.com>
>
> Add two new power management intrinsics, and provide an implementation
> in eal/x86 based on UMONITOR/UMWAIT instructions. The instructions
> are implemented as raw byte opcodes because there is not yet widespread
> compiler support for these instructions.
>
> The power management instructions provide an architecture-specific
> function to either wait until a specified TSC timestamp is reached, or
> optionally wait until either a TSC timestamp is reached or a memory
> location is written to. The monitor function also provides an optional
> comparison, to avoid sleeping when the expected write has already
> happened, and no more writes are expected.
>
> For more details, please refer to Intel(R) 64 and IA-32 Architectures
> Software Developer's Manual, Volume 2.
>
> Signed-off-by: Liang Ma <liang.j.ma at intel.com>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> Acked-by: David Christensen <drc at linux.vnet.ibm.com>

Acked-by: Jerin Jacob <jerinj at marvell.com>



> ---
>
> Notes:
>     v7:
>     - Fix code style and other nitpicks (Konstantin)
>     v6:
>     - Add spinlock-enabled version to allow pthread-wait-like
>       constructs with umwait
>     - Clarify comments
>     - Added experimental tags to intrinsics
>     - Added endianness support
>     v5:
>     - Removed return values
>     - Simplified intrinsics and hardcoded C0.2 state
>     - Added other arch stubs
>
>  lib/librte_eal/arm/include/meson.build        |   1 +
>  .../arm/include/rte_power_intrinsics.h        |  60 ++++++++
>  .../include/generic/rte_power_intrinsics.h    | 111 ++++++++++++++
>  lib/librte_eal/include/meson.build            |   1 +
>  lib/librte_eal/ppc/include/meson.build        |   1 +
>  .../ppc/include/rte_power_intrinsics.h        |  60 ++++++++
>  lib/librte_eal/x86/include/meson.build        |   1 +
>  .../x86/include/rte_power_intrinsics.h        | 135 ++++++++++++++++++
>  8 files changed, 370 insertions(+)
>  create mode 100644 lib/librte_eal/arm/include/rte_power_intrinsics.h
>  create mode 100644 lib/librte_eal/include/generic/rte_power_intrinsics.h
>  create mode 100644 lib/librte_eal/ppc/include/rte_power_intrinsics.h
>  create mode 100644 lib/librte_eal/x86/include/rte_power_intrinsics.h
>
> diff --git a/lib/librte_eal/arm/include/meson.build b/lib/librte_eal/arm/include/meson.build
> index 73b750a18f..c6a9f70d73 100644
> --- a/lib/librte_eal/arm/include/meson.build
> +++ b/lib/librte_eal/arm/include/meson.build
> @@ -20,6 +20,7 @@ arch_headers = files(
>         'rte_pause_32.h',
>         'rte_pause_64.h',
>         'rte_pause.h',
> +       'rte_power_intrinsics.h',
>         'rte_prefetch_32.h',
>         'rte_prefetch_64.h',
>         'rte_prefetch.h',
> diff --git a/lib/librte_eal/arm/include/rte_power_intrinsics.h b/lib/librte_eal/arm/include/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..a4a1bc1159
> --- /dev/null
> +++ b/lib/librte_eal/arm/include/rte_power_intrinsics.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_ARM_H_
> +#define _RTE_POWER_INTRINSIC_ARM_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_common.h>
> +
> +#include "generic/rte_power_intrinsics.h"
> +
> +/**
> + * This function is not supported on ARM.
> + */
> +static inline void
> +rte_power_monitor(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz)
> +{
> +       RTE_SET_USED(p);
> +       RTE_SET_USED(expected_value);
> +       RTE_SET_USED(value_mask);
> +       RTE_SET_USED(tsc_timestamp);
> +       RTE_SET_USED(data_sz);
> +}
> +
> +/**
> + * This function is not supported on ARM.
> + */
> +static inline void
> +rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz, rte_spinlock_t *lck)
> +{
> +       RTE_SET_USED(p);
> +       RTE_SET_USED(expected_value);
> +       RTE_SET_USED(value_mask);
> +       RTE_SET_USED(tsc_timestamp);
> +       RTE_SET_USED(lck);
> +       RTE_SET_USED(data_sz);
> +}
> +
> +/**
> + * This function is not supported on ARM.
> + */
> +static inline void
> +rte_power_pause(const uint64_t tsc_timestamp)
> +{
> +       RTE_SET_USED(tsc_timestamp);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_POWER_INTRINSIC_ARM_H_ */
> diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..fb897d9060
> --- /dev/null
> +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> @@ -0,0 +1,111 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_H_
> +#define _RTE_POWER_INTRINSIC_H_
> +
> +#include <inttypes.h>
> +
> +#include <rte_compat.h>
> +#include <rte_spinlock.h>
> +
> +/**
> + * @file
> + * Advanced power management operations.
> + *
> + * This file define APIs for advanced power management,
> + * which are architecture-dependent.
> + */
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Monitor specific address for changes. This will cause the CPU to enter an
> + * architecture-defined optimized power state until either the specified
> + * memory address is written to, a certain TSC timestamp is reached, or other
> + * reasons cause the CPU to wake up.
> + *
> + * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
> + * mask is non-zero, the current value pointed to by the `p` pointer will be
> + * checked against the expected value, and if they match, the entering of
> + * optimized power state may be aborted.
> + *
> + * @param p
> + *   Address to monitor for changes.
> + * @param expected_value
> + *   Before attempting the monitoring, the `p` address may be read and compared
> + *   against this value. If `value_mask` is zero, this step will be skipped.
> + * @param value_mask
> + *   The 64-bit mask to use to extract current value from `p`.
> + * @param tsc_timestamp
> + *   Maximum TSC timestamp to wait for. Note that the wait behavior is
> + *   architecture-dependent.
> + * @param data_sz
> + *   Data size (in bytes) that will be used to compare expected value with the
> + *   memory address. Can be 1, 2, 4 or 8. Supplying any other value will lead
> + *   to undefined result.
> + */
> +__rte_experimental
> +static inline void rte_power_monitor(const volatile void *p,
> +               const uint64_t expected_value, const uint64_t value_mask,
> +               const uint64_t tsc_timestamp, const uint8_t data_sz);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Monitor specific address for changes. This will cause the CPU to enter an
> + * architecture-defined optimized power state until either the specified
> + * memory address is written to, a certain TSC timestamp is reached, or other
> + * reasons cause the CPU to wake up.
> + *
> + * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If
> + * mask is non-zero, the current value pointed to by the `p` pointer will be
> + * checked against the expected value, and if they match, the entering of
> + * optimized power state may be aborted.
> + *
> + * This call will also lock a spinlock on entering sleep, and release it on
> + * waking up the CPU.
> + *
> + * @param p
> + *   Address to monitor for changes.
> + * @param expected_value
> + *   Before attempting the monitoring, the `p` address may be read and compared
> + *   against this value. If `value_mask` is zero, this step will be skipped.
> + * @param value_mask
> + *   The 64-bit mask to use to extract current value from `p`.
> + * @param tsc_timestamp
> + *   Maximum TSC timestamp to wait for. Note that the wait behavior is
> + *   architecture-dependent.
> + * @param data_sz
> + *   Data size (in bytes) that will be used to compare expected value with the
> + *   memory address. Can be 1, 2, 4 or 8. Supplying any other value will lead
> + *   to undefined result.
> + * @param lck
> + *   A spinlock that must be locked before entering the function, will be
> + *   unlocked while the CPU is sleeping, and will be locked again once the CPU
> + *   wakes up.
> + */
> +__rte_experimental
> +static inline void rte_power_monitor_sync(const volatile void *p,
> +               const uint64_t expected_value, const uint64_t value_mask,
> +               const uint64_t tsc_timestamp, const uint8_t data_sz,
> +               rte_spinlock_t *lck);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Enter an architecture-defined optimized power state until a certain TSC
> + * timestamp is reached.
> + *
> + * @param tsc_timestamp
> + *   Maximum TSC timestamp to wait for. Note that the wait behavior is
> + *   architecture-dependent.
> + */
> +__rte_experimental
> +static inline void rte_power_pause(const uint64_t tsc_timestamp);
> +
> +#endif /* _RTE_POWER_INTRINSIC_H_ */
> diff --git a/lib/librte_eal/include/meson.build b/lib/librte_eal/include/meson.build
> index cd09027958..3a12e87e19 100644
> --- a/lib/librte_eal/include/meson.build
> +++ b/lib/librte_eal/include/meson.build
> @@ -60,6 +60,7 @@ generic_headers = files(
>         'generic/rte_memcpy.h',
>         'generic/rte_pause.h',
>         'generic/rte_prefetch.h',
> +       'generic/rte_power_intrinsics.h',
>         'generic/rte_rwlock.h',
>         'generic/rte_spinlock.h',
>         'generic/rte_ticketlock.h',
> diff --git a/lib/librte_eal/ppc/include/meson.build b/lib/librte_eal/ppc/include/meson.build
> index ab4bd28092..0873b2aecb 100644
> --- a/lib/librte_eal/ppc/include/meson.build
> +++ b/lib/librte_eal/ppc/include/meson.build
> @@ -10,6 +10,7 @@ arch_headers = files(
>         'rte_io.h',
>         'rte_memcpy.h',
>         'rte_pause.h',
> +       'rte_power_intrinsics.h',
>         'rte_prefetch.h',
>         'rte_rwlock.h',
>         'rte_spinlock.h',
> diff --git a/lib/librte_eal/ppc/include/rte_power_intrinsics.h b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..4ed03d521f
> --- /dev/null
> +++ b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
> @@ -0,0 +1,60 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_PPC_H_
> +#define _RTE_POWER_INTRINSIC_PPC_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_common.h>
> +
> +#include "generic/rte_power_intrinsics.h"
> +
> +/**
> + * This function is not supported on PPC64.
> + */
> +static inline void
> +rte_power_monitor(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz)
> +{
> +       RTE_SET_USED(p);
> +       RTE_SET_USED(expected_value);
> +       RTE_SET_USED(value_mask);
> +       RTE_SET_USED(tsc_timestamp);
> +       RTE_SET_USED(data_sz);
> +}
> +
> +/**
> + * This function is not supported on PPC64.
> + */
> +static inline void
> +rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz, rte_spinlock_t *lck)
> +{
> +       RTE_SET_USED(p);
> +       RTE_SET_USED(expected_value);
> +       RTE_SET_USED(value_mask);
> +       RTE_SET_USED(tsc_timestamp);
> +       RTE_SET_USED(lck);
> +       RTE_SET_USED(data_sz);
> +}
> +
> +/**
> + * This function is not supported on PPC64.
> + */
> +static inline void
> +rte_power_pause(const uint64_t tsc_timestamp)
> +{
> +       RTE_SET_USED(tsc_timestamp);
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_POWER_INTRINSIC_PPC_H_ */
> diff --git a/lib/librte_eal/x86/include/meson.build b/lib/librte_eal/x86/include/meson.build
> index f0e998c2fe..494a8142a2 100644
> --- a/lib/librte_eal/x86/include/meson.build
> +++ b/lib/librte_eal/x86/include/meson.build
> @@ -13,6 +13,7 @@ arch_headers = files(
>         'rte_io.h',
>         'rte_memcpy.h',
>         'rte_prefetch.h',
> +       'rte_power_intrinsics.h',
>         'rte_pause.h',
>         'rte_rtm.h',
>         'rte_rwlock.h',
> diff --git a/lib/librte_eal/x86/include/rte_power_intrinsics.h b/lib/librte_eal/x86/include/rte_power_intrinsics.h
> new file mode 100644
> index 0000000000..f9b761d796
> --- /dev/null
> +++ b/lib/librte_eal/x86/include/rte_power_intrinsics.h
> @@ -0,0 +1,135 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Intel Corporation
> + */
> +
> +#ifndef _RTE_POWER_INTRINSIC_X86_H_
> +#define _RTE_POWER_INTRINSIC_X86_H_
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <rte_common.h>
> +
> +#include "generic/rte_power_intrinsics.h"
> +
> +static inline uint64_t
> +__get_umwait_val(const volatile void *p, const uint8_t sz)
> +{
> +       switch (sz) {
> +       case sizeof(uint8_t):
> +               return *(const volatile uint8_t *)p;
> +       case sizeof(uint16_t):
> +               return *(const volatile uint16_t *)p;
> +       case sizeof(uint32_t):
> +               return *(const volatile uint32_t *)p;
> +       case sizeof(uint64_t):
> +               return *(const volatile uint64_t *)p;
> +       default:
> +               /* this is an intrinsic, so we can't have any error handling */
> +               RTE_ASSERT(0);
> +               return 0;
> +       }
> +}
> +
> +/**
> + * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
> + * For more information about usage of these instructions, please refer to
> + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
> + */
> +static inline void
> +rte_power_monitor(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz)
> +{
> +       const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> +       const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> +       /*
> +        * we're using raw byte codes for now as only the newest compiler
> +        * versions support this instruction natively.
> +        */
> +
> +       /* set address for UMONITOR */
> +       asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
> +                       :
> +                       : "D"(p));
> +
> +       if (value_mask) {
> +               const uint64_t cur_value = __get_umwait_val(p, data_sz);
> +               const uint64_t masked = cur_value & value_mask;
> +
> +               /* if the masked value is already matching, abort */
> +               if (masked == expected_value)
> +                       return;
> +       }
> +       /* execute UMWAIT */
> +       asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
> +                       : /* ignore rflags */
> +                       : "D"(0), /* enter C0.2 */
> +                         "a"(tsc_l), "d"(tsc_h));
> +}
> +
> +/**
> + * This function uses UMONITOR/UMWAIT instructions and will enter C0.2 state.
> + * For more information about usage of these instructions, please refer to
> + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
> + */
> +static inline void
> +rte_power_monitor_sync(const volatile void *p, const uint64_t expected_value,
> +               const uint64_t value_mask, const uint64_t tsc_timestamp,
> +               const uint8_t data_sz, rte_spinlock_t *lck)
> +{
> +       const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> +       const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> +       /*
> +        * we're using raw byte codes for now as only the newest compiler
> +        * versions support this instruction natively.
> +        */
> +
> +       /* set address for UMONITOR */
> +       asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
> +                       :
> +                       : "D"(p));
> +
> +       if (value_mask) {
> +               const uint64_t cur_value = __get_umwait_val(p, data_sz);
> +               const uint64_t masked = cur_value & value_mask;
> +
> +               /* if the masked value is already matching, abort */
> +               if (masked == expected_value)
> +                       return;
> +       }
> +       rte_spinlock_unlock(lck);
> +
> +       /* execute UMWAIT */
> +       asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
> +                       : /* ignore rflags */
> +                       : "D"(0), /* enter C0.2 */
> +                         "a"(tsc_l), "d"(tsc_h));
> +
> +       rte_spinlock_lock(lck);
> +}
> +
> +/**
> + * This function uses TPAUSE instruction  and will enter C0.2 state. For more
> + * information about usage of this instruction, please refer to Intel(R) 64 and
> + * IA-32 Architectures Software Developer's Manual.
> + */
> +static inline void
> +rte_power_pause(const uint64_t tsc_timestamp)
> +{
> +       const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> +       const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> +
> +       /* execute TPAUSE */
> +       asm volatile(".byte 0x66, 0x0f, 0xae, 0xf7;"
> +               : /* ignore rflags */
> +               : "D"(0), /* enter C0.2 */
> +                 "a"(tsc_l), "d"(tsc_h));
> +}
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +#endif /* _RTE_POWER_INTRINSIC_X86_H_ */
> --
> 2.17.1


More information about the dev mailing list