[PATCH v5 3/3] power: amd power monitor support
Konstantin Ananyev
konstantin.ananyev at huawei.com
Thu Aug 17 16:18:10 CEST 2023
> > Caution: This message originated from an External Source. Use proper caution
> > when opening attachments, clicking links, or responding.
> >
> >
> > On Wed, Aug 16, 2023 at 11:59:59AM -0700, Sivaprasad Tummala wrote:
> > > mwaitx allows EPYC processors to enter a implementation dependent
> > > power/performance optimized state (C1 state) for a specific period or
> > > until a store to the monitored address range.
> > >
> > > Signed-off-by: Sivaprasad Tummala <sivaprasad.tummala at amd.com>
> > > Acked-by: Anatoly Burakov <anatoly.burakov at intel.com>
> > > ---
> > > lib/eal/x86/rte_power_intrinsics.c | 77
> > > +++++++++++++++++++++++++-----
> > > 1 file changed, 66 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/lib/eal/x86/rte_power_intrinsics.c
> > > b/lib/eal/x86/rte_power_intrinsics.c
> > > index 6eb9e50807..b4754e17da 100644
> > > --- a/lib/eal/x86/rte_power_intrinsics.c
> > > +++ b/lib/eal/x86/rte_power_intrinsics.c
> > > @@ -17,6 +17,60 @@ static struct power_wait_status {
> > > volatile void *monitor_addr; /**< NULL if not currently sleeping
> > > */ } __rte_cache_aligned wait_status[RTE_MAX_LCORE];
> > >
> > > +/**
> > > + * These functions uses UMONITOR/UMWAIT instructions and will enter C0.2
> > state.
> > > + * For more information about usage of these instructions, please
> > > +refer to
> > > + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual.
> > > + */
> > > +static void intel_umonitor(volatile void *addr) {
> > > + /* UMONITOR */
> > > + asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
> > > + :
> > > + : "D"(addr));
> > > +}
> > > +
> > > +static void intel_umwait(const uint64_t timeout) {
> > > + const uint32_t tsc_l = (uint32_t)timeout;
> > > + const uint32_t tsc_h = (uint32_t)(timeout >> 32);
> > > + /* UMWAIT */
> > > + asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
> > > + : /* ignore rflags */
> > > + : "D"(0), /* enter C0.2 */
> > > + "a"(tsc_l), "d"(tsc_h)); }
> >
> > question and perhaps Anatoly Burakov can chime in with expertise.
> >
> > gcc/clang have built-in intrinsics for umonitor and umwait i believe as per our other
> > thread of discussion is there a benefit to also providing inline assembly over just
> > using the intrinsics? I understand that the intrinsics may not exist for the monitorx
> > and mwaitx below so it is probably necessary for amd.
> >
> > so the suggestion here is when they are available just use the intrinsics.
> >
> > thanks
> >
> The gcc built-in functions __builtin_ia32_monitorx()/__builtin_ia32_mwaitx are available only when -mmwaitx
> is used specific for AMD platforms. On generic builds, these built-ins are not available and hence inline
> assembly is required here.
Ok... but we can probably put them into a separate .c file that will be compiled with that specific flag?
Same thing can be probably done for Intel specific instructions.
In general, I think it is much more preferable to use built-ins vs inline assembly
(if possible off-course).
Konstantin
>
> > > +
> > > +/**
> > > + * These functions uses MONITORX/MWAITX instructions and will enter C1 state.
> > > + * For more information about usage of these instructions, please
> > > +refer to
> > > + * AMD64 Architecture Programmer's Manual.
> > > + */
> > > +static void amd_monitorx(volatile void *addr) {
> > > + /* MONITORX */
> > > + asm volatile(".byte 0x0f, 0x01, 0xfa;"
> > > + :
> > > + : "a"(addr),
> > > + "c"(0), /* no extensions */
> > > + "d"(0)); /* no hints */ }
> > > +
> > > +static void amd_mwaitx(const uint64_t timeout) {
> > > + /* MWAITX */
> > > + asm volatile(".byte 0x0f, 0x01, 0xfb;"
> > > + : /* ignore rflags */
> > > + : "a"(0), /* enter C1 */
> > > + "c"(2), /* enable timer */
> > > + "b"(timeout));
> > > +}
> > > +
> > > +static struct {
> > > + void (*mmonitor)(volatile void *addr);
> > > + void (*mwait)(const uint64_t timeout); } __rte_cache_aligned
> > > +power_monitor_ops;
> > > +
> > > static inline void
> > > __umwait_wakeup(volatile void *addr)
> > > {
> > > @@ -75,8 +129,6 @@ int
> > > rte_power_monitor(const struct rte_power_monitor_cond *pmc,
> > > const uint64_t tsc_timestamp) {
> > > - const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> > > - const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> > > const unsigned int lcore_id = rte_lcore_id();
> > > struct power_wait_status *s;
> > > uint64_t cur_value;
> > > @@ -109,10 +161,8 @@ rte_power_monitor(const struct
> > rte_power_monitor_cond *pmc,
> > > * versions support this instruction natively.
> > > */
> > >
> > > - /* set address for UMONITOR */
> > > - asm volatile(".byte 0xf3, 0x0f, 0xae, 0xf7;"
> > > - :
> > > - : "D"(pmc->addr));
> > > + /* set address for mmonitor */
> > > + power_monitor_ops.mmonitor(pmc->addr);
> > >
> > > /* now that we've put this address into monitor, we can unlock */
> > > rte_spinlock_unlock(&s->lock);
> > > @@ -123,11 +173,8 @@ rte_power_monitor(const struct
> > rte_power_monitor_cond *pmc,
> > > if (pmc->fn(cur_value, pmc->opaque) != 0)
> > > goto end;
> > >
> > > - /* execute UMWAIT */
> > > - asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
> > > - : /* ignore rflags */
> > > - : "D"(0), /* enter C0.2 */
> > > - "a"(tsc_l), "d"(tsc_h));
> > > + /* execute mwait */
> > > + power_monitor_ops.mwait(tsc_timestamp);
> > >
> > > end:
> > > /* erase sleep address */
> > > @@ -173,6 +220,14 @@ RTE_INIT(rte_power_intrinsics_init) {
> > > wait_multi_supported = 1;
> > > if (i.power_monitor)
> > > monitor_supported = 1;
> > > +
> > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MONITORX)) { /* AMD */
> > > + power_monitor_ops.mmonitor = &amd_monitorx;
> > > + power_monitor_ops.mwait = &amd_mwaitx;
> > > + } else { /* Intel */
> > > + power_monitor_ops.mmonitor = &intel_umonitor;
> > > + power_monitor_ops.mwait = &intel_umwait;
> > > + }
> > > }
> > >
> > > int
> > > --
> > > 2.34.1
More information about the dev
mailing list