[dpdk-dev] [PATCH v13 05/11] eal: add monitor wakeup function
Ananyev, Konstantin
konstantin.ananyev at intel.com
Tue Jan 12 17:02:47 CET 2021
>
> Now that we have everything in a C file, we can store the information
> about our sleep, and have a native mechanism to wake up the sleeping
> core. This mechanism would however only wake up a core that's sleeping
> while monitoring - waking up from `rte_power_pause` won't work.
>
> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> ---
>
> Notes:
> v13:
> - Add comments around wakeup code to explain what it does
> - Add lcore_id parameter checking to prevent buffer overrun
>
> .../arm/include/rte_power_intrinsics.h | 9 ++
> .../include/generic/rte_power_intrinsics.h | 16 ++++
> .../ppc/include/rte_power_intrinsics.h | 9 ++
> lib/librte_eal/version.map | 1 +
> lib/librte_eal/x86/rte_power_intrinsics.c | 85 +++++++++++++++++++
> 5 files changed, 120 insertions(+)
>
> diff --git a/lib/librte_eal/arm/include/rte_power_intrinsics.h b/lib/librte_eal/arm/include/rte_power_intrinsics.h
> index 27869251a8..39e49cc45b 100644
> --- a/lib/librte_eal/arm/include/rte_power_intrinsics.h
> +++ b/lib/librte_eal/arm/include/rte_power_intrinsics.h
> @@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp)
> RTE_SET_USED(tsc_timestamp);
> }
>
> +/**
> + * This function is not supported on ARM.
> + */
> +void
> +rte_power_monitor_wakeup(const unsigned int lcore_id)
> +{
> + RTE_SET_USED(lcore_id);
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> index a6f1955996..e311d6f8ea 100644
> --- a/lib/librte_eal/include/generic/rte_power_intrinsics.h
> +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h
> @@ -57,6 +57,22 @@ __rte_experimental
> void rte_power_monitor(const struct rte_power_monitor_cond *pmc,
> const uint64_t tsc_timestamp);
>
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Wake up a specific lcore that is in a power optimized state and is monitoring
> + * an address.
> + *
> + * @note This function will *not* wake up a core that is in a power optimized
> + * state due to calling `rte_power_pause`.
> + *
> + * @param lcore_id
> + * Lcore ID of a sleeping thread.
> + */
> +__rte_experimental
> +void rte_power_monitor_wakeup(const unsigned int lcore_id);
> +
> /**
> * @warning
> * @b EXPERIMENTAL: this API may change without prior notice
> diff --git a/lib/librte_eal/ppc/include/rte_power_intrinsics.h b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
> index 248d1f4a23..2e7db0e7eb 100644
> --- a/lib/librte_eal/ppc/include/rte_power_intrinsics.h
> +++ b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
> @@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp)
> RTE_SET_USED(tsc_timestamp);
> }
>
> +/**
> + * This function is not supported on PPC64.
> + */
> +void
> +rte_power_monitor_wakeup(const unsigned int lcore_id)
> +{
> + RTE_SET_USED(lcore_id);
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map
> index 20945b1efa..ac026e289d 100644
> --- a/lib/librte_eal/version.map
> +++ b/lib/librte_eal/version.map
> @@ -406,6 +406,7 @@ EXPERIMENTAL {
>
> # added in 21.02
> rte_power_monitor;
> + rte_power_monitor_wakeup;
> rte_power_pause;
> };
>
> diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c b/lib/librte_eal/x86/rte_power_intrinsics.c
> index a9cd1afe9d..46a4fb6cd5 100644
> --- a/lib/librte_eal/x86/rte_power_intrinsics.c
> +++ b/lib/librte_eal/x86/rte_power_intrinsics.c
> @@ -2,8 +2,31 @@
> * Copyright(c) 2020 Intel Corporation
> */
>
> +#include <rte_common.h>
> +#include <rte_lcore.h>
> +#include <rte_spinlock.h>
> +
> #include "rte_power_intrinsics.h"
>
> +/*
> + * Per-lcore structure holding current status of C0.2 sleeps.
> + */
> +static struct power_wait_status {
> + rte_spinlock_t lock;
> + volatile void *monitor_addr; /**< NULL if not currently sleeping */
> +} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
> +
> +static inline void
> +__umwait_wakeup(volatile void *addr)
> +{
> + uint64_t val;
> +
> + /* trigger a write but don't change the value */
> + val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
> + __atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
> + __ATOMIC_RELAXED, __ATOMIC_RELAXED);
> +}
> +
> static uint8_t wait_supported;
>
> static inline uint64_t
> @@ -36,6 +59,12 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
> {
> const uint32_t tsc_l = (uint32_t)tsc_timestamp;
> const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
> + const unsigned int lcore_id = rte_lcore_id();
> + struct power_wait_status *s;
> +
> + /* prevent non-EAL thread from using this API */
> + if (lcore_id >= RTE_MAX_LCORE)
> + return;
>
> /* prevent user from running this instruction if it's not supported */
> if (!wait_supported)
> @@ -60,11 +89,24 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
> if (masked == pmc->val)
> return;
> }
> +
> + s = &wait_status[lcore_id];
> +
> + /* update sleep address */
> + rte_spinlock_lock(&s->lock);
> + s->monitor_addr = pmc->addr;
> + rte_spinlock_unlock(&s->lock);
It was a while, since I looked at it last time,
but shouldn't we grab the lock before monitor()?
I.E:
lock();
monitor();
addr=...;
unlock();
umwait();
> +
> /* execute UMWAIT */
> asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
> : /* ignore rflags */
> : "D"(0), /* enter C0.2 */
> "a"(tsc_l), "d"(tsc_h));
> +
> + /* erase sleep address */
> + rte_spinlock_lock(&s->lock);
> + s->monitor_addr = NULL;
> + rte_spinlock_unlock(&s->lock);
> }
>
> /**
> @@ -97,3 +139,46 @@ RTE_INIT(rte_power_intrinsics_init) {
> if (i.power_monitor && i.power_pause)
> wait_supported = 1;
> }
> +
> +void
> +rte_power_monitor_wakeup(const unsigned int lcore_id)
> +{
> + struct power_wait_status *s;
> +
> + /* prevent buffer overrun */
> + if (lcore_id >= RTE_MAX_LCORE)
> + return;
> +
> + /* prevent user from running this instruction if it's not supported */
> + if (!wait_supported)
> + return;
> +
> + s = &wait_status[lcore_id];
> +
> + /*
> + * There is a race condition between sleep, wakeup and locking, but we
> + * don't need to handle it.
> + *
> + * Possible situations:
> + *
> + * 1. T1 locks, sets address, unlocks
> + * 2. T2 locks, triggers wakeup, unlocks
> + * 3. T1 sleeps
> + *
> + * In this case, because T1 has already set the address for monitoring,
> + * we will wake up immediately even if T2 triggers wakeup before T1
> + * goes to sleep.
> + *
> + * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
> + * 2. T2 locks, triggers wakeup, and unlocks
> + * 3. T1 locks, erases address, and unlocks
> + *
> + * In this case, since we've already woken up, the "wakeup" was
> + * unneeded, and since T1 is still waiting on T2 releasing the lock, the
> + * wakeup address is still valid so it's perfectly safe to write it.
> + */
> + rte_spinlock_lock(&s->lock);
> + if (s->monitor_addr != NULL)
> + __umwait_wakeup(s->monitor_addr);
> + rte_spinlock_unlock(&s->lock);
> +}
> --
> 2.25.1
More information about the dev
mailing list