[dpdk-dev] [PATCH v12 05/11] eal: add monitor wakeup function

Anatoly Burakov anatoly.burakov at intel.com
Thu Dec 17 15:05:35 CET 2020


Now that we have everything in a C file, we can store the information
about our sleep, and have a native mechanism to wake up the sleeping
core. This mechanism would however only wake up a core that's sleeping
while monitoring - waking up from `rte_power_pause` won't work.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 .../arm/include/rte_power_intrinsics.h        |  9 +++
 .../include/generic/rte_power_intrinsics.h    | 16 +++++
 .../ppc/include/rte_power_intrinsics.h        |  9 +++
 lib/librte_eal/version.map                    |  1 +
 lib/librte_eal/x86/rte_power_intrinsics.c     | 70 +++++++++++++++++++
 5 files changed, 105 insertions(+)

diff --git a/lib/librte_eal/arm/include/rte_power_intrinsics.h b/lib/librte_eal/arm/include/rte_power_intrinsics.h
index 27869251a8..39e49cc45b 100644
--- a/lib/librte_eal/arm/include/rte_power_intrinsics.h
+++ b/lib/librte_eal/arm/include/rte_power_intrinsics.h
@@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp)
 	RTE_SET_USED(tsc_timestamp);
 }
 
+/**
+ * This function is not supported on ARM.
+ */
+void
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+	RTE_SET_USED(lcore_id);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h
index a6f1955996..e311d6f8ea 100644
--- a/lib/librte_eal/include/generic/rte_power_intrinsics.h
+++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h
@@ -57,6 +57,22 @@ __rte_experimental
 void rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 		const uint64_t tsc_timestamp);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Wake up a specific lcore that is in a power optimized state and is monitoring
+ * an address.
+ *
+ * @note This function will *not* wake up a core that is in a power optimized
+ *   state due to calling `rte_power_pause`.
+ *
+ * @param lcore_id
+ *   Lcore ID of a sleeping thread.
+ */
+__rte_experimental
+void rte_power_monitor_wakeup(const unsigned int lcore_id);
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/librte_eal/ppc/include/rte_power_intrinsics.h b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
index 248d1f4a23..2e7db0e7eb 100644
--- a/lib/librte_eal/ppc/include/rte_power_intrinsics.h
+++ b/lib/librte_eal/ppc/include/rte_power_intrinsics.h
@@ -33,6 +33,15 @@ rte_power_pause(const uint64_t tsc_timestamp)
 	RTE_SET_USED(tsc_timestamp);
 }
 
+/**
+ * This function is not supported on PPC64.
+ */
+void
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+	RTE_SET_USED(lcore_id);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/librte_eal/version.map b/lib/librte_eal/version.map
index 20945b1efa..ac026e289d 100644
--- a/lib/librte_eal/version.map
+++ b/lib/librte_eal/version.map
@@ -406,6 +406,7 @@ EXPERIMENTAL {
 
 	# added in 21.02
 	rte_power_monitor;
+	rte_power_monitor_wakeup;
 	rte_power_pause;
 };
 
diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c b/lib/librte_eal/x86/rte_power_intrinsics.c
index a9cd1afe9d..4149b430f6 100644
--- a/lib/librte_eal/x86/rte_power_intrinsics.c
+++ b/lib/librte_eal/x86/rte_power_intrinsics.c
@@ -2,8 +2,29 @@
  * Copyright(c) 2020 Intel Corporation
  */
 
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
+
 #include "rte_power_intrinsics.h"
 
+/*
+ * Per-lcore structure holding current status of C0.2 sleeps.
+ */
+static struct power_wait_status {
+	rte_spinlock_t lock;
+	volatile void *monitor_addr; /**< NULL if not currently sleeping */
+} __rte_cache_aligned wait_status[RTE_MAX_LCORE];
+
+static inline void
+__umwait_wakeup(volatile void *addr)
+{
+	uint64_t val;
+	val = __atomic_load_n((volatile uint64_t *)addr, __ATOMIC_RELAXED);
+	__atomic_compare_exchange_n((volatile uint64_t *)addr, &val, val, 0,
+			__ATOMIC_RELAXED, __ATOMIC_RELAXED);
+}
+
 static uint8_t wait_supported;
 
 static inline uint64_t
@@ -36,6 +57,8 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 {
 	const uint32_t tsc_l = (uint32_t)tsc_timestamp;
 	const uint32_t tsc_h = (uint32_t)(tsc_timestamp >> 32);
+	const unsigned int lcore_id = rte_lcore_id();
+	struct power_wait_status *s = &wait_status[lcore_id];
 
 	/* prevent user from running this instruction if it's not supported */
 	if (!wait_supported)
@@ -60,11 +83,21 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc,
 		if (masked == pmc->val)
 			return;
 	}
+	/* update sleep address */
+	rte_spinlock_lock(&s->lock);
+	s->monitor_addr = pmc->addr;
+	rte_spinlock_unlock(&s->lock);
+
 	/* execute UMWAIT */
 	asm volatile(".byte 0xf2, 0x0f, 0xae, 0xf7;"
 			: /* ignore rflags */
 			: "D"(0), /* enter C0.2 */
 			  "a"(tsc_l), "d"(tsc_h));
+
+	/* erase sleep address */
+	rte_spinlock_lock(&s->lock);
+	s->monitor_addr = NULL;
+	rte_spinlock_unlock(&s->lock);
 }
 
 /**
@@ -97,3 +130,40 @@ RTE_INIT(rte_power_intrinsics_init) {
 	if (i.power_monitor && i.power_pause)
 		wait_supported = 1;
 }
+
+void
+rte_power_monitor_wakeup(const unsigned int lcore_id)
+{
+	struct power_wait_status *s = &wait_status[lcore_id];
+
+	/* prevent user from running this instruction if it's not supported */
+	if (!wait_supported)
+		return;
+
+	/*
+	 * There is a race condition between sleep, wakeup and locking, but we
+	 * don't need to handle it.
+	 *
+	 * Possible situations:
+	 *
+	 * 1. T1 locks, sets address, unlocks
+	 * 2. T2 locks, triggers wakeup, unlocks
+	 * 3. T1 sleeps
+	 *
+	 * In this case, because T1 has already set the address for monitoring,
+	 * we will wake up immediately even if T2 triggers wakeup before T1
+	 * goes to sleep.
+	 *
+	 * 1. T1 locks, sets address, unlocks, goes to sleep, and wakes up
+	 * 2. T2 locks, triggers wakeup, and unlocks
+	 * 3. T1 locks, erases address, and unlocks
+	 *
+	 * In this case, since we've already woken up, the "wakeup" was
+	 * unneeded, and since T1 is still waiting on T2 releasing the lock, the
+	 * wakeup address is still valid so it's perfectly safe to write it.
+	 */
+	rte_spinlock_lock(&s->lock);
+	if (s->monitor_addr != NULL)
+		__umwait_wakeup(s->monitor_addr);
+	rte_spinlock_unlock(&s->lock);
+}
-- 
2.17.1


More information about the dev mailing list