[PATCH v12 3/3] examples/l3fwd-power: add PM QoS configuration
Huisong Li
lihuisong at huawei.com
Wed Oct 23 06:09:16 CEST 2024
The '--cpu-resume-latency' can use to control C-state selection.
Setting the CPU resume latency to 0 can limit the CPU just to enter
C0-state to improve performance, which also may increase the power
consumption of platform.
Signed-off-by: Huisong Li <lihuisong at huawei.com>
Acked-by: Morten Brørup <mb at smartsharesystems.com>
Acked-by: Chengwen Feng <fengchengwen at huawei.com>
---
.../sample_app_ug/l3_forward_power_man.rst | 5 +-
examples/l3fwd-power/main.c | 68 +++++++++++++++++++
2 files changed, 72 insertions(+), 1 deletion(-)
diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst
index 9c9684fea7..70fa83669a 100644
--- a/doc/guides/sample_app_ug/l3_forward_power_man.rst
+++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst
@@ -67,7 +67,8 @@ based on the speculative sleep duration of the core.
In this application, we introduce a heuristic algorithm that allows packet processing cores to sleep for a short period
if there is no Rx packet received on recent polls.
In this way, CPUIdle automatically forces the corresponding cores to enter deeper C-states
-instead of always running to the C0 state waiting for packets.
+instead of always running to the C0 state waiting for packets. But user can set the CPU resume latency to control C-state selection.
+Setting the CPU resume latency to 0 can limit the CPU just to enter C0-state to improve performance, which may increase power consumption of platform.
.. note::
@@ -105,6 +106,8 @@ where,
* --config (port,queue,lcore)[,(port,queue,lcore)]: determines which queues from which ports are mapped to which cores.
+* --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection, 0 : just allow to enter C0-state.
+
* --max-pkt-len: optional, maximum packet length in decimal (64-9600)
* --no-numa: optional, disables numa awareness
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 0ce4aa04d4..e58f4e301c 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -47,6 +47,7 @@
#include <rte_telemetry.h>
#include <rte_power_pmd_mgmt.h>
#include <rte_power_uncore.h>
+#include <rte_power_qos.h>
#include "perf_core.h"
#include "main.h"
@@ -265,6 +266,9 @@ static uint32_t pause_duration = 1;
static uint32_t scale_freq_min;
static uint32_t scale_freq_max;
+static int cpu_resume_latency;
+static bool pm_qos_en;
+
static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
@@ -1501,6 +1505,8 @@ print_usage(const char *prgname)
" -U: set min/max frequency for uncore to maximum value\n"
" -i (frequency index): set min/max frequency for uncore to specified frequency index\n"
" --config (port,queue,lcore): rx queues configuration\n"
+ " --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection,"
+ " 0 : just allow to enter C0-state\n"
" --high-perf-cores CORELIST: list of high performance cores\n"
" --perf-config: similar as config, cores specified as indices"
" for bins containing high or regular performance cores\n"
@@ -1545,6 +1551,28 @@ parse_uint32(const char *opt, uint32_t *res)
return 0;
}
+static int
+parse_int(const char *opt, int *res)
+{
+ char *end = NULL;
+ signed long val;
+
+ /* parse integer string */
+ val = strtol(opt, &end, 10);
+ if ((opt[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (val < INT_MIN || val > INT_MAX) {
+ RTE_LOG(ERR, L3FWD_POWER, "parameter should be range from %d to %d.\n",
+ INT_MIN, INT_MAX);
+ return -1;
+ }
+
+ *res = val;
+
+ return 0;
+}
+
static int
parse_uncore_options(enum uncore_choice choice, const char *argument)
{
@@ -1734,6 +1762,7 @@ parse_pmd_mgmt_config(const char *name)
#define CMD_LINE_OPT_PAUSE_DURATION "pause-duration"
#define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min"
#define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max"
+#define CMD_LINE_OPT_CPU_RESUME_LATENCY "cpu-resume-latency"
/* Parse the argument given in the command line of the application */
static int
@@ -1748,6 +1777,7 @@ parse_args(int argc, char **argv)
{"perf-config", 1, 0, 0},
{"high-perf-cores", 1, 0, 0},
{"no-numa", 0, 0, 0},
+ {CMD_LINE_OPT_CPU_RESUME_LATENCY, 1, 0, 0},
{CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0},
{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
{CMD_LINE_OPT_LEGACY, 0, 0, 0},
@@ -1933,6 +1963,15 @@ parse_args(int argc, char **argv)
printf("Scaling frequency maximum configured\n");
}
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_CPU_RESUME_LATENCY,
+ sizeof(CMD_LINE_OPT_CPU_RESUME_LATENCY))) {
+ if (parse_int(optarg, &cpu_resume_latency) != 0)
+ return -1;
+ printf("PM QoS configured\n");
+ pm_qos_en = true;
+ }
+
break;
default:
@@ -2256,6 +2295,26 @@ init_power_library(void)
return -1;
}
}
+
+ if (pm_qos_en) {
+ RTE_LCORE_FOREACH(lcore_id) {
+ /*
+ * Set the cpu resume latency of the worker lcore based
+ * on user's request. If set strict latency (0), just
+ * allow the CPU to enter the shallowest idle state to
+ * improve performance.
+ */
+ ret = rte_power_qos_set_cpu_resume_latency(lcore_id,
+ cpu_resume_latency);
+ if (ret != 0) {
+ RTE_LOG(ERR, L3FWD_POWER,
+ "Failed to set cpu resume latency on lcore-%u.\n",
+ lcore_id);
+ return ret;
+ }
+ }
+ }
+
return ret;
}
@@ -2295,6 +2354,15 @@ deinit_power_library(void)
}
}
}
+
+ if (pm_qos_en) {
+ RTE_LCORE_FOREACH(lcore_id) {
+ /* Restore the original value in kernel. */
+ rte_power_qos_set_cpu_resume_latency(lcore_id,
+ RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT);
+ }
+ }
+
return ret;
}
--
2.22.0
More information about the dev
mailing list