[PATCH v12 3/3] examples/l3fwd-power: add PM QoS configuration
Konstantin Ananyev
konstantin.ananyev at huawei.com
Thu Oct 24 18:44:55 CEST 2024
> The '--cpu-resume-latency' can use to control C-state selection.
> Setting the CPU resume latency to 0 can limit the CPU just to enter
> C0-state to improve performance, which also may increase the power
> consumption of platform.
>
> Signed-off-by: Huisong Li <lihuisong at huawei.com>
> Acked-by: Morten Brørup <mb at smartsharesystems.com>
> Acked-by: Chengwen Feng <fengchengwen at huawei.com>
> ---
> .../sample_app_ug/l3_forward_power_man.rst | 5 +-
> examples/l3fwd-power/main.c | 68 +++++++++++++++++++
> 2 files changed, 72 insertions(+), 1 deletion(-)
>
> diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst
> index 9c9684fea7..70fa83669a 100644
> --- a/doc/guides/sample_app_ug/l3_forward_power_man.rst
> +++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst
> @@ -67,7 +67,8 @@ based on the speculative sleep duration of the core.
> In this application, we introduce a heuristic algorithm that allows packet processing cores to sleep for a short period
> if there is no Rx packet received on recent polls.
> In this way, CPUIdle automatically forces the corresponding cores to enter deeper C-states
> -instead of always running to the C0 state waiting for packets.
> +instead of always running to the C0 state waiting for packets. But user can set the CPU resume latency to control C-state selection.
> +Setting the CPU resume latency to 0 can limit the CPU just to enter C0-state to improve performance, which may increase power
> consumption of platform.
>
> .. note::
>
> @@ -105,6 +106,8 @@ where,
>
> * --config (port,queue,lcore)[,(port,queue,lcore)]: determines which queues from which ports are mapped to which cores.
>
> +* --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection, 0 : just allow to enter C0-state.
> +
> * --max-pkt-len: optional, maximum packet length in decimal (64-9600)
>
> * --no-numa: optional, disables numa awareness
> diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
> index 0ce4aa04d4..e58f4e301c 100644
> --- a/examples/l3fwd-power/main.c
> +++ b/examples/l3fwd-power/main.c
> @@ -47,6 +47,7 @@
> #include <rte_telemetry.h>
> #include <rte_power_pmd_mgmt.h>
> #include <rte_power_uncore.h>
> +#include <rte_power_qos.h>
>
> #include "perf_core.h"
> #include "main.h"
> @@ -265,6 +266,9 @@ static uint32_t pause_duration = 1;
> static uint32_t scale_freq_min;
> static uint32_t scale_freq_max;
>
> +static int cpu_resume_latency;
> +static bool pm_qos_en;
> +
> static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
>
>
> @@ -1501,6 +1505,8 @@ print_usage(const char *prgname)
> " -U: set min/max frequency for uncore to maximum value\n"
> " -i (frequency index): set min/max frequency for uncore to specified frequency index\n"
> " --config (port,queue,lcore): rx queues configuration\n"
> + " --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection,"
> + " 0 : just allow to enter C0-state\n"
> " --high-perf-cores CORELIST: list of high performance cores\n"
> " --perf-config: similar as config, cores specified as indices"
> " for bins containing high or regular performance cores\n"
> @@ -1545,6 +1551,28 @@ parse_uint32(const char *opt, uint32_t *res)
> return 0;
> }
>
> +static int
> +parse_int(const char *opt, int *res)
> +{
> + char *end = NULL;
> + signed long val;
> +
> + /* parse integer string */
> + val = strtol(opt, &end, 10);
> + if ((opt[0] == '\0') || (end == NULL) || (*end != '\0'))
> + return -1;
> +
> + if (val < INT_MIN || val > INT_MAX) {
> + RTE_LOG(ERR, L3FWD_POWER, "parameter should be range from %d to %d.\n",
> + INT_MIN, INT_MAX);
> + return -1;
> + }
> +
> + *res = val;
> +
> + return 0;
> +}
> +
> static int
> parse_uncore_options(enum uncore_choice choice, const char *argument)
> {
> @@ -1734,6 +1762,7 @@ parse_pmd_mgmt_config(const char *name)
> #define CMD_LINE_OPT_PAUSE_DURATION "pause-duration"
> #define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min"
> #define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max"
> +#define CMD_LINE_OPT_CPU_RESUME_LATENCY "cpu-resume-latency"
>
> /* Parse the argument given in the command line of the application */
> static int
> @@ -1748,6 +1777,7 @@ parse_args(int argc, char **argv)
> {"perf-config", 1, 0, 0},
> {"high-perf-cores", 1, 0, 0},
> {"no-numa", 0, 0, 0},
> + {CMD_LINE_OPT_CPU_RESUME_LATENCY, 1, 0, 0},
> {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0},
> {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
> {CMD_LINE_OPT_LEGACY, 0, 0, 0},
> @@ -1933,6 +1963,15 @@ parse_args(int argc, char **argv)
> printf("Scaling frequency maximum configured\n");
> }
>
> + if (!strncmp(lgopts[option_index].name,
> + CMD_LINE_OPT_CPU_RESUME_LATENCY,
> + sizeof(CMD_LINE_OPT_CPU_RESUME_LATENCY))) {
> + if (parse_int(optarg, &cpu_resume_latency) != 0)
Do you really need a to support a negative values for that variable?
> + return -1;
> + printf("PM QoS configured\n");
> + pm_qos_en = true;
> + }
> +
> break;
>
> default:
> @@ -2256,6 +2295,26 @@ init_power_library(void)
> return -1;
> }
> }
> +
> + if (pm_qos_en) {
> + RTE_LCORE_FOREACH(lcore_id) {
> + /*
> + * Set the cpu resume latency of the worker lcore based
> + * on user's request. If set strict latency (0), just
> + * allow the CPU to enter the shallowest idle state to
> + * improve performance.
> + */
> + ret = rte_power_qos_set_cpu_resume_latency(lcore_id,
> + cpu_resume_latency);
> + if (ret != 0) {
> + RTE_LOG(ERR, L3FWD_POWER,
> + "Failed to set cpu resume latency on lcore-%u.\n",
> + lcore_id);
> + return ret;
> + }
> + }
> + }
> +
> return ret;
> }
>
> @@ -2295,6 +2354,15 @@ deinit_power_library(void)
> }
> }
> }
> +
> + if (pm_qos_en) {
> + RTE_LCORE_FOREACH(lcore_id) {
> + /* Restore the original value in kernel. */
> + rte_power_qos_set_cpu_resume_latency(lcore_id,
> + RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT);
If we are going to 'restore original' shouldn't we:
At startup old_value=get()
At termination: set(old_value)
?
> + }
> + }
> +
> return ret;
> }
>
> --
> 2.22.0
More information about the dev
mailing list