[PATCH v12 3/3] examples/l3fwd-power: add PM QoS configuration
lihuisong (C)
lihuisong at huawei.com
Fri Oct 25 05:35:54 CEST 2024
在 2024/10/25 0:44, Konstantin Ananyev 写道:
>
>> The '--cpu-resume-latency' can use to control C-state selection.
>> Setting the CPU resume latency to 0 can limit the CPU just to enter
>> C0-state to improve performance, which also may increase the power
>> consumption of platform.
>>
>> Signed-off-by: Huisong Li <lihuisong at huawei.com>
>> Acked-by: Morten Brørup <mb at smartsharesystems.com>
>> Acked-by: Chengwen Feng <fengchengwen at huawei.com>
>> ---
>> .../sample_app_ug/l3_forward_power_man.rst | 5 +-
>> examples/l3fwd-power/main.c | 68 +++++++++++++++++++
>> 2 files changed, 72 insertions(+), 1 deletion(-)
>>
>> diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst
>> index 9c9684fea7..70fa83669a 100644
>> --- a/doc/guides/sample_app_ug/l3_forward_power_man.rst
>> +++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst
>> @@ -67,7 +67,8 @@ based on the speculative sleep duration of the core.
>> In this application, we introduce a heuristic algorithm that allows packet processing cores to sleep for a short period
>> if there is no Rx packet received on recent polls.
>> In this way, CPUIdle automatically forces the corresponding cores to enter deeper C-states
>> -instead of always running to the C0 state waiting for packets.
>> +instead of always running to the C0 state waiting for packets. But user can set the CPU resume latency to control C-state selection.
>> +Setting the CPU resume latency to 0 can limit the CPU just to enter C0-state to improve performance, which may increase power
>> consumption of platform.
>>
>> .. note::
>>
>> @@ -105,6 +106,8 @@ where,
>>
>> * --config (port,queue,lcore)[,(port,queue,lcore)]: determines which queues from which ports are mapped to which cores.
>>
>> +* --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection, 0 : just allow to enter C0-state.
>> +
>> * --max-pkt-len: optional, maximum packet length in decimal (64-9600)
>>
>> * --no-numa: optional, disables numa awareness
>> diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
>> index 0ce4aa04d4..e58f4e301c 100644
>> --- a/examples/l3fwd-power/main.c
>> +++ b/examples/l3fwd-power/main.c
>> @@ -47,6 +47,7 @@
>> #include <rte_telemetry.h>
>> #include <rte_power_pmd_mgmt.h>
>> #include <rte_power_uncore.h>
>> +#include <rte_power_qos.h>
>>
>> #include "perf_core.h"
>> #include "main.h"
>> @@ -265,6 +266,9 @@ static uint32_t pause_duration = 1;
>> static uint32_t scale_freq_min;
>> static uint32_t scale_freq_max;
>>
>> +static int cpu_resume_latency;
>> +static bool pm_qos_en;
>> +
>> static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
>>
>>
>> @@ -1501,6 +1505,8 @@ print_usage(const char *prgname)
>> " -U: set min/max frequency for uncore to maximum value\n"
>> " -i (frequency index): set min/max frequency for uncore to specified frequency index\n"
>> " --config (port,queue,lcore): rx queues configuration\n"
>> + " --cpu-resume-latency LATENCY: set CPU resume latency to control C-state selection,"
>> + " 0 : just allow to enter C0-state\n"
>> " --high-perf-cores CORELIST: list of high performance cores\n"
>> " --perf-config: similar as config, cores specified as indices"
>> " for bins containing high or regular performance cores\n"
>> @@ -1545,6 +1551,28 @@ parse_uint32(const char *opt, uint32_t *res)
>> return 0;
>> }
>>
>> +static int
>> +parse_int(const char *opt, int *res)
>> +{
>> + char *end = NULL;
>> + signed long val;
>> +
>> + /* parse integer string */
>> + val = strtol(opt, &end, 10);
>> + if ((opt[0] == '\0') || (end == NULL) || (*end != '\0'))
>> + return -1;
>> +
>> + if (val < INT_MIN || val > INT_MAX) {
>> + RTE_LOG(ERR, L3FWD_POWER, "parameter should be range from %d to %d.\n",
>> + INT_MIN, INT_MAX);
>> + return -1;
>> + }
>> +
>> + *res = val;
>> +
>> + return 0;
>> +}
>> +
>> static int
>> parse_uncore_options(enum uncore_choice choice, const char *argument)
>> {
>> @@ -1734,6 +1762,7 @@ parse_pmd_mgmt_config(const char *name)
>> #define CMD_LINE_OPT_PAUSE_DURATION "pause-duration"
>> #define CMD_LINE_OPT_SCALE_FREQ_MIN "scale-freq-min"
>> #define CMD_LINE_OPT_SCALE_FREQ_MAX "scale-freq-max"
>> +#define CMD_LINE_OPT_CPU_RESUME_LATENCY "cpu-resume-latency"
>>
>> /* Parse the argument given in the command line of the application */
>> static int
>> @@ -1748,6 +1777,7 @@ parse_args(int argc, char **argv)
>> {"perf-config", 1, 0, 0},
>> {"high-perf-cores", 1, 0, 0},
>> {"no-numa", 0, 0, 0},
>> + {CMD_LINE_OPT_CPU_RESUME_LATENCY, 1, 0, 0},
>> {CMD_LINE_OPT_MAX_PKT_LEN, 1, 0, 0},
>> {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
>> {CMD_LINE_OPT_LEGACY, 0, 0, 0},
>> @@ -1933,6 +1963,15 @@ parse_args(int argc, char **argv)
>> printf("Scaling frequency maximum configured\n");
>> }
>>
>> + if (!strncmp(lgopts[option_index].name,
>> + CMD_LINE_OPT_CPU_RESUME_LATENCY,
>> + sizeof(CMD_LINE_OPT_CPU_RESUME_LATENCY))) {
>> + if (parse_int(optarg, &cpu_resume_latency) != 0)
> Do you really need a to support a negative values for that variable?
will fix it.
>
>> + return -1;
>> + printf("PM QoS configured\n");
>> + pm_qos_en = true;
>> + }
>> +
>> break;
>>
>> default:
>> @@ -2256,6 +2295,26 @@ init_power_library(void)
>> return -1;
>> }
>> }
>> +
>> + if (pm_qos_en) {
>> + RTE_LCORE_FOREACH(lcore_id) {
>> + /*
>> + * Set the cpu resume latency of the worker lcore based
>> + * on user's request. If set strict latency (0), just
>> + * allow the CPU to enter the shallowest idle state to
>> + * improve performance.
>> + */
>> + ret = rte_power_qos_set_cpu_resume_latency(lcore_id,
>> + cpu_resume_latency);
>> + if (ret != 0) {
>> + RTE_LOG(ERR, L3FWD_POWER,
>> + "Failed to set cpu resume latency on lcore-%u.\n",
>> + lcore_id);
>> + return ret;
>> + }
>> + }
>> + }
>> +
>> return ret;
>> }
>>
>> @@ -2295,6 +2354,15 @@ deinit_power_library(void)
>> }
>> }
>> }
>> +
>> + if (pm_qos_en) {
>> + RTE_LCORE_FOREACH(lcore_id) {
>> + /* Restore the original value in kernel. */
>> + rte_power_qos_set_cpu_resume_latency(lcore_id,
>> + RTE_POWER_QOS_RESUME_LATENCY_NO_CONSTRAINT);
> If we are going to 'restore original' shouldn't we:
> At startup old_value=get()
> At termination: set(old_value)
> ?
Ack
>
>> + }
>> + }
>> +
>> return ret;
>> }
>>
>> --
>> 2.22.0
More information about the dev
mailing list