[dpdk-dev] [PATCH] libs/power: add p-state driver compatibility
Liang, Ma
liang.j.ma at intel.com
Thu Dec 13 11:58:48 CET 2018
On 10 Dec 16:08, Burakov, Anatoly wrote:
<snip>
> Hi Liang,
>
> General comment: please do not break up log strings onto multiple lines. It
> makes it harder to grep the codebase. Checkpatch will not warn about log
> strings going over 80 characters.
agree, I will update in next version
>
> > ---
> > lib/librte_power/Makefile | 2 +
> > lib/librte_power/meson.build | 4 +-
> > lib/librte_power/power_pstate_cpufreq.c | 778 ++++++++++++++++++++++++++++++++
> > lib/librte_power/power_pstate_cpufreq.h | 218 +++++++++
> > lib/librte_power/rte_power.c | 43 +-
> > lib/librte_power/rte_power.h | 3 +-
> > 6 files changed, 1039 insertions(+), 9 deletions(-)
> > create mode 100644 lib/librte_power/power_pstate_cpufreq.c
> > create mode 100644 lib/librte_power/power_pstate_cpufreq.h
>
> <snip>
>
> > sources = files('rte_power.c', 'power_acpi_cpufreq.c',
> > 'power_kvm_vm.c', 'guest_channel.c',
> > - 'rte_power_empty_poll.c')
> > + 'rte_power_empty_poll.c',
> > + 'power_pstate_cpufreq.c')
> > headers = files('rte_power.h','rte_power_empty_poll.h')
> > -deps += ['timer']
> > diff --git a/lib/librte_power/power_pstate_cpufreq.c b/lib/librte_power/power_pstate_cpufreq.c
> > new file mode 100644
> > index 0000000..f1dada8
> > --- /dev/null
> > +++ b/lib/librte_power/power_pstate_cpufreq.c
> > @@ -0,0 +1,778 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2010-2018 Intel Corporation
>
> I believe it should be 2018.
I didn't see anything wrong here.
>
> > + */
> > +
> > +#include <stdio.h>
> > +#include <sys/types.h>
> > +#include <sys/stat.h>
> > +#include <fcntl.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <unistd.h>
> > +#include <signal.h>
> > +#include <limits.h>
> > +#include <errno.h>
> > +
> > +#include <rte_memcpy.h>
> > +#include <rte_atomic.h>
> > +
> > +#include "power_pstate_cpufreq.h"
> > +#include "power_common.h"
> > +
> <snip>
>
> > +
> > +static struct pstate_power_info lcore_power_info[RTE_MAX_LCORE];
> > +
> > +/**
> > + * It is to read the specific MSR.
> > + */
> > +
> > +static int32_t
> > +power_rdmsr(int msr, uint64_t *val, unsigned int lcore_id)
> > +{
> > + int fd;
> > + int ret;
>
> int fd, ret?a
should be fine.
>
> > + char fullpath[PATH_MAX];
> > +
> > + snprintf(fullpath, sizeof(fullpath), POWER_MSR_PATH, lcore_id);
> > +
> > + fd = open(fullpath, O_RDONLY);
> > +
> > + if (fd < 0) {
> > +
> > + if (errno == EACCES)
> > + RTE_LOG(ERR, POWER, "No access to %s\n", fullpath);
> > +
> > + if (errno == ENXIO)
> > + RTE_LOG(ERR, POWER, "%s Not Exist!\n", fullpath);
>
> How about:
>
> RTE_LOG(ERR, POWER, "Error opening '%s': %s\n", fullpath, strerror(errno));
agree, I will update in next version
>
> ?
>
> > +
> > + return fd;
> > + }
> > +
> > + ret = pread(fd, val, sizeof(uint64_t), msr);
>
> Can pread fail?
agree, I will add error checking here.
>
> > +
> > + close(fd);
> > +
> > + POWER_DEBUG_TRACE("MSR Path %s, offset 0x%X for lcore %u\n",
> > + fullpath, msr, lcore_id);
> > +
> > + POWER_DEBUG_TRACE("Ret value %d, content is 0x%lx\n", ret, *val);
> > +
> > + return ret;
> > +}
> > +
>
> <snip>
>
> > + if (fseek(pi->f_cur_max, 0, SEEK_SET) < 0) {
> > + RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 "
> > + "for setting frequency for lcore %u\n",
> > + pi->lcore_id);
> > + return -1;
> > + }
> > +
> > + /* Turbo is available and enabled, first freq bucket is sys max freq */
> > + if (pi->turbo_available && pi->turbo_enable && (idx == 0))
> > +
> > + target_freq = pi->sys_max_freq;
> > +
> > + else
> > +
> > + target_freq = pi->freqs[idx];
>
> Unneeded whitespace?
agree, I will remove it.
> > +
> > +
> > + /* Decrease freq, the min freq should be updated first */
> > + if (idx > pi->curr_idx) {
> > +
> > + if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
> > + RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > + "lcore %u\n", pi->lcore_id);
> > + return -1;
> > + }
> > +
> > + if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
> > + RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > + "lcore %u\n", pi->lcore_id);
> > + return -1;
> > + }
> > +
> > + POWER_DEBUG_TRACE("Freqency[%u] to be set for lcore %u\n",
> > + target_freq, pi->lcore_id);
>
> Frequency :)
>
> Also, i believe "Frequency[%u]" is misleading in this case, because the
> value is not an index into an array, but is an actual target frequency. I
> think "Frequency '%u'" would be more descriptive.
agree I will update in next version
>
> > +
> > + fflush(pi->f_cur_min);
> > + fflush(pi->f_cur_max);
> > +
> > + }
> > +
> > + /* Increase freq, the max freq should be updated first */
> > + if (idx < pi->curr_idx) {
>
> Else if?
no need here.
>
> > +
> > + if (fprintf(pi->f_cur_max, "%u", target_freq) < 0) {
> > + RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > + "lcore %u\n", pi->lcore_id);
> > + return -1;
> > + }
> > +
> > + if (fprintf(pi->f_cur_min, "%u", target_freq) < 0) {
> > + RTE_LOG(ERR, POWER, "Fail to write new frequency for "
> > + "lcore %u\n", pi->lcore_id);
> > + return -1;
> > + }
>
> Forgot POWER_DEBUG_TRACE?
>
agree, I will add here.
> > +
> > + fflush(pi->f_cur_max);
> > + fflush(pi->f_cur_min);
> > + }
> > +
> > + pi->curr_idx = idx;
> > +
> > + return 1;
> > +}
> > +
>
> <snip>
>
> > + snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR,
> > + pi->lcore_id);
> > + f = fopen(fullpath, "rw+");
> > + FOPEN_OR_ERR_RET(f, ret);
> > +
> > + s = fgets(buf, sizeof(buf), f);
> > + FOPS_OR_NULL_GOTO(s, out);
> > +
> > + /* Check if current governor is performance */
> > + if (strncmp(buf, POWER_GOVERNOR_PERF,
> > + sizeof(POWER_GOVERNOR_PERF)) == 0) {
>
> Nitpick, but probably should be strlen, not sizeof?
sizeof should be fine
>
> > + ret = 0;
> > + POWER_DEBUG_TRACE("Power management governor of lcore %u is "
> > + "already performance\n", pi->lcore_id);
> > + goto out;
> > + }
> > + /* Save the original governor */
> > + snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf);
> > +
> > + /* Write 'performance' to the governor */
> > + val = fseek(f, 0, SEEK_SET);
> > + FOPS_OR_ERR_GOTO(val, out);
> > +
> > + val = fputs(POWER_GOVERNOR_PERF, f);
> > + FOPS_OR_ERR_GOTO(val, out);
> > +
> > + ret = 0;
> > + RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been "
> > + "set to performance successfully\n", pi->lcore_id);
>
> Do we want this as INFO? (it's OK if we do, just asking!)
just keep it as same as acpi driver
>
> > +out:
> > + fclose(f);
> > +
> > + return ret;
> > +}
> > +
> > +/**
> > + * It is to check the governor and then set the original governor back if
> > + * needed by writing the sys file.
> > + */
> > +static int
> > +power_set_governor_original(struct pstate_power_info *pi)
> > +{
>
> <snip>
>
> > + /* Write back the original governor */
> > + val = fseek(f, 0, SEEK_SET);
> > + FOPS_OR_ERR_GOTO(val, out);
> > +
> > + val = fputs(pi->governor_ori, f);
> > + FOPS_OR_ERR_GOTO(val, out);
> > +
> > + ret = 0;
> > + RTE_LOG(INFO, POWER, "Power management governor of lcore %u "
> > + "has been set back to %s successfully\n",
> > + pi->lcore_id, pi->governor_ori);
>
> Same - do we want this as INFO?
keep it as same as acpi driver
>
> > +out:
> > + fclose(f);
> > +
> > + return ret;
> > +}
> > +
> > +/**
> > + * It is to get the available frequencies of the specific lcore by reading the
> > + * sys file.
> > + */
> > +static int
> > +power_get_available_freqs(struct pstate_power_info *pi)
> > +{
> > + FILE *f_min, *f_max;
> > + int ret = -1;
> > + char *p_min, *p_max;
> > + char buf_min[BUFSIZ];
> > + char buf_max[BUFSIZ];
> > + char fullpath_min[PATH_MAX];
> > + char fullpath_max[PATH_MAX];
> > + char *s_min, *s_max;
> > + uint32_t sys_min_freq = 0, sys_max_freq = 0, base_max_freq = 0;
> > + uint32_t i, num_freqs = 0;
> > +
> > + snprintf(fullpath_max, sizeof(fullpath_max),
> > + POWER_SYSFILE_BASE_MAX_FREQ,
> > + pi->lcore_id);
> > + snprintf(fullpath_min, sizeof(fullpath_min),
> > + POWER_SYSFILE_BASE_MIN_FREQ,
> > + pi->lcore_id);
> > +
> > + f_min = fopen(fullpath_min, "r");
> > + FOPEN_OR_ERR_RET(f_min, ret);
> > +
> > + s_min = fgets(buf_min, sizeof(buf_min), f_min);
> > + FOPS_OR_NULL_GOTO(s_min, out);
> > +
> > + f_max = fopen(fullpath_max, "r");
> > + FOPEN_OR_ERR_RET(f_max, ret);
> > +
> > + s_max = fgets(buf_max, sizeof(buf_max), f_max);
> > + FOPS_OR_NULL_GOTO(s_max, out);
> > +
> > +
> > + /* Strip the line break if there is */
> > + p_min = strchr(buf_min, '\n');
> > + if (p_min != NULL)
> > + *p_min = 0;
> > +
> > + p_max = strchr(buf_max, '\n');
> > + if (p_max != NULL)
> > + *p_max = 0;
>
> Probably '\0' would be more correct.
should be fine
>
> > +
> > + sys_min_freq = strtoul(buf_min, &p_min, POWER_CONVERT_TO_DECIMAL);
> > + sys_max_freq = strtoul(buf_max, &p_max, POWER_CONVERT_TO_DECIMAL);
> > +
> > + if (sys_max_freq < sys_min_freq)
> > + goto out;
>
> This leaks fds. See below.
agree, I will adjust the postion of label "out"
>
> > +
> > +
> > + pi->sys_max_freq = sys_max_freq;
> > +
> > + base_max_freq = pi->non_turbo_max_ratio*BUS_FREQ;
>
> spacing around multiplication :)
agree. I will remove that
>
> > +
> > + POWER_DEBUG_TRACE("sys min %u, sys max %u, base_max %u\n",
> > + sys_min_freq,
> > + sys_max_freq,
> > + base_max_freq);
> > +
> > + if (base_max_freq < sys_max_freq)
> > +
> > + pi->turbo_available = 1;
>
> Extra whitespace.
agree
>
> > + else
> > + pi->turbo_available = 0;
> > +
> > +
> > + /* If turbo is available then there is one extra freq bucket
> > + * to store the sys max freq which value is base_max +1
> > + */
> > + num_freqs = (base_max_freq - sys_min_freq)/BUS_FREQ + 1
> > + + pi->turbo_available;
>
> The '+' should be on the preceding line, also spacing.
agree
>
> > +
> > + /* Generate the freq bucket array.
> > + * If turbo is available the freq bucket[0] value is base_max +1
> > + * the bucket[1] is base_max, bucket[2] is base_max - BUS_FREQ
> > + * and so on.
> > + * If turbo is not available bucket[0] is base_max and so on
> > + */
> > + for (i = 0, pi->nb_freqs = 0; i < num_freqs; i++) {
> > +
>
> Extra whitespace.
agree
>
> > + if ((i == 0) && pi->turbo_available)
> > + pi->freqs[pi->nb_freqs++] = base_max_freq + 1;
> > + else
> > + pi->freqs[pi->nb_freqs++] =
> > + base_max_freq - (i - pi->turbo_available)*BUS_FREQ;
>
> Misleading indentation, also spacing around multiplication operator.
agree
> > + }
> > +
> > + ret = 0;
> > +
> > + POWER_DEBUG_TRACE("%d frequency(s) of lcore %u are available\n",
> > + num_freqs, pi->lcore_id);
> > +
> > + fclose(f_min);
> > + fclose(f_max);
> > +
> > +
> > +out:
>
> fclose should be after out, otherwise error path above will leak fd.
agree
> > + return ret;
> > +}
> > +
> > +int
> > +power_pstate_cpufreq_init(unsigned int lcore_id)
> > +{
> > + struct pstate_power_info *pi;
> > +
> > + if (lcore_id >= RTE_MAX_LCORE) {
> > + RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n",
>
> Cannot exceed.
>
agree
> > + lcore_id, RTE_MAX_LCORE - 1U);
> > + return -1;
> > + }
> > +
> > + pi = &lcore_power_info[lcore_id];
> > + if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING)
> > + == 0) {
> > + RTE_LOG(INFO, POWER, "Power management of lcore %u is "
> > + "in use\n", lcore_id);
> > + return -1;
> > + }
> > +
> > + pi->lcore_id = lcore_id;
>
> Can we not set this until we're sure we didn't fail? Or do any of the below
> calls rely on pi->lcore_id to be set?
>
power_set_governor_performance will sue lcore_id
> > + /* Check and set the governor */
> > + if (power_set_governor_performance(pi) < 0) {
> > + RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to "
> > + "performance\n", lcore_id);
> > + goto fail;
> > + }
> > + /* Init for setting lcore frequency */
> > + if (power_init_for_setting_freq(pi) < 0) {
> > + RTE_LOG(ERR, POWER, "Cannot init for setting frequency for "
> > + "lcore %u\n", lcore_id);
> > + goto fail;
> > + }
> > +
> > + /* Get the available frequencies */
> > + if (power_get_available_freqs(pi) < 0) {
> > + RTE_LOG(ERR, POWER, "Cannot get available frequencies of "
> > + "lcore %u\n", lcore_id);
> > + goto fail;
> > + }
> > +
> > +
> > + /* Set freq to max by default */
> > + if (power_pstate_cpufreq_freq_max(lcore_id) < 0) {
> > + RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u "
> > + "to max\n", lcore_id);
> > + goto fail;
> > + }
> > +
> > + RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u "
> > + "power management\n", lcore_id);
>
> Do we want this as INFO?
>
> > + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED);
> > +
> > + return 0;
> > +
> > +fail:
> > + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
> > +
> > + return -1;
> > +}
> > +
> > +int
> > +power_pstate_cpufreq_exit(unsigned int lcore_id)
> > +{
> > + struct pstate_power_info *pi;
> > +
>
> <snip>
>
> > + /* Set the governor back to the original */
> > + if (power_set_governor_original(pi) < 0) {
> > + RTE_LOG(ERR, POWER, "Cannot set the governor of %u back "
> > + "to the original\n", lcore_id);
> > + goto fail;
> > + }
> > +
> > + RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from "
> > + "'performance' mode and been set back to the "
> > + "original\n", lcore_id);
>
> Perhaps print out the original governor name? Also, i think it's better to
> use the performance string define, rather than having it as part of
> formatted message.
original governor name is alread print out by power_set_governor_original
> > + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE);
> > +
> > + return 0;
> > +
> > +fail:
> > + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN);
> > +
> > + return -1;
> > +}
> > +
> > +
> > +uint32_t
> > +power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs, uint32_t num)
> > +{
> > + struct pstate_power_info *pi;
> > +
> > + if (lcore_id >= RTE_MAX_LCORE) {
> > + RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
> > + return -1;
> > + }
> > +
> > + pi = &lcore_power_info[lcore_id];
> > + if (num < pi->nb_freqs) {
> > + RTE_LOG(ERR, POWER, "Buffer size is not enough\n");
> > + return 0;
> > + }
> > + rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t));
>
> Why rte_memcpy?
the table can be large
> > +
> > + return pi->nb_freqs;
> > +}
> > +
> > +uint32_t
> > +power_pstate_cpufreq_get_freq(unsigned int lcore_id)
> > +{
> > + if (lcore_id >= RTE_MAX_LCORE) {
> > + RTE_LOG(ERR, POWER, "Invalid lcore ID\n");
> > + return RTE_POWER_INVALID_FREQ_INDEX;
> > + }
> > +
> > + return lcore_power_info[lcore_id].curr_idx;
> > +}
> > +
>
> <snip>
>
> > + caps->turbo = !!(pi->turbo_available);
> > +
> > + return 0;
> > +}
> > diff --git a/lib/librte_power/power_pstate_cpufreq.h b/lib/librte_power/power_pstate_cpufreq.h
> > new file mode 100644
> > index 0000000..0fc917a
> > --- /dev/null
> > +++ b/lib/librte_power/power_pstate_cpufreq.h
> > @@ -0,0 +1,218 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2010-2018 Intel Corporation
>
> I believe it should be just 2018.
>
> > + */
> > +
> > +#ifndef _POWER_PSTATE_CPUFREQ_H
> > +#define _POWER_PSTATE_CPUFREQ_H
> > +
> > +/**
> > + * @file
> > + * RTE Power Management via Intel Pstate driver
> > + */
> > +
> > +#include <rte_common.h>
> > +#include <rte_byteorder.h>
> > +#include <rte_log.h>
> > +#include <rte_string_fns.h>
> > +#include "rte_power.h"
> > +
> > +#ifdef __cplusplus
> > +extern "C" {
> > +#endif
>
> I don't think this is necessary. These extern declarations are only for
> public API headers, so that C++ compilers could parse them. Since this is
> not a public header, there's no need for extern C declaration.
just keep as same as other headers
>
> > +
> > +/**
> > + * Initialize power management for a specific lcore. It will check and set the
> > + * governor to performance for the lcore, get the available frequencies, and
> > + * prepare to set new lcore frequency.
> > + *
> > + * @param lcore_id
> > + * lcore id.
> > + *
> > + * @return
> > + * - 0 on success.
> > + * - Negative on error.
> > + */
>
> <snip>
>
> > + *
> > + * @return
> > + * The number of available frequencies.
> > + */
> > +uint32_t power_pstate_cpufreq_freqs(unsigned int lcore_id, uint32_t *freqs,
> > + uint32_t num);
> > +
> > +/**
> > + * Return the current index of available frequencies of a specific lcore. It
> > + * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error.
>
> This isn't a public API, so it's nitpicking, but any notes about return
> value should be in the @return section.
agree
>
> > + * It should be protected outside of this function for threadsafe.
> > + *
> > + * @param lcore_id
> > + * lcore id.
> > + *
> > + * @return
> > + * The current index of available frequencies.
> > + */
> > +uint32_t power_pstate_cpufreq_get_freq(unsigned int lcore_id);
> > +
> > +/**
>
> <snip>
>
> > #include "rte_power.h"
> > #include "power_acpi_cpufreq.h"
> > #include "power_kvm_vm.h"
> > +#include "power_pstate_cpufreq.h"
> > #include "power_common.h"
> > enum power_management_env global_default_env = PM_ENV_NOT_SET;
> > @@ -29,6 +30,8 @@ rte_power_get_capabilities_t rte_power_get_capabilities;
> > int
> > rte_power_set_env(enum power_management_env env)
> > {
> > +
> > +
>
> This is probably unintended whitespace change.
>
agree
> > if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) {
> > return 0;
> > }
> > @@ -56,6 +59,19 @@ rte_power_set_env(enum power_management_env env)
> > rte_power_freq_enable_turbo = power_kvm_vm_enable_turbo;
> > rte_power_freq_disable_turbo = power_kvm_vm_disable_turbo;
> > rte_power_get_capabilities = power_kvm_vm_get_capabilities;
> > + } else if (env == PM_ENV_PSTATE_CPUFREQ) {
> > + rte_power_freqs = power_pstate_cpufreq_freqs;
> > + rte_power_get_freq = power_pstate_cpufreq_get_freq;
>
> <snip>
>
> > + if (global_default_env == PM_ENV_KVM_VM)
> > return power_kvm_vm_init(lcore_id);
> > - }
> > +
> > + if (global_default_env == PM_ENV_PSTATE_CPUFREQ)
> > + return power_pstate_cpufreq_init(lcore_id);
> > +
>
> switch?
should be fine for only 3 enum type
>
> > /* Auto detect Environment */
> > RTE_LOG(INFO, POWER, "Attempting to initialise ACPI cpufreq power "
> > "management...\n");
> > @@ -99,13 +118,23 @@ rte_power_init(unsigned int lcore_id)
> > goto out;
> > }
> > - RTE_LOG(INFO, POWER, "Attempting to initialise VM power management...\n");
> > + RTE_LOG(INFO, POWER, "Attempting to initialise PSTAT power "
> > + "management...\n");
> > + ret = power_pstate_cpufreq_init(lcore_id);
> > + if (ret == 0) {
> > + rte_power_set_env(PM_ENV_PSTATE_CPUFREQ);
> > + goto out;
> > + }
> > +
> > + RTE_LOG(INFO, POWER, "Attempting to initialise VM power "
> > + "management...\n");
> > ret = power_kvm_vm_init(lcore_id);
> > if (ret == 0) {
> > rte_power_set_env(PM_ENV_KVM_VM);
> > goto out;
> > }
> > - RTE_LOG(ERR, POWER, "Unable to set Power Management Environment for lcore "
> > + RTE_LOG(ERR, POWER, "Unable to set Power Management "
> > + "Environment for lcore "
> > "%u\n", lcore_id);
>
> Perhaps this could be moved to a separate function, so that the code could
> look like:
>
> switch (global_default_env) {
> case PM_ENV_ACPI:
> return acpi_init();
> case PM_ENV_KVM:
> return kvm_init();
> case PM_ENV_PSTATE:
> return pstate_init();
> default:
> if (autodetect() < 0)
> RTE_LOG("error");
> return 0;
> }
>
> Or something like that.
I can move to switch
>
> > out:
> > return ret;
> > @@ -118,6 +147,8 @@ rte_power_exit(unsigned int lcore_id)
> > return power_acpi_cpufreq_exit(lcore_id);
> > if (global_default_env == PM_ENV_KVM_VM)
> > return power_kvm_vm_exit(lcore_id);
> > + if (global_default_env == PM_ENV_PSTATE_CPUFREQ)
> > + return power_pstate_cpufreq_exit(lcore_id);
>
> switch?
>
> > RTE_LOG(ERR, POWER, "Environment has not been set, unable to exit "
> > "gracefully\n");
> > diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h
> > index d70bc0b..c5e8f6b 100644
> > --- a/lib/librte_power/rte_power.h
> > +++ b/lib/librte_power/rte_power.h
> > @@ -20,7 +20,8 @@ extern "C" {
> > #endif
> > /* Power Management Environment State */
> > -enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM};
> > +enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM,
> > + PM_ENV_PSTATE_CPUFREQ};
>
> I don't like this approach. While it can be argued that application needs to
> be explicitly aware of whether it's using native (ACPI or pstate) vs. KVM
> power management, there's no real reason for an application to differentiate
> between ACPI and pstate modes.
>
> Changing this would of course require a deprecation notice, so for now, can
> we hide all of this behind ACPI mode, and auto-detect whether we want ACPI
> or pstate mode? IMO it would be better for the user application to use a
> somewhat misnamed ACPI option for both ACPI and pstate modes, than to
> needlessly care about whether one or the other is in use.
>
> What do you think?
acpi has significant difference with hwp. sysfs/xxxx/setspeed sysfs/xxxx/scaling_driver is different
I would like to make application aware the driver type.
>
> > /**
> > * Set the default power management implementation. If this is not called prior
> >
>
> --
> Thanks,
> Anatoly
More information about the dev
mailing list