[dpdk-dev] [PATCH v5 5/7] power: support callbacks for multiple Rx queues

Burakov, Anatoly anatoly.burakov at intel.com
Mon Jul 5 12:24:02 CEST 2021


On 01-Jul-21 10:01 AM, David Hunt wrote:
> 
> On 30/6/2021 10:52 AM, David Hunt wrote:
>> Hi Anatoly,
>>
>> On 29/6/2021 4:48 PM, Anatoly Burakov wrote:
>>> Currently, there is a hard limitation on the PMD power management
>>> support that only allows it to support a single queue per lcore. This is
>>> not ideal as most DPDK use cases will poll multiple queues per core.
>>>
>>> The PMD power management mechanism relies on ethdev Rx callbacks, so it
>>> is very difficult to implement such support because callbacks are
>>> effectively stateless and have no visibility into what the other ethdev
>>> devices are doing. This places limitations on what we can do within the
>>> framework of Rx callbacks, but the basics of this implementation are as
>>> follows:
>>>
>>> - Replace per-queue structures with per-lcore ones, so that any device
>>>    polled from the same lcore can share data
>>> - Any queue that is going to be polled from a specific lcore has to be
>>>    added to the list of queues to poll, so that the callback is aware of
>>>    other queues being polled by the same lcore
>>> - Both the empty poll counter and the actual power saving mechanism is
>>>    shared between all queues polled on a particular lcore, and is only
>>>    activated when all queues in the list were polled and were determined
>>>    to have no traffic.
>>> - The limitation on UMWAIT-based polling is not removed because UMWAIT
>>>    is incapable of monitoring more than one address.
>>>
>>> Also, while we're at it, update and improve the docs.
>>>
>>> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
>>> ---
>>>
>>> Notes:
>>>      v5:
>>>      - Remove the "power save queue" API and replace it with 
>>> mechanism suggested by
>>>        Konstantin
>>>           v3:
>>>      - Move the list of supported NICs to NIC feature table
>>>           v2:
>>>      - Use a TAILQ for queues instead of a static array
>>>      - Address feedback from Konstantin
>>>      - Add additional checks for stopped queues
>>>
>>>   doc/guides/nics/features.rst           |  10 +
>>>   doc/guides/prog_guide/power_man.rst    |  65 ++--
>>>   doc/guides/rel_notes/release_21_08.rst |   3 +
>>>   lib/power/rte_power_pmd_mgmt.c         | 431 ++++++++++++++++++-------
>>>   4 files changed, 373 insertions(+), 136 deletions(-)
>>>
>>
>> --snip--
>>
>>>   int
>>>   rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t 
>>> port_id,
>>>           uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
>>>   {
>>> -    struct pmd_queue_cfg *queue_cfg;
>>> +    const union queue qdata = {.portid = port_id, .qid = queue_id};
>>> +    struct pmd_core_cfg *lcore_cfg;
>>> +    struct queue_list_entry *queue_cfg;
>>>       struct rte_eth_dev_info info;
>>>       rte_rx_callback_fn clb;
>>>       int ret;
>>> @@ -202,9 +401,19 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int 
>>> lcore_id, uint16_t port_id,
>>>           goto end;
>>>       }
>>>   -    queue_cfg = &port_cfg[port_id][queue_id];
>>> +    lcore_cfg = &lcore_cfgs[lcore_id];
>>>   -    if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) {
>>> +    /* check if other queues are stopped as well */
>>> +    ret = cfg_queues_stopped(lcore_cfg);
>>> +    if (ret != 1) {
>>> +        /* error means invalid queue, 0 means queue wasn't stopped */
>>> +        ret = ret < 0 ? -EINVAL : -EBUSY;
>>> +        goto end;
>>> +    }
>>> +
>>> +    /* if callback was already enabled, check current callback type */
>>> +    if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
>>> +            lcore_cfg->cb_mode != mode) {
>>>           ret = -EINVAL;
>>>           goto end;
>>>       }
>>> @@ -214,53 +423,20 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned 
>>> int lcore_id, uint16_t port_id,
>>>         switch (mode) {
>>>       case RTE_POWER_MGMT_TYPE_MONITOR:
>>> -    {
>>> -        struct rte_power_monitor_cond dummy;
>>> -
>>> -        /* check if rte_power_monitor is supported */
>>> -        if (!global_data.intrinsics_support.power_monitor) {
>>> -            RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not 
>>> supported\n");
>>> -            ret = -ENOTSUP;
>>> +        /* check if we can add a new queue */
>>> +        ret = check_monitor(lcore_cfg, &qdata);
>>> +        if (ret < 0)
>>>               goto end;
>>> -        }
>>>   -        /* check if the device supports the necessary PMD API */
>>> -        if (rte_eth_get_monitor_addr(port_id, queue_id,
>>> -                &dummy) == -ENOTSUP) {
>>> -            RTE_LOG(DEBUG, POWER, "The device does not support 
>>> rte_eth_get_monitor_addr\n");
>>> -            ret = -ENOTSUP;
>>> -            goto end;
>>> -        }
>>>           clb = clb_umwait;
>>>           break;
>>> -    }
>>>       case RTE_POWER_MGMT_TYPE_SCALE:
>>> -    {
>>> -        enum power_management_env env;
>>> -        /* only PSTATE and ACPI modes are supported */
>>> -        if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
>>> -                !rte_power_check_env_supported(
>>> -                    PM_ENV_PSTATE_CPUFREQ)) {
>>> -            RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are 
>>> supported\n");
>>> -            ret = -ENOTSUP;
>>> +        /* check if we can add a new queue */
>>> +        ret = check_scale(lcore_id);
>>> +        if (ret < 0)
>>>               goto end;
>>> -        }
>>> -        /* ensure we could initialize the power library */
>>> -        if (rte_power_init(lcore_id)) {
>>> -            ret = -EINVAL;
>>> -            goto end;
>>> -        }
>>> -        /* ensure we initialized the correct env */
>>> -        env = rte_power_get_env();
>>> -        if (env != PM_ENV_ACPI_CPUFREQ &&
>>> -                env != PM_ENV_PSTATE_CPUFREQ) {
>>> -            RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes 
>>> were initialized\n");
>>> -            ret = -ENOTSUP;
>>> -            goto end;
>>> -        }
>>>           clb = clb_scale_freq;
>>>           break;
>>> -    }
>>>       case RTE_POWER_MGMT_TYPE_PAUSE:
>>>           /* figure out various time-to-tsc conversions */
>>>           if (global_data.tsc_per_us == 0)
>>> @@ -273,13 +449,23 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned 
>>> int lcore_id, uint16_t port_id,
>>>           ret = -EINVAL;
>>>           goto end;
>>>       }
>>> +    /* add this queue to the list */
>>> +    ret = queue_list_add(lcore_cfg, &qdata);
>>> +    if (ret < 0) {
>>> +        RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
>>> +                strerror(-ret));
>>> +        goto end;
>>> +    }
>>> +    /* new queue is always added last */
>>> +    queue_cfg = TAILQ_LAST(&lcore_cfgs->head, queue_list_head);
>>
>>
>> Need to ensure that queue_cfg gets set here, otherwise we'll get a 
>> segfault below.
>>
> 
> Or, looking at this again, shouldn't "lcore_cfgs" be "lcore_cfg"?

Good catch, will fix!

> 
> 
>>
>>
>>>         /* initialize data before enabling the callback */
>>> -    queue_cfg->empty_poll_stats = 0;
>>> -    queue_cfg->cb_mode = mode;
>>> -    queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
>>> -    queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
>>> -            clb, NULL);
>>> +    if (lcore_cfg->n_queues == 1) {
>>> +        lcore_cfg->cb_mode = mode;
>>> +        lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
>>> +    }
>>> +    queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
>>> +            clb, queue_cfg);
>> --snip--
>>


-- 
Thanks,
Anatoly


More information about the dev mailing list