[dpdk-dev] [PATCH v5 5/7] power: support callbacks for multiple Rx queues

David Hunt david.hunt at intel.com
Thu Jul 1 11:01:17 CEST 2021


On 30/6/2021 10:52 AM, David Hunt wrote:
> Hi Anatoly,
>
> On 29/6/2021 4:48 PM, Anatoly Burakov wrote:
>> Currently, there is a hard limitation on the PMD power management
>> support that only allows it to support a single queue per lcore. This is
>> not ideal as most DPDK use cases will poll multiple queues per core.
>>
>> The PMD power management mechanism relies on ethdev Rx callbacks, so it
>> is very difficult to implement such support because callbacks are
>> effectively stateless and have no visibility into what the other ethdev
>> devices are doing. This places limitations on what we can do within the
>> framework of Rx callbacks, but the basics of this implementation are as
>> follows:
>>
>> - Replace per-queue structures with per-lcore ones, so that any device
>>    polled from the same lcore can share data
>> - Any queue that is going to be polled from a specific lcore has to be
>>    added to the list of queues to poll, so that the callback is aware of
>>    other queues being polled by the same lcore
>> - Both the empty poll counter and the actual power saving mechanism is
>>    shared between all queues polled on a particular lcore, and is only
>>    activated when all queues in the list were polled and were determined
>>    to have no traffic.
>> - The limitation on UMWAIT-based polling is not removed because UMWAIT
>>    is incapable of monitoring more than one address.
>>
>> Also, while we're at it, update and improve the docs.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
>> ---
>>
>> Notes:
>>      v5:
>>      - Remove the "power save queue" API and replace it with 
>> mechanism suggested by
>>        Konstantin
>>           v3:
>>      - Move the list of supported NICs to NIC feature table
>>           v2:
>>      - Use a TAILQ for queues instead of a static array
>>      - Address feedback from Konstantin
>>      - Add additional checks for stopped queues
>>
>>   doc/guides/nics/features.rst           |  10 +
>>   doc/guides/prog_guide/power_man.rst    |  65 ++--
>>   doc/guides/rel_notes/release_21_08.rst |   3 +
>>   lib/power/rte_power_pmd_mgmt.c         | 431 ++++++++++++++++++-------
>>   4 files changed, 373 insertions(+), 136 deletions(-)
>>
>
> --snip--
>
>>   int
>>   rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t 
>> port_id,
>>           uint16_t queue_id, enum rte_power_pmd_mgmt_type mode)
>>   {
>> -    struct pmd_queue_cfg *queue_cfg;
>> +    const union queue qdata = {.portid = port_id, .qid = queue_id};
>> +    struct pmd_core_cfg *lcore_cfg;
>> +    struct queue_list_entry *queue_cfg;
>>       struct rte_eth_dev_info info;
>>       rte_rx_callback_fn clb;
>>       int ret;
>> @@ -202,9 +401,19 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int 
>> lcore_id, uint16_t port_id,
>>           goto end;
>>       }
>>   -    queue_cfg = &port_cfg[port_id][queue_id];
>> +    lcore_cfg = &lcore_cfgs[lcore_id];
>>   -    if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) {
>> +    /* check if other queues are stopped as well */
>> +    ret = cfg_queues_stopped(lcore_cfg);
>> +    if (ret != 1) {
>> +        /* error means invalid queue, 0 means queue wasn't stopped */
>> +        ret = ret < 0 ? -EINVAL : -EBUSY;
>> +        goto end;
>> +    }
>> +
>> +    /* if callback was already enabled, check current callback type */
>> +    if (lcore_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED &&
>> +            lcore_cfg->cb_mode != mode) {
>>           ret = -EINVAL;
>>           goto end;
>>       }
>> @@ -214,53 +423,20 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned 
>> int lcore_id, uint16_t port_id,
>>         switch (mode) {
>>       case RTE_POWER_MGMT_TYPE_MONITOR:
>> -    {
>> -        struct rte_power_monitor_cond dummy;
>> -
>> -        /* check if rte_power_monitor is supported */
>> -        if (!global_data.intrinsics_support.power_monitor) {
>> -            RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not 
>> supported\n");
>> -            ret = -ENOTSUP;
>> +        /* check if we can add a new queue */
>> +        ret = check_monitor(lcore_cfg, &qdata);
>> +        if (ret < 0)
>>               goto end;
>> -        }
>>   -        /* check if the device supports the necessary PMD API */
>> -        if (rte_eth_get_monitor_addr(port_id, queue_id,
>> -                &dummy) == -ENOTSUP) {
>> -            RTE_LOG(DEBUG, POWER, "The device does not support 
>> rte_eth_get_monitor_addr\n");
>> -            ret = -ENOTSUP;
>> -            goto end;
>> -        }
>>           clb = clb_umwait;
>>           break;
>> -    }
>>       case RTE_POWER_MGMT_TYPE_SCALE:
>> -    {
>> -        enum power_management_env env;
>> -        /* only PSTATE and ACPI modes are supported */
>> -        if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) &&
>> -                !rte_power_check_env_supported(
>> -                    PM_ENV_PSTATE_CPUFREQ)) {
>> -            RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are 
>> supported\n");
>> -            ret = -ENOTSUP;
>> +        /* check if we can add a new queue */
>> +        ret = check_scale(lcore_id);
>> +        if (ret < 0)
>>               goto end;
>> -        }
>> -        /* ensure we could initialize the power library */
>> -        if (rte_power_init(lcore_id)) {
>> -            ret = -EINVAL;
>> -            goto end;
>> -        }
>> -        /* ensure we initialized the correct env */
>> -        env = rte_power_get_env();
>> -        if (env != PM_ENV_ACPI_CPUFREQ &&
>> -                env != PM_ENV_PSTATE_CPUFREQ) {
>> -            RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes 
>> were initialized\n");
>> -            ret = -ENOTSUP;
>> -            goto end;
>> -        }
>>           clb = clb_scale_freq;
>>           break;
>> -    }
>>       case RTE_POWER_MGMT_TYPE_PAUSE:
>>           /* figure out various time-to-tsc conversions */
>>           if (global_data.tsc_per_us == 0)
>> @@ -273,13 +449,23 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned 
>> int lcore_id, uint16_t port_id,
>>           ret = -EINVAL;
>>           goto end;
>>       }
>> +    /* add this queue to the list */
>> +    ret = queue_list_add(lcore_cfg, &qdata);
>> +    if (ret < 0) {
>> +        RTE_LOG(DEBUG, POWER, "Failed to add queue to list: %s\n",
>> +                strerror(-ret));
>> +        goto end;
>> +    }
>> +    /* new queue is always added last */
>> +    queue_cfg = TAILQ_LAST(&lcore_cfgs->head, queue_list_head);
>
>
> Need to ensure that queue_cfg gets set here, otherwise we'll get a 
> segfault below.
>

Or, looking at this again, shouldn't "lcore_cfgs" be "lcore_cfg"?


>
>
>>         /* initialize data before enabling the callback */
>> -    queue_cfg->empty_poll_stats = 0;
>> -    queue_cfg->cb_mode = mode;
>> -    queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
>> -    queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
>> -            clb, NULL);
>> +    if (lcore_cfg->n_queues == 1) {
>> +        lcore_cfg->cb_mode = mode;
>> +        lcore_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
>> +    }
>> +    queue_cfg->cb = rte_eth_add_rx_callback(port_id, queue_id,
>> +            clb, queue_cfg);
> --snip--
>


More information about the dev mailing list