[dpdk-dev] [PATCH v1 4/7] power: remove thread safety from PMD power API's
Ananyev, Konstantin
konstantin.ananyev at intel.com
Wed Jun 23 11:52:28 CEST 2021
>
> On 22-Jun-21 10:13 AM, Ananyev, Konstantin wrote:
> >
> >> Currently, we expect that only one callback can be active at any given
> >> moment, for a particular queue configuration, which is relatively easy
> >> to implement in a thread-safe way. However, we're about to add support
> >> for multiple queues per lcore, which will greatly increase the
> >> possibility of various race conditions.
> >>
> >> We could have used something like an RCU for this use case, but absent
> >> of a pressing need for thread safety we'll go the easy way and just
> >> mandate that the API's are to be called when all affected ports are
> >> stopped, and document this limitation. This greatly simplifies the
> >> `rte_power_monitor`-related code.
> >
> > I think you need to update RN too with that.
>
> Yep, will fix.
>
> > Another thing - do you really need the whole port stopped?
> > From what I understand - you work on queues, so it is enough for you
> > that related RX queue is stopped.
> > So, to make things a bit more robust, in pmgmt_queue_enable/disable
> > you can call rte_eth_rx_queue_info_get() and check queue state.
>
> We work on queues, but the data is per-lcore not per-queue, and it is
> potentially used by multiple queues, so checking one specific queue is
> not going to be enough. We could check all queues that were registered
> so far with the power library, maybe that'll work better?
Yep, that's what I mean: on queue_enable() check is that queue stopped or not.
If not, return -EBUSY/EAGAIN or so/
Sorry if I wasn't clear at first time.
>
> >
> >> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
> >> ---
> >> lib/power/meson.build | 3 +
> >> lib/power/rte_power_pmd_mgmt.c | 106 ++++++++-------------------------
> >> lib/power/rte_power_pmd_mgmt.h | 6 ++
> >> 3 files changed, 35 insertions(+), 80 deletions(-)
> >>
> >> diff --git a/lib/power/meson.build b/lib/power/meson.build
> >> index c1097d32f1..4f6a242364 100644
> >> --- a/lib/power/meson.build
> >> +++ b/lib/power/meson.build
> >> @@ -21,4 +21,7 @@ headers = files(
> >> 'rte_power_pmd_mgmt.h',
> >> 'rte_power_guest_channel.h',
> >> )
> >> +if cc.has_argument('-Wno-cast-qual')
> >> + cflags += '-Wno-cast-qual'
> >> +endif
> >> deps += ['timer', 'ethdev']
> >> diff --git a/lib/power/rte_power_pmd_mgmt.c b/lib/power/rte_power_pmd_mgmt.c
> >> index db03cbf420..0707c60a4f 100644
> >> --- a/lib/power/rte_power_pmd_mgmt.c
> >> +++ b/lib/power/rte_power_pmd_mgmt.c
> >> @@ -40,8 +40,6 @@ struct pmd_queue_cfg {
> >> /**< Callback mode for this queue */
> >> const struct rte_eth_rxtx_callback *cur_cb;
> >> /**< Callback instance */
> >> - volatile bool umwait_in_progress;
> >> - /**< are we currently sleeping? */
> >> uint64_t empty_poll_stats;
> >> /**< Number of empty polls */
> >> } __rte_cache_aligned;
> >> @@ -92,30 +90,11 @@ clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused,
> >> struct rte_power_monitor_cond pmc;
> >> uint16_t ret;
> >>
> >> - /*
> >> - * we might get a cancellation request while being
> >> - * inside the callback, in which case the wakeup
> >> - * wouldn't work because it would've arrived too early.
> >> - *
> >> - * to get around this, we notify the other thread that
> >> - * we're sleeping, so that it can spin until we're done.
> >> - * unsolicited wakeups are perfectly safe.
> >> - */
> >> - q_conf->umwait_in_progress = true;
> >> -
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> -
> >> - /* check if we need to cancel sleep */
> >> - if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) {
> >> - /* use monitoring condition to sleep */
> >> - ret = rte_eth_get_monitor_addr(port_id, qidx,
> >> - &pmc);
> >> - if (ret == 0)
> >> - rte_power_monitor(&pmc, UINT64_MAX);
> >> - }
> >> - q_conf->umwait_in_progress = false;
> >> -
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> + /* use monitoring condition to sleep */
> >> + ret = rte_eth_get_monitor_addr(port_id, qidx,
> >> + &pmc);
> >> + if (ret == 0)
> >> + rte_power_monitor(&pmc, UINT64_MAX);
> >> }
> >> } else
> >> q_conf->empty_poll_stats = 0;
> >> @@ -183,6 +162,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
> >> {
> >> struct pmd_queue_cfg *queue_cfg;
> >> struct rte_eth_dev_info info;
> >> + rte_rx_callback_fn clb;
> >> int ret;
> >>
> >> RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> >> @@ -232,17 +212,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
> >> ret = -ENOTSUP;
> >> goto end;
> >> }
> >> - /* initialize data before enabling the callback */
> >> - queue_cfg->empty_poll_stats = 0;
> >> - queue_cfg->cb_mode = mode;
> >> - queue_cfg->umwait_in_progress = false;
> >> - queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
> >> -
> >> - /* ensure we update our state before callback starts */
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> -
> >> - queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
> >> - clb_umwait, NULL);
> >> + clb = clb_umwait;
> >> break;
> >> }
> >> case RTE_POWER_MGMT_TYPE_SCALE:
> >> @@ -269,16 +239,7 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
> >> ret = -ENOTSUP;
> >> goto end;
> >> }
> >> - /* initialize data before enabling the callback */
> >> - queue_cfg->empty_poll_stats = 0;
> >> - queue_cfg->cb_mode = mode;
> >> - queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
> >> -
> >> - /* this is not necessary here, but do it anyway */
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> -
> >> - queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id,
> >> - queue_id, clb_scale_freq, NULL);
> >> + clb = clb_scale_freq;
> >> break;
> >> }
> >> case RTE_POWER_MGMT_TYPE_PAUSE:
> >> @@ -286,18 +247,21 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id,
> >> if (global_data.tsc_per_us == 0)
> >> calc_tsc();
> >>
> >> - /* initialize data before enabling the callback */
> >> - queue_cfg->empty_poll_stats = 0;
> >> - queue_cfg->cb_mode = mode;
> >> - queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
> >> -
> >> - /* this is not necessary here, but do it anyway */
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> -
> >> - queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
> >> - clb_pause, NULL);
> >> + clb = clb_pause;
> >> break;
> >> + default:
> >> + RTE_LOG(DEBUG, POWER, "Invalid power management type\n");
> >> + ret = -EINVAL;
> >> + goto end;
> >> }
> >> +
> >> + /* initialize data before enabling the callback */
> >> + queue_cfg->empty_poll_stats = 0;
> >> + queue_cfg->cb_mode = mode;
> >> + queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED;
> >> + queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id,
> >> + clb, NULL);
> >> +
> >> ret = 0;
> >> end:
> >> return ret;
> >> @@ -323,27 +287,8 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
> >> /* stop any callbacks from progressing */
> >> queue_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED;
> >>
> >> - /* ensure we update our state before continuing */
> >> - rte_atomic_thread_fence(__ATOMIC_SEQ_CST);
> >> -
> >> switch (queue_cfg->cb_mode) {
> >> - case RTE_POWER_MGMT_TYPE_MONITOR:
> >> - {
> >> - bool exit = false;
> >> - do {
> >> - /*
> >> - * we may request cancellation while the other thread
> >> - * has just entered the callback but hasn't started
> >> - * sleeping yet, so keep waking it up until we know it's
> >> - * done sleeping.
> >> - */
> >> - if (queue_cfg->umwait_in_progress)
> >> - rte_power_monitor_wakeup(lcore_id);
> >> - else
> >> - exit = true;
> >> - } while (!exit);
> >> - }
> >> - /* fall-through */
> >> + case RTE_POWER_MGMT_TYPE_MONITOR: /* fall-through */
> >> case RTE_POWER_MGMT_TYPE_PAUSE:
> >> rte_eth_remove_rx_callback(port_id, queue_id,
> >> queue_cfg->cur_cb);
> >> @@ -356,10 +301,11 @@ rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id,
> >> break;
> >> }
> >> /*
> >> - * we don't free the RX callback here because it is unsafe to do so
> >> - * unless we know for a fact that all data plane threads have stopped.
> >> + * the API doc mandates that the user stops all processing on affected
> >> + * ports before calling any of these API's, so we can assume that the
> >> + * callbacks can be freed. we're intentionally casting away const-ness.
> >> */
> >> - queue_cfg->cur_cb = NULL;
> >> + rte_free((void *)queue_cfg->cur_cb);
> >>
> >> return 0;
> >> }
> >> diff --git a/lib/power/rte_power_pmd_mgmt.h b/lib/power/rte_power_pmd_mgmt.h
> >> index 7a0ac24625..7557f5d7e1 100644
> >> --- a/lib/power/rte_power_pmd_mgmt.h
> >> +++ b/lib/power/rte_power_pmd_mgmt.h
> >> @@ -43,6 +43,9 @@ enum rte_power_pmd_mgmt_type {
> >> *
> >> * @note This function is not thread-safe.
> >> *
> >> + * @warning This function must be called when all affected Ethernet ports are
> >> + * stopped and no Rx/Tx is in progress!
> >> + *
> >> * @param lcore_id
> >> * The lcore the Rx queue will be polled from.
> >> * @param port_id
> >> @@ -69,6 +72,9 @@ rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id,
> >> *
> >> * @note This function is not thread-safe.
> >> *
> >> + * @warning This function must be called when all affected Ethernet ports are
> >> + * stopped and no Rx/Tx is in progress!
> >> + *
> >> * @param lcore_id
> >> * The lcore the Rx queue is polled from.
> >> * @param port_id
> >> --
> >> 2.25.1
> >
>
>
> --
> Thanks,
> Anatoly
More information about the dev
mailing list