[PATCH v6 1/4] eal: add lcore poll busyness telemetry
    Morten Brørup 
    mb at smartsharesystems.com
       
    Tue Sep 13 15:48:39 CEST 2022
    
    
  
> From: Kevin Laatz [mailto:kevin.laatz at intel.com]
> Sent: Tuesday, 13 September 2022 15.20
> 
> From: Anatoly Burakov <anatoly.burakov at intel.com>
> 
[...]
Still a few missing renames...
> diff --git a/lib/eal/common/eal_common_lcore_telemetry.c
> b/lib/eal/common/eal_common_lcore_telemetry.c
> new file mode 100644
> index 0000000000..abef1ff86d
> --- /dev/null
> +++ b/lib/eal/common/eal_common_lcore_telemetry.c
eal_common_lcore_poll_telemetry.c
> @@ -0,0 +1,303 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2022 Intel Corporation
> + */
> +
> +#include <unistd.h>
> +#include <limits.h>
> +#include <string.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_errno.h>
> +#include <rte_lcore.h>
> +
> +#ifdef RTE_LCORE_POLL_BUSYNESS
> +#include <rte_telemetry.h>
> +#endif
> +
> +rte_atomic32_t __rte_lcore_telemetry_enabled;
__rte_lcore_poll_telemetry_enabled
> +
> +#ifdef RTE_LCORE_POLL_BUSYNESS
> +
> +struct lcore_telemetry {
This one is private, so suggestion only:
struct lcore_poll_telemetry {
> +	int poll_busyness;
> +	/**< Calculated poll busyness (gets set/returned by the API) */
> +	int raw_poll_busyness;
> +	/**< Calculated poll busyness times 100. */
> +	uint64_t interval_ts;
> +	/**< when previous telemetry interval started */
> +	uint64_t empty_cycles;
> +	/**< empty cycle count since last interval */
> +	uint64_t last_poll_ts;
> +	/**< last poll timestamp */
> +	bool last_empty;
> +	/**< if last poll was empty */
> +	unsigned int contig_poll_cnt;
> +	/**< contiguous (always empty/non empty) poll counter */
> +} __rte_cache_aligned;
> +
> +static struct lcore_telemetry *telemetry_data;
> +
> +#define LCORE_POLL_BUSYNESS_MAX 100
> +#define LCORE_POLL_BUSYNESS_NOT_SET -1
> +#define LCORE_POLL_BUSYNESS_MIN 0
> +
> +#define SMOOTH_COEFF 5
> +#define STATE_CHANGE_OPT 32
> +
> +static void lcore_config_init(void)
> +{
> +	int lcore_id;
> +
> +	RTE_LCORE_FOREACH(lcore_id) {
> +		struct lcore_telemetry *td = &telemetry_data[lcore_id];
> +
> +		td->interval_ts = 0;
> +		td->last_poll_ts = 0;
> +		td->empty_cycles = 0;
> +		td->last_empty = true;
> +		td->contig_poll_cnt = 0;
> +		td->poll_busyness = LCORE_POLL_BUSYNESS_NOT_SET;
> +		td->raw_poll_busyness = 0;
> +	}
> +}
> +
> +int rte_lcore_poll_busyness(unsigned int lcore_id)
> +{
> +	const uint64_t tsc_ms = rte_get_timer_hz() / MS_PER_S;
> +	/* if more than 1000 busyness periods have passed, this core is
> considered inactive */
> +	const uint64_t active_thresh = RTE_LCORE_POLL_BUSYNESS_PERIOD_MS
> * tsc_ms * 1000;
> +	struct lcore_telemetry *tdata;
> +
> +	if (lcore_id >= RTE_MAX_LCORE)
> +		return -EINVAL;
> +	tdata = &telemetry_data[lcore_id];
> +
> +	/* if the lcore is not active */
> +	if (tdata->interval_ts == 0)
> +		return LCORE_POLL_BUSYNESS_NOT_SET;
> +	/* if the core hasn't been active in a while */
> +	else if ((rte_rdtsc() - tdata->interval_ts) > active_thresh)
> +		return LCORE_POLL_BUSYNESS_NOT_SET;
> +
> +	/* this core is active, report its poll busyness */
> +	return telemetry_data[lcore_id].poll_busyness;
> +}
> +
> +int rte_lcore_poll_busyness_enabled(void)
> +{
> +	return rte_atomic32_read(&__rte_lcore_telemetry_enabled);
> +}
> +
> +void rte_lcore_poll_busyness_enabled_set(bool enable)
> +{
> +	int set = rte_atomic32_cmpset((volatile uint32_t
> *)&__rte_lcore_telemetry_enabled,
> +			(int)!enable, (int)enable);
> +
> +	/* Reset counters on successful disable */
> +	if (set && !enable)
> +		lcore_config_init();
> +}
> +
> +static inline int calc_raw_poll_busyness(const struct lcore_telemetry
> *tdata,
> +				    const uint64_t empty, const uint64_t total)
> +{
> +	/*
> +	 * We don't want to use floating point math here, but we want for
> our poll
> +	 * busyness to react smoothly to sudden changes, while still
> keeping the
> +	 * accuracy and making sure that over time the average follows
> poll busyness
> +	 * as measured just-in-time. Therefore, we will calculate the
> average poll
> +	 * busyness using integer math, but shift the decimal point two
> places
> +	 * to the right, so that 100.0 becomes 10000. This allows us to
> report
> +	 * integer values (0..100) while still allowing ourselves to
> follow the
> +	 * just-in-time measurements when we calculate our averages.
> +	 */
> +	const int max_raw_idle = LCORE_POLL_BUSYNESS_MAX * 100;
> +
> +	const int prev_raw_idle = max_raw_idle - tdata-
> >raw_poll_busyness;
> +
> +	/* calculate rate of idle cycles, times 100 */
> +	const int cur_raw_idle = (int)((empty * max_raw_idle) / total);
> +
> +	/* smoothen the idleness */
> +	const int smoothened_idle =
> +			(cur_raw_idle + prev_raw_idle * (SMOOTH_COEFF - 1)) /
> SMOOTH_COEFF;
> +
> +	/* convert idleness to poll busyness */
> +	return max_raw_idle - smoothened_idle;
> +}
> +
> +void __rte_lcore_poll_busyness_timestamp(uint16_t nb_rx)
> +{
> +	const unsigned int lcore_id = rte_lcore_id();
> +	uint64_t interval_ts, empty_cycles, cur_tsc, last_poll_ts;
> +	struct lcore_telemetry *tdata;
> +	const bool empty = nb_rx == 0;
> +	uint64_t diff_int, diff_last;
> +	bool last_empty;
> +
> +	/* This telemetry is not supported for unregistered non-EAL
> threads */
> +	if (lcore_id >= RTE_MAX_LCORE) {
> +		RTE_LOG(DEBUG, EAL,
> +				"Lcore telemetry not supported on unregistered
> non-EAL thread %d",
> +				lcore_id);
> +		return;
> +	}
> +
> +	tdata = &telemetry_data[lcore_id];
> +	last_empty = tdata->last_empty;
> +
> +	/* optimization: don't do anything if status hasn't changed */
> +	if (last_empty == empty && tdata->contig_poll_cnt++ <
> STATE_CHANGE_OPT)
> +		return;
> +	/* status changed or we're waiting for too long, reset counter */
> +	tdata->contig_poll_cnt = 0;
> +
> +	cur_tsc = rte_rdtsc();
> +
> +	interval_ts = tdata->interval_ts;
> +	empty_cycles = tdata->empty_cycles;
> +	last_poll_ts = tdata->last_poll_ts;
> +
> +	diff_int = cur_tsc - interval_ts;
> +	diff_last = cur_tsc - last_poll_ts;
> +
> +	/* is this the first time we're here? */
> +	if (interval_ts == 0) {
> +		tdata->poll_busyness = LCORE_POLL_BUSYNESS_MIN;
> +		tdata->raw_poll_busyness = 0;
> +		tdata->interval_ts = cur_tsc;
> +		tdata->empty_cycles = 0;
> +		tdata->contig_poll_cnt = 0;
> +		goto end;
> +	}
> +
> +	/* update the empty counter if we got an empty poll earlier */
> +	if (last_empty)
> +		empty_cycles += diff_last;
> +
> +	/* have we passed the interval? */
> +	uint64_t interval = ((rte_get_tsc_hz() / MS_PER_S) *
> RTE_LCORE_POLL_BUSYNESS_PERIOD_MS);
> +	if (diff_int > interval) {
> +		int raw_poll_busyness;
> +
> +		/* get updated poll_busyness value */
> +		raw_poll_busyness = calc_raw_poll_busyness(tdata,
> empty_cycles, diff_int);
> +
> +		/* set a new interval, reset empty counter */
> +		tdata->interval_ts = cur_tsc;
> +		tdata->empty_cycles = 0;
> +		tdata->raw_poll_busyness = raw_poll_busyness;
> +		/* bring poll busyness back to 0..100 range, biased to
> round up */
> +		tdata->poll_busyness = (raw_poll_busyness + 50) / 100;
> +	} else
> +		/* we may have updated empty counter */
> +		tdata->empty_cycles = empty_cycles;
> +
> +end:
> +	/* update status for next poll */
> +	tdata->last_poll_ts = cur_tsc;
> +	tdata->last_empty = empty;
> +}
> +
> +static int
> +lcore_poll_busyness_enable(const char *cmd __rte_unused,
> +		      const char *params __rte_unused,
> +		      struct rte_tel_data *d)
> +{
> +	rte_lcore_poll_busyness_enabled_set(true);
> +
> +	rte_tel_data_start_dict(d);
> +
> +	rte_tel_data_add_dict_int(d, "poll_busyness_enabled", 1);
> +
> +	return 0;
> +}
> +
> +static int
> +lcore_poll_busyness_disable(const char *cmd __rte_unused,
> +		       const char *params __rte_unused,
> +		       struct rte_tel_data *d)
> +{
> +	rte_lcore_poll_busyness_enabled_set(false);
> +
> +	rte_tel_data_start_dict(d);
> +
> +	rte_tel_data_add_dict_int(d, "poll_busyness_enabled", 0);
> +
> +	return 0;
> +}
> +
> +static int
> +lcore_handle_poll_busyness(const char *cmd __rte_unused,
> +		      const char *params __rte_unused, struct rte_tel_data
> *d)
> +{
> +	char corenum[64];
> +	int i;
> +
> +	rte_tel_data_start_dict(d);
> +
> +	RTE_LCORE_FOREACH(i) {
> +		if (!rte_lcore_is_enabled(i))
> +			continue;
> +		snprintf(corenum, sizeof(corenum), "%d", i);
> +		rte_tel_data_add_dict_int(d, corenum,
> rte_lcore_poll_busyness(i));
> +	}
> +
> +	return 0;
> +}
> +
> +void
> +lcore_telemetry_free(void)
Not sure, but either:
lcore_poll_telemetry_free or
rte_lcore_poll_telemetry_free
> +{
> +	if (telemetry_data != NULL) {
> +		free(telemetry_data);
> +		telemetry_data = NULL;
> +	}
> +}
> +
> +RTE_INIT(lcore_init_telemetry)
Not sure, but either:
RTE_INIT(lcore_poll_init_telemetry) or
RTE_INIT(rte_lcore_poll_init_telemetry)
> +{
> +	telemetry_data = calloc(RTE_MAX_LCORE,
> sizeof(telemetry_data[0]));
> +	if (telemetry_data == NULL)
> +		rte_panic("Could not init lcore telemetry data: Out of
> memory\n");
> +
> +	lcore_config_init();
> +
> +	rte_telemetry_register_cmd("/eal/lcore/poll_busyness",
> lcore_handle_poll_busyness,
> +				   "return percentage poll busyness of cores");
> +
> +	rte_telemetry_register_cmd("/eal/lcore/poll_busyness_enable",
> lcore_poll_busyness_enable,
> +				   "enable lcore poll busyness measurement");
> +
> +	rte_telemetry_register_cmd("/eal/lcore/poll_busyness_disable",
> lcore_poll_busyness_disable,
> +				   "disable lcore poll busyness measurement");
> +
> +	rte_atomic32_set(&__rte_lcore_telemetry_enabled, true);
> +}
> +
> +#else
> +
> +int rte_lcore_poll_busyness(unsigned int lcore_id __rte_unused)
> +{
> +	return -ENOTSUP;
> +}
> +
> +int rte_lcore_poll_busyness_enabled(void)
> +{
> +	return -ENOTSUP;
> +}
> +
> +void rte_lcore_poll_busyness_enabled_set(bool enable __rte_unused)
> +{
> +}
> +
> +void __rte_lcore_poll_busyness_timestamp(uint16_t nb_rx __rte_unused)
> +{
> +}
> +
> +void lcore_telemetry_free(void)
> +{
> +}
> +
> +#endif
    
    
More information about the dev
mailing list