[dpdk-dev] [PATCH v8 2/4] examples/l3fwd-power: simple app update for new API

Hunt, David david.hunt at intel.com
Fri Sep 28 13:19:31 CEST 2018


Hi Liang,

A few tweaks below:


On 17/9/2018 2:30 PM, Liang Ma wrote:
> Add the support for new traffic pattern aware power control
> power management API.
>
> Example:
> ./l3fwd-power -l xxx   -n 4   -w 0000:xx:00.0 -w 0000:xx:00.1 -- -p 0x3
> -P --config="(0,0,xx),(1,0,xx)" --empty-poll="0,0,0" -l 14 -m 9 -h 1
>
> Please Reference l3fwd-power document for all parameter except
> empty-poll.

The docs should probably include empty poll parameter. Suggest 
re-wording to

Please Reference l3fwd-power document for full parameter usage



> The option "l", "m", "h" are used to set the power index for
> LOW, MED, HIGH power state. only is useful after enable empty-poll
>
> --empty-poll="training_flag, med_threshold, high_threshold"
>
> The option training_flag is used to enable/disable training mode.
>
> The option med_threshold is used to indicate the empty poll threshold
> of modest state which is customized by user.
>
> The option high_threshold is used to indicate the empty poll threshold
> of busy state which is customized by user.
>
> Above three option default value is all 0.
>
> Once enable empty-poll. System will apply the default parameter.
> Training mode is disabled as default.

Suggest:

Once empty-poll is enabled, the system will apply the default parameters is no
other command line options are provided.



> If training mode is triggered, there should not has any traffic
> pass-through during training phase.

Suggest:
If training mode is enabled, the user should ensure that no traffic
is allowed to pass through the system.

> When training phase complete, system transfer to normal phase.

When training phase complete, the application transfer to normal operation



>
> System will running with modest power stat at beginning.

System will start running with the modest power mode.


> If the system busyness percentage above 70%, then system will adjust
> power state move to High power state. If the traffic become lower(eg. The
> system busyness percentage drop below 30%), system will fallback
> to the modest power state.

If the traffic goes above 70%, then system will move to High power state.
If the traffic drops below 30%, the system will fallback to the modest
power state.


> Example code use master thread to monitoring worker thread busyness.
> the default timer resolution is 10ms.
>
> ChangeLog:
> v2 fix some coding style issues
> v3 rename the API.
> v6 re-work the API.
> v7 no change.
> v8 disable training as default option.
>
> Signed-off-by: Liang Ma <liang.j.ma at intel.com>
>
> Reviewed-by: Lei Yao <lei.a.yao at intel.com>
> ---
>   examples/l3fwd-power/Makefile    |   3 +
>   examples/l3fwd-power/main.c      | 325 +++++++++++++++++++++++++++++++++++++--
>   examples/l3fwd-power/meson.build |   1 +
>   3 files changed, 312 insertions(+), 17 deletions(-)
>
> diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile
> index d7e39a3..772ec7b 100644
> --- a/examples/l3fwd-power/Makefile
> +++ b/examples/l3fwd-power/Makefile
> @@ -23,6 +23,8 @@ CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
>   LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
>   LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
>   
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> +
>   build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
>   	$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
>   
> @@ -54,6 +56,7 @@ please change the definition of the RTE_TARGET environment variable)
>   all:
>   else
>   
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
>   CFLAGS += -O3
>   CFLAGS += $(WERROR_FLAGS)
>   
> diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
> index 68527d2..1465608 100644
> --- a/examples/l3fwd-power/main.c
> +++ b/examples/l3fwd-power/main.c
> @@ -43,6 +43,7 @@
>   #include <rte_timer.h>
>   #include <rte_power.h>
>   #include <rte_spinlock.h>
> +#include <rte_power_empty_poll.h>
>   
>   #include "perf_core.h"
>   #include "main.h"
> @@ -55,6 +56,8 @@
>   
>   /* 100 ms interval */
>   #define TIMER_NUMBER_PER_SECOND           10
> +/* (10ms) */
> +#define INTERVALS_PER_SECOND             100
>   /* 100000 us */
>   #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
>   #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
> @@ -117,6 +120,11 @@
>    */
>   #define RTE_TEST_RX_DESC_DEFAULT 1024
>   #define RTE_TEST_TX_DESC_DEFAULT 1024
> +#define EMPTY_POLL_MED_THRESHOLD 350000UL
> +#define EMPTY_POLL_HGH_THRESHOLD 580000UL

I'd suggest adding some explanation around these two numbers.
E.g.
/*
  * These two thresholds were decided on by running the training 
algorithm on
  * a 2.5GHz Xeon. These defaults can be overridden by supplying 
non-zero values
  * for the med_threshold and high_threshold parameters on the command line.
  */


> +
> +
> +
>   static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
>   static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>   
> @@ -132,6 +140,14 @@ static uint32_t enabled_port_mask = 0;
>   static int promiscuous_on = 0;
>   /* NUMA is enabled by default. */
>   static int numa_on = 1;
> +/* emptypoll is disabled by default. */
> +static bool empty_poll_on;
> +static bool empty_poll_train;
> +volatile bool empty_poll_stop;
> +static struct  ep_params *ep_params;
> +static struct  ep_policy policy;
> +static long  ep_med_edpi, ep_hgh_edpi;
> +
>   static int parse_ptype; /**< Parse packet type using rx callback, and */
>   			/**< disabled by default */
>   
> @@ -330,6 +346,13 @@ static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
>   static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
>   		unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
>   
> +static uint8_t  freq_tlb[] = {14, 9, 1};
> +

Maybe an explanation on where these numbers came from. E.g.
/*
  * These defaults are using the max frequency index (1), a medium index 
(9) and a
  * typical low frequency index (14). These can be adjusted to use different
  * indexes using the relevant command line parameters.
  */


> +static int is_done(void)
> +{
> +	return empty_poll_stop;
> +}
> +
>   /* exit signal handler */
>   static void
>   signal_exit_now(int sigtype)
> @@ -338,7 +361,15 @@ signal_exit_now(int sigtype)
>   	unsigned int portid;
>   	int ret;
>   
> +	RTE_SET_USED(lcore_id);
> +	RTE_SET_USED(portid);
> +	RTE_SET_USED(ret);
> +
>   	if (sigtype == SIGINT) {
> +		if (empty_poll_on)
> +			empty_poll_stop = true;
> +
> +
>   		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>   			if (rte_lcore_is_enabled(lcore_id) == 0)
>   				continue;
> @@ -351,16 +382,19 @@ signal_exit_now(int sigtype)
>   							"core%u\n", lcore_id);
>   		}
>   
> -		RTE_ETH_FOREACH_DEV(portid) {
> -			if ((enabled_port_mask & (1 << portid)) == 0)
> -				continue;
> +		if (!empty_poll_on) {
> +			RTE_ETH_FOREACH_DEV(portid) {
> +				if ((enabled_port_mask & (1 << portid)) == 0)
> +					continue;
>   
> -			rte_eth_dev_stop(portid);
> -			rte_eth_dev_close(portid);
> +				rte_eth_dev_stop(portid);
> +				rte_eth_dev_close(portid);
> +			}
>   		}
>   	}
>   
> -	rte_exit(EXIT_SUCCESS, "User forced exit\n");
> +	if (!empty_poll_on)
> +		rte_exit(EXIT_SUCCESS, "User forced exit\n");
>   }
>   
>   /*  Freqency scale down timer callback */
> @@ -825,7 +859,107 @@ static int event_register(struct lcore_conf *qconf)
>   
>   	return 0;
>   }
> +/* main processing loop */
> +static int
> +main_empty_poll_loop(__attribute__((unused)) void *dummy)
> +{
> +	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> +	unsigned int lcore_id;
> +	uint64_t prev_tsc, diff_tsc, cur_tsc;
> +	int i, j, nb_rx;
> +	uint8_t queueid;
> +	uint16_t portid;
> +	struct lcore_conf *qconf;
> +	struct lcore_rx_queue *rx_queue;
> +
> +	const uint64_t drain_tsc =
> +		(rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
> +
> +	prev_tsc = 0;
> +
> +	lcore_id = rte_lcore_id();
> +	qconf = &lcore_conf[lcore_id];
> +
> +	if (qconf->n_rx_queue == 0) {
> +		RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
> +		return 0;
> +	}
> +
> +	for (i = 0; i < qconf->n_rx_queue; i++) {
> +		portid = qconf->rx_queue_list[i].port_id;
> +		queueid = qconf->rx_queue_list[i].queue_id;
> +		RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
> +				"rxqueueid=%hhu\n", lcore_id, portid, queueid);
> +	}
> +
> +	while (!is_done()) {
> +		stats[lcore_id].nb_iteration_looped++;
> +
> +		cur_tsc = rte_rdtsc();
> +		/*
> +		 * TX burst queue drain
> +		 */
> +		diff_tsc = cur_tsc - prev_tsc;
> +		if (unlikely(diff_tsc > drain_tsc)) {
> +			for (i = 0; i < qconf->n_tx_port; ++i) {
> +				portid = qconf->tx_port_id[i];
> +				rte_eth_tx_buffer_flush(portid,
> +						qconf->tx_queue_id[portid],
> +						qconf->tx_buffer[portid]);
> +			}
> +			prev_tsc = cur_tsc;
> +		}
> +
> +		/*
> +		 * Read packet from RX queues
> +		 */
> +		for (i = 0; i < qconf->n_rx_queue; ++i) {
> +			rx_queue = &(qconf->rx_queue_list[i]);
> +			rx_queue->idle_hint = 0;
> +			portid = rx_queue->port_id;
> +			queueid = rx_queue->queue_id;
> +
> +			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
> +					MAX_PKT_BURST);
> +
> +			stats[lcore_id].nb_rx_processed += nb_rx;
> +
> +			if (nb_rx == 0) {
> +
> +				rte_power_empty_poll_stat_update(lcore_id);
> +
> +				continue;
> +			} else {
> +				rte_power_poll_stat_update(lcore_id, nb_rx);
> +			}
> +
> +
> +			/* Prefetch first packets */
> +			for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
> +				rte_prefetch0(rte_pktmbuf_mtod(
> +							pkts_burst[j], void *));
> +			}
> +
> +			/* Prefetch and forward already prefetched packets */
> +			for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
> +				rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
> +							j + PREFETCH_OFFSET], void *));
> +				l3fwd_simple_forward(pkts_burst[j], portid,
> +						qconf);
> +			}
>   
> +			/* Forward remaining prefetched packets */
> +			for (; j < nb_rx; j++) {
> +				l3fwd_simple_forward(pkts_burst[j], portid,
> +						qconf);
> +			}
> +
> +		}
> +
> +	}
> +
> +	return 0;
> +}
>   /* main processing loop */
>   static int
>   main_loop(__attribute__((unused)) void *dummy)
> @@ -1127,7 +1261,8 @@ print_usage(const char *prgname)
>   		"  --no-numa: optional, disable numa awareness\n"
>   		"  --enable-jumbo: enable jumbo frame"
>   		" which max packet len is PKTLEN in decimal (64-9600)\n"
> -		"  --parse-ptype: parse packet type by software\n",
> +		"  --parse-ptype: parse packet type by software\n"
> +		"  --empty=poll: enable empty poll detection\n",

typo: "empty=poll" should be "empty-poll"

I really think some info on what should be supplied with the empty-poll 
parameter
should be mentioned here
e.g.

--empty=poll "training_flag, high_threshold, med_threshold"



>   		prgname);
>   }
>   
> @@ -1220,7 +1355,55 @@ parse_config(const char *q_arg)
>   
>   	return 0;
>   }
> +static int
> +parse_ep_config(const char *q_arg)
> +{
> +	char s[256];
> +	const char *p = q_arg;
> +	char *end;
> +	int  num_arg;
> +
> +	char *str_fld[3];
> +
> +	int training_flag;
> +	int med_edpi;
> +	int hgh_edpi;
> +
> +	ep_med_edpi = EMPTY_POLL_MED_THRESHOLD;
> +	ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD;
> +
> +	snprintf(s, sizeof(s), "%s", p);
> +
> +	num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ',');
> +
> +	empty_poll_train = false;
> +
> +	if (num_arg == 0)
> +		return 0;
>   
> +	if (num_arg == 3) {
> +
> +		training_flag = strtoul(str_fld[0], &end, 0);
> +		med_edpi = strtoul(str_fld[1], &end, 0);
> +		hgh_edpi = strtoul(str_fld[2], &end, 0);
> +
> +		if (training_flag == 1)
> +			empty_poll_train = true;
> +
> +		if (med_edpi > 0)
> +			ep_med_edpi = med_edpi;
> +
> +		if (med_edpi > 0)
> +			ep_hgh_edpi = hgh_edpi;
> +
> +	} else {
> +
> +		return -1;
> +	}
> +
> +	return 0;
> +
> +}
>   #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
>   
>   /* Parse the argument given in the command line of the application */
> @@ -1230,6 +1413,7 @@ parse_args(int argc, char **argv)
>   	int opt, ret;
>   	char **argvopt;
>   	int option_index;
> +	uint32_t limit;
>   	char *prgname = argv[0];
>   	static struct option lgopts[] = {
>   		{"config", 1, 0, 0},
> @@ -1237,13 +1421,14 @@ parse_args(int argc, char **argv)
>   		{"high-perf-cores", 1, 0, 0},
>   		{"no-numa", 0, 0, 0},
>   		{"enable-jumbo", 0, 0, 0},
> +		{"empty-poll", 1, 0, 0},
>   		{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
>   		{NULL, 0, 0, 0}
>   	};
>   
>   	argvopt = argv;
>   
> -	while ((opt = getopt_long(argc, argvopt, "p:P",
> +	while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P",
>   				lgopts, &option_index)) != EOF) {
>   
>   		switch (opt) {
> @@ -1260,7 +1445,18 @@ parse_args(int argc, char **argv)
>   			printf("Promiscuous mode selected\n");
>   			promiscuous_on = 1;
>   			break;
> -
> +		case 'l':
> +			limit = parse_max_pkt_len(optarg);
> +			freq_tlb[LOW] = limit;
> +			break;
> +		case 'm':
> +			limit = parse_max_pkt_len(optarg);
> +			freq_tlb[MED] = limit;
> +			break;
> +		case 'h':
> +			limit = parse_max_pkt_len(optarg);
> +			freq_tlb[HGH] = limit;
> +			break;
>   		/* long options */
>   		case 0:
>   			if (!strncmp(lgopts[option_index].name, "config", 6)) {
> @@ -1299,6 +1495,20 @@ parse_args(int argc, char **argv)
>   			}
>   
>   			if (!strncmp(lgopts[option_index].name,
> +						"empty-poll", 10)) {
> +				printf("empty-poll is enabled\n");
> +				empty_poll_on = true;
> +				ret = parse_ep_config(optarg);
> +
> +				if (ret) {
> +					printf("invalid empty poll config\n");
> +					print_usage(prgname);
> +					return -1;
> +				}
> +
> +			}
> +
> +			if (!strncmp(lgopts[option_index].name,
>   					"enable-jumbo", 12)) {
>   				struct option lenopts =
>   					{"max-pkt-len", required_argument, \
> @@ -1646,6 +1856,59 @@ init_power_library(void)
>   	}
>   	return ret;
>   }
> +static void
> +empty_poll_setup_timer(void)
> +{
> +	int lcore_id = rte_lcore_id();
> +	uint64_t hz = rte_get_timer_hz();
> +
> +	struct  ep_params *ep_ptr = ep_params;
> +
> +	ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND;
> +
> +	rte_timer_reset_sync(&ep_ptr->timer0,
> +			ep_ptr->interval_ticks,
> +			PERIODICAL,
> +			lcore_id,
> +			rte_empty_poll_detection,
> +			(void *)ep_ptr);
> +
> +}
> +static int
> +launch_timer(unsigned int lcore_id)
> +{
> +	int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms;
> +
> +	RTE_SET_USED(lcore_id);
> +
> +
> +	if (rte_get_master_lcore() != lcore_id) {
> +		rte_panic("timer on lcore:%d which is not master core:%d\n",
> +				lcore_id,
> +				rte_get_master_lcore());
> +	}
> +
> +	RTE_LOG(INFO, POWER, "Bring up the Timer\n");
> +
> +	empty_poll_setup_timer();
> +
> +	cycles_10ms = rte_get_timer_hz() / 100;
> +
> +	while (!is_done()) {
> +		cur_tsc = rte_rdtsc();
> +		diff_tsc = cur_tsc - prev_tsc;
> +		if (diff_tsc > cycles_10ms) {
> +			rte_timer_manage();
> +			prev_tsc = cur_tsc;
> +			cycles_10ms = rte_get_timer_hz() / 100;
> +		}
> +	}
> +
> +	RTE_LOG(INFO, POWER, "Timer_subsystem is done\n");
> +
> +	return 0;
> +}
> +
>   
>   int
>   main(int argc, char **argv)
> @@ -1828,13 +2091,15 @@ main(int argc, char **argv)
>   		if (rte_lcore_is_enabled(lcore_id) == 0)
>   			continue;
>   
> -		/* init timer structures for each enabled lcore */
> -		rte_timer_init(&power_timers[lcore_id]);
> -		hz = rte_get_timer_hz();
> -		rte_timer_reset(&power_timers[lcore_id],
> -			hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
> -						power_timer_cb, NULL);
> -
> +		if (empty_poll_on == false) {
> +			/* init timer structures for each enabled lcore */
> +			rte_timer_init(&power_timers[lcore_id]);
> +			hz = rte_get_timer_hz();
> +			rte_timer_reset(&power_timers[lcore_id],
> +					hz/TIMER_NUMBER_PER_SECOND,
> +					SINGLE, lcore_id,
> +					power_timer_cb, NULL);
> +		}
>   		qconf = &lcore_conf[lcore_id];
>   		printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
>   		fflush(stdout);
> @@ -1905,12 +2170,38 @@ main(int argc, char **argv)
>   
>   	check_all_ports_link_status(enabled_port_mask);
>   
> +	if (empty_poll_on == true) {
> +
> +		if (empty_poll_train) {
> +			policy.state = TRAINING;
> +		} else {
> +			policy.state = MED_NORMAL;
> +			policy.med_base_edpi = ep_med_edpi;
> +			policy.hgh_base_edpi = ep_hgh_edpi;
> +		}
> +
> +		rte_power_empty_poll_stat_init(&ep_params, freq_tlb, &policy);
> +	}
> +
> +
>   	/* launch per-lcore init on every lcore */
> -	rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
> +	if (empty_poll_on == false) {
> +		rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
> +	} else {
> +		empty_poll_stop = false;
> +		rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, SKIP_MASTER);
> +	}
> +
> +	if (empty_poll_on == true)
> +		launch_timer(rte_lcore_id());
> +
>   	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
>   		if (rte_eal_wait_lcore(lcore_id) < 0)
>   			return -1;
>   	}
>   
> +	if (empty_poll_on)
> +		rte_power_empty_poll_stat_free();
> +
>   	return 0;
>   }
> diff --git a/examples/l3fwd-power/meson.build b/examples/l3fwd-power/meson.build
> index 20c8054..a3c5c2f 100644
> --- a/examples/l3fwd-power/meson.build
> +++ b/examples/l3fwd-power/meson.build
> @@ -9,6 +9,7 @@
>   if host_machine.system() != 'linux'
>   	build = false
>   endif
> +allow_experimental_apis = true
>   deps += ['power', 'timer', 'lpm', 'hash']
>   sources = files(
>   	'main.c', 'perf_core.c'


Checkpatch throws up some warnings:


### examples/l3fwd-power: simple app update for new API

WARNING:LONG_LINE: line over 80 characters
#201: FILE: examples/l3fwd-power/main.c:876:
+               (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * 
BURST_TX_DRAIN_US;

WARNING:LONG_LINE: line over 80 characters
#209: FILE: examples/l3fwd-power/main.c:884:
+               RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to 
do\n", lcore_id);

WARNING:LONG_LINE: line over 80 characters
#271: FILE: examples/l3fwd-power/main.c:946:
+                                                       j + 
PREFETCH_OFFSET], void *));

WARNING:LONG_LINE: line over 80 characters
#529: FILE: examples/l3fwd-power/main.c:2192:
+               rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, 
SKIP_MASTER);

total: 0 errors, 4 warnings, 467 lines checked


Rgds,
Dave.




More information about the dev mailing list