[dpdk-dev] [PATCH v8 2/4] examples/l3fwd-power: simple app update for new API
Hunt, David
david.hunt at intel.com
Fri Sep 28 13:19:31 CEST 2018
Hi Liang,
A few tweaks below:
On 17/9/2018 2:30 PM, Liang Ma wrote:
> Add the support for new traffic pattern aware power control
> power management API.
>
> Example:
> ./l3fwd-power -l xxx -n 4 -w 0000:xx:00.0 -w 0000:xx:00.1 -- -p 0x3
> -P --config="(0,0,xx),(1,0,xx)" --empty-poll="0,0,0" -l 14 -m 9 -h 1
>
> Please Reference l3fwd-power document for all parameter except
> empty-poll.
The docs should probably include empty poll parameter. Suggest
re-wording to
Please Reference l3fwd-power document for full parameter usage
> The option "l", "m", "h" are used to set the power index for
> LOW, MED, HIGH power state. only is useful after enable empty-poll
>
> --empty-poll="training_flag, med_threshold, high_threshold"
>
> The option training_flag is used to enable/disable training mode.
>
> The option med_threshold is used to indicate the empty poll threshold
> of modest state which is customized by user.
>
> The option high_threshold is used to indicate the empty poll threshold
> of busy state which is customized by user.
>
> Above three option default value is all 0.
>
> Once enable empty-poll. System will apply the default parameter.
> Training mode is disabled as default.
Suggest:
Once empty-poll is enabled, the system will apply the default parameters is no
other command line options are provided.
> If training mode is triggered, there should not has any traffic
> pass-through during training phase.
Suggest:
If training mode is enabled, the user should ensure that no traffic
is allowed to pass through the system.
> When training phase complete, system transfer to normal phase.
When training phase complete, the application transfer to normal operation
>
> System will running with modest power stat at beginning.
System will start running with the modest power mode.
> If the system busyness percentage above 70%, then system will adjust
> power state move to High power state. If the traffic become lower(eg. The
> system busyness percentage drop below 30%), system will fallback
> to the modest power state.
If the traffic goes above 70%, then system will move to High power state.
If the traffic drops below 30%, the system will fallback to the modest
power state.
> Example code use master thread to monitoring worker thread busyness.
> the default timer resolution is 10ms.
>
> ChangeLog:
> v2 fix some coding style issues
> v3 rename the API.
> v6 re-work the API.
> v7 no change.
> v8 disable training as default option.
>
> Signed-off-by: Liang Ma <liang.j.ma at intel.com>
>
> Reviewed-by: Lei Yao <lei.a.yao at intel.com>
> ---
> examples/l3fwd-power/Makefile | 3 +
> examples/l3fwd-power/main.c | 325 +++++++++++++++++++++++++++++++++++++--
> examples/l3fwd-power/meson.build | 1 +
> 3 files changed, 312 insertions(+), 17 deletions(-)
>
> diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile
> index d7e39a3..772ec7b 100644
> --- a/examples/l3fwd-power/Makefile
> +++ b/examples/l3fwd-power/Makefile
> @@ -23,6 +23,8 @@ CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
> LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
> LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
>
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> +
> build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
> $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
>
> @@ -54,6 +56,7 @@ please change the definition of the RTE_TARGET environment variable)
> all:
> else
>
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> CFLAGS += -O3
> CFLAGS += $(WERROR_FLAGS)
>
> diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
> index 68527d2..1465608 100644
> --- a/examples/l3fwd-power/main.c
> +++ b/examples/l3fwd-power/main.c
> @@ -43,6 +43,7 @@
> #include <rte_timer.h>
> #include <rte_power.h>
> #include <rte_spinlock.h>
> +#include <rte_power_empty_poll.h>
>
> #include "perf_core.h"
> #include "main.h"
> @@ -55,6 +56,8 @@
>
> /* 100 ms interval */
> #define TIMER_NUMBER_PER_SECOND 10
> +/* (10ms) */
> +#define INTERVALS_PER_SECOND 100
> /* 100000 us */
> #define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND)
> #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
> @@ -117,6 +120,11 @@
> */
> #define RTE_TEST_RX_DESC_DEFAULT 1024
> #define RTE_TEST_TX_DESC_DEFAULT 1024
> +#define EMPTY_POLL_MED_THRESHOLD 350000UL
> +#define EMPTY_POLL_HGH_THRESHOLD 580000UL
I'd suggest adding some explanation around these two numbers.
E.g.
/*
* These two thresholds were decided on by running the training
algorithm on
* a 2.5GHz Xeon. These defaults can be overridden by supplying
non-zero values
* for the med_threshold and high_threshold parameters on the command line.
*/
> +
> +
> +
> static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
> static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
>
> @@ -132,6 +140,14 @@ static uint32_t enabled_port_mask = 0;
> static int promiscuous_on = 0;
> /* NUMA is enabled by default. */
> static int numa_on = 1;
> +/* emptypoll is disabled by default. */
> +static bool empty_poll_on;
> +static bool empty_poll_train;
> +volatile bool empty_poll_stop;
> +static struct ep_params *ep_params;
> +static struct ep_policy policy;
> +static long ep_med_edpi, ep_hgh_edpi;
> +
> static int parse_ptype; /**< Parse packet type using rx callback, and */
> /**< disabled by default */
>
> @@ -330,6 +346,13 @@ static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
> static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
> unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
>
> +static uint8_t freq_tlb[] = {14, 9, 1};
> +
Maybe an explanation on where these numbers came from. E.g.
/*
* These defaults are using the max frequency index (1), a medium index
(9) and a
* typical low frequency index (14). These can be adjusted to use different
* indexes using the relevant command line parameters.
*/
> +static int is_done(void)
> +{
> + return empty_poll_stop;
> +}
> +
> /* exit signal handler */
> static void
> signal_exit_now(int sigtype)
> @@ -338,7 +361,15 @@ signal_exit_now(int sigtype)
> unsigned int portid;
> int ret;
>
> + RTE_SET_USED(lcore_id);
> + RTE_SET_USED(portid);
> + RTE_SET_USED(ret);
> +
> if (sigtype == SIGINT) {
> + if (empty_poll_on)
> + empty_poll_stop = true;
> +
> +
> for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
> if (rte_lcore_is_enabled(lcore_id) == 0)
> continue;
> @@ -351,16 +382,19 @@ signal_exit_now(int sigtype)
> "core%u\n", lcore_id);
> }
>
> - RTE_ETH_FOREACH_DEV(portid) {
> - if ((enabled_port_mask & (1 << portid)) == 0)
> - continue;
> + if (!empty_poll_on) {
> + RTE_ETH_FOREACH_DEV(portid) {
> + if ((enabled_port_mask & (1 << portid)) == 0)
> + continue;
>
> - rte_eth_dev_stop(portid);
> - rte_eth_dev_close(portid);
> + rte_eth_dev_stop(portid);
> + rte_eth_dev_close(portid);
> + }
> }
> }
>
> - rte_exit(EXIT_SUCCESS, "User forced exit\n");
> + if (!empty_poll_on)
> + rte_exit(EXIT_SUCCESS, "User forced exit\n");
> }
>
> /* Freqency scale down timer callback */
> @@ -825,7 +859,107 @@ static int event_register(struct lcore_conf *qconf)
>
> return 0;
> }
> +/* main processing loop */
> +static int
> +main_empty_poll_loop(__attribute__((unused)) void *dummy)
> +{
> + struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> + unsigned int lcore_id;
> + uint64_t prev_tsc, diff_tsc, cur_tsc;
> + int i, j, nb_rx;
> + uint8_t queueid;
> + uint16_t portid;
> + struct lcore_conf *qconf;
> + struct lcore_rx_queue *rx_queue;
> +
> + const uint64_t drain_tsc =
> + (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
> +
> + prev_tsc = 0;
> +
> + lcore_id = rte_lcore_id();
> + qconf = &lcore_conf[lcore_id];
> +
> + if (qconf->n_rx_queue == 0) {
> + RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
> + return 0;
> + }
> +
> + for (i = 0; i < qconf->n_rx_queue; i++) {
> + portid = qconf->rx_queue_list[i].port_id;
> + queueid = qconf->rx_queue_list[i].queue_id;
> + RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
> + "rxqueueid=%hhu\n", lcore_id, portid, queueid);
> + }
> +
> + while (!is_done()) {
> + stats[lcore_id].nb_iteration_looped++;
> +
> + cur_tsc = rte_rdtsc();
> + /*
> + * TX burst queue drain
> + */
> + diff_tsc = cur_tsc - prev_tsc;
> + if (unlikely(diff_tsc > drain_tsc)) {
> + for (i = 0; i < qconf->n_tx_port; ++i) {
> + portid = qconf->tx_port_id[i];
> + rte_eth_tx_buffer_flush(portid,
> + qconf->tx_queue_id[portid],
> + qconf->tx_buffer[portid]);
> + }
> + prev_tsc = cur_tsc;
> + }
> +
> + /*
> + * Read packet from RX queues
> + */
> + for (i = 0; i < qconf->n_rx_queue; ++i) {
> + rx_queue = &(qconf->rx_queue_list[i]);
> + rx_queue->idle_hint = 0;
> + portid = rx_queue->port_id;
> + queueid = rx_queue->queue_id;
> +
> + nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
> + MAX_PKT_BURST);
> +
> + stats[lcore_id].nb_rx_processed += nb_rx;
> +
> + if (nb_rx == 0) {
> +
> + rte_power_empty_poll_stat_update(lcore_id);
> +
> + continue;
> + } else {
> + rte_power_poll_stat_update(lcore_id, nb_rx);
> + }
> +
> +
> + /* Prefetch first packets */
> + for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
> + rte_prefetch0(rte_pktmbuf_mtod(
> + pkts_burst[j], void *));
> + }
> +
> + /* Prefetch and forward already prefetched packets */
> + for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
> + rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
> + j + PREFETCH_OFFSET], void *));
> + l3fwd_simple_forward(pkts_burst[j], portid,
> + qconf);
> + }
>
> + /* Forward remaining prefetched packets */
> + for (; j < nb_rx; j++) {
> + l3fwd_simple_forward(pkts_burst[j], portid,
> + qconf);
> + }
> +
> + }
> +
> + }
> +
> + return 0;
> +}
> /* main processing loop */
> static int
> main_loop(__attribute__((unused)) void *dummy)
> @@ -1127,7 +1261,8 @@ print_usage(const char *prgname)
> " --no-numa: optional, disable numa awareness\n"
> " --enable-jumbo: enable jumbo frame"
> " which max packet len is PKTLEN in decimal (64-9600)\n"
> - " --parse-ptype: parse packet type by software\n",
> + " --parse-ptype: parse packet type by software\n"
> + " --empty=poll: enable empty poll detection\n",
typo: "empty=poll" should be "empty-poll"
I really think some info on what should be supplied with the empty-poll
parameter
should be mentioned here
e.g.
--empty=poll "training_flag, high_threshold, med_threshold"
> prgname);
> }
>
> @@ -1220,7 +1355,55 @@ parse_config(const char *q_arg)
>
> return 0;
> }
> +static int
> +parse_ep_config(const char *q_arg)
> +{
> + char s[256];
> + const char *p = q_arg;
> + char *end;
> + int num_arg;
> +
> + char *str_fld[3];
> +
> + int training_flag;
> + int med_edpi;
> + int hgh_edpi;
> +
> + ep_med_edpi = EMPTY_POLL_MED_THRESHOLD;
> + ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD;
> +
> + snprintf(s, sizeof(s), "%s", p);
> +
> + num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ',');
> +
> + empty_poll_train = false;
> +
> + if (num_arg == 0)
> + return 0;
>
> + if (num_arg == 3) {
> +
> + training_flag = strtoul(str_fld[0], &end, 0);
> + med_edpi = strtoul(str_fld[1], &end, 0);
> + hgh_edpi = strtoul(str_fld[2], &end, 0);
> +
> + if (training_flag == 1)
> + empty_poll_train = true;
> +
> + if (med_edpi > 0)
> + ep_med_edpi = med_edpi;
> +
> + if (med_edpi > 0)
> + ep_hgh_edpi = hgh_edpi;
> +
> + } else {
> +
> + return -1;
> + }
> +
> + return 0;
> +
> +}
> #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
>
> /* Parse the argument given in the command line of the application */
> @@ -1230,6 +1413,7 @@ parse_args(int argc, char **argv)
> int opt, ret;
> char **argvopt;
> int option_index;
> + uint32_t limit;
> char *prgname = argv[0];
> static struct option lgopts[] = {
> {"config", 1, 0, 0},
> @@ -1237,13 +1421,14 @@ parse_args(int argc, char **argv)
> {"high-perf-cores", 1, 0, 0},
> {"no-numa", 0, 0, 0},
> {"enable-jumbo", 0, 0, 0},
> + {"empty-poll", 1, 0, 0},
> {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
> {NULL, 0, 0, 0}
> };
>
> argvopt = argv;
>
> - while ((opt = getopt_long(argc, argvopt, "p:P",
> + while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P",
> lgopts, &option_index)) != EOF) {
>
> switch (opt) {
> @@ -1260,7 +1445,18 @@ parse_args(int argc, char **argv)
> printf("Promiscuous mode selected\n");
> promiscuous_on = 1;
> break;
> -
> + case 'l':
> + limit = parse_max_pkt_len(optarg);
> + freq_tlb[LOW] = limit;
> + break;
> + case 'm':
> + limit = parse_max_pkt_len(optarg);
> + freq_tlb[MED] = limit;
> + break;
> + case 'h':
> + limit = parse_max_pkt_len(optarg);
> + freq_tlb[HGH] = limit;
> + break;
> /* long options */
> case 0:
> if (!strncmp(lgopts[option_index].name, "config", 6)) {
> @@ -1299,6 +1495,20 @@ parse_args(int argc, char **argv)
> }
>
> if (!strncmp(lgopts[option_index].name,
> + "empty-poll", 10)) {
> + printf("empty-poll is enabled\n");
> + empty_poll_on = true;
> + ret = parse_ep_config(optarg);
> +
> + if (ret) {
> + printf("invalid empty poll config\n");
> + print_usage(prgname);
> + return -1;
> + }
> +
> + }
> +
> + if (!strncmp(lgopts[option_index].name,
> "enable-jumbo", 12)) {
> struct option lenopts =
> {"max-pkt-len", required_argument, \
> @@ -1646,6 +1856,59 @@ init_power_library(void)
> }
> return ret;
> }
> +static void
> +empty_poll_setup_timer(void)
> +{
> + int lcore_id = rte_lcore_id();
> + uint64_t hz = rte_get_timer_hz();
> +
> + struct ep_params *ep_ptr = ep_params;
> +
> + ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND;
> +
> + rte_timer_reset_sync(&ep_ptr->timer0,
> + ep_ptr->interval_ticks,
> + PERIODICAL,
> + lcore_id,
> + rte_empty_poll_detection,
> + (void *)ep_ptr);
> +
> +}
> +static int
> +launch_timer(unsigned int lcore_id)
> +{
> + int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms;
> +
> + RTE_SET_USED(lcore_id);
> +
> +
> + if (rte_get_master_lcore() != lcore_id) {
> + rte_panic("timer on lcore:%d which is not master core:%d\n",
> + lcore_id,
> + rte_get_master_lcore());
> + }
> +
> + RTE_LOG(INFO, POWER, "Bring up the Timer\n");
> +
> + empty_poll_setup_timer();
> +
> + cycles_10ms = rte_get_timer_hz() / 100;
> +
> + while (!is_done()) {
> + cur_tsc = rte_rdtsc();
> + diff_tsc = cur_tsc - prev_tsc;
> + if (diff_tsc > cycles_10ms) {
> + rte_timer_manage();
> + prev_tsc = cur_tsc;
> + cycles_10ms = rte_get_timer_hz() / 100;
> + }
> + }
> +
> + RTE_LOG(INFO, POWER, "Timer_subsystem is done\n");
> +
> + return 0;
> +}
> +
>
> int
> main(int argc, char **argv)
> @@ -1828,13 +2091,15 @@ main(int argc, char **argv)
> if (rte_lcore_is_enabled(lcore_id) == 0)
> continue;
>
> - /* init timer structures for each enabled lcore */
> - rte_timer_init(&power_timers[lcore_id]);
> - hz = rte_get_timer_hz();
> - rte_timer_reset(&power_timers[lcore_id],
> - hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
> - power_timer_cb, NULL);
> -
> + if (empty_poll_on == false) {
> + /* init timer structures for each enabled lcore */
> + rte_timer_init(&power_timers[lcore_id]);
> + hz = rte_get_timer_hz();
> + rte_timer_reset(&power_timers[lcore_id],
> + hz/TIMER_NUMBER_PER_SECOND,
> + SINGLE, lcore_id,
> + power_timer_cb, NULL);
> + }
> qconf = &lcore_conf[lcore_id];
> printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
> fflush(stdout);
> @@ -1905,12 +2170,38 @@ main(int argc, char **argv)
>
> check_all_ports_link_status(enabled_port_mask);
>
> + if (empty_poll_on == true) {
> +
> + if (empty_poll_train) {
> + policy.state = TRAINING;
> + } else {
> + policy.state = MED_NORMAL;
> + policy.med_base_edpi = ep_med_edpi;
> + policy.hgh_base_edpi = ep_hgh_edpi;
> + }
> +
> + rte_power_empty_poll_stat_init(&ep_params, freq_tlb, &policy);
> + }
> +
> +
> /* launch per-lcore init on every lcore */
> - rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
> + if (empty_poll_on == false) {
> + rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
> + } else {
> + empty_poll_stop = false;
> + rte_eal_mp_remote_launch(main_empty_poll_loop, NULL, SKIP_MASTER);
> + }
> +
> + if (empty_poll_on == true)
> + launch_timer(rte_lcore_id());
> +
> RTE_LCORE_FOREACH_SLAVE(lcore_id) {
> if (rte_eal_wait_lcore(lcore_id) < 0)
> return -1;
> }
>
> + if (empty_poll_on)
> + rte_power_empty_poll_stat_free();
> +
> return 0;
> }
> diff --git a/examples/l3fwd-power/meson.build b/examples/l3fwd-power/meson.build
> index 20c8054..a3c5c2f 100644
> --- a/examples/l3fwd-power/meson.build
> +++ b/examples/l3fwd-power/meson.build
> @@ -9,6 +9,7 @@
> if host_machine.system() != 'linux'
> build = false
> endif
> +allow_experimental_apis = true
> deps += ['power', 'timer', 'lpm', 'hash']
> sources = files(
> 'main.c', 'perf_core.c'
Checkpatch throws up some warnings:
### examples/l3fwd-power: simple app update for new API
WARNING:LONG_LINE: line over 80 characters
#201: FILE: examples/l3fwd-power/main.c:876:
+ (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
BURST_TX_DRAIN_US;
WARNING:LONG_LINE: line over 80 characters
#209: FILE: examples/l3fwd-power/main.c:884:
+ RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to
do\n", lcore_id);
WARNING:LONG_LINE: line over 80 characters
#271: FILE: examples/l3fwd-power/main.c:946:
+ j +
PREFETCH_OFFSET], void *));
WARNING:LONG_LINE: line over 80 characters
#529: FILE: examples/l3fwd-power/main.c:2192:
+ rte_eal_mp_remote_launch(main_empty_poll_loop, NULL,
SKIP_MASTER);
total: 0 errors, 4 warnings, 467 lines checked
Rgds,
Dave.
More information about the dev
mailing list