[dpdk-dev] [PATCH v11 2/5] examples/l3fwd-power: simple app update for new API

Liang Ma liang.j.ma at intel.com
Fri Oct 19 12:23:47 CEST 2018


Add the support for new traffic pattern aware power control
power management API.

Example:
./l3fwd-power -l xxx   -n 4   -w 0000:xx:00.0 -w 0000:xx:00.1 -- -p 0x3
-P --config="(0,0,xx),(1,0,xx)" --empty-poll="0,0,0" -l 14 -m 9 -h 1

Please Reference l3fwd-power document for full parameter usage

The option "l", "m", "h" are used to set the power index for
LOW, MED, HIGH power state. Only is useful after enable empty-poll

--empty-poll="training_flag, med_threshold, high_threshold"

The option training_flag is used to enable/disable training mode.

The option med_threshold is used to indicate the empty poll threshold
of modest state which is customized by user.

The option high_threshold is used to indicate the empty poll threshold
of busy state which is customized by user.

Above three option default value is all 0.

Once enable empty-poll. System will apply the default parameter if no
other command line options are provided.

If training mode is enabled, the user should ensure that no traffic
is allowed to pass through the system. When training phase complete,
the application transfer to normal operation

System will start running with the modest power mode.
If the traffic goes above 70%, then system will move to High power state.
If the traffic drops below 30%, the system will fallback to the modest
power state.

Example code use master thread to monitoring worker thread busyness.
The default timer resolution is 10ms.

ChangeLog:
v2 fix some coding style issues
v3 rename the API.
v6 re-work the API.
v7 no change.
v8 disable training as default option.
v10 update due to review comments.
v11 add checking for empty poll init function return value.

Signed-off-by: Liang Ma <liang.j.ma at intel.com>

Reviewed-by: Lei Yao <lei.a.yao at intel.com>

Acked-by: David Hunt <david.hunt at intel.com>
---
 examples/l3fwd-power/Makefile    |   3 +
 examples/l3fwd-power/main.c      | 346 +++++++++++++++++++++++++++++++++++++--
 examples/l3fwd-power/meson.build |   1 +
 3 files changed, 333 insertions(+), 17 deletions(-)

diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile
index d7e39a3..772ec7b 100644
--- a/examples/l3fwd-power/Makefile
+++ b/examples/l3fwd-power/Makefile
@@ -23,6 +23,8 @@ CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
 LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
 LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
 build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
 	$(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
 
@@ -54,6 +56,7 @@ please change the definition of the RTE_TARGET environment variable)
 all:
 else
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
index 68527d2..c07eeff 100644
--- a/examples/l3fwd-power/main.c
+++ b/examples/l3fwd-power/main.c
@@ -43,6 +43,7 @@
 #include <rte_timer.h>
 #include <rte_power.h>
 #include <rte_spinlock.h>
+#include <rte_power_empty_poll.h>
 
 #include "perf_core.h"
 #include "main.h"
@@ -55,6 +56,8 @@
 
 /* 100 ms interval */
 #define TIMER_NUMBER_PER_SECOND           10
+/* (10ms) */
+#define INTERVALS_PER_SECOND             100
 /* 100000 us */
 #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
@@ -117,6 +120,17 @@
  */
 #define RTE_TEST_RX_DESC_DEFAULT 1024
 #define RTE_TEST_TX_DESC_DEFAULT 1024
+
+/*
+ * These two thresholds were decided on by running the training algorithm on
+ * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values
+ * for the med_threshold and high_threshold parameters on the command line.
+ */
+#define EMPTY_POLL_MED_THRESHOLD 350000UL
+#define EMPTY_POLL_HGH_THRESHOLD 580000UL
+
+
+
 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 
@@ -132,6 +146,14 @@ static uint32_t enabled_port_mask = 0;
 static int promiscuous_on = 0;
 /* NUMA is enabled by default. */
 static int numa_on = 1;
+/* emptypoll is disabled by default. */
+static bool empty_poll_on;
+static bool empty_poll_train;
+volatile bool empty_poll_stop;
+static struct  ep_params *ep_params;
+static struct  ep_policy policy;
+static long  ep_med_edpi, ep_hgh_edpi;
+
 static int parse_ptype; /**< Parse packet type using rx callback, and */
 			/**< disabled by default */
 
@@ -330,6 +352,19 @@ static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
 		unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
 
+
+/*
+ * These defaults are using the max frequency index (1), a medium index (9)
+ * and a typical low frequency index (14). These can be adjusted to use
+ * different indexes using the relevant command line parameters.
+ */
+static uint8_t  freq_tlb[] = {14, 9, 1};
+
+static int is_done(void)
+{
+	return empty_poll_stop;
+}
+
 /* exit signal handler */
 static void
 signal_exit_now(int sigtype)
@@ -338,7 +373,15 @@ signal_exit_now(int sigtype)
 	unsigned int portid;
 	int ret;
 
+	RTE_SET_USED(lcore_id);
+	RTE_SET_USED(portid);
+	RTE_SET_USED(ret);
+
 	if (sigtype == SIGINT) {
+		if (empty_poll_on)
+			empty_poll_stop = true;
+
+
 		for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 			if (rte_lcore_is_enabled(lcore_id) == 0)
 				continue;
@@ -351,16 +394,19 @@ signal_exit_now(int sigtype)
 							"core%u\n", lcore_id);
 		}
 
-		RTE_ETH_FOREACH_DEV(portid) {
-			if ((enabled_port_mask & (1 << portid)) == 0)
-				continue;
+		if (!empty_poll_on) {
+			RTE_ETH_FOREACH_DEV(portid) {
+				if ((enabled_port_mask & (1 << portid)) == 0)
+					continue;
 
-			rte_eth_dev_stop(portid);
-			rte_eth_dev_close(portid);
+				rte_eth_dev_stop(portid);
+				rte_eth_dev_close(portid);
+			}
 		}
 	}
 
-	rte_exit(EXIT_SUCCESS, "User forced exit\n");
+	if (!empty_poll_on)
+		rte_exit(EXIT_SUCCESS, "User forced exit\n");
 }
 
 /*  Freqency scale down timer callback */
@@ -825,7 +871,110 @@ static int event_register(struct lcore_conf *qconf)
 
 	return 0;
 }
+/* main processing loop */
+static int
+main_empty_poll_loop(__attribute__((unused)) void *dummy)
+{
+	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+	unsigned int lcore_id;
+	uint64_t prev_tsc, diff_tsc, cur_tsc;
+	int i, j, nb_rx;
+	uint8_t queueid;
+	uint16_t portid;
+	struct lcore_conf *qconf;
+	struct lcore_rx_queue *rx_queue;
+
+	const uint64_t drain_tsc =
+		(rte_get_tsc_hz() + US_PER_S - 1) /
+		US_PER_S * BURST_TX_DRAIN_US;
+
+	prev_tsc = 0;
+
+	lcore_id = rte_lcore_id();
+	qconf = &lcore_conf[lcore_id];
+
+	if (qconf->n_rx_queue == 0) {
+		RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n",
+			lcore_id);
+		return 0;
+	}
+
+	for (i = 0; i < qconf->n_rx_queue; i++) {
+		portid = qconf->rx_queue_list[i].port_id;
+		queueid = qconf->rx_queue_list[i].queue_id;
+		RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
+				"rxqueueid=%hhu\n", lcore_id, portid, queueid);
+	}
+
+	while (!is_done()) {
+		stats[lcore_id].nb_iteration_looped++;
+
+		cur_tsc = rte_rdtsc();
+		/*
+		 * TX burst queue drain
+		 */
+		diff_tsc = cur_tsc - prev_tsc;
+		if (unlikely(diff_tsc > drain_tsc)) {
+			for (i = 0; i < qconf->n_tx_port; ++i) {
+				portid = qconf->tx_port_id[i];
+				rte_eth_tx_buffer_flush(portid,
+						qconf->tx_queue_id[portid],
+						qconf->tx_buffer[portid]);
+			}
+			prev_tsc = cur_tsc;
+		}
+
+		/*
+		 * Read packet from RX queues
+		 */
+		for (i = 0; i < qconf->n_rx_queue; ++i) {
+			rx_queue = &(qconf->rx_queue_list[i]);
+			rx_queue->idle_hint = 0;
+			portid = rx_queue->port_id;
+			queueid = rx_queue->queue_id;
+
+			nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+					MAX_PKT_BURST);
+
+			stats[lcore_id].nb_rx_processed += nb_rx;
+
+			if (nb_rx == 0) {
+
+				rte_power_empty_poll_stat_update(lcore_id);
+
+				continue;
+			} else {
+				rte_power_poll_stat_update(lcore_id, nb_rx);
+			}
+
+
+			/* Prefetch first packets */
+			for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+				rte_prefetch0(rte_pktmbuf_mtod(
+							pkts_burst[j], void *));
+			}
+
+			/* Prefetch and forward already prefetched packets */
+			for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+				rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+							j + PREFETCH_OFFSET],
+							void *));
+				l3fwd_simple_forward(pkts_burst[j], portid,
+						qconf);
+			}
+
+			/* Forward remaining prefetched packets */
+			for (; j < nb_rx; j++) {
+				l3fwd_simple_forward(pkts_burst[j], portid,
+						qconf);
+			}
+
+		}
 
+	}
+
+	return 0;
+}
 /* main processing loop */
 static int
 main_loop(__attribute__((unused)) void *dummy)
@@ -1127,7 +1276,9 @@ print_usage(const char *prgname)
 		"  --no-numa: optional, disable numa awareness\n"
 		"  --enable-jumbo: enable jumbo frame"
 		" which max packet len is PKTLEN in decimal (64-9600)\n"
-		"  --parse-ptype: parse packet type by software\n",
+		"  --parse-ptype: parse packet type by software\n"
+		"  --empty-poll: enable empty poll detection"
+		" follow (training_flag, high_threshold, med_threshold)\n",
 		prgname);
 }
 
@@ -1220,7 +1371,55 @@ parse_config(const char *q_arg)
 
 	return 0;
 }
+static int
+parse_ep_config(const char *q_arg)
+{
+	char s[256];
+	const char *p = q_arg;
+	char *end;
+	int  num_arg;
+
+	char *str_fld[3];
+
+	int training_flag;
+	int med_edpi;
+	int hgh_edpi;
+
+	ep_med_edpi = EMPTY_POLL_MED_THRESHOLD;
+	ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD;
+
+	snprintf(s, sizeof(s), "%s", p);
+
+	num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ',');
+
+	empty_poll_train = false;
 
+	if (num_arg == 0)
+		return 0;
+
+	if (num_arg == 3) {
+
+		training_flag = strtoul(str_fld[0], &end, 0);
+		med_edpi = strtoul(str_fld[1], &end, 0);
+		hgh_edpi = strtoul(str_fld[2], &end, 0);
+
+		if (training_flag == 1)
+			empty_poll_train = true;
+
+		if (med_edpi > 0)
+			ep_med_edpi = med_edpi;
+
+		if (med_edpi > 0)
+			ep_hgh_edpi = hgh_edpi;
+
+	} else {
+
+		return -1;
+	}
+
+	return 0;
+
+}
 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
 
 /* Parse the argument given in the command line of the application */
@@ -1230,6 +1429,7 @@ parse_args(int argc, char **argv)
 	int opt, ret;
 	char **argvopt;
 	int option_index;
+	uint32_t limit;
 	char *prgname = argv[0];
 	static struct option lgopts[] = {
 		{"config", 1, 0, 0},
@@ -1237,13 +1437,14 @@ parse_args(int argc, char **argv)
 		{"high-perf-cores", 1, 0, 0},
 		{"no-numa", 0, 0, 0},
 		{"enable-jumbo", 0, 0, 0},
+		{"empty-poll", 1, 0, 0},
 		{CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
 		{NULL, 0, 0, 0}
 	};
 
 	argvopt = argv;
 
-	while ((opt = getopt_long(argc, argvopt, "p:P",
+	while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P",
 				lgopts, &option_index)) != EOF) {
 
 		switch (opt) {
@@ -1260,7 +1461,18 @@ parse_args(int argc, char **argv)
 			printf("Promiscuous mode selected\n");
 			promiscuous_on = 1;
 			break;
-
+		case 'l':
+			limit = parse_max_pkt_len(optarg);
+			freq_tlb[LOW] = limit;
+			break;
+		case 'm':
+			limit = parse_max_pkt_len(optarg);
+			freq_tlb[MED] = limit;
+			break;
+		case 'h':
+			limit = parse_max_pkt_len(optarg);
+			freq_tlb[HGH] = limit;
+			break;
 		/* long options */
 		case 0:
 			if (!strncmp(lgopts[option_index].name, "config", 6)) {
@@ -1299,6 +1511,20 @@ parse_args(int argc, char **argv)
 			}
 
 			if (!strncmp(lgopts[option_index].name,
+						"empty-poll", 10)) {
+				printf("empty-poll is enabled\n");
+				empty_poll_on = true;
+				ret = parse_ep_config(optarg);
+
+				if (ret) {
+					printf("invalid empty poll config\n");
+					print_usage(prgname);
+					return -1;
+				}
+
+			}
+
+			if (!strncmp(lgopts[option_index].name,
 					"enable-jumbo", 12)) {
 				struct option lenopts =
 					{"max-pkt-len", required_argument, \
@@ -1646,6 +1872,59 @@ init_power_library(void)
 	}
 	return ret;
 }
+static void
+empty_poll_setup_timer(void)
+{
+	int lcore_id = rte_lcore_id();
+	uint64_t hz = rte_get_timer_hz();
+
+	struct  ep_params *ep_ptr = ep_params;
+
+	ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND;
+
+	rte_timer_reset_sync(&ep_ptr->timer0,
+			ep_ptr->interval_ticks,
+			PERIODICAL,
+			lcore_id,
+			rte_empty_poll_detection,
+			(void *)ep_ptr);
+
+}
+static int
+launch_timer(unsigned int lcore_id)
+{
+	int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms;
+
+	RTE_SET_USED(lcore_id);
+
+
+	if (rte_get_master_lcore() != lcore_id) {
+		rte_panic("timer on lcore:%d which is not master core:%d\n",
+				lcore_id,
+				rte_get_master_lcore());
+	}
+
+	RTE_LOG(INFO, POWER, "Bring up the Timer\n");
+
+	empty_poll_setup_timer();
+
+	cycles_10ms = rte_get_timer_hz() / 100;
+
+	while (!is_done()) {
+		cur_tsc = rte_rdtsc();
+		diff_tsc = cur_tsc - prev_tsc;
+		if (diff_tsc > cycles_10ms) {
+			rte_timer_manage();
+			prev_tsc = cur_tsc;
+			cycles_10ms = rte_get_timer_hz() / 100;
+		}
+	}
+
+	RTE_LOG(INFO, POWER, "Timer_subsystem is done\n");
+
+	return 0;
+}
+
 
 int
 main(int argc, char **argv)
@@ -1828,13 +2107,15 @@ main(int argc, char **argv)
 		if (rte_lcore_is_enabled(lcore_id) == 0)
 			continue;
 
-		/* init timer structures for each enabled lcore */
-		rte_timer_init(&power_timers[lcore_id]);
-		hz = rte_get_timer_hz();
-		rte_timer_reset(&power_timers[lcore_id],
-			hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
-						power_timer_cb, NULL);
-
+		if (empty_poll_on == false) {
+			/* init timer structures for each enabled lcore */
+			rte_timer_init(&power_timers[lcore_id]);
+			hz = rte_get_timer_hz();
+			rte_timer_reset(&power_timers[lcore_id],
+					hz/TIMER_NUMBER_PER_SECOND,
+					SINGLE, lcore_id,
+					power_timer_cb, NULL);
+		}
 		qconf = &lcore_conf[lcore_id];
 		printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
 		fflush(stdout);
@@ -1905,12 +2186,43 @@ main(int argc, char **argv)
 
 	check_all_ports_link_status(enabled_port_mask);
 
+	if (empty_poll_on == true) {
+
+		if (empty_poll_train) {
+			policy.state = TRAINING;
+		} else {
+			policy.state = MED_NORMAL;
+			policy.med_base_edpi = ep_med_edpi;
+			policy.hgh_base_edpi = ep_hgh_edpi;
+		}
+
+		ret = rte_power_empty_poll_stat_init(&ep_params,
+				freq_tlb,
+				&policy);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE, "empty poll init failed");
+	}
+
+
 	/* launch per-lcore init on every lcore */
-	rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+	if (empty_poll_on == false) {
+		rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+	} else {
+		empty_poll_stop = false;
+		rte_eal_mp_remote_launch(main_empty_poll_loop, NULL,
+				SKIP_MASTER);
+	}
+
+	if (empty_poll_on == true)
+		launch_timer(rte_lcore_id());
+
 	RTE_LCORE_FOREACH_SLAVE(lcore_id) {
 		if (rte_eal_wait_lcore(lcore_id) < 0)
 			return -1;
 	}
 
+	if (empty_poll_on)
+		rte_power_empty_poll_stat_free();
+
 	return 0;
 }
diff --git a/examples/l3fwd-power/meson.build b/examples/l3fwd-power/meson.build
index 20c8054..a3c5c2f 100644
--- a/examples/l3fwd-power/meson.build
+++ b/examples/l3fwd-power/meson.build
@@ -9,6 +9,7 @@
 if host_machine.system() != 'linux'
 	build = false
 endif
+allow_experimental_apis = true
 deps += ['power', 'timer', 'lpm', 'hash']
 sources = files(
 	'main.c', 'perf_core.c'
-- 
2.7.5



More information about the dev mailing list