[dpdk-dev] [PATCH 3/4] app/eventdev: add perf pipeline test

Pavan Nikhilesh pbhagavatula at caviumnetworks.com
Thu Nov 30 08:24:05 CET 2017
Previous message: [dpdk-dev] [PATCH 2/4] app/eventdev: add packet distribution logs
Next message: [dpdk-dev] [PATCH 3/4] app/eventdev: add perf pipeline test
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
This is a performance test case that aims at testing the following:
1. Measure the end-to-end performance of an event dev with a ethernet dev.
2. Maintain packet ordering from Rx to Tx.

The perf pipeline test configures the eventdev with Q queues and P ports,
where Q is nb_ethdev * nb_stages and P is nb_workers.

The user can choose the number of workers and number of stages through the
--wlcores and the --stlist application command line arguments respectively.
The probed ethernet devices act as producer(s) for this application.

The ethdevs are configured as event Rx adapters that enables them to
injects events to eventdev based the first stage schedule type list
requested by the user through --stlist the command line argument.

Based on the number of stages to process(selected through --stlist),
the application forwards the event to next upstream queue and when it
reaches last stage in the pipeline if the event type is ATOMIC it is
enqueued onto ethdev Tx queue else to maintain ordering the event type is
set to ATOMIC and enqueued onto the last stage queue.
On packet Tx, application increments the number events processed and print
periodically in one second to get the number of events processed in one
second.

Note: The --prod_type_ethdev is mandatory for running the application.

Example command to run perf pipeline test:
sudo build/app/dpdk-test-eventdev -c 0xf -s 0x8 --vdev=event_sw0 -- \
--test=perf_pipeline --wlcore=1 --prod_type_ethdev --stlist=ao

Signed-off-by: Pavan Nikhilesh <pbhagavatula at caviumnetworks.com>
---
 app/test-eventdev/Makefile             |   1 +
 app/test-eventdev/test_perf_pipeline.c | 548 +++++++++++++++++++++++++++++++++
 2 files changed, 549 insertions(+)
 create mode 100644 app/test-eventdev/test_perf_pipeline.c

diff --git a/app/test-eventdev/Makefile b/app/test-eventdev/Makefile
index dcb2ac4..9bd8ecd 100644
--- a/app/test-eventdev/Makefile
+++ b/app/test-eventdev/Makefile
@@ -50,5 +50,6 @@ SRCS-y += test_order_atq.c
 SRCS-y += test_perf_common.c
 SRCS-y += test_perf_queue.c
 SRCS-y += test_perf_atq.c
+SRCS-y += test_perf_pipeline.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-eventdev/test_perf_pipeline.c b/app/test-eventdev/test_perf_pipeline.c
new file mode 100644
index 0000000..a4a13f8
--- /dev/null
+++ b/app/test-eventdev/test_perf_pipeline.c
@@ -0,0 +1,548 @@
+/*
+ *   BSD LICENSE
+ *
+ *   Copyright (C) Cavium, Inc 2017.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Cavium, Inc nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "test_perf_common.h"
+
+/* See http://dpdk.org/doc/guides/tools/testeventdev.html for test details */
+
+static inline int
+perf_pipeline_nb_event_queues(struct evt_options *opt)
+{
+	uint16_t eth_count = rte_eth_dev_count();
+
+	return (eth_count * opt->nb_stages) +
+		evt_has_all_types_queue(opt->dev_id) ? 0 : eth_count;
+}
+
+static __rte_always_inline void
+perf_pipeline_tx_pkt_safe(struct rte_mbuf *mbuf)
+{
+	while (rte_eth_tx_burst(mbuf->port, 0, &mbuf, 1) != 1)
+		rte_pause();
+}
+
+static __rte_always_inline void
+perf_pipeline_tx_pkt_unsafe(struct rte_mbuf *mbuf, struct test_perf *t)
+{
+	rte_spinlock_t *lk = &t->tx_lk[mbuf->port];
+
+	rte_spinlock_lock(lk);
+	perf_pipeline_tx_pkt_safe(mbuf);
+	rte_spinlock_unlock(lk);
+}
+
+static __rte_always_inline void
+perf_pipeline_tx_unsafe_burst(struct rte_mbuf *mbuf, struct test_perf *t)
+{
+	uint16_t port = mbuf->port;
+	rte_spinlock_t *lk = &t->tx_lk[port];
+
+	rte_spinlock_lock(lk);
+	rte_eth_tx_buffer(port, 0, t->tx_buf[port], mbuf);
+	rte_spinlock_unlock(lk);
+}
+
+static __rte_always_inline void
+perf_pipeline_tx_flush(struct test_perf *t, const uint8_t nb_ports)
+{
+	int i;
+	rte_spinlock_t *lk;
+
+	for (i = 0; i < nb_ports; i++) {
+		lk = &t->tx_lk[i];
+
+		rte_spinlock_lock(lk);
+		rte_eth_tx_buffer_flush(i, 0, t->tx_buf[i]);
+		rte_spinlock_unlock(lk);
+	}
+}
+
+static int
+perf_pipeline_worker_single_stage(void *arg)
+{
+	struct worker_data *w  = arg;
+	struct test_perf *t = w->t;
+	const uint8_t dev = w->dev_id;
+	const uint8_t port = w->port_id;
+	const bool mt_safe = !t->mt_unsafe;
+	const bool atq = evt_has_all_types_queue(dev);
+	struct rte_event ev;
+
+	while (t->done == false) {
+		uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
+
+		if (!event) {
+			rte_pause();
+			continue;
+		}
+
+		if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
+			if (mt_safe)
+				perf_pipeline_tx_pkt_safe(ev.mbuf);
+			else
+				perf_pipeline_tx_pkt_unsafe(ev.mbuf, t);
+			w->processed_pkts++;
+		} else {
+			ev.event_type = RTE_EVENT_TYPE_CPU;
+			ev.op = RTE_EVENT_OP_FORWARD;
+			ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
+			ev.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+			atq ? 0 : ev.queue_id++;
+			while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1)
+				rte_pause();
+		}
+	}
+
+	return 0;
+}
+
+static int
+perf_pipeline_worker_single_stage_burst(void *arg)
+{
+	int i;
+	struct worker_data *w  = arg;
+	struct test_perf *t = w->t;
+	const uint8_t dev = w->dev_id;
+	const uint8_t port = w->port_id;
+	const bool mt_safe = !t->mt_unsafe;
+	const bool atq = evt_has_all_types_queue(dev);
+	struct rte_event ev[BURST_SIZE];
+	const uint16_t nb_ports = rte_eth_dev_count();
+
+	while (t->done == false) {
+		uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
+				BURST_SIZE, 0);
+
+		if (!nb_rx) {
+			if (!mt_safe)
+				perf_pipeline_tx_flush(t, nb_ports);
+
+			rte_pause();
+			continue;
+		}
+
+		for (i = 0; i < nb_rx; i++) {
+			rte_prefetch0(ev[i + 1].mbuf);
+			if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
+
+				if (mt_safe)
+					perf_pipeline_tx_pkt_safe(ev[i].mbuf);
+				else
+					perf_pipeline_tx_unsafe_burst(
+							ev[i].mbuf, t);
+				ev[i].op = RTE_EVENT_OP_RELEASE;
+				w->processed_pkts++;
+			} else {
+				ev[i].event_type = RTE_EVENT_TYPE_CPU;
+				ev[i].op = RTE_EVENT_OP_FORWARD;
+				ev[i].sched_type = RTE_SCHED_TYPE_ATOMIC;
+				ev[i].priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+				atq ? 0 : ev[i].queue_id++;
+			}
+		}
+
+		uint16_t enq;
+
+		enq = rte_event_enqueue_burst(dev, port, ev, nb_rx);
+		while (enq < nb_rx) {
+			enq += rte_event_enqueue_burst(dev, port,
+							ev + enq, nb_rx - enq);
+		}
+	}
+
+	return 0;
+}
+
+static int
+perf_pipeline_worker_multi_stage(void *arg)
+{
+	struct worker_data *w  = arg;
+	struct test_perf *t = w->t;
+	const uint8_t dev = w->dev_id;
+	const uint8_t port = w->port_id;
+	const bool mt_safe = !t->mt_unsafe;
+	const bool atq = evt_has_all_types_queue(dev);
+	const uint8_t last_queue = t->opt->nb_stages - 1;
+	const uint8_t nb_stages = atq ? t->opt->nb_stages :
+		t->opt->nb_stages + 1;
+	uint8_t *const sched_type_list = &t->sched_type_list[0];
+	uint8_t cq_id;
+	struct rte_event ev;
+
+
+	while (t->done == false) {
+		uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
+
+		if (!event) {
+			rte_pause();
+			continue;
+		}
+
+		cq_id = ev.queue_id % nb_stages;
+
+		if (cq_id >= last_queue) {
+			if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
+
+				if (mt_safe)
+					perf_pipeline_tx_pkt_safe(ev.mbuf);
+				else
+					perf_pipeline_tx_pkt_unsafe(ev.mbuf, t);
+				w->processed_pkts++;
+				continue;
+			}
+			ev.sched_type = RTE_SCHED_TYPE_ATOMIC;
+			ev.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+			atq || !(cq_id == last_queue) ? 0 : ev.queue_id++;
+		} else {
+			ev.queue_id++;
+			ev.sched_type = sched_type_list[cq_id];
+		}
+
+		ev.event_type = RTE_EVENT_TYPE_CPU;
+		ev.op = RTE_EVENT_OP_FORWARD;
+		while (rte_event_enqueue_burst(dev, port, &ev, 1) != 1)
+			rte_pause();
+	}
+	return 0;
+}
+
+static int
+perf_pipeline_worker_multi_stage_burst(void *arg)
+{
+	int i;
+	struct worker_data *w  = arg;
+	struct test_perf *t = w->t;
+	const uint8_t dev = w->dev_id;
+	const uint8_t port = w->port_id;
+	uint8_t *const sched_type_list = &t->sched_type_list[0];
+	const bool mt_safe = !t->mt_unsafe;
+	const bool atq = evt_has_all_types_queue(dev);
+	const uint8_t last_queue = t->opt->nb_stages - 1;
+	const uint8_t nb_stages = atq ? t->opt->nb_stages :
+		t->opt->nb_stages + 1;
+	uint8_t cq_id;
+	struct rte_event ev[BURST_SIZE + 1];
+	const uint16_t nb_ports = rte_eth_dev_count();
+
+	RTE_SET_USED(atq);
+	while (t->done == false) {
+		uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
+				BURST_SIZE, 0);
+
+		if (!nb_rx) {
+			if (!mt_safe)
+				perf_pipeline_tx_flush(t, nb_ports);
+			rte_pause();
+			continue;
+		}
+
+		for (i = 0; i < nb_rx; i++) {
+			rte_prefetch0(ev[i + 1].mbuf);
+			cq_id = ev[i].queue_id % nb_stages;
+
+			if (cq_id >= last_queue) {
+				if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
+
+					if (mt_safe)
+						perf_pipeline_tx_pkt_safe(
+								ev[i].mbuf);
+					else
+						perf_pipeline_tx_unsafe_burst(
+								ev[i].mbuf, t);
+					ev[i].op = RTE_EVENT_OP_RELEASE;
+					w->processed_pkts++;
+					continue;
+				}
+
+				ev[i].sched_type = RTE_SCHED_TYPE_ATOMIC;
+				ev[i].priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+				atq || !(cq_id == last_queue) ? 0 :
+					ev[i].queue_id++;
+			} else {
+				ev[i].queue_id++;
+				ev[i].sched_type = sched_type_list[cq_id];
+			}
+
+			ev[i].event_type = RTE_EVENT_TYPE_CPU;
+			ev[i].op = RTE_EVENT_OP_FORWARD;
+		}
+
+		uint16_t enq;
+
+		enq = rte_event_enqueue_burst(dev, port, ev, nb_rx);
+		while (enq < nb_rx) {
+			enq += rte_event_enqueue_burst(dev, port,
+							ev + enq, nb_rx - enq);
+		}
+	}
+	return 0;
+}
+
+static int
+worker_wrapper(void *arg)
+{
+	struct worker_data *w  = arg;
+	struct evt_options *opt = w->t->opt;
+	const bool burst = evt_has_burst_mode(w->dev_id);
+	const uint8_t nb_stages = opt->nb_stages;
+	RTE_SET_USED(opt);
+
+	/* allow compiler to optimize */
+	if (nb_stages == 1) {
+		if (!burst)
+			return perf_pipeline_worker_single_stage(arg);
+		else
+			return perf_pipeline_worker_single_stage_burst(arg);
+	} else {
+		if (!burst)
+			return perf_pipeline_worker_multi_stage(arg);
+		else
+			return perf_pipeline_worker_multi_stage_burst(arg);
+	}
+	rte_panic("invalid worker\n");
+}
+
+static int
+perf_pipeline_launch_lcores(struct evt_test *test, struct evt_options *opt)
+{
+	return perf_launch_lcores(test, opt, worker_wrapper);
+}
+
+static int
+perf_pipeline_eventdev_setup(struct evt_test *test, struct evt_options *opt)
+{
+	int ret;
+	int nb_ports;
+	int nb_queues;
+	int nb_stages = opt->nb_stages;
+	uint8_t queue;
+	uint8_t port;
+	uint8_t atq = evt_has_all_types_queue(opt->dev_id);
+	struct test_perf *t = evt_test_priv(test);
+
+	nb_ports = evt_nr_active_lcores(opt->wlcores);
+	nb_queues = rte_eth_dev_count() * (nb_stages);
+	nb_queues += atq ? 0 : rte_eth_dev_count();
+
+	const struct rte_event_dev_config config = {
+			.nb_event_queues = nb_queues,
+			.nb_event_ports = nb_ports,
+			.nb_events_limit  = 4096,
+			.nb_event_queue_flows = opt->nb_flows,
+			.nb_event_port_dequeue_depth = 128,
+			.nb_event_port_enqueue_depth = 128,
+	};
+
+	ret = rte_event_dev_configure(opt->dev_id, &config);
+	if (ret) {
+		evt_err("failed to configure eventdev %d", opt->dev_id);
+		return ret;
+	}
+
+	struct rte_event_queue_conf q_conf = {
+			.priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+			.nb_atomic_flows = opt->nb_flows,
+			.nb_atomic_order_sequences = opt->nb_flows,
+	};
+	/* queue configurations */
+	for (queue = 0; queue < nb_queues; queue++) {
+		if (atq) {
+			q_conf.event_queue_cfg = RTE_EVENT_QUEUE_CFG_ALL_TYPES;
+		} else {
+			uint8_t slot;
+
+			slot = queue % (nb_stages + 1);
+			q_conf.schedule_type = slot == nb_stages ?
+				RTE_SCHED_TYPE_ATOMIC :
+				opt->sched_type_list[slot];
+		}
+
+		ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf);
+		if (ret) {
+			evt_err("failed to setup queue=%d", queue);
+			return ret;
+		}
+	}
+
+	/* port configuration */
+	const struct rte_event_port_conf p_conf = {
+			.dequeue_depth = opt->wkr_deq_dep,
+			.enqueue_depth = 64,
+			.new_event_threshold = 4096,
+	};
+
+	/* setup one port per worker, linking to all queues */
+	for (port = 0; port < evt_nr_active_lcores(opt->wlcores); port++) {
+		struct worker_data *w = &t->worker[port];
+
+		w->dev_id = opt->dev_id;
+		w->port_id = port;
+		w->t = t;
+		w->processed_pkts = 0;
+		w->latency = 0;
+
+		ret = rte_event_port_setup(opt->dev_id, port, &p_conf);
+		if (ret) {
+			evt_err("failed to setup port %d", port);
+			return ret;
+		}
+
+		ret = rte_event_port_link(opt->dev_id, port, NULL, NULL, 0);
+		if (ret != nb_queues) {
+			evt_err("failed to link all queues to port %d", port);
+			return -EINVAL;
+		}
+	}
+
+	ret = perf_event_rx_adapter_setup(opt, atq ? nb_stages : nb_stages + 1,
+			p_conf);
+	if (ret)
+		return ret;
+
+	if (!evt_has_distributed_sched(opt->dev_id)) {
+		uint32_t service_id;
+		rte_event_dev_service_id_get(opt->dev_id, &service_id);
+		ret = evt_service_setup(service_id);
+		if (ret) {
+			evt_err("No service lcore found to run event dev.");
+			return ret;
+		}
+	}
+
+	ret = rte_event_dev_start(opt->dev_id);
+	if (ret) {
+		evt_err("failed to start eventdev %d", opt->dev_id);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void
+perf_pipeline_opt_dump(struct evt_options *opt)
+{
+	evt_dump_fwd_latency(opt);
+	perf_opt_dump(opt, perf_pipeline_nb_event_queues(opt));
+}
+
+static int
+perf_pipeline_opt_check(struct evt_options *opt)
+{
+	unsigned int lcores;
+	/*
+	 * N worker + 1 master
+	 */
+	lcores = 2;
+
+	if (opt->prod_type == EVT_PROD_TYPE_SYNT) {
+		evt_err("test doesn't support synthetic producers");
+		return -1;
+	}
+
+	if (!rte_eth_dev_count()) {
+		evt_err("test needs minimum 1 ethernet dev");
+		return -1;
+	}
+
+	if (rte_lcore_count() < lcores) {
+		evt_err("test need minimum %d lcores", lcores);
+		return -1;
+	}
+
+	/* Validate worker lcores */
+	if (evt_lcores_has_overlap(opt->wlcores, rte_get_master_lcore())) {
+		evt_err("worker lcores overlaps with master lcore");
+		return -1;
+	}
+	if (evt_has_disabled_lcore(opt->wlcores)) {
+		evt_err("one or more workers lcores are not enabled");
+		return -1;
+	}
+	if (!evt_has_active_lcore(opt->wlcores)) {
+		evt_err("minimum one worker is required");
+		return -1;
+	}
+
+	if (perf_pipeline_nb_event_queues(opt) > EVT_MAX_QUEUES) {
+		evt_err("number of queues exceeds %d", EVT_MAX_QUEUES);
+		return -1;
+	}
+	if (perf_nb_event_ports(opt) > EVT_MAX_PORTS) {
+		evt_err("number of ports exceeds %d", EVT_MAX_PORTS);
+		return -1;
+	}
+
+	if (evt_has_invalid_stage(opt))
+		return -1;
+
+	if (evt_has_invalid_sched_type(opt))
+		return -1;
+
+	return 0;
+}
+
+static bool
+perf_pipeline_capability_check(struct evt_options *opt)
+{
+	struct rte_event_dev_info dev_info;
+
+	rte_event_dev_info_get(opt->dev_id, &dev_info);
+	if (dev_info.max_event_queues < perf_pipeline_nb_event_queues(opt) ||
+			dev_info.max_event_ports <
+			evt_nr_active_lcores(opt->wlcores)) {
+		evt_err("not enough eventdev queues=%d/%d or ports=%d/%d",
+			perf_pipeline_nb_event_queues(opt),
+			dev_info.max_event_queues,
+			evt_nr_active_lcores(opt->wlcores),
+			dev_info.max_event_ports);
+	}
+
+	return true;
+}
+
+static const struct evt_test_ops perf_pipeline =  {
+	.cap_check          = perf_pipeline_capability_check,
+	.opt_check          = perf_pipeline_opt_check,
+	.opt_dump           = perf_pipeline_opt_dump,
+	.test_setup         = perf_test_setup,
+	.mempool_setup      = perf_mempool_setup,
+	.ethdev_setup	    = perf_ethdev_setup,
+	.eventdev_setup     = perf_pipeline_eventdev_setup,
+	.launch_lcores      = perf_pipeline_launch_lcores,
+	.eventdev_destroy   = perf_eventdev_destroy,
+	.mempool_destroy    = perf_mempool_destroy,
+	.ethdev_destroy	    = perf_ethdev_destroy,
+	.test_result        = perf_test_result,
+	.test_destroy       = perf_test_destroy,
+};
+
+EVT_TEST_REGISTER(perf_pipeline);
-- 
2.7.4
Previous message: [dpdk-dev] [PATCH 2/4] app/eventdev: add packet distribution logs
Next message: [dpdk-dev] [PATCH 3/4] app/eventdev: add perf pipeline test
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the dev mailing list