[dpdk-dev] [PATCH v3 1/9] test/ring: add contention stress test

Ananyev, Konstantin konstantin.ananyev at intel.com
Thu Apr 9 14:36:35 CEST 2020


> <snip>
> 
> > Subject: [PATCH v3 1/9] test/ring: add contention stress test
> Minor, would 'add stress test for overcommitted use case' sound better?

I  liked to point out that this test-case can be used as contention stress-test
(many threads do enqueue/dequeue to/from the same ring) for both
over-committed and not scenarios...
Will probably try to add few extra explanations in v4.  
 
> >
> > Introduce new test-case to measure ring perfomance under contention
> Minor, 'over committed' seems to the word commonly used from the references you provided. Does it make sense to use that?
> 
> > (miltiple producers/consumers).
>     ^^^^^^^ multiple

ack.

> 
> > Starts dequeue/enqueue loop on all available slave lcores.
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > ---
> >  app/test/Makefile                |   2 +
> >  app/test/meson.build             |   2 +
> >  app/test/test_ring_mpmc_stress.c |  31 +++
> >  app/test/test_ring_stress.c      |  48 ++++
> >  app/test/test_ring_stress.h      |  35 +++
> >  app/test/test_ring_stress_impl.h | 444 +++++++++++++++++++++++++++++++
> Would be good to change the file names to indicate that these tests are for over-committed usecase/configuration.
> These are performance tests, better to have 'perf' or 'performance' in their names.
> 
> >  6 files changed, 562 insertions(+)
> >  create mode 100644 app/test/test_ring_mpmc_stress.c  create mode 100644
> > app/test/test_ring_stress.c  create mode 100644 app/test/test_ring_stress.h
> > create mode 100644 app/test/test_ring_stress_impl.h
> >
> > diff --git a/app/test/Makefile b/app/test/Makefile index
> > 1f080d162..4eefaa887 100644
> > --- a/app/test/Makefile
> > +++ b/app/test/Makefile
> > @@ -77,7 +77,9 @@ SRCS-y += test_external_mem.c  SRCS-y +=
> > test_rand_perf.c
> >
> >  SRCS-y += test_ring.c
> > +SRCS-y += test_ring_mpmc_stress.c
> >  SRCS-y += test_ring_perf.c
> > +SRCS-y += test_ring_stress.c
> >  SRCS-y += test_pmd_perf.c
> >
> >  ifeq ($(CONFIG_RTE_LIBRTE_TABLE),y)
> > diff --git a/app/test/meson.build b/app/test/meson.build index
> > 351d29cb6..827b04886 100644
> > --- a/app/test/meson.build
> > +++ b/app/test/meson.build
> > @@ -100,7 +100,9 @@ test_sources = files('commands.c',
> >  	'test_rib.c',
> >  	'test_rib6.c',
> >  	'test_ring.c',
> > +	'test_ring_mpmc_stress.c',
> >  	'test_ring_perf.c',
> > +	'test_ring_stress.c',
> >  	'test_rwlock.c',
> >  	'test_sched.c',
> >  	'test_service_cores.c',
> > diff --git a/app/test/test_ring_mpmc_stress.c
> > b/app/test/test_ring_mpmc_stress.c
> > new file mode 100644
> > index 000000000..1524b0248
> > --- /dev/null
> > +++ b/app/test/test_ring_mpmc_stress.c
> > @@ -0,0 +1,31 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Intel Corporation
> > + */
> > +
> > +#include "test_ring_stress_impl.h"
> > +
> > +static inline uint32_t
> > +_st_ring_dequeue_bulk(struct rte_ring *r, void **obj, uint32_t n,
> > +	uint32_t *avail)
> > +{
> > +	return rte_ring_mc_dequeue_bulk(r, obj, n, avail); }
> > +
> > +static inline uint32_t
> > +_st_ring_enqueue_bulk(struct rte_ring *r, void * const *obj, uint32_t n,
> > +	uint32_t *free)
> > +{
> > +	return rte_ring_mp_enqueue_bulk(r, obj, n, free); }
> > +
> > +static int
> > +_st_ring_init(struct rte_ring *r, const char *name, uint32_t num) {
> > +	return rte_ring_init(r, name, num, 0); }
> > +
> > +const struct test test_ring_mpmc_stress = {
> > +	.name = "MP/MC",
> > +	.nb_case = RTE_DIM(tests),
> > +	.cases = tests,
> > +};
> > diff --git a/app/test/test_ring_stress.c b/app/test/test_ring_stress.c new file
> > mode 100644 index 000000000..60706f799
> > --- /dev/null
> > +++ b/app/test/test_ring_stress.c
> > @@ -0,0 +1,48 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Intel Corporation
> > + */
> > +
> > +#include "test_ring_stress.h"
> > +
> > +static int
> > +run_test(const struct test *test)
> > +{
> > +	int32_t rc;
> > +	uint32_t i, k;
> > +
> > +	for (i = 0, k = 0; i != test->nb_case; i++) {
> > +
> > +		printf("TEST-CASE %s %s START\n",
> > +			test->name, test->cases[i].name);
> > +
> > +		rc = test->cases[i].func(test->cases[i].wfunc);
> > +		k += (rc == 0);
> > +
> > +		if (rc != 0)
> > +			printf("TEST-CASE %s %s FAILED\n",
> > +				test->name, test->cases[i].name);
> > +		else
> > +			printf("TEST-CASE %s %s OK\n",
> > +				test->name, test->cases[i].name);
> > +	}
> > +
> > +	return k;
> > +}
> > +
> > +static int
> > +test_ring_stress(void)
> > +{
> > +	uint32_t n, k;
> > +
> > +	n = 0;
> > +	k = 0;
> > +
> > +	n += test_ring_mpmc_stress.nb_case;
> > +	k += run_test(&test_ring_mpmc_stress);
> > +
> > +	printf("Number of tests:\t%u\nSuccess:\t%u\nFailed:\t%u\n",
> > +		n, k, n - k);
> > +	return (k != n);
> > +}
> > +
> > +REGISTER_TEST_COMMAND(ring_stress_autotest, test_ring_stress);
> > diff --git a/app/test/test_ring_stress.h b/app/test/test_ring_stress.h new file
> > mode 100644 index 000000000..60eac6216
> > --- /dev/null
> > +++ b/app/test/test_ring_stress.h
> > @@ -0,0 +1,35 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Intel Corporation
> > + */
> > +
> > +
> > +#include <inttypes.h>
> > +#include <stddef.h>
> > +#include <stdalign.h>
> > +#include <string.h>
> > +#include <stdio.h>
> > +#include <unistd.h>
> > +
> > +#include <rte_ring.h>
> > +#include <rte_cycles.h>
> > +#include <rte_launch.h>
> > +#include <rte_pause.h>
> > +#include <rte_random.h>
> > +#include <rte_malloc.h>
> > +#include <rte_spinlock.h>
> > +
> > +#include "test.h"
> > +
> > +struct test_case {
> > +	const char *name;
> > +	int (*func)(int (*)(void *));
> > +	int (*wfunc)(void *arg);
> > +};
> > +
> > +struct test {
> > +	const char *name;
> > +	uint32_t nb_case;
> > +	const struct test_case *cases;
> > +};
> > +
> > +extern const struct test test_ring_mpmc_stress;
> > diff --git a/app/test/test_ring_stress_impl.h
> > b/app/test/test_ring_stress_impl.h
> > new file mode 100644
> > index 000000000..11476d28c
> > --- /dev/null
> > +++ b/app/test/test_ring_stress_impl.h
> > @@ -0,0 +1,444 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2020 Intel Corporation
> > + */
> > +
> > +#include "test_ring_stress.h"
> > +
> > +/*
> > + * Measures performance of ring enqueue/dequeue under high contention
> > +*/
> > +
> > +#define RING_NAME	"RING_STRESS"
> > +#define BULK_NUM	32
> > +#define RING_SIZE	(2 * BULK_NUM * RTE_MAX_LCORE)
> > +
> > +enum {
> > +	WRK_CMD_STOP,
> > +	WRK_CMD_RUN,
> > +};
> > +
> > +static volatile uint32_t wrk_cmd __rte_cache_aligned;
> > +
> > +/* test run-time in seconds */
> > +static const uint32_t run_time = 60;
> > +static const uint32_t verbose;
> > +
> > +struct lcore_stat {
> > +	uint64_t nb_cycle;
> > +	struct {
> > +		uint64_t nb_call;
> > +		uint64_t nb_obj;
> > +		uint64_t nb_cycle;
> > +		uint64_t max_cycle;
> > +		uint64_t min_cycle;
> > +	} op;
> > +};
> > +
> > +struct lcore_arg {
> > +	struct rte_ring *rng;
> > +	struct lcore_stat stats;
> > +} __rte_cache_aligned;
> > +
> > +struct ring_elem {
> > +	uint32_t cnt[RTE_CACHE_LINE_SIZE / sizeof(uint32_t)]; }
> > +__rte_cache_aligned;
> > +
> > +/*
> > + * redefinable functions
> > + */
> > +static uint32_t
> > +_st_ring_dequeue_bulk(struct rte_ring *r, void **obj, uint32_t n,
> > +	uint32_t *avail);
> > +
> > +static uint32_t
> > +_st_ring_enqueue_bulk(struct rte_ring *r, void * const *obj, uint32_t n,
> > +	uint32_t *free);
> > +
> > +static int
> > +_st_ring_init(struct rte_ring *r, const char *name, uint32_t num);
> > +
> > +
> > +static void
> > +lcore_stat_update(struct lcore_stat *ls, uint64_t call, uint64_t obj,
> > +	uint64_t tm, int32_t prcs)
> > +{
> > +	ls->op.nb_call += call;
> > +	ls->op.nb_obj += obj;
> > +	ls->op.nb_cycle += tm;
> > +	if (prcs) {
> > +		ls->op.max_cycle = RTE_MAX(ls->op.max_cycle, tm);
> > +		ls->op.min_cycle = RTE_MIN(ls->op.min_cycle, tm);
> > +	}
> > +}
> > +
> > +static void
> > +lcore_op_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls)
> > +{
> > +
> > +	ms->op.nb_call += ls->op.nb_call;
> > +	ms->op.nb_obj += ls->op.nb_obj;
> > +	ms->op.nb_cycle += ls->op.nb_cycle;
> > +	ms->op.max_cycle = RTE_MAX(ms->op.max_cycle, ls->op.max_cycle);
> > +	ms->op.min_cycle = RTE_MIN(ms->op.min_cycle, ls->op.min_cycle); }
> > +
> > +static void
> > +lcore_stat_aggr(struct lcore_stat *ms, const struct lcore_stat *ls) {
> > +	ms->nb_cycle = RTE_MAX(ms->nb_cycle, ls->nb_cycle);
> > +	lcore_op_stat_aggr(ms, ls);
> > +}
> > +
> > +static void
> > +lcore_stat_dump(FILE *f, uint32_t lc, const struct lcore_stat *ls) {
> > +	long double st;
> > +
> > +	st = (long double)rte_get_timer_hz() / US_PER_S;
> > +
> > +	if (lc == UINT32_MAX)
> > +		fprintf(f, "%s(AGGREGATE)={\n", __func__);
> > +	else
> > +		fprintf(f, "%s(lc=%u)={\n", __func__, lc);
> > +
> > +	fprintf(f, "\tnb_cycle=%" PRIu64 "(%.2Lf usec),\n",
> > +		ls->nb_cycle, (long double)ls->nb_cycle / st);
> > +
> > +	fprintf(f, "\tDEQ+ENQ={\n");
> > +
> > +	fprintf(f, "\t\tnb_call=%" PRIu64 ",\n", ls->op.nb_call);
> > +	fprintf(f, "\t\tnb_obj=%" PRIu64 ",\n", ls->op.nb_obj);
> > +	fprintf(f, "\t\tnb_cycle=%" PRIu64 ",\n", ls->op.nb_cycle);
> > +	fprintf(f, "\t\tobj/call(avg): %.2Lf\n",
> > +		(long double)ls->op.nb_obj / ls->op.nb_call);
> > +	fprintf(f, "\t\tcycles/obj(avg): %.2Lf\n",
> > +		(long double)ls->op.nb_cycle / ls->op.nb_obj);
> > +	fprintf(f, "\t\tcycles/call(avg): %.2Lf\n",
> > +		(long double)ls->op.nb_cycle / ls->op.nb_call);
> > +
> > +	/* if min/max cycles per call stats was collected */
> > +	if (ls->op.min_cycle != UINT64_MAX) {
> > +		fprintf(f, "\t\tmax cycles/call=%" PRIu64 "(%.2Lf usec),\n",
> > +			ls->op.max_cycle,
> > +			(long double)ls->op.max_cycle / st);
> > +		fprintf(f, "\t\tmin cycles/call=%" PRIu64 "(%.2Lf usec),\n",
> > +			ls->op.min_cycle,
> > +			(long double)ls->op.min_cycle / st);
> > +	}
> > +
> > +	fprintf(f, "\t},\n");
> > +	fprintf(f, "};\n");
> > +}
> > +
> > +static void
> > +fill_ring_elm(struct ring_elem *elm, uint32_t fill) {
> > +	uint32_t i;
> > +
> > +	for (i = 0; i != RTE_DIM(elm->cnt); i++)
> > +		elm->cnt[i] = fill;
> > +}
> > +
> > +static int32_t
> > +check_updt_elem(struct ring_elem *elm[], uint32_t num,
> > +	const struct ring_elem *check, const struct ring_elem *fill) {
> > +	uint32_t i;
> > +
> > +	static rte_spinlock_t dump_lock;
> > +
> > +	for (i = 0; i != num; i++) {
> > +		if (memcmp(check, elm[i], sizeof(*check)) != 0) {
> > +			rte_spinlock_lock(&dump_lock);
> > +			printf("%s(lc=%u, num=%u) failed at %u-th iter, "
> > +				"offending object: %p\n",
> > +				__func__, rte_lcore_id(), num, i, elm[i]);
> > +			rte_memdump(stdout, "expected", check,
> > sizeof(*check));
> > +			rte_memdump(stdout, "result", elm[i], sizeof(elm[i]));
> > +			rte_spinlock_unlock(&dump_lock);
> > +			return -EINVAL;
> > +		}
> > +		memcpy(elm[i], fill, sizeof(*elm[i]));
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +check_ring_op(uint32_t exp, uint32_t res, uint32_t lc,
> minor, lcore instead of lc would be better
> 
> > +	const char *fname, const char *opname) {
> > +	if (exp != res) {
> > +		printf("%s(lc=%u) failure: %s expected: %u, returned %u\n",
> Suggest using lcore in the printf
> 
> > +			fname, lc, opname, exp, res);
> > +		return -ENOSPC;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int
> > +test_worker_prcs(void *arg)
> > +{
> > +	int32_t rc;
> > +	uint32_t lc, n, num;
> minor, lcore instead of lc would be better
> 
> > +	uint64_t cl, tm0, tm1;
> > +	struct lcore_arg *la;
> > +	struct ring_elem def_elm, loc_elm;
> > +	struct ring_elem *obj[2 * BULK_NUM];
> > +
> > +	la = arg;
> > +	lc = rte_lcore_id();
> > +
> > +	fill_ring_elm(&def_elm, UINT32_MAX);
> > +	fill_ring_elm(&loc_elm, lc);
> > +
> > +	while (wrk_cmd != WRK_CMD_RUN) {
> > +		rte_smp_rmb();
> > +		rte_pause();
> > +	}
> > +
> > +	cl = rte_rdtsc_precise();
> > +
> > +	do {
> > +		/* num in interval [7/8, 11/8] of BULK_NUM */
> > +		num = 7 * BULK_NUM / 8 + rte_rand() % (BULK_NUM / 2);
> > +
> > +		/* reset all pointer values */
> > +		memset(obj, 0, sizeof(obj));
> > +
> > +		/* dequeue num elems */
> > +		tm0 = rte_rdtsc_precise();
> > +		n = _st_ring_dequeue_bulk(la->rng, (void **)obj, num, NULL);
> > +		tm0 = rte_rdtsc_precise() - tm0;
> > +
> > +		/* check return value and objects */
> > +		rc = check_ring_op(num, n, lc, __func__,
> > +			RTE_STR(_st_ring_dequeue_bulk));
> > +		if (rc == 0)
> > +			rc = check_updt_elem(obj, num, &def_elm,
> > &loc_elm);
> > +		if (rc != 0)
> > +			break;
> Since this seems like a performance test, should we skip validating the objects?
> Did these tests run on Travis CI? I believe Travis CI has trouble running stress/performance tests if they take too much time.
> The RTS and HTS tests should be added to functional tests.
> 
> > +
> > +		/* enqueue num elems */
> > +		rte_compiler_barrier();
> > +		rc = check_updt_elem(obj, num, &loc_elm, &def_elm);
> > +		if (rc != 0)
> > +			break;
> > +
> > +		tm1 = rte_rdtsc_precise();
> > +		n = _st_ring_enqueue_bulk(la->rng, (void **)obj, num, NULL);
> > +		tm1 = rte_rdtsc_precise() - tm1;
> > +
> > +		/* check return value */
> > +		rc = check_ring_op(num, n, lc, __func__,
> > +			RTE_STR(_st_ring_enqueue_bulk));
> > +		if (rc != 0)
> > +			break;
> > +
> > +		lcore_stat_update(&la->stats, 1, num, tm0 + tm1, 1);
> > +
> > +	} while (wrk_cmd == WRK_CMD_RUN);
> > +
> > +	la->stats.nb_cycle = rte_rdtsc_precise() - cl;
> > +	return rc;
> > +}
> > +
> > +static int
> > +test_worker_avg(void *arg)
> > +{
> > +	int32_t rc;
> > +	uint32_t lc, n, num;
> > +	uint64_t cl;
> > +	struct lcore_arg *la;
> > +	struct ring_elem def_elm, loc_elm;
> > +	struct ring_elem *obj[2 * BULK_NUM];
> > +
> > +	la = arg;
> > +	lc = rte_lcore_id();
> > +
> > +	fill_ring_elm(&def_elm, UINT32_MAX);
> > +	fill_ring_elm(&loc_elm, lc);
> > +
> > +	while (wrk_cmd != WRK_CMD_RUN) {
> > +		rte_smp_rmb();
> > +		rte_pause();
> > +	}
> > +
> > +	cl = rte_rdtsc_precise();
> > +
> > +	do {
> > +		/* num in interval [7/8, 11/8] of BULK_NUM */
> > +		num = 7 * BULK_NUM / 8 + rte_rand() % (BULK_NUM / 2);
> > +
> > +		/* reset all pointer values */
> > +		memset(obj, 0, sizeof(obj));
> > +
> > +		/* dequeue num elems */
> > +		n = _st_ring_dequeue_bulk(la->rng, (void **)obj, num, NULL);
> > +
> > +		/* check return value and objects */
> > +		rc = check_ring_op(num, n, lc, __func__,
> > +			RTE_STR(_st_ring_dequeue_bulk));
> > +		if (rc == 0)
> > +			rc = check_updt_elem(obj, num, &def_elm,
> > &loc_elm);
> > +		if (rc != 0)
> > +			break;
> > +
> > +		/* enqueue num elems */
> > +		rte_compiler_barrier();
> > +		rc = check_updt_elem(obj, num, &loc_elm, &def_elm);
> > +		if (rc != 0)
> > +			break;
> > +
> > +		n = _st_ring_enqueue_bulk(la->rng, (void **)obj, num, NULL);
> > +
> > +		/* check return value */
> > +		rc = check_ring_op(num, n, lc, __func__,
> > +			RTE_STR(_st_ring_enqueue_bulk));
> > +		if (rc != 0)
> > +			break;
> > +
> > +		lcore_stat_update(&la->stats, 1, num, 0, 0);
> > +
> > +	} while (wrk_cmd == WRK_CMD_RUN);
> > +
> > +	/* final stats update */
> > +	cl = rte_rdtsc_precise() - cl;
> > +	lcore_stat_update(&la->stats, 0, 0, cl, 0);
> > +	la->stats.nb_cycle = cl;
> > +
> > +	return rc;
> > +}
> Just wondering about the need of 2 tests which run the same functionality. The difference is the way in which numbers are collected.
> Does 'test_worker_avg' adding any value? IMO, we can remove 'test_worker_avg'.

Yeh, they are quite similar.
I added _average_ version for two reasons:
1. In precise I call rte_rdtsc_precise() straight before/after 
    enqueue/dequeue op.
    At least at IA rte_rdtsc_precise()  means mb().
    This extra sync point might hide some sync problems in the ring
    enqueue/dequeue itself.
    So having a separate test without such extra sync points
    gives extra confidence that these tests would catch ring sync problems if any.  
2. People usually don't do enqueue/dequeue on its own.
    One of common patterns: dequeue/read-write data from the dequed objects/enqueue.
    So this test measures cycles for dequeue/enqueue plus some reads/writes
    to the objects from the ring.
 
> > +
> > +static void
> > +mt1_fini(struct rte_ring *rng, void *data) {
> > +	rte_free(rng);
> > +	rte_free(data);
> > +}
> > +
> > +static int
> > +mt1_init(struct rte_ring **rng, void **data, uint32_t num) {
> > +	int32_t rc;
> > +	size_t sz;
> > +	uint32_t i, nr;
> > +	struct rte_ring *r;
> > +	struct ring_elem *elm;
> > +	void *p;
> > +
> > +	*rng = NULL;
> > +	*data = NULL;
> > +
> > +	sz = num * sizeof(*elm);
> > +	elm = rte_zmalloc(NULL, sz, __alignof__(*elm));
> > +	if (elm == NULL) {
> > +		printf("%s: alloc(%zu) for %u elems data failed",
> > +			__func__, sz, num);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	*data = elm;
> > +
> > +	/* alloc ring */
> > +	nr = 2 * num;
> > +	sz = rte_ring_get_memsize(nr);
> > +	r = rte_zmalloc(NULL, sz, __alignof__(*r));
> > +	if (r == NULL) {
> > +		printf("%s: alloc(%zu) for FIFO with %u elems failed",
> > +			__func__, sz, nr);
> > +		return -ENOMEM;
> > +	}
> > +
> > +	*rng = r;
> > +
> > +	rc = _st_ring_init(r, RING_NAME, nr);
> > +	if (rc != 0) {
> > +		printf("%s: _st_ring_init(%p, %u) failed, error: %d(%s)\n",
> > +			__func__, r, nr, rc, strerror(-rc));
> > +		return rc;
> > +	}
> > +
> > +	for (i = 0; i != num; i++) {
> > +		fill_ring_elm(elm + i, UINT32_MAX);
> > +		p = elm + i;
> > +		if (_st_ring_enqueue_bulk(r, &p, 1, NULL) != 1)
> > +			break;
> > +	}
> > +
> > +	if (i != num) {
> > +		printf("%s: _st_ring_enqueue(%p, %u) returned %u\n",
> > +			__func__, r, num, i);
> > +		return -ENOSPC;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +test_mt1(int (*test)(void *))
> > +{
> > +	int32_t rc;
> > +	uint32_t lc, mc;
> > +	struct rte_ring *r;
> > +	void *data;
> > +	struct lcore_arg arg[RTE_MAX_LCORE];
> > +
> > +	static const struct lcore_stat init_stat = {
> > +		.op.min_cycle = UINT64_MAX,
> > +	};
> > +
> > +	rc = mt1_init(&r, &data, RING_SIZE);
> > +	if (rc != 0) {
> > +		mt1_fini(r, data);
> > +		return rc;
> > +	}
> > +
> > +	memset(arg, 0, sizeof(arg));
> > +
> > +	/* launch on all slaves */
> > +	RTE_LCORE_FOREACH_SLAVE(lc) {
> > +		arg[lc].rng = r;
> > +		arg[lc].stats = init_stat;
> > +		rte_eal_remote_launch(test, &arg[lc], lc);
> > +	}
> > +
> > +	/* signal worker to start test */
> > +	wrk_cmd = WRK_CMD_RUN;
> > +	rte_smp_wmb();
> > +
> > +	usleep(run_time * US_PER_S);
> > +
> > +	/* signal worker to start test */
> > +	wrk_cmd = WRK_CMD_STOP;
> > +	rte_smp_wmb();
> > +
> > +	/* wait for slaves and collect stats. */
> > +	mc = rte_lcore_id();
> > +	arg[mc].stats = init_stat;
> > +
> > +	rc = 0;
> > +	RTE_LCORE_FOREACH_SLAVE(lc) {
> > +		rc |= rte_eal_wait_lcore(lc);
> > +		lcore_stat_aggr(&arg[mc].stats, &arg[lc].stats);
> > +		if (verbose != 0)
> > +			lcore_stat_dump(stdout, lc, &arg[lc].stats);
> > +	}
> > +
> > +	lcore_stat_dump(stdout, UINT32_MAX, &arg[mc].stats);
> > +	mt1_fini(r, data);
> > +	return rc;
> > +}
> > +
> > +static const struct test_case tests[] = {
> > +	{
> > +		.name = "MT-WRK_ENQ_DEQ-MST_NONE-PRCS",
> > +		.func = test_mt1,
> > +		.wfunc = test_worker_prcs,
> > +	},
> > +	{
> > +		.name = "MT-WRK_ENQ_DEQ-MST_NONE-AVG",
> > +		.func = test_mt1,
> > +		.wfunc = test_worker_avg,
> > +	},
> > +};
> > --
> > 2.17.1



More information about the dev mailing list