[PATCH v15 4/4] eal: add PMU support to tracing library
    Konstantin Ananyev 
    konstantin.ananyev at huawei.com
       
    Tue Nov  5 12:04:17 CET 2024
    
    
  
> In order to profile app one needs to store significant amount of samples
> somewhere for an analysis later on. Since trace library supports
> storing data in a CTF format lets take advantage of that and add a
> dedicated PMU tracepoint.
> 
> Signed-off-by: Tomasz Duszynski <tduszynski at marvell.com>
> ---
>  app/test/test_trace_perf.c               | 10 ++++
>  doc/guides/prog_guide/profile_app.rst    |  5 ++
>  doc/guides/prog_guide/trace_lib.rst      | 32 +++++++++++
>  lib/eal/common/eal_common_trace.c        |  5 +-
>  lib/eal/common/eal_common_trace_pmu.c    | 38 ++++++++++++++
>  lib/eal/common/eal_common_trace_points.c |  5 ++
>  lib/eal/common/eal_trace.h               |  4 ++
>  lib/eal/common/meson.build               |  1 +
>  lib/eal/include/rte_eal_trace.h          | 11 ++++
>  lib/eal/version.map                      |  1 +
>  lib/pmu/rte_pmu.c                        | 67 +++++++++++++++++++++++-
>  lib/pmu/rte_pmu.h                        | 24 +++++++--
>  lib/pmu/version.map                      |  1 +
>  13 files changed, 198 insertions(+), 6 deletions(-)
>  create mode 100644 lib/eal/common/eal_common_trace_pmu.c
> 
> diff --git a/app/test/test_trace_perf.c b/app/test/test_trace_perf.c
> index 8257cc02be..8a0730943e 100644
> --- a/app/test/test_trace_perf.c
> +++ b/app/test/test_trace_perf.c
> @@ -114,6 +114,10 @@ worker_fn_##func(void *arg) \
>  #define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666)
>  #define GENERIC_STR rte_eal_trace_generic_str("hello world")
>  #define VOID_FP app_dpdk_test_fp()
> +#ifdef RTE_LIB_PMU
> +/* 0 corresponds first event passed via --trace= */
> +#define READ_PMU rte_eal_trace_pmu_read(0)
> +#endif
> 
>  WORKER_DEFINE(GENERIC_VOID)
>  WORKER_DEFINE(GENERIC_U64)
> @@ -122,6 +126,9 @@ WORKER_DEFINE(GENERIC_FLOAT)
>  WORKER_DEFINE(GENERIC_DOUBLE)
>  WORKER_DEFINE(GENERIC_STR)
>  WORKER_DEFINE(VOID_FP)
> +#ifdef RTE_LIB_PMU
> +WORKER_DEFINE(READ_PMU)
> +#endif
> 
>  static void
>  run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz)
> @@ -174,6 +181,9 @@ test_trace_perf(void)
>  	run_test("double", worker_fn_GENERIC_DOUBLE, data, sz);
>  	run_test("string", worker_fn_GENERIC_STR, data, sz);
>  	run_test("void_fp", worker_fn_VOID_FP, data, sz);
> +#ifdef RTE_LIB_PMU
> +	run_test("read_pmu", worker_fn_READ_PMU, data, sz);
> +#endif
> 
>  	rte_free(data);
>  	return TEST_SUCCESS;
> diff --git a/doc/guides/prog_guide/profile_app.rst b/doc/guides/prog_guide/profile_app.rst
> index 854c515a61..1ab6fb9eaa 100644
> --- a/doc/guides/prog_guide/profile_app.rst
> +++ b/doc/guides/prog_guide/profile_app.rst
> @@ -36,6 +36,11 @@ As of now implementation imposes certain limitations:
> 
>  * Each EAL lcore measures same group of events
> 
> +Alternatively tracing library can be used which offers dedicated tracepoint
> +``rte_eal_trace_pmu_event()``.
> +
> +Refer to :doc:`../prog_guide/trace_lib` for more details.
> +
> 
>  Profiling on x86
>  ----------------
> diff --git a/doc/guides/prog_guide/trace_lib.rst b/doc/guides/prog_guide/trace_lib.rst
> index d9b17abe90..378abccd72 100644
> --- a/doc/guides/prog_guide/trace_lib.rst
> +++ b/doc/guides/prog_guide/trace_lib.rst
> @@ -46,6 +46,7 @@ DPDK tracing library features
>    trace format and is compatible with ``LTTng``.
>    For detailed information, refer to
>    `Common Trace Format <https://diamon.org/ctf/>`_.
> +- Support reading PMU events on ARM64 and x86-64 (Intel)
> 
>  How to add a tracepoint?
>  ------------------------
> @@ -139,6 +140,37 @@ the user must use ``RTE_TRACE_POINT_FP`` instead of ``RTE_TRACE_POINT``.
>  ``RTE_TRACE_POINT_FP`` is compiled out by default and it can be enabled using
>  the ``enable_trace_fp`` option for meson build.
> 
> +PMU tracepoint
> +--------------
> +
> +Performance monitoring unit (PMU) event values can be read from hardware
> +registers using predefined ``rte_pmu_read`` tracepoint.
> +
> +Tracing is enabled via ``--trace`` EAL option by passing both expression
> +matching PMU tracepoint name i.e ``lib.eal.pmu.read`` and expression
> +``e=ev1[,ev2,...]`` matching particular events::
> +
> +    --trace='.*pmu.read\|e=cpu_cycles,l1d_cache'
> +
> +Event names are available under ``/sys/bus/event_source/devices/PMU/events``
> +directory, where ``PMU`` is a placeholder for either a ``cpu`` or a directory
> +containing ``cpus``.
> +
> +In contrary to other tracepoints this does not need any extra variables
> +added to source files. Instead, caller passes index which follows the order of
> +events specified via ``--trace`` parameter. In the following example index ``0``
> +corresponds to ``cpu_cyclces`` while index ``1`` corresponds to ``l1d_cache``.
> +
> +.. code-block:: c
> +
> + ...
> + rte_eal_trace_pmu_read(0);
> + rte_eal_trace_pmu_read(1);
> + ...
> +
> +PMU tracing support must be explicitly enabled using the ``enable_trace_fp``
> +option for meson build.
> +
>  Event record mode
>  -----------------
> 
> diff --git a/lib/eal/common/eal_common_trace.c b/lib/eal/common/eal_common_trace.c
> index 918f49bf4f..9be8724ec4 100644
> --- a/lib/eal/common/eal_common_trace.c
> +++ b/lib/eal/common/eal_common_trace.c
> @@ -72,8 +72,10 @@ eal_trace_init(void)
>  		goto free_meta;
> 
>  	/* Apply global configurations */
> -	STAILQ_FOREACH(arg, &trace.args, next)
> +	STAILQ_FOREACH(arg, &trace.args, next) {
>  		trace_args_apply(arg->val);
> +		trace_pmu_args_apply(arg->val);
> +	}
> 
>  	rte_trace_mode_set(trace.mode);
> 
> @@ -89,6 +91,7 @@ eal_trace_init(void)
>  void
>  eal_trace_fini(void)
>  {
> +	trace_pmu_args_free();
>  	trace_mem_free();
>  	trace_metadata_destroy();
>  	eal_trace_args_free();
> diff --git a/lib/eal/common/eal_common_trace_pmu.c b/lib/eal/common/eal_common_trace_pmu.c
> new file mode 100644
> index 0000000000..b3ab41e8a2
> --- /dev/null
> +++ b/lib/eal/common/eal_common_trace_pmu.c
> @@ -0,0 +1,38 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2024 Marvell International Ltd.
> + */
> +
> +#include <rte_common.h>
> +
> +#include "eal_trace.h"
> +
> +#ifdef RTE_LIB_PMU
> +
> +#include <rte_pmu.h>
> +
> +void
> +trace_pmu_args_apply(const char *arg)
> +{
> +	static bool once;
> +
> +	if (!once) {
> +		if (rte_pmu_init())
> +			return;
> +		once = true;
> +	}
> +
> +	rte_pmu_add_events_by_pattern(arg);
> +}
> +
> +void
> +trace_pmu_args_free(void)
> +{
> +	rte_pmu_fini();
> +}
> +
> +#else /* !RTE_LIB_PMU */
> +
> +void trace_pmu_args_apply(const char *arg __rte_unused) { return; }
> +void trace_pmu_args_free(void) { return; }
> +
> +#endif /* RTE_LIB_PMU */
> diff --git a/lib/eal/common/eal_common_trace_points.c b/lib/eal/common/eal_common_trace_points.c
> index 0f1240ea3a..c99eab92f4 100644
> --- a/lib/eal/common/eal_common_trace_points.c
> +++ b/lib/eal/common/eal_common_trace_points.c
> @@ -100,3 +100,8 @@ RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_enable,
>  	lib.eal.intr.enable)
>  RTE_TRACE_POINT_REGISTER(rte_eal_trace_intr_disable,
>  	lib.eal.intr.disable)
> +
> +#ifdef RTE_LIB_PMU
> +RTE_TRACE_POINT_REGISTER(rte_eal_trace_pmu_read,
> +	lib.eal.pmu.read)
> +#endif
> diff --git a/lib/eal/common/eal_trace.h b/lib/eal/common/eal_trace.h
> index 55262677e0..58fa43472a 100644
> --- a/lib/eal/common/eal_trace.h
> +++ b/lib/eal/common/eal_trace.h
> @@ -104,6 +104,10 @@ int trace_epoch_time_save(void);
>  void trace_mem_free(void);
>  void trace_mem_per_thread_free(void);
> 
> +/* PMU wrappers */
> +void trace_pmu_args_apply(const char *arg);
> +void trace_pmu_args_free(void);
> +
>  /* EAL interface */
>  int eal_trace_init(void);
>  void eal_trace_fini(void);
> diff --git a/lib/eal/common/meson.build b/lib/eal/common/meson.build
> index c1bbf26654..59d5b15708 100644
> --- a/lib/eal/common/meson.build
> +++ b/lib/eal/common/meson.build
> @@ -27,6 +27,7 @@ sources += files(
>          'eal_common_tailqs.c',
>          'eal_common_thread.c',
>          'eal_common_timer.c',
> +        'eal_common_trace_pmu.c',
>          'eal_common_trace_points.c',
>          'eal_common_uuid.c',
>          'malloc_elem.c',
> diff --git a/lib/eal/include/rte_eal_trace.h b/lib/eal/include/rte_eal_trace.h
> index 9ad2112801..9c78f63ff5 100644
> --- a/lib/eal/include/rte_eal_trace.h
> +++ b/lib/eal/include/rte_eal_trace.h
> @@ -127,6 +127,17 @@ RTE_TRACE_POINT(
> 
>  #define RTE_EAL_TRACE_GENERIC_FUNC rte_eal_trace_generic_func(__func__)
> 
> +#ifdef RTE_LIB_PMU
> +#include <rte_pmu.h>
> +
> +RTE_TRACE_POINT_FP(
> +	rte_eal_trace_pmu_read,
> +	RTE_TRACE_POINT_ARGS(unsigned int index),
> +	uint64_t val = rte_pmu_read(index);
> +	rte_trace_point_emit_u64(val);
> +)
> +#endif
> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/lib/eal/version.map b/lib/eal/version.map
> index f493cd1ca7..3dc147f848 100644
> --- a/lib/eal/version.map
> +++ b/lib/eal/version.map
> @@ -399,6 +399,7 @@ EXPERIMENTAL {
> 
>  	# added in 24.11
>  	rte_bitset_to_str;
> +	__rte_eal_trace_pmu_read; # WINDOWS_NO_EXPORT
>  };
> 
>  INTERNAL {
> diff --git a/lib/pmu/rte_pmu.c b/lib/pmu/rte_pmu.c
> index dd57961627..b65f08c75b 100644
> --- a/lib/pmu/rte_pmu.c
> +++ b/lib/pmu/rte_pmu.c
> @@ -412,12 +412,75 @@ rte_pmu_add_event(const char *name)
>  	return event->index;
>  }
> 
> +static int
> +add_events(const char *pattern)
> +{
> +	char *token, *copy;
> +	int ret = 0;
> +
> +	copy = strdup(pattern);
> +	if (copy == NULL)
> +		return -ENOMEM;
> +
> +	token = strtok(copy, ",");
> +	while (token) {
> +		ret = rte_pmu_add_event(token);
> +		if (ret < 0)
> +			break;
> +
> +		token = strtok(NULL, ",");
> +	}
> +
> +	free(copy);
> +
> +	return ret >= 0 ? 0 : ret;
> +}
> +
> +int
> +rte_pmu_add_events_by_pattern(const char *pattern)
> +{
> +	regmatch_t rmatch;
> +	char buf[BUFSIZ];
> +	unsigned int num;
> +	regex_t reg;
> +	int ret;
> +
> +	/* events are matched against occurrences of e=ev1[,ev2,..] pattern */
> +	ret = regcomp(®, "e=([_[:alnum:]-],?)+", REG_EXTENDED);
> +	if (ret) {
> +		PMU_LOG(ERR, "Failed to compile event matching regexp");
> +		return -EINVAL;
> +	}
> +
> +	for (;;) {
> +		if (regexec(®, pattern, 1, &rmatch, 0))
> +			break;
> +
> +		num = rmatch.rm_eo - rmatch.rm_so;
> +		if (num > sizeof(buf))
> +			num = sizeof(buf);
> +
> +		/* skip e= pattern prefix */
> +		memcpy(buf, pattern + rmatch.rm_so + 2, num - 2);
> +		buf[num - 2] = '\0';
> +		ret = add_events(buf);
> +		if (ret)
> +			break;
> +
> +		pattern += rmatch.rm_eo;
> +	}
> +
> +	regfree(®);
> +
> +	return ret;
> +}
> +
>  int
>  rte_pmu_init(void)
>  {
>  	int ret;
> 
> -	if (rte_pmu.initialized)
> +	if (rte_pmu.initialized && ++rte_pmu.initialized)
Stupid q, why not:
if (rte_pmu.initialized++ > 0) return 0?
Also increment/decrement counter implies that init()/fini()
can be called multiple times, correct?
If so, there is still an implicit assumption that it always will
happen in a sequential manner? 
>  		return 0;
> 
>  	ret = scan_pmus();
> @@ -450,7 +513,7 @@ rte_pmu_fini(void)
>  	struct rte_pmu_event_group *group;
>  	unsigned int i;
> 
> -	if (!rte_pmu.initialized)
> +	if (!rte_pmu.initialized || --rte_pmu.initialized)
>  		return;
> 
>  	RTE_TAILQ_FOREACH_SAFE(event, &rte_pmu.event_list, next, tmp_event) {
> diff --git a/lib/pmu/rte_pmu.h b/lib/pmu/rte_pmu.h
> index 85f9127911..df571f0a2f 100644
> --- a/lib/pmu/rte_pmu.h
> +++ b/lib/pmu/rte_pmu.h
> @@ -135,7 +135,7 @@ __rte_pmu_enable_group(struct rte_pmu_event_group *group);
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * Initialize PMU library.
> + * Initialize PMU library. It's safe to call it multiple times.
>   *
>   * @return
>   *   0 in case of success, negative value otherwise.
> @@ -148,7 +148,9 @@ rte_pmu_init(void);
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * Finalize PMU library.
> + * Finalize PMU library. Number of calls must match number
> + * of times rte_pmu_init() was called. Otherwise memory
> + * won't be freed properly.
>   */
>  __rte_experimental
>  void
> @@ -173,7 +175,23 @@ rte_pmu_add_event(const char *name);
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
>   *
> - * Read hardware counter configured to count occurrences of an event.
> + * Add events matching pattern to the group of enabled events.
> + *
> + * @param pattern
> + *   Pattern e=ev1[,ev2,...] matching events, where evX is a placeholder for an event listed under
> + *   /sys/bus/event_source/devices/<pmu>/events.
> + */
> +__rte_experimental
> +int
> +rte_pmu_add_events_by_pattern(const char *pattern);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Read hardware counter configured to count occurrences of an event. This is called by an lcore
> + * binded exclusively to particular cpu and may not work as expected if gets migrated elsewhere.
> + * Reason being event group is pinned hence not supposed to be multiplexed with any other events.
>   *
>   * @param index
>   *   Index of an event to be read.
> diff --git a/lib/pmu/version.map b/lib/pmu/version.map
> index d0f907d13d..f14d498b54 100644
> --- a/lib/pmu/version.map
> +++ b/lib/pmu/version.map
> @@ -5,6 +5,7 @@ EXPERIMENTAL {
>  	__rte_pmu_enable_group;
>  	rte_pmu;
>  	rte_pmu_add_event;
> +	rte_pmu_add_events_by_pattern;
>  	rte_pmu_fini;
>  	rte_pmu_init;
>  	rte_pmu_read;
> --
> 2.34.1
    
    
More information about the dev
mailing list