[dpdk-dev] [PATCH] eal: generic counter based loop for CPU freq calculation

Jerin Jacob jerinjacobk at gmail.com
Wed Jun 24 14:50:54 CEST 2020


On Tue, Jun 9, 2020 at 3:04 AM Honnappa Nagarahalli
<honnappa.nagarahalli at arm.com> wrote:
>
> get_tsc_freq uses 'nanosleep' system call to calculate the CPU
> frequency. However, 'nanosleep' results in the process getting
> un-scheduled. The kernel saves and restores the PMU state. This
> ensures that the PMU cycles are not counted towards a sleeping
> process. When RTE_ARM_EAL_RDTSC_USE_PMU is defined, this results
> in incorrect CPU frequency calculation. This logic is replaced
> with generic counter based loop.
>
> Bugzilla ID: 450
> Fixes: af75078fece3 ("first public release")

The Fix looks good to me.

The Fixes is not correct. It should be the patch where
RTE_ARM_EAL_RDTSC_USE_PMU got introduced.


> Cc: stable at dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thakkar at arm.com>
> Reviewed-by: Phil Yang <phil.yang at arm.com>
>
> ---
>  lib/librte_eal/arm/include/rte_cycles_64.h | 45 +++++++++++++++++++---
>  lib/librte_eal/arm/rte_cycles.c            | 24 ++++++++++--
>  2 files changed, 61 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_eal/arm/include/rte_cycles_64.h b/lib/librte_eal/arm/include/rte_cycles_64.h
> index da557b6a1..6fc352036 100644
> --- a/lib/librte_eal/arm/include/rte_cycles_64.h
> +++ b/lib/librte_eal/arm/include/rte_cycles_64.h
> @@ -11,6 +11,36 @@ extern "C" {
>
>  #include "generic/rte_cycles.h"
>
> +/** Read generic counter frequency */
> +static inline uint64_t

I prefer to have __rte_allways_inline

> +__rte_rd_generic_cntr_freq(void)

I think, the generic counter is confusing, I think, since the symbol
is exposed due to placed in
header file, it is better to change, __rte_arm64_cntfrq()

> +{
> +       uint64_t freq;
> +
> +       asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
> +       return freq;
> +}
> +
> +/** Read generic counter */
> +static inline uint64_t

Likewise, __rte_arm64_cntvct()


> +__rte_rd_generic_cntr(void)
> +{
> +       uint64_t tsc;
> +
> +       asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
> +       return tsc;
> +}
> +
> +static inline uint64_t
> +__rte_rd_generic_cntr_precise(void)

__rte_arm64_cntfrq_precise()

> +{
> +       uint64_t tsc;
> +
> +       asm volatile("isb" : : : "memory");
> +       asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
> +       return tsc;
> +}
> +
>  /**
>   * Read the time base register.
>   *
> @@ -25,10 +55,7 @@ extern "C" {
>  static inline uint64_t
>  rte_rdtsc(void)
>  {
> -       uint64_t tsc;
> -
> -       asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
> -       return tsc;
> +       return __rte_rd_generic_cntr();
>  }
>  #else
>  /**
> @@ -49,14 +76,22 @@ rte_rdtsc(void)
>   * asm volatile("msr pmcr_el0, %0" : : "r" (val));
>   *
>   */
> +
> +/** Read PMU cycle counter */
>  static inline uint64_t
> -rte_rdtsc(void)
> +__rte_rd_pmu_cycle_cntr(void)
>  {
>         uint64_t tsc;
>
>         asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc));
>         return tsc;
>  }
> +
> +static inline uint64_t
> +rte_rdtsc(void)
> +{
> +       return __rte_rd_pmu_cycle_cntr();
> +}
>  #endif
>
>  static inline uint64_t
> diff --git a/lib/librte_eal/arm/rte_cycles.c b/lib/librte_eal/arm/rte_cycles.c
> index 3500d523e..92c87a8a4 100644
> --- a/lib/librte_eal/arm/rte_cycles.c
> +++ b/lib/librte_eal/arm/rte_cycles.c
> @@ -3,14 +3,32 @@
>   */
>
>  #include "eal_private.h"
> +#include "rte_cycles.h"
>
>  uint64_t
>  get_tsc_freq_arch(void)
>  {
>  #if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU
> -       uint64_t freq;
> -       asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
> -       return freq;
> +       return __rte_rd_generic_cntr_freq();
> +#elif defined RTE_ARCH_ARM64 && defined RTE_ARM_EAL_RDTSC_USE_PMU
> +       /* Use the generic counter ticks to calculate the PMU
> +        * cycle frequency.
> +        */
> +       uint64_t gcnt_ticks;
> +       uint64_t start_ticks, cur_ticks;
> +       uint64_t start_pmu_cycles, end_pmu_cycles;
> +
> +       /* Number of ticks for 1/10 second */
> +       gcnt_ticks = __rte_rd_generic_cntr_freq() / 10;
> +
> +       start_ticks = __rte_rd_generic_cntr_precise();
> +       start_pmu_cycles = rte_rdtsc_precise();
> +       do {
> +               cur_ticks = __rte_rd_generic_cntr();
> +       } while ((cur_ticks - start_ticks) < gcnt_ticks);
> +       end_pmu_cycles = rte_rdtsc_precise();
> +
> +       return ((end_pmu_cycles - start_pmu_cycles) * 10);

Good thought. On the plus side, it will reduce the boot time by .9 sec.

>  #else
>         return 0;

With above changes:

Acked-by: Jerin Jacob <jerinj at marvell.com>



>  #endif
> --
> 2.17.1
>


More information about the dev mailing list