[PATCH grout] add high-performance clock
Robin Jarry
rjarry at redhat.com
Wed Jun 10 17:16:37 CEST 2026
Hi Morten,
Morten Brørup, Jun 09, 2026 at 21:06:
> The current clock is based on clock_gettime(CLOCK_MONOTONIC_RAW), which is
> significantly slower than rte_rdtsc(), even though the kernel exposes it
> as a vDSO.
>
> CLOCK_MONOTONIC_RAW is typically based on and in sync with the TSC, so use
> the faster rte_rdtsc() to read the clock when this is the case.
>
> Also, introduce a per-thread snapshot of the clock for use in the
> dataplane, where reading the snapshot is sufficiently accurate, and much
> faster than reading the clock.
>
> Signed-off-by: Morten Brørup <mb at smartsharesystems.com>
> ---
> Note: Optimizations relying on the clock snapshot will be submitted later.
> ---
> api/gr_clock.h | 38 ++++++++++-
> main/clock.c | 103 +++++++++++++++++++++++++++++
> main/clock.h | 58 ++++++++++++++++
> main/meson.build | 1 +
> modules/infra/datapath/main_loop.c | 3 +
> 5 files changed, 201 insertions(+), 2 deletions(-)
> create mode 100644 main/clock.c
> create mode 100644 main/clock.h
>
> diff --git a/api/gr_clock.h b/api/gr_clock.h
> index d2d98fba..c70a03a1 100644
> --- a/api/gr_clock.h
> +++ b/api/gr_clock.h
> @@ -14,20 +14,54 @@
> // in calculations where race conditions may cause negative differences.
> typedef int64_t gr_clock_ns_t;
>
> +#define GR_NS_PER_S (gr_clock_ns_t)INT64_C(1000000000)
> +
> +#ifdef __GROUT_MAIN__
> +#include <rte_cycles.h>
> +
> +// Ref: main/clock.h
> +extern uint64_t clock_tsc_hz;
> +#endif
> +
> // Get powered-on (non-suspended, non-hibernated) time since last boot,
> // using a common clock across all processes.
> static inline struct timespec gr_clock_raw(void) {
> struct timespec tp = {0};
> +#ifdef __GROUT_MAIN__
> + if (clock_tsc_hz != 0) {
> + const uint64_t tsc = rte_rdtsc();
> + tp.tv_sec = (tsc / clock_tsc_hz);
> + tp.tv_nsec = (tsc % clock_tsc_hz) * GR_NS_PER_S / clock_tsc_hz;
> + } else {
> + clock_gettime(CLOCK_MONOTONIC_RAW, &tp);
> + }
> + __rte_assume(tp.tv_sec >= 0);
> + __rte_assume(tp.tv_nsec >= 0);
> + return tp;
> +#else
> clock_gettime(CLOCK_MONOTONIC_RAW, &tp);
> return tp;
> +#endif
> }
>
> -#define GR_NS_PER_S (gr_clock_ns_t)1000000000LL
> -
> // Get powered-on (non-suspended, non-hibernated) time since last boot [nanoseconds],
> // using a common clock across all processes.
> // Does not return negative values.
> static inline gr_clock_ns_t gr_clock_ns(void) {
> +#ifdef __GROUT_MAIN__
> + gr_clock_ns_t ret;
> + if (clock_tsc_hz != 0) {
> + const uint64_t tsc = rte_rdtsc();
> + ret = (gr_clock_ns_t)((tsc / clock_tsc_hz) * GR_NS_PER_S
> + + (tsc % clock_tsc_hz) * GR_NS_PER_S / clock_tsc_hz);
> + } else {
> + struct timespec tp = gr_clock_raw();
> + ret = (gr_clock_ns_t)(tp.tv_sec * GR_NS_PER_S + tp.tv_nsec);
> + }
> + __rte_assume(ret >= 0);
> + return ret;
> +#else
> struct timespec tp = gr_clock_raw();
> return tp.tv_sec * GR_NS_PER_S + tp.tv_nsec;
> +#endif
This means gr_clock_ns() values returned in API messages by the grout
daemon cannot be compared with values returned in grout clients (e.g.
grcli).
It will break commands that display time differences such as:
$ grcli fdb show
https://github.com/DPDK/grout/blob/v0.16.0/modules/l2/cli/fdb.c#L141
$ grcli conntrack show
https://github.com/DPDK/grout/blob/v0.16.0/modules/policy/cli/conntrack.c#L58
> diff --git a/main/clock.h b/main/clock.h
> new file mode 100644
> index 00000000..c268f5e6
> --- /dev/null
> +++ b/main/clock.h
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: BSD-3-Clause
> +// Copyright (c) 2026 SmartShare Systems
> +
> +#pragma once
> +
> +#include <gr_clock.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_per_lcore.h>
> +
> +// TSC frequency in Hz.
> +//
> +// If non-zero, the TSC is in sync with the common clock.
> +// If zero, the TSC is out of sync with the common clock.
> +extern uint64_t clock_tsc_hz;
> +
> +// Get common (monotonically increasing) clock from snapshot [nanoseconds].
> +//
> +// Resembles CLOCK_MONOTONIC_RAW:
> +// - Pauses (does not increase) while the system is suspended or hibernated.
> +// - Accurate for short intervals, where NTP adjustments would distort the measurement.
> +// - Not accurate for long intervals. It drifts with hardware.
> +// - - Drifts up to 4.3 seconds/day = 26 minutes/year. (Typical PC XTAL with 50 PPM accuracy.)
> +//
> +// Wraps around after hundreds of years.
> +// Does not return negative values.
> +//
> +// Call clock_update() to update the clock snapshots for the current thread.
> +static __rte_always_inline gr_clock_ns_t clock_ns(void) {
> + RTE_DECLARE_PER_LCORE(uint64_t, clock_ns);
This is weird to have a RTE_DECLARE_PER_LCORE inside an inline function.
This should probably move out of the function block and to declare it
with the proper type directly to avoid casting:
RTE_DECLARE_PER_LCORE(gr_clock_ns_t, clock_ns);
Also, I would advocate to use this value in gr_clock_ns().
> +
> + const gr_clock_ns_t ret = (gr_clock_ns_t)RTE_PER_LCORE(clock_ns);
> + __rte_assume(ret >= 0);
> + return ret;
> +}
> +
> +// Get common (monotonically increasing) clock from snapshot [seconds].
> +//
> +// Resembles CLOCK_MONOTONIC_RAW:
> +// - Pauses (does not increase) while the system is suspended or hibernated.
> +// - Not accurate for long intervals. It drifts with hardware.
> +// - - Drifts up to 4.3 seconds/day = 26 minutes/year. (Typical PC XTAL with 50 PPM accuracy.)
> +//
> +// Wraps around after hundreds of years.
> +// Does not return negative values.
> +//
> +// Call clock_update() to update the clock snapshots for the current thread.
> +static __rte_always_inline int32_t clock_s(void) {
> + RTE_DECLARE_PER_LCORE(uint32_t, clock_s);
> +
> + const int32_t ret = (int32_t)RTE_PER_LCORE(clock_s);
> + __rte_assume(ret >= 0);
> + return ret;
> +}
> +
> +// Update the clock snapshots for the current thread.
> +void clock_update(void);
> diff --git a/main/meson.build b/main/meson.build
> index a57d8600..f0823ff3 100644
> --- a/main/meson.build
> +++ b/main/meson.build
> @@ -3,6 +3,7 @@
>
> src += files(
> 'api.c',
> + 'clock.c',
> 'control_queue.c',
> 'dpdk.c',
> 'event.c',
> diff --git a/modules/infra/datapath/main_loop.c b/modules/infra/datapath/main_loop.c
> index f462cfbd..4127631d 100644
> --- a/modules/infra/datapath/main_loop.c
> +++ b/modules/infra/datapath/main_loop.c
> @@ -1,6 +1,8 @@
> // SPDX-License-Identifier: BSD-3-Clause
> // Copyright (c) 2023 Robin Jarry
> +// Copyright (c) 2026 SmartShare Systems
>
> +#include "clock.h"
> #include "config.h"
> #include "datapath.h"
> #include "log.h"
> @@ -258,6 +260,7 @@ reconfig:
> sleep = 0;
> timestamp = rte_rdtsc();
> for (;;) {
> + clock_update();
Could you move clock_update() inside the housekeeping block? I don't
think we need more precision that once every 256 rounds of graph walk.
> rte_graph_walk(graph);
>
> if (++loop == HOUSEKEEPING_INTERVAL) {
--
Robin
> At participating locations only.
More information about the grout
mailing list