[PATCH grout] add high-performance clock

Robin Jarry rjarry at redhat.com
Wed Jun 10 17:16:37 CEST 2026


Hi Morten,

Morten Brørup, Jun 09, 2026 at 21:06:
> The current clock is based on clock_gettime(CLOCK_MONOTONIC_RAW), which is
> significantly slower than rte_rdtsc(), even though the kernel exposes it
> as a vDSO.
>
> CLOCK_MONOTONIC_RAW is typically based on and in sync with the TSC, so use
> the faster rte_rdtsc() to read the clock when this is the case.
>
> Also, introduce a per-thread snapshot of the clock for use in the
> dataplane, where reading the snapshot is sufficiently accurate, and much
> faster than reading the clock.
>
> Signed-off-by: Morten Brørup <mb at smartsharesystems.com>
> ---
> Note: Optimizations relying on the clock snapshot will be submitted later.
> ---
>  api/gr_clock.h                     |  38 ++++++++++-
>  main/clock.c                       | 103 +++++++++++++++++++++++++++++
>  main/clock.h                       |  58 ++++++++++++++++
>  main/meson.build                   |   1 +
>  modules/infra/datapath/main_loop.c |   3 +
>  5 files changed, 201 insertions(+), 2 deletions(-)
>  create mode 100644 main/clock.c
>  create mode 100644 main/clock.h
>
> diff --git a/api/gr_clock.h b/api/gr_clock.h
> index d2d98fba..c70a03a1 100644
> --- a/api/gr_clock.h
> +++ b/api/gr_clock.h
> @@ -14,20 +14,54 @@
>  // in calculations where race conditions may cause negative differences.
>  typedef int64_t gr_clock_ns_t;
>  
> +#define GR_NS_PER_S (gr_clock_ns_t)INT64_C(1000000000)
> +
> +#ifdef __GROUT_MAIN__
> +#include <rte_cycles.h>
> +
> +// Ref: main/clock.h
> +extern uint64_t clock_tsc_hz;
> +#endif
> +
>  // Get powered-on (non-suspended, non-hibernated) time since last boot,
>  // using a common clock across all processes.
>  static inline struct timespec gr_clock_raw(void) {
>  	struct timespec tp = {0};
> +#ifdef __GROUT_MAIN__
> +	if (clock_tsc_hz != 0) {
> +		const uint64_t tsc = rte_rdtsc();
> +		tp.tv_sec = (tsc / clock_tsc_hz);
> +		tp.tv_nsec = (tsc % clock_tsc_hz) * GR_NS_PER_S / clock_tsc_hz;
> +	} else {
> +		clock_gettime(CLOCK_MONOTONIC_RAW, &tp);
> +	}
> +	__rte_assume(tp.tv_sec >= 0);
> +	__rte_assume(tp.tv_nsec >= 0);
> +	return tp;
> +#else
>  	clock_gettime(CLOCK_MONOTONIC_RAW, &tp);
>  	return tp;
> +#endif
>  }
>  
> -#define GR_NS_PER_S (gr_clock_ns_t)1000000000LL
> -
>  // Get powered-on (non-suspended, non-hibernated) time since last boot [nanoseconds],
>  // using a common clock across all processes.
>  // Does not return negative values.
>  static inline gr_clock_ns_t gr_clock_ns(void) {
> +#ifdef __GROUT_MAIN__
> +	gr_clock_ns_t ret;
> +	if (clock_tsc_hz != 0) {
> +		const uint64_t tsc = rte_rdtsc();
> +		ret = (gr_clock_ns_t)((tsc / clock_tsc_hz) * GR_NS_PER_S
> +		      + (tsc % clock_tsc_hz) * GR_NS_PER_S / clock_tsc_hz);
> +	} else {
> +		struct timespec tp = gr_clock_raw();
> +		ret = (gr_clock_ns_t)(tp.tv_sec * GR_NS_PER_S + tp.tv_nsec);
> +	}
> +	__rte_assume(ret >= 0);
> +	return ret;
> +#else
>  	struct timespec tp = gr_clock_raw();
>  	return tp.tv_sec * GR_NS_PER_S + tp.tv_nsec;
> +#endif

This means gr_clock_ns() values returned in API messages by the grout
daemon cannot be compared with values returned in grout clients (e.g.
grcli).

It will break commands that display time differences such as:

$ grcli fdb show

https://github.com/DPDK/grout/blob/v0.16.0/modules/l2/cli/fdb.c#L141

$ grcli conntrack show

https://github.com/DPDK/grout/blob/v0.16.0/modules/policy/cli/conntrack.c#L58

> diff --git a/main/clock.h b/main/clock.h
> new file mode 100644
> index 00000000..c268f5e6
> --- /dev/null
> +++ b/main/clock.h
> @@ -0,0 +1,58 @@
> +// SPDX-License-Identifier: BSD-3-Clause
> +// Copyright (c) 2026 SmartShare Systems
> +
> +#pragma once
> +
> +#include <gr_clock.h>
> +
> +#include <rte_common.h>
> +#include <rte_cycles.h>
> +#include <rte_per_lcore.h>
> +
> +// TSC frequency in Hz.
> +//
> +// If non-zero, the TSC is in sync with the common clock.
> +// If zero, the TSC is out of sync with the common clock.
> +extern uint64_t clock_tsc_hz;
> +
> +// Get common (monotonically increasing) clock from snapshot [nanoseconds].
> +//
> +// Resembles CLOCK_MONOTONIC_RAW:
> +// - Pauses (does not increase) while the system is suspended or hibernated.
> +// - Accurate for short intervals, where NTP adjustments would distort the measurement.
> +// - Not accurate for long intervals. It drifts with hardware.
> +// - - Drifts up to 4.3 seconds/day = 26 minutes/year. (Typical PC XTAL with 50 PPM accuracy.)
> +//
> +// Wraps around after hundreds of years.
> +// Does not return negative values.
> +//
> +// Call clock_update() to update the clock snapshots for the current thread.
> +static __rte_always_inline gr_clock_ns_t clock_ns(void) {
> +	RTE_DECLARE_PER_LCORE(uint64_t, clock_ns);

This is weird to have a RTE_DECLARE_PER_LCORE inside an inline function.
This should probably move out of the function block and to declare it
with the proper type directly to avoid casting:

RTE_DECLARE_PER_LCORE(gr_clock_ns_t, clock_ns);

Also, I would advocate to use this value in gr_clock_ns().

> +
> +	const gr_clock_ns_t ret = (gr_clock_ns_t)RTE_PER_LCORE(clock_ns);
> +	__rte_assume(ret >= 0);
> +	return ret;
> +}
> +
> +// Get common (monotonically increasing) clock from snapshot [seconds].
> +//
> +// Resembles CLOCK_MONOTONIC_RAW:
> +// - Pauses (does not increase) while the system is suspended or hibernated.
> +// - Not accurate for long intervals. It drifts with hardware.
> +// - - Drifts up to 4.3 seconds/day = 26 minutes/year. (Typical PC XTAL with 50 PPM accuracy.)
> +//
> +// Wraps around after hundreds of years.
> +// Does not return negative values.
> +//
> +// Call clock_update() to update the clock snapshots for the current thread.
> +static __rte_always_inline int32_t clock_s(void) {
> +	RTE_DECLARE_PER_LCORE(uint32_t, clock_s);
> +
> +	const int32_t ret = (int32_t)RTE_PER_LCORE(clock_s);
> +	__rte_assume(ret >= 0);
> +	return ret;
> +}
> +
> +// Update the clock snapshots for the current thread.
> +void clock_update(void);
> diff --git a/main/meson.build b/main/meson.build
> index a57d8600..f0823ff3 100644
> --- a/main/meson.build
> +++ b/main/meson.build
> @@ -3,6 +3,7 @@
>  
>  src += files(
>    'api.c',
> +  'clock.c',
>    'control_queue.c',
>    'dpdk.c',
>    'event.c',
> diff --git a/modules/infra/datapath/main_loop.c b/modules/infra/datapath/main_loop.c
> index f462cfbd..4127631d 100644
> --- a/modules/infra/datapath/main_loop.c
> +++ b/modules/infra/datapath/main_loop.c
> @@ -1,6 +1,8 @@
>  // SPDX-License-Identifier: BSD-3-Clause
>  // Copyright (c) 2023 Robin Jarry
> +// Copyright (c) 2026 SmartShare Systems
>  
> +#include "clock.h"
>  #include "config.h"
>  #include "datapath.h"
>  #include "log.h"
> @@ -258,6 +260,7 @@ reconfig:
>  	sleep = 0;
>  	timestamp = rte_rdtsc();
>  	for (;;) {
> +		clock_update();

Could you move clock_update() inside the housekeeping block? I don't
think we need more precision that once every 256 rounds of graph walk.

>  		rte_graph_walk(graph);
>  
>  		if (++loop == HOUSEKEEPING_INTERVAL) {


-- 
Robin

> At participating locations only.



More information about the grout mailing list