[PATCH v7 4/7] pcapng: improve performance of timestamping

Stephen Hemminger stephen at networkplumber.org
Fri Feb 13 20:18:21 CET 2026


Avoid doing expensive divide operations when converting
timestamps from cycles (TSC) to nanoseconds for pcapng.

Precompute a rte_reciprocal_u64 inverse of the TSC frequency
and a right-shift count chosen so that the intermediate
product (delta >> shift) * NSEC_PER_SEC cannot overflow
uint64_t. The per-packet conversion then requires only a
shift, a multiply, and a reciprocal divide—no division.

For TSC frequencies less than 18.4 GHz the shift value will
be zero but code is defensive to be future proof.

Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
---
 lib/pcapng/rte_pcapng.c | 97 +++++++++++++++++++++++++++++++----------
 1 file changed, 73 insertions(+), 24 deletions(-)

diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index a2254ba807..7eedbaf298 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -37,12 +37,23 @@
 /* upper bound for strings in pcapng option data */
 #define PCAPNG_STR_MAX	UINT16_MAX
 
+/*
+ * Converter from TSC values to nanoseconds since Unix epoch.
+ * Uses reciprocal multiply to avoid runtime division.
+ */
+struct tsc_clock {
+	uint64_t tsc_base;          /* TSC value at initialization. */
+	uint64_t ns_base;           /* Nanoseconds since epoch at init. */
+	struct rte_reciprocal_u64 tsc_hz_inv; /* Reciprocal of TSC frequency. */
+	uint32_t shift;             /* Pre-shift to avoid overflow. */
+};
+
 /* Format of the capture file handle */
 struct rte_pcapng {
 	int  outfd;		/* output file */
 	unsigned int ports;	/* number of interfaces added */
-	uint64_t offset_ns;	/* ns since 1/1/1970 when initialized */
-	uint64_t tsc_base;	/* TSC when started */
+
+	struct tsc_clock clock;
 
 	/* DPDK port id to interface index in file */
 	uint32_t port_index[RTE_MAX_ETHPORTS];
@@ -98,21 +109,59 @@ static ssize_t writev(int fd, const struct iovec *iov, int iovcnt)
 #define if_indextoname(ifindex, ifname) NULL
 #endif
 
-/* Convert from TSC (CPU cycles) to nanoseconds */
-static uint64_t
-pcapng_timestamp(const rte_pcapng_t *self, uint64_t cycles)
+/*
+ * Initialize TSC-to-epoch-ns converter.
+ *
+ * Captures current TSC and system clock as a reference point.
+ */
+static int
+tsc_clock_init(struct tsc_clock *clk)
 {
-	uint64_t delta, rem, secs, ns;
-	const uint64_t hz = rte_get_tsc_hz();
+	struct timespec ts;
+	uint64_t cycles, tsc_hz, divisor;
+	uint32_t shift;
+
+	memset(clk, 0, sizeof(*clk));
+
+	/* If Hz is zero, something is seriously broken. */
+	tsc_hz = rte_get_tsc_hz();
+	if (tsc_hz == 0)
+		return -1;
+
+	/*
+	 * Choose shift so (delta >> shift) * NSEC_PER_SEC fits in uint64_t.
+	 * For typical GHz-range TSC and ~1s deltas this is 0.
+	 */
+	shift = 0;
+	divisor = tsc_hz;
+	while (divisor > UINT64_MAX / NSEC_PER_SEC) {
+		divisor >>= 1;
+		shift++;
+	}
+
+	clk->shift = shift;
+	clk->tsc_hz_inv = rte_reciprocal_value_u64(divisor);
+
+	/* Sample TSC and system clock as close together as possible. */
+	cycles = rte_get_tsc_cycles();
+	clock_gettime(CLOCK_REALTIME, &ts);
+	clk->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
+	clk->ns_base = (uint64_t)ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+
+	return 0;
+}
 
-	delta = cycles - self->tsc_base;
+/* Convert a TSC value to nanoseconds since Unix epoch. */
+static inline uint64_t
+tsc_to_ns_epoch(const struct tsc_clock *clk, uint64_t tsc)
+{
+	uint64_t delta, ns;
 
-	/* Avoid numeric wraparound by computing seconds first */
-	secs = delta / hz;
-	rem = delta % hz;
-	ns = (rem * NS_PER_S) / hz;
+	delta = tsc - clk->tsc_base;
+	ns = (delta >> clk->shift) * NSEC_PER_SEC;
+	ns = rte_reciprocal_divide_u64(ns, &clk->tsc_hz_inv);
 
-	return secs * NS_PER_S + ns + self->offset_ns;
+	return clk->ns_base + ns;
 }
 
 /* length of option including padding */
@@ -344,7 +393,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
 {
 	struct pcapng_statistics *hdr;
 	struct pcapng_option *opt;
-	uint64_t start_time = self->offset_ns;
+	uint64_t start_time = self->clock.ns_base;
 	uint64_t sample_time;
 	uint32_t optlen, len;
 	uint32_t *buf;
@@ -397,7 +446,7 @@ rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
 	hdr->block_length = len;
 	hdr->interface_id = self->port_index[port_id];
 
-	sample_time = pcapng_timestamp(self, rte_get_tsc_cycles());
+	sample_time = tsc_to_ns_epoch(&self->clock, rte_get_tsc_cycles());
 	hdr->timestamp_hi = sample_time >> 32;
 	hdr->timestamp_lo = (uint32_t)sample_time;
 
@@ -684,10 +733,13 @@ rte_pcapng_write_packets(rte_pcapng_t *self,
 			return -1;
 		}
 
-		/* adjust timestamp recorded in packet */
+		/*
+		 * When data is captured pcapng_copy the current TSC is stored.
+		 * Adjust the value recorded in file to PCAP epoch units.
+		 */
 		cycles = (uint64_t)epb->timestamp_hi << 32;
 		cycles += epb->timestamp_lo;
-		timestamp = pcapng_timestamp(self, cycles);
+		timestamp = tsc_to_ns_epoch(&self->clock, cycles);
 		epb->timestamp_hi = timestamp >> 32;
 		epb->timestamp_lo = (uint32_t)timestamp;
 
@@ -733,8 +785,6 @@ rte_pcapng_fdopen(int fd,
 {
 	unsigned int i;
 	rte_pcapng_t *self;
-	struct timespec ts;
-	uint64_t cycles;
 	int ret;
 
 	if ((osname && strlen(osname) > PCAPNG_STR_MAX) ||
@@ -754,11 +804,10 @@ rte_pcapng_fdopen(int fd,
 	self->outfd = fd;
 	self->ports = 0;
 
-	/* record start time in ns since 1/1/1970 */
-	cycles = rte_get_tsc_cycles();
-	clock_gettime(CLOCK_REALTIME, &ts);
-	self->tsc_base = (cycles + rte_get_tsc_cycles()) / 2;
-	self->offset_ns = rte_timespec_to_ns(&ts);
+	if (tsc_clock_init(&self->clock) < 0) {
+		rte_errno = ENODEV;
+		goto fail;
+	}
 
 	for (i = 0; i < RTE_MAX_ETHPORTS; i++)
 		self->port_index[i] = UINT32_MAX;
-- 
2.51.0



More information about the dev mailing list