[PATCH grout] l2: optimize bridge FDB source MAC learning
Robin Jarry
rjarry at redhat.com
Thu May 21 22:12:19 CEST 2026
Hi Morten,
Morten Brørup, May 21, 2026 at 19:59:
> In the bridge_input node, read the clock once outside the loop, cache the
> value, and pass the cached value as a parameter to fdb_learn() inside the
> loop.
> This reduces the number of times the node reads the clock, which is a
> costly operation, from once per packet to once per burst of packets.
I wonder if it would make sense to have a per-lcore timestamp variable
accessible via a static inline function that would be refreshed every
housekeeping turns of main loop, e.g.:
diff --git a/modules/infra/datapath/datapath.h b/modules/infra/datapath/datapath.h
index 2c1e58e5f8ec..e0784daa3949 100644
--- a/modules/infra/datapath/datapath.h
+++ b/modules/infra/datapath/datapath.h
@@ -3,4 +3,14 @@
#pragma once
+#include <rte_per_lcore.h>
+
+#include <time.h>
+
void *gr_datapath_loop(void *priv);
+
+RTE_DECLARE_PER_LCORE(clock_t, datapath_timestamp);
+
+static inline clock_t datapath_timestamp(void) {
+ return RTE_PER_LCORE(datapath_timestamp);
+}
diff --git a/modules/infra/datapath/main_loop.c b/modules/infra/datapath/main_loop.c
index f462cfbd42dd..2be0885319ba 100644
--- a/modules/infra/datapath/main_loop.c
+++ b/modules/infra/datapath/main_loop.c
@@ -10,6 +10,8 @@
#include "vec.h"
#include "worker.h"
+#include <gr_clock.h>
+
#include <rte_common.h>
#include <rte_eal.h>
#include <rte_errno.h>
@@ -182,6 +184,8 @@ err:
static struct rte_rcu_qsbr *rcu;
+RTE_DEFINE_PER_LCORE(clock_t, datapath_timestamp);
+
void *gr_datapath_loop(void *priv) {
struct stats_context ctx = {
.stats = NULL,
@@ -257,6 +261,7 @@ reconfig:
loop = 0;
sleep = 0;
timestamp = rte_rdtsc();
+ RTE_PER_LCORE(datapath_timestamp) = gr_clock_us();
for (;;) {
rte_graph_walk(graph);
@@ -292,6 +297,7 @@ reconfig:
ctx.w_stats->total_cycles += cycles;
ctx.w_stats->loop_cycles += cycles;
ctx.w_stats->n_loops += HOUSEKEEPING_INTERVAL;
+ RTE_PER_LCORE(datapath_timestamp) = gr_clock_us();
}
}
What do you think?
> Only update the bridge FDB entry's last_seen timestamp if at least 1/4
> second has passed since its last update.
> This reduces the pressure on the CPU's store unit.
>
> Signed-off-by: Morten Brørup <mb at smartsharesystems.com>
> ---
> modules/l2/control/fdb.c | 19 +++++++++++++------
> modules/l2/control/l2.h | 3 ++-
> modules/l2/datapath/bridge_input.c | 6 +++++-
> 3 files changed, 20 insertions(+), 8 deletions(-)
PS(off topic): Since we don't have many email contributions, I didn't
bother setting up automated CI. That reminds me of the discussions we
had at the summit about synchronizing the mailing lists and GitHub pull
requests. I will see if I can push my ideas [1] a little further.
[1] https://lists.ozlabs.org/pipermail/patchwork/2025-March/007489.html
>
> diff --git a/modules/l2/control/fdb.c b/modules/l2/control/fdb.c
> index 3982cce1..2f706dec 100644
> --- a/modules/l2/control/fdb.c
> +++ b/modules/l2/control/fdb.c
> @@ -109,13 +109,14 @@ void fdb_learn(
> uint16_t iface_id,
> const struct rte_ether_addr *mac,
> uint16_t vlan_id,
> - const struct l3_addr *vtep
> + const struct l3_addr *vtep,
> + const clock_t now
> ) {
> const struct fdb_key key = {bridge_id, vlan_id, *mac};
> struct gr_fdb_entry *fdb;
> void *data;
>
> - if (rte_hash_lookup_data(fdb_hash, &key, &data) < 0) {
> + if (unlikely(rte_hash_lookup_data(fdb_hash, &key, &data) < 0)) {
> if (rte_mempool_get(fdb_pool, &data) < 0)
> return; // pool exhausted
>
> @@ -126,6 +127,7 @@ void fdb_learn(
> fdb->flags = GR_FDB_F_LEARN;
> fdb->iface_id = iface_id;
> fdb->vtep = *vtep;
> + fdb->last_seen = now;
>
> if (rte_hash_add_key_data(fdb_hash, &key, fdb) < 0) {
> // no space left in hash
> @@ -134,19 +136,24 @@ void fdb_learn(
> }
>
> event_push(GR_EVENT_FDB_ADD, fdb);
> - } else {
> - fdb = data;
> + return;
> }
>
> - fdb->last_seen = gr_clock_us();
> + fdb = data;
>
> if ((fdb->flags & GR_FDB_F_LEARN)
> - && (fdb->iface_id != iface_id || !l3_addr_eq(&fdb->vtep, vtep))) {
> + && unlikely(fdb->iface_id != iface_id || !l3_addr_eq(&fdb->vtep, vtep))) {
> // update in case the mac address has moved
> fdb->iface_id = iface_id;
> fdb->vtep = *vtep;
> + fdb->last_seen = now;
> event_push(GR_EVENT_FDB_UPDATE, fdb);
> + return;
> }
> +
> + // update timestamp if at least 1/4 second since last update
> + if (unlikely(now >= fdb->last_seen + CLOCKS_PER_SEC / 4))
> + fdb->last_seen = now;
> }
>
> void fdb_purge_iface(uint16_t iface_id) {
> diff --git a/modules/l2/control/l2.h b/modules/l2/control/l2.h
> index 5cbb47cd..a58cda62 100644
> --- a/modules/l2/control/l2.h
> +++ b/modules/l2/control/l2.h
> @@ -33,7 +33,8 @@ void fdb_learn(
> uint16_t iface_id,
> const struct rte_ether_addr *,
> uint16_t vlan_id,
> - const struct l3_addr *vtep
> + const struct l3_addr *vtep,
> + const clock_t now
> );
>
> // Delete all FDB entries referencing the provided interface.
> diff --git a/modules/l2/datapath/bridge_input.c b/modules/l2/datapath/bridge_input.c
> index e9ea266f..8fb9ea79 100644
> --- a/modules/l2/datapath/bridge_input.c
> +++ b/modules/l2/datapath/bridge_input.c
> @@ -7,6 +7,8 @@
> #include "mbuf.h"
> #include "rxtx.h"
>
> +#include <gr_clock.h>
> +
> #include <rte_ether.h>
>
> enum edges {
> @@ -39,6 +41,8 @@ static uint16_t bridge_input_process(
> struct rte_mbuf *m;
> rte_edge_t edge;
>
> + const clock_t now = gr_clock_us();
> +
> for (uint16_t i = 0; i < nb_objs; i++) {
> m = objs[i];
> d = iface_mbuf_data(m);
> @@ -63,7 +67,7 @@ static uint16_t bridge_input_process(
> struct l3_addr vtep = {0};
> if (d->iface->type == GR_IFACE_TYPE_VXLAN)
> vtep = d->vtep;
> - fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id, &vtep);
> + fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id, &vtep, now);
> }
>
> if (rte_is_unicast_ether_addr(ð->dst_addr)) {
--
Robin
> Motorized vehicles only.
More information about the grout
mailing list