[PATCH v8 4/5] app/testpmd: report lcore usage
David Marchand
david.marchand at redhat.com
Mon Feb 6 09:58:28 CET 2023
Salut Robin,
On Thu, Feb 2, 2023 at 2:44 PM Robin Jarry <rjarry at redhat.com> wrote:
>
> Reuse the --record-core-cycles option to account for busy cycles. One
> turn of packet_fwd_t is considered "busy" if there was at least one
> received or transmitted packet.
>
> Add a new busy_cycles field in struct fwd_stream. Update get_end_cycles
> to accept an additional argument for the number of processed packets.
> Update fwd_stream.busy_cycles when the number of packets is greater than
> zero.
>
> When --record-core-cycles is specified, register a callback with
> rte_lcore_register_usage_cb(). In the callback, use the new lcore_id
> field in struct fwd_lcore to identify the correct index in fwd_lcores
> and return the sum of busy/total cycles of all fwd_streams.
>
> This makes the cycles counters available in rte_lcore_dump() and the
> lcore telemetry API:
>
> testpmd> dump_lcores
> lcore 3, socket 0, role RTE, cpuset 3
> lcore 4, socket 0, role RTE, cpuset 4, busy cycles 1228584096/9239923140
> lcore 5, socket 0, role RTE, cpuset 5, busy cycles 1255661768/9218141538
I have been playing a bit with this series with two lcores, each one
polling a net/null port.
At first it looked good, but then I started to have one idle lcore, by
asking net/null not to receive anything.
$ build-clang/app/dpdk-testpmd -c 7 --no-huge -m 40 -a 0:0.0 --vdev
net_null1,no-rx=1 --vdev net_null2 -- --no-mlockall
--total-num-mbufs=2048 -ia --record-core-cycles --nb-cores=2
One thing that struck me is that an idle lcore was always showing less
"total_cycles" than a busy one.
The more time testpmd was running, the bigger the divergence between
lcores would be.
Re-reading the API, it is unclear to me (which is the reason for my
comments on patch 2).
Let's first sort out my patch 2 comments and we may revisit this patch
4 implementation afterwards (as I think we are not accounting some
mainloop cycles with current implementation).
For now, I have some comments on the existing data structures, see below.
[snip]
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index e366f81a0f46..105f75ad5f35 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -2053,7 +2053,7 @@ fwd_stats_display(void)
> fs->rx_bad_outer_ip_csum;
>
> if (record_core_cycles)
> - fwd_cycles += fs->core_cycles;
> + fwd_cycles += fs->busy_cycles;
> }
> for (i = 0; i < cur_fwd_config.nb_fwd_ports; i++) {
> pt_id = fwd_ports_ids[i];
> @@ -2184,6 +2184,7 @@ fwd_stats_reset(void)
>
> memset(&fs->rx_burst_stats, 0, sizeof(fs->rx_burst_stats));
> memset(&fs->tx_burst_stats, 0, sizeof(fs->tx_burst_stats));
> + fs->busy_cycles = 0;
> fs->core_cycles = 0;
> }
> }
> @@ -2260,6 +2261,7 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
> tics_datum = rte_rdtsc();
> tics_per_1sec = rte_get_timer_hz();
> #endif
> + fc->lcore_id = rte_lcore_id();
A fwd_lcore object is bound to a single lcore, so this lcore_id is unneeded.
> fsm = &fwd_streams[fc->stream_idx];
> nb_fs = fc->stream_nb;
> do {
> @@ -2288,6 +2290,38 @@ run_pkt_fwd_on_lcore(struct fwd_lcore *fc, packet_fwd_t pkt_fwd)
> } while (! fc->stopped);
> }
>
> +static int
> +lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage)
> +{
> + struct fwd_stream **fsm;
> + struct fwd_lcore *fc;
> + streamid_t nb_fs;
> + streamid_t sm_id;
> + int c;
> +
> + for (c = 0; c < nb_lcores; c++) {
> + fc = fwd_lcores[c];
> + if (fc->lcore_id != lcore_id)
> + continue;
You can find which fwd_lcore is mapped to a lcore using existing structures.
This requires updating some helper, something like:
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 7d24d25970..e5297ee7fb 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -785,25 +785,31 @@ is_proc_primary(void)
return rte_eal_process_type() == RTE_PROC_PRIMARY;
}
-static inline unsigned int
-lcore_num(void)
+void
+parse_fwd_portlist(const char *port);
+
+static inline struct fwd_lcore *
+lcore_to_fwd_lcore(uint16_t lcore_id)
{
unsigned int i;
- for (i = 0; i < RTE_MAX_LCORE; ++i)
- if (fwd_lcores_cpuids[i] == rte_lcore_id())
- return i;
+ for (i = 0; i < cur_fwd_config.nb_fwd_lcores; ++i) {
+ if (fwd_lcores_cpuids[i] == lcore_id)
+ return fwd_lcores[i];
+ }
- rte_panic("lcore_id of current thread not found in
fwd_lcores_cpuids\n");
+ return NULL;
}
-void
-parse_fwd_portlist(const char *port);
-
static inline struct fwd_lcore *
current_fwd_lcore(void)
{
- return fwd_lcores[lcore_num()];
+ struct fwd_lcore *fc = lcore_to_fwd_lcore(rte_lcore_id());
+
+ if (fc == NULL)
+ rte_panic("lcore_id of current thread not found in
fwd_lcores_cpuids\n");
+
+ return fc;
}
/* Mbuf Pools */
And then by using this new helper, lcore_usage_callback becomes simpler:
+static int
+lcore_usage_callback(unsigned int lcore_id, struct rte_lcore_usage *usage)
+{
+ struct fwd_stream **fsm;
+ struct fwd_lcore *fc;
+ streamid_t nb_fs;
+ streamid_t sm_id;
+
+ fc = lcore_to_fwd_lcore(lcore_id);
+ if (fc == NULL)
+ return -1;
+
+ fsm = &fwd_streams[fc->stream_idx];
+ nb_fs = fc->stream_nb;
+ usage->busy_cycles = 0;
+ usage->total_cycles = 0;
+
+ for (sm_id = 0; sm_id < nb_fs; sm_id++) {
+ if (fsm[sm_id]->disabled)
+ continue;
+
+ usage->busy_cycles += fsm[sm_id]->busy_cycles;
+ usage->total_cycles += fsm[sm_id]->core_cycles;
+ }
+
+ return 0;
+}
+
--
David Marchand
More information about the dev
mailing list