[RFC] random: use per lcore state
Morten Brørup
mb at smartsharesystems.com
Wed Sep 6 20:16:26 CEST 2023
> From: Stephen Hemminger [mailto:stephen at networkplumber.org]
> Sent: Wednesday, 6 September 2023 19.20
>
> Move the random number state into thread local storage.
> This has a several benefits.
> - no false cache sharing from cpu prefetching
> - fixes initialization of random state for non-DPDK threads
> - fixes unsafe usage of random state by non-DPDK threads
>
> The initialization of random number state is done by the
> lcore (lazy initialization).
>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
> lib/eal/common/rte_random.c | 38 +++++++++++++++++++------------------
> 1 file changed, 20 insertions(+), 18 deletions(-)
>
> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> index 53636331a27b..9657adf6ad3b 100644
> --- a/lib/eal/common/rte_random.c
> +++ b/lib/eal/common/rte_random.c
> @@ -19,13 +19,14 @@ struct rte_rand_state {
> uint64_t z3;
> uint64_t z4;
> uint64_t z5;
> -} __rte_cache_aligned;
> + uint64_t seed;
> +};
>
> -/* One instance each for every lcore id-equipped thread, and one
> - * additional instance to be shared by all others threads (i.e., all
> - * unregistered non-EAL threads).
> - */
> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> +/* Global random seed */
> +static uint64_t rte_rand_seed;
> +
> +/* Per lcore random state. */
> +static RTE_DEFINE_PER_LCORE(struct rte_rand_state, rte_rand_state);
>
> static uint32_t
> __rte_rand_lcg32(uint32_t *seed)
> @@ -81,11 +82,7 @@ __rte_srand_lfsr258(uint64_t seed, struct
> rte_rand_state *state)
> void
> rte_srand(uint64_t seed)
> {
> - unsigned int lcore_id;
> -
> - /* add lcore_id to seed to avoid having the same sequence */
> - for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
> - __rte_srand_lfsr258(seed + lcore_id,
> &rand_states[lcore_id]);
> + __atomic_store_n(&rte_rand_seed, seed, __ATOMIC_RELAXED);
> }
>
> static __rte_always_inline uint64_t
> @@ -119,15 +116,18 @@ __rte_rand_lfsr258(struct rte_rand_state *state)
> static __rte_always_inline
> struct rte_rand_state *__rte_rand_get_state(void)
> {
> - unsigned int idx;
> + struct rte_rand_state *rand_state =
> &RTE_PER_LCORE(rte_rand_state);
> + uint64_t seed;
>
> - idx = rte_lcore_id();
> + seed = __atomic_load_n(&rte_rand_seed, __ATOMIC_RELAXED);
> + if (unlikely(seed != rand_state->seed)) {
Please note that rte_rand_seed lives in a completely different cache line than RTE_PER_LCORE(rte_rand_state), so the comparison with rte_rand_seed requires reading one more cache line than the original implementation, which only uses the cache line holding rand_states[idx].
This is in the hot path.
If we could register a per-thread INIT function, the lazy initialization could be avoided, and only one cache line accessed.
Or, simply replace "uint64_t seed" with "bool initialized" in the rte_rand_state structure, so the lazy init only needs to read rte_rand_seed if rand_state->initialized is false.
> + rand_state->seed = seed;
>
> - /* last instance reserved for unregistered non-EAL threads */
> - if (unlikely(idx == LCORE_ID_ANY))
> - idx = RTE_MAX_LCORE;
> + seed += rte_thread_self().opaque_id;
> + __rte_srand_lfsr258(seed, rand_state);
> + }
>
> - return &rand_states[idx];
> + return rand_state;
> }
>
> uint64_t
> @@ -227,7 +227,9 @@ RTE_INIT(rte_rand_init)
> {
> uint64_t seed;
>
> - seed = __rte_random_initial_seed();
> + do
> + seed = __rte_random_initial_seed();
> + while (seed == 0);
>
> rte_srand(seed);
> }
> --
> 2.39.2
More information about the dev
mailing list