[PATCH] eal: add cache guard to per-lcore PRNG state

Mattias Rönnblom hofors at lysator.liu.se
Wed Oct 11 22:41:47 CEST 2023


On 2023-10-11 18:07, Thomas Monjalon wrote:
> TLS is an alternative solution proposed by Stephen.
> What do you think?
> 

I've expressed my views on this topic in two threads already.

I'm happy to continue that discussion, but I would suggest it would be 
under the banner of "what should the standard pattern for maintaining 
per-lcore (and maybe also per-unregistered thread state) be in DPDK".

A related issue is the ambition level for having unregistered threads 
calling into DPDK APIs in general. MT safety issues, performance issues, 
and concerns around preemption safety.

> 
> 06/09/2023 18:25, Stephen Hemminger:
>> On Mon, 4 Sep 2023 13:57:19 +0200
>> Mattias Rönnblom <hofors at lysator.liu.se> wrote:
>>
>>> On 2023-09-04 11:26, Morten Brørup wrote:
>>>> The per-lcore random state is frequently updated by their individual
>>>> lcores, so add a cache guard to prevent CPU cache thrashing.
>>>>    
>>>
>>> "to prevent false sharing in case the CPU employs a next-N-lines (or
>>> similar) hardware prefetcher"
>>>
>>> In my world, cache trashing and cache line contention are two different
>>> things.
>>>
>>> Other than that,
>>> Acked-by: Mattias Rönnblom <mattias.ronnblom at ericsson.com>
>>
>> Could the per-lcore state be thread local?
>>
>> Something like this:
>>
>>  From 3df5e28a7e5589d05e1eade62a0979e84697853d Mon Sep 17 00:00:00 2001
>> From: Stephen Hemminger <stephen at networkplumber.org>
>> Date: Wed, 6 Sep 2023 09:22:42 -0700
>> Subject: [PATCH] random: use per lcore state
>>
>> Move the random number state into thread local storage.
>> This has a several benefits.
>>   - no false cache sharing from cpu prefetching
>>   - fixes initialization of random state for non-DPDK threads
>>   - fixes unsafe usage of random state by non-DPDK threads
>>
>> The initialization of random number state is done by the
>> lcore (lazy initialization).
>>
>> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
>> ---
>>   lib/eal/common/rte_random.c | 35 +++++++++++++++++------------------
>>   1 file changed, 17 insertions(+), 18 deletions(-)
>>
>> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
>> index 53636331a27b..62f36038ac52 100644
>> --- a/lib/eal/common/rte_random.c
>> +++ b/lib/eal/common/rte_random.c
>> @@ -19,13 +19,14 @@ struct rte_rand_state {
>>   	uint64_t z3;
>>   	uint64_t z4;
>>   	uint64_t z5;
>> -} __rte_cache_aligned;
>> +	uint64_t seed;
>> +};
>>   
>> -/* One instance each for every lcore id-equipped thread, and one
>> - * additional instance to be shared by all others threads (i.e., all
>> - * unregistered non-EAL threads).
>> - */
>> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
>> +/* Global random seed */
>> +static uint64_t rte_rand_seed;
>> +
>> +/* Per lcore random state. */
>> +static RTE_DEFINE_PER_LCORE(struct rte_rand_state, rte_rand_state);
>>   
>>   static uint32_t
>>   __rte_rand_lcg32(uint32_t *seed)
>> @@ -76,16 +77,14 @@ __rte_srand_lfsr258(uint64_t seed, struct rte_rand_state *state)
>>   	state->z3 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 4096UL);
>>   	state->z4 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 131072UL);
>>   	state->z5 = __rte_rand_lfsr258_gen_seed(&lcg_seed, 8388608UL);
>> +
>> +	state->seed = seed;
>>   }
>>   
>>   void
>>   rte_srand(uint64_t seed)
>>   {
>> -	unsigned int lcore_id;
>> -
>> -	/* add lcore_id to seed to avoid having the same sequence */
>> -	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
>> -		__rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
>> +	__atomic_store_n(&rte_rand_seed, seed, __ATOMIC_RELAXED);
>>   }
>>   
>>   static __rte_always_inline uint64_t
>> @@ -119,15 +118,15 @@ __rte_rand_lfsr258(struct rte_rand_state *state)
>>   static __rte_always_inline
>>   struct rte_rand_state *__rte_rand_get_state(void)
>>   {
>> -	unsigned int idx;
>> -
>> -	idx = rte_lcore_id();
>> +	struct rte_rand_state *rand_state = &RTE_PER_LCORE(rte_rand_state);
>> +	uint64_t seed;
>>   
>> -	/* last instance reserved for unregistered non-EAL threads */
>> -	if (unlikely(idx == LCORE_ID_ANY))
>> -		idx = RTE_MAX_LCORE;
>> +	/* did seed change */
>> +	seed = __atomic_load_n(&rte_rand_seed, __ATOMIC_RELAXED);
>> +	if (unlikely(seed != rand_state->seed))
>> +		__rte_srand_lfsr258(seed, rand_state);
>>   
>> -	return &rand_states[idx];
>> +	return rand_state;
>>   }
>>   
>>   uint64_t
>>
> 
> 
> 
> 
> 


More information about the dev mailing list