[dpdk-dev] [RFC PATCH] eal: rte_rand yields only 62 random bits

Sanford, Robert rsanford at akamai.com
Fri Mar 27 17:38:01 CET 2015


Never mind ... I cancel the previous suggestion. After further reading on
RNGs, I believe we should abandon the use of lrand48() and implement our
own RNG based on the so-called KISS family of RNGs originally proposed by
the late George Marsaglia. In his excellent paper, "Good Practice in
(Pseudo) Random Number Generation for Bioinformatics Applications", David
Jones (UCL Bioinformatics Group) describes a few variants of KISS
generators. This paper, and Robert G. Brown's (Duke Univ.) comprehensive
"Dieharder" random number test suite work show that KISS RNGs are simple
and fast, yet high quality.

Something like JLKISS64(), with state kept in TLS, would be ideal for DPDK
use. In limited experiments, I found JLKISS64() (not inlined, compiles to
~40 instructions) to be ~4 times faster than rte_rand(). This is probably
because JLKISS64() achieves integer-instruction parallelism, while
rte_rand(), with its two calls to lrand48(), nrand48_r(), and
__drand48_iterate(), does not (and all those calls!).

Here is the JLKISS64() function, as it appears in Jones's GoodPracticeRNG
paper:

---

	
		
		
	
	
		
			
				
					/* Public domain code for JLKISS64 RNG - long period KISS RNG
producing 64-bit results */

					unsigned long long x = 123456789123ULL,y = 987654321987ULL; /* Seed
variables */
unsigned int z1 = 43219876, c1 = 6543217, z2 = 21987643, c2 = 1732654; /*
Seed variables */

					unsigned long long JLKISS64()
{

					    unsigned long long t;

					x = 1490024343005336237ULL * x + 123456789;
y ^= y << 21; y ^= y >> 17; y ^= y << 30; /* Do not set y=0! */
t = 4294584393ULL * z1 + c1; c1 = t >> 32; z1 = t;
t = 4246477509ULL * z2 + c2; c2 = t >> 32; z2 = t;

					return x + y + z1 + ((unsigned long long)z2 << 32); /* Return 64-bit
result */
}

				
			
		
	



--
Regards,
Robert



>The implementation of rte_rand() returns only 62 bits of
>pseudo-randomness, because the underlying calls to lrand48()
>"return non-negative long integers uniformly distributed
>between 0 and 2^31."
>
>We have written a potential fix, but before we spend more
>time testing and refining it, I wanted to check with you
>guys.
>
>We switched to using the reentrant versions of [ls]rand48,
>and maintain per-lcore state. We need ~2.06 calls to
>lrand48_r(), per call to rte_rand().
>
>Do you agree with the approach we've taken in this patch?
>
>
>Thanks,
>Robert
>
>---
> lib/librte_eal/common/include/rte_random.h |   33
>+++++++++++++++++++++++++--
> lib/librte_eal/linuxapp/eal/eal_thread.c   |    7 ++++++
> 2 files changed, 37 insertions(+), 3 deletions(-)
>
>diff --git a/lib/librte_eal/common/include/rte_random.h
>b/lib/librte_eal/common/include/rte_random.h
>index 24ae836..b9248cd 100644
>--- a/lib/librte_eal/common/include/rte_random.h
>+++ b/lib/librte_eal/common/include/rte_random.h
>@@ -46,6 +46,17 @@ extern "C" {
> 
> #include <stdint.h>
> #include <stdlib.h>
>+#include <rte_per_lcore.h>
>+#include <rte_branch_prediction.h>
>+
>+struct rte_rand_data {
>+	struct drand48_data _dr48;
>+	uint32_t _hi_bits;
>+	uint8_t _bits_left;
>+};
>+
>+RTE_DECLARE_PER_LCORE(struct rte_rand_data, _rand_data);
>+
> 
> /**
>  * Seed the pseudo-random generator.
>@@ -60,7 +71,7 @@ extern "C" {
> static inline void
> rte_srand(uint64_t seedval)
> {
>-	srand48((long unsigned int)seedval);
>+	srand48_r((long unsigned int)seedval, &RTE_PER_LCORE(_rand_data)._dr48);
> }
> 
> /**
>@@ -76,10 +87,26 @@ rte_srand(uint64_t seedval)
> static inline uint64_t
> rte_rand(void)
> {
>+	struct rte_rand_data *rd = &RTE_PER_LCORE(_rand_data);
> 	uint64_t val;
>-	val = lrand48();
>+	uint32_t hi_bits;
>+	long int result;
>+
>+	if (unlikely(rd->_bits_left < 2)) {
>+		lrand48_r(&rd->_dr48, &result);
>+		rd->_hi_bits |= (uint32_t)result << (1 - rd->_bits_left);
>+		rd->_bits_left += 31;
>+	}
>+
>+	hi_bits = rd->_hi_bits;
>+	lrand48_r(&rd->_dr48, &result);
>+	val = (uint32_t)result | (hi_bits & 0x8000000);
> 	val <<= 32;
>-	val += lrand48();
>+	hi_bits <<= 1;
>+	lrand48_r(&rd->_dr48, &result);
>+	val |= (uint32_t)result | (hi_bits & 0x8000000);
>+	rd->_hi_bits = hi_bits << 1;
>+	rd->_bits_left -= 2;
> 	return val;
> }
> 
>diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c
>b/lib/librte_eal/linuxapp/eal/eal_thread.c
>index 5635c7d..08e7f72 100644
>--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
>+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
>@@ -52,6 +52,8 @@
> #include <rte_eal.h>
> #include <rte_per_lcore.h>
> #include <rte_lcore.h>
>+#include <rte_cycles.h>
>+#include <rte_random.h>
> 
> #include "eal_private.h"
> #include "eal_thread.h"
>@@ -59,6 +61,8 @@
> RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY;
> RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY;
> RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset);
>+RTE_DEFINE_PER_LCORE(struct rte_rand_data, _rand_data);
>+
> 
> /*
>  * Send a message to a slave lcore identified by slave_id to call a
>@@ -147,6 +151,9 @@ eal_thread_loop(__attribute__((unused)) void *arg)
> 	/* set the lcore ID in per-lcore memory area */
> 	RTE_PER_LCORE(_lcore_id) = lcore_id;
> 
>+	/* seed per-lcore PRNG */
>+	rte_srand(rte_rdtsc());
>+
> 	/* set CPU affinity */
> 	if (eal_thread_set_affinity() < 0)
> 		rte_panic("cannot set affinity\n");
>-- 
>1.7.1
>



More information about the dev mailing list