[dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal

Pavan Nikhilesh Bhagavatula pbhagavatula at caviumnetworks.com
Mon Sep 4 16:55:17 CEST 2017


On Mon, Sep 04, 2017 at 02:34:49PM +0000, Burakov, Anatoly wrote:
> > From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Pavan Nikhilesh
> > Sent: Sunday, September 3, 2017 1:36 PM
> > To: dev at dpdk.org
> > Cc: Dumitrescu, Cristian <cristian.dumitrescu at intel.com>;
> > stephen at networkplumber.org; Pavan Nikhilesh
> > <pbhagavatula at caviumnetworks.com>
> > Subject: [dpdk-dev] [PATCH v3 2/3] eal: add u64 bit variant for reciprocal
> >
> > Currently, rte_reciprocal only supports unsigned 32bit divisors. This commit
> > adds support for unsigned 64bit divisors.
> >
> > Rename unsigned 32bit specific functions appropriately and update
> > librte_sched accordingly.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula at caviumnetworks.com>
> > ---
> >  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   3 +-
> >  lib/librte_eal/common/include/rte_reciprocal.h  | 111
> > ++++++++++++++++++++--
> >  lib/librte_eal/common/rte_reciprocal.c          | 120
> > +++++++++++++++++++++---
> >  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   3 +-
> >  lib/librte_sched/Makefile                       |   4 +-
> >  lib/librte_sched/rte_sched.c                    |   9 +-
> >  6 files changed, 222 insertions(+), 28 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > index d0bda66..5fd6101 100644
> > --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> > @@ -242,6 +242,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> > b/lib/librte_eal/common/include/rte_reciprocal.h
> > index b6d752f..801d1c8 100644
> > --- a/lib/librte_eal/common/include/rte_reciprocal.h
> > +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> > @@ -22,22 +22,117 @@
> >  #ifndef _RTE_RECIPROCAL_H_
> >  #define _RTE_RECIPROCAL_H_
> >
> > -#include <stdint.h>
> > +#include <rte_memory.h>
> >
> > -struct rte_reciprocal {
> > +/**
> > + * Unsigned 32-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u32 {
> >  	uint32_t m;
> >  	uint8_t sh1, sh2;
> > -};
> > +} __rte_cache_aligned;
> > +
> > +/**
> > + * Unsigned 64-bit divisor structure.
> > + */
> > +struct rte_reciprocal_u64 {
> > +	uint64_t m;
> > +	uint8_t sh1;
> > +} __rte_cache_aligned;
> >
> > +/**
> > + * Divide given unsigned 32-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 32-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> >  static inline uint32_t
> > -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> > +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R) {
> > +	uint32_t t = (((uint64_t)a * R->m) >> 32);
> > +
> > +	return (t + ((a - t) >> R->sh1)) >> R->sh2; }
> > +
> > +static inline uint64_t
> > +mullhi_u64(uint64_t x, uint64_t y)
> > +{
> > +#ifdef __SIZEOF_INT128__
> > +	__uint128_t xl = x;
> > +	__uint128_t rl = xl * y;
> > +
> > +	return (rl >> 64);
> > +#else
> > +	uint64_t u0, u1, v0, v1, k, t;
> > +	uint64_t w1, w2;
> > +	uint64_t whi;
> > +
> > +	u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> > +	v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> > +
> > +	t = u0*v0;
> > +	k = t >> 32;
> > +
> > +	t = u1*v0 + k;
> > +	w1 = t & 0xFFFFFFFF;
> > +	w2 = t >> 32;
> > +
> > +	t = u0*v1 + w1;
> > +	k = t >> 32;
> > +
> > +	whi = u1*v1 + w2 + k;
> > +
> > +	return whi;
> > +#endif
> > +}
> > +
> > +/**
> > + * Divide given unsigned 64-bit integer with pre calculated divisor.
> > + *
> > + * @param a
> > + *   The 64-bit dividend.
> > + * @param R
> > + *   The pointer to pre calculated divisor reciprocal structure.
> > + *
> > + * @return
> > + *   The result of the division
> > + */
> > +static inline uint64_t
> > +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> >  {
> > -	uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> > +	uint64_t q = mullhi_u64(R->m, a);
> > +	uint64_t t = ((a - q) >> 1) + q;
> >
> > -	return (t + ((a - t) >> R.sh1)) >> R.sh2;
> > +	return t >> R->sh1;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d);
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 32-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d);
> > +
> > +/**
> > + * Generate pre calculated divisor structure.
> > + *
> > + * @param d
> > + *   The unsigned 64-bit divisor.
> > + *
> > + * @return
> > + *   Divisor structure.
> > + */
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d);
> >
> >  #endif /* _RTE_RECIPROCAL_H_ */
> > diff --git a/lib/librte_eal/common/rte_reciprocal.c
> > b/lib/librte_eal/common/rte_reciprocal.c
> > index 7ab99b4..5d7e367 100644
> > --- a/lib/librte_eal/common/rte_reciprocal.c
> > +++ b/lib/librte_eal/common/rte_reciprocal.c
> > @@ -31,18 +31,13 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> >   */
> >
> > -#include <stdio.h>
> > -#include <stdint.h>
> > -
> > -#include <rte_common.h>
> > -
> > -#include "rte_reciprocal.h"
> > +#include <rte_reciprocal.h>
> >
> >  /* find largest set bit.
> >   * portable and slow but does not matter for this usage.
> >   */
> >  static inline int
> > -fls(uint32_t x)
> > +fls_u32(uint32_t x)
> >  {
> >  	int b;
> >
> > @@ -54,21 +49,120 @@ fls(uint32_t x)
> >  	return 0;
> >  }
> >
> > -struct rte_reciprocal
> > -rte_reciprocal_value(uint32_t d)
> > +struct rte_reciprocal_u32
> > +rte_reciprocal_value_u32(uint32_t d)
> >  {
> > -	struct rte_reciprocal R;
> > +	struct rte_reciprocal_u32 R;
> >  	uint64_t m;
> >  	int l;
> >
> > -	l = fls(d - 1);
> > +	l = fls_u32(d - 1);
> >  	m = ((1ULL << 32) * ((1ULL << l) - d));
> >  	m /= d;
> >
> >  	++m;
> >  	R.m = m;
> > -	R.sh1 = RTE_MIN(l, 1);
> > -	R.sh2 = RTE_MAX(l - 1, 0);
> > +	R.sh1 = l > 1 ? 1 : l;
> > +	R.sh2 = (l - 1 > 0) ? l - 1 : 0;
>
> Is there a specific reason for changing from RTE_MIN to what you have? R.sh2 seems identical to the previous version (so reason for change is unclear), and R.sh1 changes behavior (R.sh1 can now potentially be zero, even if in practice it won't) without any explanation given.
>

This was a oversight from my end (Testing with standalone app). Thanks for the
catch, will undo in v4.

-Pavan

> Thanks,
> Anatoly
>
> > +
> > +	return R;
> > +}
> > +
> > +/* Code taken from Hacker's Delight:
> > + * http://www.hackersdelight.org/HDcode/divlu.c.
> > + * License permits inclusion here per:
> > + * http://www.hackersdelight.org/permissions.htm
> > + */
> > +static inline uint64_t
> > +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t
> > +*r) {
> > +	const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> > +	uint64_t un1, un0,           /* Norm. dividend LSD's. */
> > +			 vn1, vn0,           /* Norm. divisor digits. */
> > +			 q1, q0,             /* Quotient digits. */
> > +			 un64, un21, un10,   /* Dividend digit pairs. */
> > +			 rhat;               /* A remainder. */
> > +	int s;                       /* Shift amount for norm. */
> > +
> > +    /* If overflow, set rem. to an impossible value. */
> > +	if (u1 >= v) {
> > +		if (r != NULL)
> > +			*r = (uint64_t) -1;
> > +		return (uint64_t) -1;
> > +	}
> > +
> > +	/* Count leading zeros. */
> > +	s = __builtin_clzll(v);
> > +	if (s > 0) {
> > +		v = v << s;
> > +		un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> > +		un10 = u0 << s;
> > +	} else {
> > +
> > +		un64 = u1 | u0;
> > +		un10 = u0;
> > +	}
> > +
> > +	vn1 = v >> 32;
> > +	vn0 = v & 0xFFFFFFFF;
> > +
> > +	un1 = un10 >> 32;
> > +	un0 = un10 & 0xFFFFFFFF;
> > +
> > +	q1 = un64/vn1;
> > +	rhat = un64 - q1*vn1;
> > +again1:
> > +	if (q1 >= b || q1*vn0 > b*rhat + un1) {
> > +		q1 = q1 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again1;
> > +	}
> > +
> > +	un21 = un64*b + un1 - q1*v;
> > +
> > +	q0 = un21/vn1;
> > +	rhat = un21 - q0*vn1;
> > +again2:
> > +	if (q0 >= b || q0*vn0 > b*rhat + un0) {
> > +		q0 = q0 - 1;
> > +		rhat = rhat + vn1;
> > +		if (rhat < b)
> > +			goto again2;
> > +	}
> > +
> > +	if (r != NULL)
> > +		*r = (un21*b + un0 - q0*v) >> s;
> > +	return q1*b + q0;
> > +}
> > +
> > +struct rte_reciprocal_u64
> > +rte_reciprocal_value_u64(uint64_t d)
> > +{
> > +	struct rte_reciprocal_u64 R;
> > +
> > +	const uint32_t fld = 63 - __builtin_clzll(d);
> > +
> > +	if ((d & (d - 1)) == 0) {
> > +		R.m = 0;
> > +		R.sh1 = (fld - 1) | 0x40;
> > +	} else {
> > +		uint64_t rem;
> > +		uint64_t multiplier;
> > +		uint8_t more;
> > +
> > +		multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> > &rem);
> > +		multiplier += multiplier;
> > +
> > +		const uint64_t twice_rem = rem + rem;
> > +		if (twice_rem >= d || twice_rem < rem)
> > +			multiplier += 1;
> > +		more = fld;
> > +		R.m = 1 + multiplier;
> > +		R.sh1 = more | 0x40;
> > +	}
> > +
> > +	R.sh1 &= 0x3F;
> >
> >  	return R;
> >  }
> > diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > index 65117cb..63ff2b8 100644
> > --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > @@ -247,6 +247,7 @@ EXPERIMENTAL {
> >  DPDK_17.11 {
> >  	global:
> >
> > -	rte_reciprocal_value;
> > +	rte_reciprocal_value_u32;
> > +	rte_reciprocal_value_u64;
> >
> >  } DPDK_17.08;
> > diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile index
> > 569656b..a2fd6f3 100644
> > --- a/lib/librte_sched/Makefile
> > +++ b/lib/librte_sched/Makefile
> > @@ -54,6 +54,8 @@ LIBABIVER := 1
> >  SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
> >
> >  # install includes
> > -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> > rte_bitmap.h
> > +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> > +rte_red.h SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include +=
> > rte_approx.h
> >
> >  include $(RTE_SDK)/mk/rte.lib.mk
> > diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index
> > 3b8ccaa..7bb6d51 100644
> > --- a/lib/librte_sched/rte_sched.c
> > +++ b/lib/librte_sched/rte_sched.c
> > @@ -228,7 +228,7 @@ struct rte_sched_port {
> >  	uint64_t time_cpu_cycles;     /* Current CPU time measured in CPU
> > cyles */
> >  	uint64_t time_cpu_bytes;      /* Current CPU time measured in bytes
> > */
> >  	uint64_t time;                /* Current NIC TX time measured in bytes */
> > -	struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> > +	struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> > byte
> > +*/
> >
> >  	/* Scheduling loop detection */
> >  	uint32_t pipe_loop;
> > @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> > rte_sched_port_params *params)
> >
> >  	cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
> >  		/ params->rate;
> > -	port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> > +	port->inv_cycles_per_byte =
> > rte_reciprocal_value_u32(cycles_per_byte);
> >
> >  	/* Scheduling loop detection */
> >  	port->pipe_loop = RTE_SCHED_PIPE_INVALID; @@ -2147,8 +2147,9
> > @@ rte_sched_port_time_resync(struct rte_sched_port *port)
> >  	uint64_t bytes_diff;
> >
> >  	/* Compute elapsed time in bytes */
> > -	bytes_diff = rte_reciprocal_divide(cycles_diff <<
> > RTE_SCHED_TIME_SHIFT,
> > -					   port->inv_cycles_per_byte);
> > +	bytes_diff = rte_reciprocal_divide_u32(
> > +			cycles_diff << RTE_SCHED_TIME_SHIFT,
> > +			&port->inv_cycles_per_byte);
> >
> >  	/* Advance port time */
> >  	port->time_cpu_cycles = cycles;
> > --
> > 2.7.4
>


More information about the dev mailing list