[dpdk-dev] [PATCH v6 2/3] eal: add u64 bit variant for reciprocal
Dumitrescu, Cristian
cristian.dumitrescu at intel.com
Wed Sep 20 15:15:29 CEST 2017
Hi Pavan,
Same ask as for the first patch:
-Do not change existing code in rte_reciprocal.[hc]: no _u32 suffix, please
-Do not add lots of CR+LF to existing code
> -----Original Message-----
> From: Pavan Nikhilesh [mailto:pbhagavatula at caviumnetworks.com]
> Sent: Wednesday, September 6, 2017 11:22 AM
> To: Dumitrescu, Cristian <cristian.dumitrescu at intel.com>;
> stephen at networkplumber.org
> Cc: dev at dpdk.org; Pavan Bhagavatula
> <pbhagavatula at caviumnetworks.com>
> Subject: [dpdk-dev] [PATCH v6 2/3] eal: add u64 bit variant for reciprocal
>
> From: Pavan Bhagavatula <pbhagavatula at caviumnetworks.com>
>
> Currently, rte_reciprocal only supports unsigned 32bit divisors. This
> commit adds support for unsigned 64bit divisors.
>
> Rename unsigned 32bit specific functions appropriately and update
> librte_sched accordingly.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula at caviumnetworks.com>
> ---
> lib/librte_eal/bsdapp/eal/rte_eal_version.map | 3 +-
> lib/librte_eal/common/include/rte_reciprocal.h | 109
> ++++++++++++++++++++--
> lib/librte_eal/common/rte_reciprocal.c | 116
> +++++++++++++++++++++---
> lib/librte_eal/linuxapp/eal/rte_eal_version.map | 3 +-
> lib/librte_sched/Makefile | 4 +-
> lib/librte_sched/rte_sched.c | 9 +-
> 6 files changed, 219 insertions(+), 25 deletions(-)
>
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index 90d7258..59a85bb 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -241,6 +241,7 @@ EXPERIMENTAL {
> DPDK_17.11 {
> global:
>
> - rte_reciprocal_value;
> + rte_reciprocal_value_u32;
> + rte_reciprocal_value_u64;
>
> } DPDK_17.08;
> diff --git a/lib/librte_eal/common/include/rte_reciprocal.h
> b/lib/librte_eal/common/include/rte_reciprocal.h
> index b6d752f..85599e6 100644
> --- a/lib/librte_eal/common/include/rte_reciprocal.h
> +++ b/lib/librte_eal/common/include/rte_reciprocal.h
> @@ -22,22 +22,117 @@
> #ifndef _RTE_RECIPROCAL_H_
> #define _RTE_RECIPROCAL_H_
>
> -#include <stdint.h>
> +#include <rte_memory.h>
>
> -struct rte_reciprocal {
> +/**
> + * Unsigned 32-bit divisor structure.
> + */
> +struct rte_reciprocal_u32 {
> uint32_t m;
> uint8_t sh1, sh2;
> };
>
Do not add _32 suffix for exiting function, no name change, please.
> +/**
> + * Unsigned 64-bit divisor structure.
> + */
> +struct rte_reciprocal_u64 {
> + uint64_t m;
> + uint8_t sh1;
> +};
> +
I am OK with adding +64 suffix for new API.
> +/**
> + * Divide given unsigned 32-bit integer with pre calculated divisor.
> + *
> + * @param a
> + * The 32-bit dividend.
> + * @param R
> + * The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + * The result of the division
> + */
> static inline uint32_t
> -rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R)
> +rte_reciprocal_divide_u32(uint32_t a, struct rte_reciprocal_u32 *R)
> {
> - uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32);
> + uint32_t t = (((uint64_t)a * R->m) >> 32);
>
> - return (t + ((a - t) >> R.sh1)) >> R.sh2;
> + return (t + ((a - t) >> R->sh1)) >> R->sh2;
> }
Do not add _u32 suffix for existing API.
>
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d);
> +static inline uint64_t
> +mullhi_u64(uint64_t x, uint64_t y)
> +{
> +#ifdef __SIZEOF_INT128__
> + __uint128_t xl = x;
> + __uint128_t rl = xl * y;
> +
> + return (rl >> 64);
> +#else
> + uint64_t u0, u1, v0, v1, k, t;
> + uint64_t w1, w2;
> + uint64_t whi;
> +
> + u1 = x >> 32; u0 = x & 0xFFFFFFFF;
> + v1 = y >> 32; v0 = y & 0xFFFFFFFF;
> +
> + t = u0*v0;
> + k = t >> 32;
> +
> + t = u1*v0 + k;
> + w1 = t & 0xFFFFFFFF;
> + w2 = t >> 32;
> +
> + t = u0*v1 + w1;
> + k = t >> 32;
> +
> + whi = u1*v1 + w2 + k;
> +
> + return whi;
> +#endif
> +}
> +
> +/**
> + * Divide given unsigned 64-bit integer with pre calculated divisor.
> + *
> + * @param a
> + * The 64-bit dividend.
> + * @param R
> + * The pointer to pre calculated divisor reciprocal structure.
> + *
> + * @return
> + * The result of the division
> + */
> +static inline uint64_t
> +rte_reciprocal_divide_u64(uint64_t a, struct rte_reciprocal_u64 *R)
> +{
> + uint64_t q = mullhi_u64(R->m, a);
> + uint64_t t = ((a - q) >> 1) + q;
> +
> + return t >> R->sh1;
> +}
> +
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + * The unsigned 32-bit divisor.
> + *
> + * @return
> + * Divisor structure.
> + */
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d);
> +
Do not add _u32 suffix for existing API.
> +/**
> + * Generate pre calculated divisor structure.
> + *
> + * @param d
> + * The unsigned 64-bit divisor.
> + *
> + * @return
> + * Divisor structure.
> + */
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d);
>
> #endif /* _RTE_RECIPROCAL_H_ */
> diff --git a/lib/librte_eal/common/rte_reciprocal.c
> b/lib/librte_eal/common/rte_reciprocal.c
> index 7ab99b4..2024e62 100644
> --- a/lib/librte_eal/common/rte_reciprocal.c
> +++ b/lib/librte_eal/common/rte_reciprocal.c
> @@ -31,18 +31,13 @@
> * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> */
>
> -#include <stdio.h>
> -#include <stdint.h>
> -
> -#include <rte_common.h>
> -
> -#include "rte_reciprocal.h"
> +#include <rte_reciprocal.h>
>
> /* find largest set bit.
> * portable and slow but does not matter for this usage.
> */
> static inline int
> -fls(uint32_t x)
> +fls_u32(uint32_t x)
> {
> int b;
>
> @@ -54,14 +49,14 @@ fls(uint32_t x)
> return 0;
> }
>
Do not add _u32 suffix for existing API.
> -struct rte_reciprocal
> -rte_reciprocal_value(uint32_t d)
> +struct rte_reciprocal_u32
> +rte_reciprocal_value_u32(uint32_t d)
Do not add _u32 suffix for existing API.
> {
> - struct rte_reciprocal R;
> + struct rte_reciprocal_u32 R;
> uint64_t m;
> int l;
>
> - l = fls(d - 1);
> + l = fls_u32(d - 1);
> m = ((1ULL << 32) * ((1ULL << l) - d));
> m /= d;
>
> @@ -72,3 +67,102 @@ rte_reciprocal_value(uint32_t d)
>
> return R;
> }
> +
> +/* Code taken from Hacker's Delight:
> + * http://www.hackersdelight.org/HDcode/divlu.c.
> + * License permits inclusion here per:
> + * http://www.hackersdelight.org/permissions.htm
> + */
> +static inline uint64_t
> +divide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r)
> +{
> + const uint64_t b = (1ULL << 32); /* Number base (16 bits). */
> + uint64_t un1, un0, /* Norm. dividend LSD's. */
> + vn1, vn0, /* Norm. divisor digits. */
> + q1, q0, /* Quotient digits. */
> + un64, un21, un10, /* Dividend digit pairs. */
> + rhat; /* A remainder. */
> + int s; /* Shift amount for norm. */
> +
> + /* If overflow, set rem. to an impossible value. */
> + if (u1 >= v) {
> + if (r != NULL)
> + *r = (uint64_t) -1;
> + return (uint64_t) -1;
> + }
> +
> + /* Count leading zeros. */
> + s = __builtin_clzll(v);
> + if (s > 0) {
> + v = v << s;
> + un64 = (u1 << s) | ((u0 >> (64 - s)) & (-s >> 31));
> + un10 = u0 << s;
> + } else {
> +
> + un64 = u1 | u0;
> + un10 = u0;
> + }
> +
> + vn1 = v >> 32;
> + vn0 = v & 0xFFFFFFFF;
> +
> + un1 = un10 >> 32;
> + un0 = un10 & 0xFFFFFFFF;
> +
> + q1 = un64/vn1;
> + rhat = un64 - q1*vn1;
> +again1:
> + if (q1 >= b || q1*vn0 > b*rhat + un1) {
> + q1 = q1 - 1;
> + rhat = rhat + vn1;
> + if (rhat < b)
> + goto again1;
> + }
> +
> + un21 = un64*b + un1 - q1*v;
> +
> + q0 = un21/vn1;
> + rhat = un21 - q0*vn1;
> +again2:
> + if (q0 >= b || q0*vn0 > b*rhat + un0) {
> + q0 = q0 - 1;
> + rhat = rhat + vn1;
> + if (rhat < b)
> + goto again2;
> + }
> +
> + if (r != NULL)
> + *r = (un21*b + un0 - q0*v) >> s;
> + return q1*b + q0;
> +}
> +
> +struct rte_reciprocal_u64
> +rte_reciprocal_value_u64(uint64_t d)
> +{
> + struct rte_reciprocal_u64 R;
> +
> + const uint32_t fld = 63 - __builtin_clzll(d);
> +
> + if ((d & (d - 1)) == 0) {
> + R.m = 0;
> + R.sh1 = (fld - 1) | 0x40;
> + } else {
> + uint64_t rem;
> + uint64_t multiplier;
> + uint8_t more;
> +
> + multiplier = divide_128_div_64_to_64(1ULL << fld, 0, d,
> &rem);
> + multiplier += multiplier;
> +
> + const uint64_t twice_rem = rem + rem;
> + if (twice_rem >= d || twice_rem < rem)
> + multiplier += 1;
> + more = fld;
> + R.m = 1 + multiplier;
> + R.sh1 = more | 0x40;
> + }
> +
> + R.sh1 &= 0x3F;
> +
> + return R;
> +}
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index 2070cba..2671627 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -246,6 +246,7 @@ EXPERIMENTAL {
> DPDK_17.11 {
> global:
>
> - rte_reciprocal_value;
> + rte_reciprocal_value_u32;
Do not add _u32 suffix for existing API.
> + rte_reciprocal_value_u64;
>
> } DPDK_17.08;
> diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile
> index 569656b..a2fd6f3 100644
> --- a/lib/librte_sched/Makefile
> +++ b/lib/librte_sched/Makefile
> @@ -54,6 +54,8 @@ LIBABIVER := 1
> SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c
>
> # install includes
> -SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h
> rte_bitmap.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_sched_common.h
> rte_red.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_approx.h
>
> include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
> index 3b8ccaa..7bb6d51 100644
> --- a/lib/librte_sched/rte_sched.c
> +++ b/lib/librte_sched/rte_sched.c
> @@ -228,7 +228,7 @@ struct rte_sched_port {
> uint64_t time_cpu_cycles; /* Current CPU time measured in CPU
> cyles */
> uint64_t time_cpu_bytes; /* Current CPU time measured in bytes
> */
> uint64_t time; /* Current NIC TX time measured in bytes */
> - struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */
> + struct rte_reciprocal_u32 inv_cycles_per_byte; /* CPU cycles per
> byte */
>
> /* Scheduling loop detection */
> uint32_t pipe_loop;
> @@ -677,7 +677,7 @@ rte_sched_port_config(struct
> rte_sched_port_params *params)
>
> cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT)
> / params->rate;
> - port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte);
> + port->inv_cycles_per_byte =
> rte_reciprocal_value_u32(cycles_per_byte);
>
Do not add _u32 suffix for existing API.
> /* Scheduling loop detection */
> port->pipe_loop = RTE_SCHED_PIPE_INVALID;
> @@ -2147,8 +2147,9 @@ rte_sched_port_time_resync(struct
> rte_sched_port *port)
> uint64_t bytes_diff;
>
> /* Compute elapsed time in bytes */
> - bytes_diff = rte_reciprocal_divide(cycles_diff <<
> RTE_SCHED_TIME_SHIFT,
> - port->inv_cycles_per_byte);
> + bytes_diff = rte_reciprocal_divide_u32(
Do not add _u32 suffix for existing API.
> + cycles_diff << RTE_SCHED_TIME_SHIFT,
> + &port->inv_cycles_per_byte);
>
> /* Advance port time */
> port->time_cpu_cycles = cycles;
> --
> 2.7.4
More information about the dev
mailing list