[dpdk-dev] [RFC 4/4] net/ether: use bitops to speedup comparison
Mattias Rönnblom
mattias.ronnblom at ericsson.com
Thu May 16 11:03:10 CEST 2019
On 2019-05-16 00:19, Stephen Hemminger wrote:
> Using bit operations like or and xor is faster than a loop
> on all architectures. Really just explicit unrolling.
>
> Similar cast to uint16 unaligned is already done in
> other functions here.
>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
> ---
> lib/librte_net/rte_ether.h | 17 +++++++----------
> 1 file changed, 7 insertions(+), 10 deletions(-)
>
> diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
> index b94e64b2195e..5d9242cda230 100644
> --- a/lib/librte_net/rte_ether.h
> +++ b/lib/librte_net/rte_ether.h
> @@ -78,11 +78,10 @@ struct ether_addr {
> static inline int is_same_ether_addr(const struct ether_addr *ea1,
> const struct ether_addr *ea2)
> {
> - int i;
> - for (i = 0; i < ETHER_ADDR_LEN; i++)
> - if (ea1->addr_bytes[i] != ea2->addr_bytes[i])
> - return 0;
> - return 1;
> + const unaligned_uint16_t *w1 = (const uint16_t *)ea1;
> + const unaligned_uint16_t *w2 = (const uint16_t *)ea2;
> +
> + return ((w1[0] ^ w2[0]) | (w1[1] ^ w2[1]) | (w1[2] ^ w2[2])) == 0;
> }
>
If you want to shave off a couple of instructions, you can switch the
three 16-bit loads to one 32-bit and one 16-bit load.
Something like:
const uint8_t *ea1_b = (const uint8_t *)ea1;
const uint8_t *ea2_b = (const uint8_t *)ea2;
uint32_t ea1_h;
uint32_t ea2_h;
uint16_t ea1_l;
uint16_t ea2_l;
memcpy(&ea1_h, &ea1_b[0], sizeof(ea1_h));
memcpy(&ea1_l, &ea1_b[sizeof(ea1_h)], sizeof(ea1_l));
memcpy(&ea2_h, &ea2_b[0], sizeof(ea2_h));
memcpy(&ea2_l, &ea2_b[sizeof(ea2_h)], sizeof(ea2_l));
return ((ea1_l ^ ea2_l) | (ea1_h ^ ea2_h)) == 0;
Code is not as clean as your solution though.
> /**
> @@ -97,11 +96,9 @@ static inline int is_same_ether_addr(const struct ether_addr *ea1,
> */
> static inline int is_zero_ether_addr(const struct ether_addr *ea)
> {
> - int i;
> - for (i = 0; i < ETHER_ADDR_LEN; i++)
> - if (ea->addr_bytes[i] != 0x00)
> - return 0;
> - return 1;
> + const unaligned_uint16_t *w = (const uint16_t *)ea;
> +
> + return (w[0] | w[1] | w[2]) == 0;
> }
>
> /**
>
More information about the dev
mailing list