[dpdk-dev] [PATCH v7] eal: add cache-line demote support

Maslekar, Omkar omkar.maslekar at intel.com
Thu Oct 15 16:41:07 CEST 2020


Hi David,

 >-----Original Message-----
 >From: David Marchand <david.marchand at redhat.com>
 >Sent: Thursday, October 15, 2020 1:01 AM
 >To: Maslekar, Omkar <omkar.maslekar at intel.com>
 >Cc: dev <dev at dpdk.org>; Richardson, Bruce <bruce.richardson at intel.com>;
 >Loftus, Ciara <ciara.loftus at intel.com>; David Christensen
 ><drc at linux.vnet.ibm.com>; Jerin Jacob Kollanukkaran <jerinj at marvell.com>;
 >Ruifeng Wang (Arm Technology China) <ruifeng.wang at arm.com>;
 >Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
 >Subject: Re: [dpdk-dev] [PATCH v7] eal: add cache-line demote support
 >
 >Repeating my questions:
 >- would there be a point in hinting at where the "demoted" line goes?
Yes, it is worth mentioning a point that demoted line goes to last shared level of cache hierarchy. Demotion to desired cache level is not possible.
 >- is this instruction available on all x86 CPUs?
Yes, this instruction is available on all x86 CPUs, it works on latest cpus and substitute NOP in the older generations
 >
 >
 >See comments:
 >
 >On Tue, Oct 13, 2020 at 6:47 PM Omkar Maslekar
 ><omkar.maslekar at intel.com> wrote:
 >> diff --git a/app/test/test_prefetch.c b/app/test/test_prefetch.c index
 >> 41f219a..5c58d0c 100644
 >> --- a/app/test/test_prefetch.c
 >> +++ b/app/test/test_prefetch.c
 >> @@ -26,7 +26,11 @@
 >>         rte_prefetch1(&a);
 >>         rte_prefetch2(&a);
 >>
 >> +/* test for marking a line as shared to test cldemote functionality
 >> +*/
 >
 >Non indented comment that gives no more info than the call itself.
 >Please remove.
I will fix it
 >
 >> +       rte_cldemote(&a);
 >> +
 >>         return 0;
 >>  }
 >>
 >> +
 >
 >Please remove this empty line.
 I will fix it
 >
 >>  REGISTER_TEST_COMMAND(prefetch_autotest, test_prefetch); diff --git
 >> a/doc/guides/rel_notes/release_20_11.rst
 >> b/doc/guides/rel_notes/release_20_11.rst
 >> index b7881f2..8a1ed01 100644
 >> --- a/doc/guides/rel_notes/release_20_11.rst
 >> +++ b/doc/guides/rel_notes/release_20_11.rst
 >> @@ -171,6 +171,13 @@ New Features
 >>    * Extern objects and functions can be plugged into the pipeline.
 >>    * Transaction-oriented table updates.
 >>
 >> +* **Added new function rte_cldemote in rte_prefetch.h.**
 >> +
 >> +  Added a hardware hint CLDEMOTE, which is similar to prefetch in
 >reverse.
 >
 >This should come at the top of the features list (but after "write combining
 >store" entry that got in first).
 >
 >Please add a mention that it only concerns x86.
I will modify the sequence in the release notes
 >
 >
 >> +  CLDEMOTE moves the cache line to the more remote cache, where it
 >> + expects  sharing to be efficient. Moving the cache line to a level
 >> + more distant from  the processor helps to accelerate core-to-core
 >communication.
 >> +
 >>
 >>  Removed Items
 >>  -------------
 >> diff --git a/lib/librte_eal/arm/include/rte_prefetch_32.h
 >> b/lib/librte_eal/arm/include/rte_prefetch_32.h
 >> index e53420a..28b3d48 100644
 >> --- a/lib/librte_eal/arm/include/rte_prefetch_32.h
 >> +++ b/lib/librte_eal/arm/include/rte_prefetch_32.h
 >> @@ -10,6 +10,7 @@
 >>  #endif
 >>
 >>  #include <rte_common.h>
 >> +#include <rte_compat.h>
 >
 >Move rte_compat.h inclusion from the arch headers to the
 >generic/rte_prefetch.h header only.
I got below build error if I move rte_compat.h inclusion from the arch headers to the generic/rte_prefetch.h header only. I will remove it and send out a new patch v8.
In file included from ../lib/librte_eal/x86/include/rte_prefetch.h:14:0,
                 from ../lib/librte_table/rte_swx_table_em.c:10:
../lib/librte_eal/include/generic/rte_prefetch.h:67:1: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘static’
 static inline void rte_cldemote(const volatile void *p);
 >
 >
 >>  #include "generic/rte_prefetch.h"
 >>
 >>  static inline void rte_prefetch0(const volatile void *p) @@ -33,6
 >> +34,12 @@ static inline void rte_prefetch_non_temporal(const volatile
 >void *p)
 >>         rte_prefetch0(p);
 >>  }
 >>
 >> +__rte_experimental
 >> +static inline void rte_cldemote(const volatile void *p) {
 >> +       RTE_SET_USED(p);
 >> +}
 >> +
 >>  #ifdef __cplusplus
 >>  }
 >>  #endif
 >> diff --git a/lib/librte_eal/arm/include/rte_prefetch_64.h
 >> b/lib/librte_eal/arm/include/rte_prefetch_64.h
 >> index fc2b391..1c722eb 100644
 >> --- a/lib/librte_eal/arm/include/rte_prefetch_64.h
 >> +++ b/lib/librte_eal/arm/include/rte_prefetch_64.h
 >> @@ -10,6 +10,7 @@
 >>  #endif
 >>
 >>  #include <rte_common.h>
 >> +#include <rte_compat.h>
 >>  #include "generic/rte_prefetch.h"
 >>
 >>  static inline void rte_prefetch0(const volatile void *p) @@ -32,6
 >> +33,12 @@ static inline void rte_prefetch_non_temporal(const volatile
 >void *p)
 >>         asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p));  }
 >>
 >> +__rte_experimental
 >> +static inline void rte_cldemote(const volatile void *p) {
 >> +       RTE_SET_USED(p);
 >> +}
 >> +
 >>  #ifdef __cplusplus
 >>  }
 >>  #endif
 >> diff --git a/lib/librte_eal/include/generic/rte_prefetch.h
 >> b/lib/librte_eal/include/generic/rte_prefetch.h
 >> index 6e47bdf..ad9844c 100644
 >> --- a/lib/librte_eal/include/generic/rte_prefetch.h
 >> +++ b/lib/librte_eal/include/generic/rte_prefetch.h
 >> @@ -51,4 +51,19 @@
 >>   */
 >>  static inline void rte_prefetch_non_temporal(const volatile void *p);
 >>
 >> +/**
 >> + * Demote a cache line to a more distant level of cache from the
 >processor.
 >> + *
 >> + * CLDEMOTE hints to hardware to move (demote) a cache line from the
 >> +closest to
 >> + * the processor to a level more distant from the processor. It is a
 >> +hint and
 >> + * not guarantee. rte_cldemote is intended to move the cache line to
 >> +the more
 >
 >guaranteed*
I will fix this
 >
 >
 >> + * remote cache, where it expects sharing to be efficient and to
 >> +indicate that a
 >> + * line may be accessed by a different core in the future.
 >> + *
 >> + * @param p
 >> + *   Address to demote
 >> + */
 >> +__rte_experimental
 >> +static inline void rte_cldemote(const volatile void *p);
 >> +
 >>  #endif /* _RTE_PREFETCH_H_ */
 >> diff --git a/lib/librte_eal/ppc/include/rte_prefetch.h
 >> b/lib/librte_eal/ppc/include/rte_prefetch.h
 >> index 9ba07c8..b55cac4 100644
 >> --- a/lib/librte_eal/ppc/include/rte_prefetch.h
 >> +++ b/lib/librte_eal/ppc/include/rte_prefetch.h
 >> @@ -11,6 +11,7 @@
 >>  #endif
 >>
 >>  #include <rte_common.h>
 >> +#include <rte_compat.h>
 >>  #include "generic/rte_prefetch.h"
 >>
 >>  static inline void rte_prefetch0(const volatile void *p) @@ -34,6
 >> +35,12 @@ static inline void rte_prefetch_non_temporal(const volatile
 >void *p)
 >>         rte_prefetch0(p);
 >>  }
 >>
 >> +__rte_experimental
 >> +static inline void rte_cldemote(const volatile void *p) {
 >> +       RTE_SET_USED(p);
 >> +}
 >> +
 >>  #ifdef __cplusplus
 >>  }
 >>  #endif
 >> diff --git a/lib/librte_eal/x86/include/rte_prefetch.h
 >> b/lib/librte_eal/x86/include/rte_prefetch.h
 >> index 384c6b3..92ba05a 100644
 >> --- a/lib/librte_eal/x86/include/rte_prefetch.h
 >> +++ b/lib/librte_eal/x86/include/rte_prefetch.h
 >> @@ -10,6 +10,7 @@
 >>  #endif
 >>
 >>  #include <rte_common.h>
 >> +#include <rte_compat.h>
 >>  #include "generic/rte_prefetch.h"
 >>
 >>  static inline void rte_prefetch0(const volatile void *p) @@ -32,6
 >> +33,16 @@ static inline void rte_prefetch_non_temporal(const volatile
 >void *p)
 >>         asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile
 >> char *)p));  }
 >>
 >> +/*
 >> + * we're using raw byte codes for now as only the newest compiler
 >
 >We use
I will fix this
 >
 >> + * versions support this instruction natively.
 >> + */
 >> +__rte_experimental
 >> +static inline void rte_cldemote(const volatile void *p) {
 >> +       asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); }
 >> +
 >>  #ifdef __cplusplus
 >>  }
 >>  #endif
 >> --
 >> 1.8.3.1
 >>
 >
 >
 >--
 >David Marchand



More information about the dev mailing list