[dpdk-dev] [PATCH v7] eal: add cache-line demote support
Ruifeng Wang
Ruifeng.Wang at arm.com
Wed Oct 14 09:24:39 CEST 2020
> -----Original Message-----
> From: Omkar Maslekar <omkar.maslekar at intel.com>
> Sent: Tuesday, October 13, 2020 5:43 PM
> To: dev at dpdk.org
> Cc: bruce.richardson at intel.com; ciara.loftus at intel.com;
> omkar.maslekar at intel.com; drc at linux.vnet.ibm.com; jerinj at marvell.com;
> Ruifeng Wang <Ruifeng.Wang at arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli at arm.com>
> Subject: [PATCH v7] eal: add cache-line demote support
>
> rte_cldemote is similar to a prefetch hint - in reverse. cldemote(addr)
> enables software to hint to hardware that line is likely to be shared.
> Useful in core-to-core communications where cache-line is likely to be
> shared. ARM and PPC implementation is provided with NOP and can be
> added if any equivalent instructions could be used for implementation on
> those architectures.
>
> Signed-off-by: Omkar Maslekar <omkar.maslekar at intel.com>
> Acked-by: Bruce Richardson <bruce.richardson at intel.com>
> Acked-by: David Christensen <drc at linux.vnet.ibm.com>
> Acked-by: Jerin Jacob <jerinj at marvell.com>
>
> ---
> v7: fixed experimental tag
>
> v6: marked rte_cldemote as experimental
> added rte_cldemote call in existing app/test_prefetch.c
>
> v5: documentation updated
> fixed formatting issue in release notes
> added Acked-by: Bruce Richardson <bruce.richardson at intel.com>
> *
> v4: updated bold text for title and fixed margin in release notes
> *
> v3: fixed warning regarding whitespace
> *
> v2: documentation updated
> ---
> ---
> app/test/test_prefetch.c | 4 ++++
> doc/guides/rel_notes/release_20_11.rst | 7 +++++++
> lib/librte_eal/arm/include/rte_prefetch_32.h | 7 +++++++
> lib/librte_eal/arm/include/rte_prefetch_64.h | 7 +++++++
> lib/librte_eal/include/generic/rte_prefetch.h | 15 +++++++++++++++
> lib/librte_eal/ppc/include/rte_prefetch.h | 7 +++++++
> lib/librte_eal/x86/include/rte_prefetch.h | 11 +++++++++++
> 7 files changed, 58 insertions(+)
>
> diff --git a/app/test/test_prefetch.c b/app/test/test_prefetch.c index
> 41f219a..5c58d0c 100644
> --- a/app/test/test_prefetch.c
> +++ b/app/test/test_prefetch.c
> @@ -26,7 +26,11 @@
> rte_prefetch1(&a);
> rte_prefetch2(&a);
>
> +/* test for marking a line as shared to test cldemote functionality */
> + rte_cldemote(&a);
> +
> return 0;
> }
>
> +
> REGISTER_TEST_COMMAND(prefetch_autotest, test_prefetch); diff --git
> a/doc/guides/rel_notes/release_20_11.rst
> b/doc/guides/rel_notes/release_20_11.rst
> index b7881f2..8a1ed01 100644
> --- a/doc/guides/rel_notes/release_20_11.rst
> +++ b/doc/guides/rel_notes/release_20_11.rst
> @@ -171,6 +171,13 @@ New Features
> * Extern objects and functions can be plugged into the pipeline.
> * Transaction-oriented table updates.
>
> +* **Added new function rte_cldemote in rte_prefetch.h.**
> +
> + Added a hardware hint CLDEMOTE, which is similar to prefetch in reverse.
> + CLDEMOTE moves the cache line to the more remote cache, where it
> + expects sharing to be efficient. Moving the cache line to a level
> + more distant from the processor helps to accelerate core-to-core
> communication.
> +
>
> Removed Items
> -------------
> diff --git a/lib/librte_eal/arm/include/rte_prefetch_32.h
> b/lib/librte_eal/arm/include/rte_prefetch_32.h
> index e53420a..28b3d48 100644
> --- a/lib/librte_eal/arm/include/rte_prefetch_32.h
> +++ b/lib/librte_eal/arm/include/rte_prefetch_32.h
> @@ -10,6 +10,7 @@
> #endif
>
> #include <rte_common.h>
> +#include <rte_compat.h>
> #include "generic/rte_prefetch.h"
>
> static inline void rte_prefetch0(const volatile void *p) @@ -33,6 +34,12 @@
> static inline void rte_prefetch_non_temporal(const volatile void *p)
> rte_prefetch0(p);
> }
>
> +__rte_experimental
> +static inline void rte_cldemote(const volatile void *p) {
> + RTE_SET_USED(p);
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/arm/include/rte_prefetch_64.h
> b/lib/librte_eal/arm/include/rte_prefetch_64.h
> index fc2b391..1c722eb 100644
> --- a/lib/librte_eal/arm/include/rte_prefetch_64.h
> +++ b/lib/librte_eal/arm/include/rte_prefetch_64.h
> @@ -10,6 +10,7 @@
> #endif
>
> #include <rte_common.h>
> +#include <rte_compat.h>
> #include "generic/rte_prefetch.h"
>
> static inline void rte_prefetch0(const volatile void *p) @@ -32,6 +33,12 @@
> static inline void rte_prefetch_non_temporal(const volatile void *p)
> asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); }
>
> +__rte_experimental
> +static inline void rte_cldemote(const volatile void *p) {
> + RTE_SET_USED(p);
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/include/generic/rte_prefetch.h
> b/lib/librte_eal/include/generic/rte_prefetch.h
> index 6e47bdf..ad9844c 100644
> --- a/lib/librte_eal/include/generic/rte_prefetch.h
> +++ b/lib/librte_eal/include/generic/rte_prefetch.h
> @@ -51,4 +51,19 @@
> */
> static inline void rte_prefetch_non_temporal(const volatile void *p);
>
> +/**
> + * Demote a cache line to a more distant level of cache from the processor.
> + *
> + * CLDEMOTE hints to hardware to move (demote) a cache line from the
> +closest to
> + * the processor to a level more distant from the processor. It is a
> +hint and
> + * not guarantee. rte_cldemote is intended to move the cache line to
> +the more
> + * remote cache, where it expects sharing to be efficient and to
> +indicate that a
> + * line may be accessed by a different core in the future.
> + *
> + * @param p
> + * Address to demote
> + */
> +__rte_experimental
> +static inline void rte_cldemote(const volatile void *p);
> +
> #endif /* _RTE_PREFETCH_H_ */
> diff --git a/lib/librte_eal/ppc/include/rte_prefetch.h
> b/lib/librte_eal/ppc/include/rte_prefetch.h
> index 9ba07c8..b55cac4 100644
> --- a/lib/librte_eal/ppc/include/rte_prefetch.h
> +++ b/lib/librte_eal/ppc/include/rte_prefetch.h
> @@ -11,6 +11,7 @@
> #endif
>
> #include <rte_common.h>
> +#include <rte_compat.h>
> #include "generic/rte_prefetch.h"
>
> static inline void rte_prefetch0(const volatile void *p) @@ -34,6 +35,12 @@
> static inline void rte_prefetch_non_temporal(const volatile void *p)
> rte_prefetch0(p);
> }
>
> +__rte_experimental
> +static inline void rte_cldemote(const volatile void *p) {
> + RTE_SET_USED(p);
> +}
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/librte_eal/x86/include/rte_prefetch.h
> b/lib/librte_eal/x86/include/rte_prefetch.h
> index 384c6b3..92ba05a 100644
> --- a/lib/librte_eal/x86/include/rte_prefetch.h
> +++ b/lib/librte_eal/x86/include/rte_prefetch.h
> @@ -10,6 +10,7 @@
> #endif
>
> #include <rte_common.h>
> +#include <rte_compat.h>
> #include "generic/rte_prefetch.h"
>
> static inline void rte_prefetch0(const volatile void *p) @@ -32,6 +33,16 @@
> static inline void rte_prefetch_non_temporal(const volatile void *p)
> asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char
> *)p)); }
>
> +/*
> + * we're using raw byte codes for now as only the newest compiler
> + * versions support this instruction natively.
> + */
> +__rte_experimental
> +static inline void rte_cldemote(const volatile void *p) {
> + asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); }
> +
> #ifdef __cplusplus
> }
> #endif
> --
> 1.8.3.1
Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
More information about the dev
mailing list