[dpdk-dev] [PATCH v8 1/4] eal: add WC store functions
Nicolau, Radu
radu.nicolau at intel.com
Mon Jul 20 10:52:44 CEST 2020
On 7/20/2020 7:42 AM, Ruifeng Wang wrote:
>> -----Original Message-----
>> From: Radu Nicolau <radu.nicolau at intel.com>
>> Sent: Friday, July 17, 2020 6:50 PM
>> To: dev at dpdk.org
>> Cc: beilei.xing at intel.com; jia.guo at intel.com; bruce.richardson at intel.com;
>> konstantin.ananyev at intel.com; jerinjacobk at gmail.com;
>> david.marchand at redhat.com; fiona.trahe at intel.com; wei.zhao1 at intel.com;
>> Ruifeng Wang <Ruifeng.Wang at arm.com>; Radu Nicolau
>> <radu.nicolau at intel.com>
>> Subject: [PATCH v8 1/4] eal: add WC store functions
>>
>> Add rte_write32_wc and rte_write32_wc_relaxed functions that implement
>> 32bit stores using write combining memory protocol.
>> Provided generic stubs and x86 implementation.
>>
>> Signed-off-by: Radu Nicolau <radu.nicolau at intel.com>
>> Acked-by: Bruce Richardson <bruce.richardson at intel.com>
>> ---
>> lib/librte_eal/arm/include/rte_io_64.h | 12 +++++++
>> lib/librte_eal/include/generic/rte_io.h | 48
>> ++++++++++++++++++++++++++++
>> lib/librte_eal/x86/include/rte_io.h | 56
>> +++++++++++++++++++++++++++++++++
>> 3 files changed, 116 insertions(+)
>>
>> diff --git a/lib/librte_eal/arm/include/rte_io_64.h
>> b/lib/librte_eal/arm/include/rte_io_64.h
>> index e534624..d07d9cb 100644
>> --- a/lib/librte_eal/arm/include/rte_io_64.h
>> +++ b/lib/librte_eal/arm/include/rte_io_64.h
>> @@ -164,6 +164,18 @@ rte_write64(uint64_t value, volatile void *addr)
>> rte_write64_relaxed(value, addr);
>> }
>>
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> + rte_write32(value, addr);
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> + rte_write32_relaxed(value, addr);
>> +}
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/lib/librte_eal/include/generic/rte_io.h
>> b/lib/librte_eal/include/generic/rte_io.h
>> index da457f7..0669baa 100644
>> --- a/lib/librte_eal/include/generic/rte_io.h
>> +++ b/lib/librte_eal/include/generic/rte_io.h
>> @@ -229,6 +229,40 @@ rte_write32(uint32_t value, volatile void *addr);
>> static inline void rte_write64(uint64_t value, volatile void *addr);
>>
>> +/**
>> + * Write a 32-bit value to I/O device memory address addr using write
>> + * combining memory write protocol. Depending on the platform write
>> +combining
>> + * may not be available and/or may be treated as a hint and the
>> +behavior may
>> + * fallback to a regular store.
> I'm trying to understand write combining use cases here.
> Is it applicable for all MMIO writes?
It's dependant on the architecture and specific use case, but generally
this is a good usecase, updating the tail registers. It has some
particularities that prevents it to be a replacement for mmio writes, it
is weakly ordered and it will bypass the cache hierarchy.
> How to identify where to use rte_write32_wc(_relaxed)?
The relaxed version can be used is sections of the code that already
have the proper fencing, as to avoid having a redundant memory fence, or
when there is no need to have a memory fence at all.
>
> Thanks.
> /Ruifeng
>> + *
>> + * @param value
>> + * Value to write
>> + * @param addr
>> + * I/O memory address to write the value to */ __rte_experimental
>> +static inline void rte_write32_wc(uint32_t value, volatile void *addr);
>> +
>> +/**
>> + * Write a 32-bit value to I/O device memory address addr using write
>> + * combining memory write protocol. Depending on the platform write
>> +combining
>> + * may not be available and/or may be treated as a hint and the
>> +behavior may
>> + * fallback to a regular store.
>> + *
>> + * The relaxed version does not have additional I/O memory barrier,
>> +useful in
>> + * accessing the device registers of integrated controllers which
>> +implicitly
>> + * strongly ordered with respect to memory access.
>> + *
>> + * @param value
>> + * Value to write
>> + * @param addr
>> + * I/O memory address to write the value to */ __rte_experimental
>> +static inline void rte_write32_wc_relaxed(uint32_t value, volatile void
>> +*addr);
>> +
>> #endif /* __DOXYGEN__ */
>>
>> #ifndef RTE_OVERRIDE_IO_H
>> @@ -345,6 +379,20 @@ rte_write64(uint64_t value, volatile void *addr)
>> rte_write64_relaxed(value, addr);
>> }
>>
>> +#ifndef RTE_NATIVE_WRITE32_WC
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> + rte_write32(value, addr);
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> + rte_write32_relaxed(value, addr);
>> +}
>> +#endif /* RTE_NATIVE_WRITE32_WC */
>> +
>> #endif /* RTE_OVERRIDE_IO_H */
>>
>> #endif /* _RTE_IO_H_ */
>> diff --git a/lib/librte_eal/x86/include/rte_io.h
>> b/lib/librte_eal/x86/include/rte_io.h
>> index 2db71b1..c95ed67 100644
>> --- a/lib/librte_eal/x86/include/rte_io.h
>> +++ b/lib/librte_eal/x86/include/rte_io.h
>> @@ -9,8 +9,64 @@
>> extern "C" {
>> #endif
>>
>> +#include "rte_cpuflags.h"
>> +
>> +#define RTE_NATIVE_WRITE32_WC
>> #include "generic/rte_io.h"
>>
>> +/**
>> + * @internal
>> + * MOVDIRI wrapper.
>> + */
>> +static __rte_always_inline void
>> +_rte_x86_movdiri(uint32_t value, volatile void *addr) {
>> + asm volatile(
>> + /* MOVDIRI */
>> + ".byte 0x40, 0x0f, 0x38, 0xf9, 0x02"
>> + :
>> + : "a" (value), "d" (addr));
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc(uint32_t value, volatile void *addr) {
>> + static int _x86_movdiri_flag = -1;
>> + if (_x86_movdiri_flag == 1) {
>> + rte_wmb();
>> + _rte_x86_movdiri(value, addr);
>> + } else if (_x86_movdiri_flag == 0) {
>> + rte_write32(value, addr);
>> + } else {
>> + _x86_movdiri_flag =
>> +
>> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
>> + if (_x86_movdiri_flag == 1) {
>> + rte_wmb();
>> + _rte_x86_movdiri(value, addr);
>> + } else {
>> + rte_write32(value, addr);
>> + }
>> + }
>> +}
>> +
>> +static __rte_always_inline void
>> +rte_write32_wc_relaxed(uint32_t value, volatile void *addr) {
>> + static int _x86_movdiri_flag = -1;
>> + if (_x86_movdiri_flag == 1) {
>> + _rte_x86_movdiri(value, addr);
>> + } else if (_x86_movdiri_flag == 0) {
>> + rte_write32_relaxed(value, addr);
>> + } else {
>> + _x86_movdiri_flag =
>> +
>> (rte_cpu_get_flag_enabled(RTE_CPUFLAG_MOVDIRI) > 0);
>> + if (_x86_movdiri_flag == 1)
>> + _rte_x86_movdiri(value, addr);
>> + else
>> + rte_write32_relaxed(value, addr);
>> + }
>> +}
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> --
>> 2.7.4
More information about the dev
mailing list