[dpdk-dev] [PATCH v2 4/5] common/octeontx2: fix build with sve enabled

Ruifeng Wang ruifeng.wang at arm.com
Fri Jan 8 09:25:22 CET 2021


Building with gcc 10.2 with SVE extension enabled got error:

{standard input}: Assembler messages:
{standard input}:4002: Error: selected processor does not support `mov z3.b,#0'
{standard input}:4003: Error: selected processor does not support `whilelo p1.b,xzr,x7'
{standard input}:4005: Error: selected processor does not support `ld1b z0.b,p1/z,[x8]'
{standard input}:4006: Error: selected processor does not support `whilelo p4.s,wzr,w7'

This is because inline assembly code explicitly resets cpu model to
not have SVE support. Thus SVE instructions generated by compiler
auto vectorization got rejected by assembler.

Fixed the issue by replacing inline assembly with equivalent atomic
built-ins. Compiler will generate LSE instructions for cpu that has
the extension.

Fixes: 8a4f835971f5 ("common/octeontx2: add IO handling APIs")
Cc: jerinj at marvell.com
Cc: stable at dpdk.org

Signed-off-by: Ruifeng Wang <ruifeng.wang at arm.com>
---
 drivers/common/octeontx2/otx2_io_arm64.h | 37 +++---------------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/drivers/common/octeontx2/otx2_io_arm64.h b/drivers/common/octeontx2/otx2_io_arm64.h
index b5c85d9a6..8843a79b5 100644
--- a/drivers/common/octeontx2/otx2_io_arm64.h
+++ b/drivers/common/octeontx2/otx2_io_arm64.h
@@ -24,55 +24,26 @@
 static __rte_always_inline uint64_t
 otx2_atomic64_add_nosync(int64_t incr, int64_t *ptr)
 {
-	uint64_t result;
-
 	/* Atomic add with no ordering */
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldadd %x[i], %x[r], [%[b]]"
-		: [r] "=r" (result), "+m" (*ptr)
-		: [i] "r" (incr), [b] "r" (ptr)
-		: "memory");
-	return result;
+	return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_RELAXED);
 }
 
 static __rte_always_inline uint64_t
 otx2_atomic64_add_sync(int64_t incr, int64_t *ptr)
 {
-	uint64_t result;
-
-	/* Atomic add with ordering */
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldadda %x[i], %x[r], [%[b]]"
-		: [r] "=r" (result), "+m" (*ptr)
-		: [i] "r" (incr), [b] "r" (ptr)
-		: "memory");
-	return result;
+	return (uint64_t)__atomic_fetch_add(ptr, incr, __ATOMIC_ACQUIRE);
 }
 
 static __rte_always_inline uint64_t
 otx2_lmt_submit(rte_iova_t io_address)
 {
-	uint64_t result;
-
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldeor xzr,%x[rf],[%[rs]]" :
-		 [rf] "=r"(result): [rs] "r"(io_address));
-	return result;
+	return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELAXED);
 }
 
 static __rte_always_inline uint64_t
 otx2_lmt_submit_release(rte_iova_t io_address)
 {
-	uint64_t result;
-
-	asm volatile (
-		".cpu  generic+lse\n"
-		"ldeorl xzr,%x[rf],[%[rs]]" :
-		 [rf] "=r"(result) : [rs] "r"(io_address));
-	return result;
+	return __atomic_fetch_xor((uint64_t *)io_address, 0, __ATOMIC_RELEASE);
 }
 
 static __rte_always_inline void
-- 
2.25.1



More information about the dev mailing list