[dpdk-dev] [PATCH 1/3] acl: fix arm argument types

Aaron Conole aconole at redhat.com
Mon Apr 8 20:24:18 CEST 2019


Compiler complains of argument type mismatch, like:

   ../lib/librte_acl/acl_run_neon.h: In function ‘transition4’:
   ../lib/librte_acl/acl_run_neon.h:115:2: note: use -flax-vector-conversions
      to permit conversions between vectors with differing element types
      or numbers of subparts
     node_type = vbicq_s32(tr_hi_lo.val[0], index_msk);
     ^
   ../lib/librte_acl/acl_run_neon.h:115:41: error: incompatible type for
      argument 2 of ‘vbicq_s32’

Signed-off-by: Aaron Conole <aconole at redhat.com>
---
 lib/librte_acl/acl_run_neon.h | 46 ++++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 19 deletions(-)

diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h
index 01b9766d8..4a8e4b681 100644
--- a/lib/librte_acl/acl_run_neon.h
+++ b/lib/librte_acl/acl_run_neon.h
@@ -112,37 +112,41 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
 	index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask);
 
 	/* Calc node type and node addr */
-	node_type = vbicq_s32(tr_hi_lo.val[0], index_msk);
-	addr = vandq_s32(tr_hi_lo.val[0], index_msk);
+	node_type = (uint32x4_t) vbicq_s32(tr_hi_lo.val[0],
+				       (int32x4_t)index_msk);
+	addr = (uint32x4_t) vandq_s32(tr_hi_lo.val[0], (int32x4_t) index_msk);
 
 	/* t = 0 */
-	t = veorq_s32(node_type, node_type);
+	t = veorq_s32((int32x4_t)node_type, (int32x4_t)node_type);
 
 	/* mask for DFA type(0) nodes */
-	dfa_msk = vceqq_u32(node_type, t);
+	dfa_msk = vceqq_u32(node_type, (uint32x4_t)t);
 
-	mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input);
-	in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask);
+	mask = (uint32x4_t)
+	       vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input);
+	in = (int32x4_t) vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask);
 
 	/* DFA calculations. */
-	r = vshrq_n_u32(in, 30); /* div by 64 */
-	mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base);
-	r = vaddq_u8(r, mask);
-	t = vshrq_n_u32(in, 24);
-	r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r);
-	dfa_ofs = vsubq_s32(t, r);
+	r = (int32x4_t) vshrq_n_u32((uint32x4_t) in, 30); /* div by 64 */
+	mask = (uint32x4_t)
+	       vld1q_s32((const int32_t *)&neon_acl_const.range_base);
+	r = (int32x4_t) vaddq_u8((uint8x16_t)r, (uint8x16_t)mask);
+	t = (int32x4_t) vshrq_n_u32((uint32x4_t)in, 24);
+	r = (int32x4_t) vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r);
+	dfa_ofs = (uint32x4_t) vsubq_s32(t, r);
 
 	/* QUAD/SINGLE calculations. */
-	t = vcgtq_s8(in, tr_hi_lo.val[1]);
-	t = vabsq_s8(t);
-	t = vpaddlq_u8(t);
-	quad_ofs = vpaddlq_u16(t);
+	t = (int32x4_t) vcgtq_s8((int8x16_t)in, (int8x16_t)tr_hi_lo.val[1]);
+	t = (int32x4_t) vabsq_s8((int8x16_t)t);
+	t = (int32x4_t) vpaddlq_u8((uint8x16_t)t);
+	quad_ofs = vpaddlq_u16((uint16x8_t)t);
 
 	/* blend DFA and QUAD/SINGLE. */
-	t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs);
+	t = (int32x4_t) vbslq_u8((uint8x16_t)dfa_msk, (uint8x16_t)dfa_ofs,
+				 (uint8x16_t)quad_ofs);
 
 	/* calculate address for next transitions */
-	addr = vaddq_u32(addr, t);
+	addr = vaddq_u32(addr, (uint32x4_t)t);
 
 	/* Fill next transitions */
 	transitions[0] = trans[vgetq_lane_u32(addr, 0)];
@@ -150,7 +154,7 @@ transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[])
 	transitions[2] = trans[vgetq_lane_u32(addr, 2)];
 	transitions[3] = trans[vgetq_lane_u32(addr, 3)];
 
-	return vshrq_n_u32(next_input, CHAR_BIT);
+	return (int32x4_t) vshrq_n_u32((uint32x4_t)next_input, CHAR_BIT);
 }
 
 /*
@@ -179,6 +183,9 @@ search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]);
 	acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]);
 
+	memset(&input0, 0, sizeof(input0));
+	memset(&input1, 0, sizeof(input1));
+
 	while (flows.started > 0) {
 		/* Gather 4 bytes of input data for each stream. */
 		input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0);
@@ -240,6 +247,7 @@ search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data,
 	/* Check for any matches. */
 	acl_match_check_x4(0, ctx, parms, &flows, index_array);
 
+	memset(&input, 0, sizeof(input));
 	while (flows.started > 0) {
 		/* Gather 4 bytes of input data for each stream. */
 		input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0);
-- 
2.19.1



More information about the dev mailing list