[PATCH 2/4] net/ice: fix AVX-512 pointer copy on 32-bit
Bruce Richardson
bruce.richardson at intel.com
Fri Sep 6 16:11:25 CEST 2024
The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.
Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
Cc: stable at dpdk.org
Signed-off-by: Bruce Richardson <bruce.richardson at intel.com>
---
drivers/net/ice/ice_rxtx_vec_avx512.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index 04148e8ea2..add095ef06 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -907,6 +907,7 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
uint32_t copied = 0;
/* n is multiple of 32 */
while (copied < n) {
+#ifdef RTE_ARCH_64
const __m512i a = _mm512_loadu_si512(&txep[copied]);
const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
const __m512i c = _mm512_loadu_si512(&txep[copied + 16]);
@@ -916,6 +917,12 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
_mm512_storeu_si512(&cache_objs[copied + 8], b);
_mm512_storeu_si512(&cache_objs[copied + 16], c);
_mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+ const __m512i a = _mm512_loadu_si512(&txep[copied]);
+ const __m512i b = _mm512_loadu_si512(&txep[copied + 16]);
+ _mm512_storeu_si512(&cache_objs[copied], a);
+ _mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
copied += 32;
}
cache->len += n;
--
2.43.0
More information about the dev
mailing list