[PATCH 2/4] net/ice: fix AVX-512 pointer copy on 32-bit
Stokes, Ian
ian.stokes at intel.com
Mon Sep 30 15:29:09 CEST 2024
> The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
> copying 32 pointers only requires half the number of AVX-512 load store
> operations.
>
> Fixes: a4e480de268e ("net/ice: optimize Tx by using AVX512")
> Cc: stable at dpdk.org
>
> Signed-off-by: Bruce Richardson <bruce.richardson at intel.com>
> ---
> drivers/net/ice/ice_rxtx_vec_avx512.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c
> b/drivers/net/ice/ice_rxtx_vec_avx512.c
> index 04148e8ea2..add095ef06 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx512.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
> @@ -907,6 +907,7 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
> uint32_t copied = 0;
> /* n is multiple of 32 */
> while (copied < n) {
> +#ifdef RTE_ARCH_64
> const __m512i a =
> _mm512_loadu_si512(&txep[copied]);
> const __m512i b = _mm512_loadu_si512(&txep[copied
> + 8]);
> const __m512i c = _mm512_loadu_si512(&txep[copied +
> 16]);
> @@ -916,6 +917,12 @@ ice_tx_free_bufs_avx512(struct ice_tx_queue *txq)
> _mm512_storeu_si512(&cache_objs[copied + 8], b);
> _mm512_storeu_si512(&cache_objs[copied + 16], c);
> _mm512_storeu_si512(&cache_objs[copied + 24], d);
> +#else
> + const __m512i a =
> _mm512_loadu_si512(&txep[copied]);
> + const __m512i b = _mm512_loadu_si512(&txep[copied
> + 16]);
> + _mm512_storeu_si512(&cache_objs[copied], a);
> + _mm512_storeu_si512(&cache_objs[copied + 16], b);
> +#endif
> copied += 32;
> }
> cache->len += n;
> --
LGTM, Acked.
Thanks
Ian
More information about the dev
mailing list