[dpdk-dev] [PATCH] Clean up rte_memcpy.h file

Pawel Wodkowski pawelx.wodkowski at intel.com
Wed Apr 15 08:32:27 CEST 2015


On 2015-04-14 23:31, Ravi Kerur wrote:
> +
> +		for (i = 0; i < 8; i++) {
> +			ymm = _mm256_loadu_si256((const __m256i *)(src + i * 32));
> +			_mm256_storeu_si256((__m256i *)(dst + i * 32), ymm);
> +		}
> +
>   		n -= 256;
> -		ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
> -		ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
> -		ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
> -		ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32));
> -		ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32));
> -		ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32));
> -		ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32));
> -		src = (const uint8_t *)src + 256;
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6);
> -		_mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7);
> -		dst = (uint8_t *)dst + 256;
> +		src = src + 256;
> +		dst = dst + 256;
>   	}

Did you perform a performance test on that part?

-- 
Pawel


More information about the dev mailing list