[dpdk-dev] [PATCH] Clean up rte_memcpy.h file
Pawel Wodkowski
pawelx.wodkowski at intel.com
Wed Apr 15 08:32:27 CEST 2015
On 2015-04-14 23:31, Ravi Kerur wrote:
> +
> + for (i = 0; i < 8; i++) {
> + ymm = _mm256_loadu_si256((const __m256i *)(src + i * 32));
> + _mm256_storeu_si256((__m256i *)(dst + i * 32), ymm);
> + }
> +
> n -= 256;
> - ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32));
> - ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32));
> - ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32));
> - ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32));
> - ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32));
> - ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32));
> - ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32));
> - src = (const uint8_t *)src + 256;
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6);
> - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7);
> - dst = (uint8_t *)dst + 256;
> + src = src + 256;
> + dst = dst + 256;
> }
Did you perform a performance test on that part?
--
Pawel
More information about the dev
mailing list