[dpdk-dev] [PATCH v5 5/5] test/thash: add performance tests for the Toeplitz hash

Medvedkin, Vladimir vladimir.medvedkin at intel.com
Wed Oct 27 17:48:49 CEST 2021


Hi Thomas,

On 27/10/2021 10:29, Thomas Monjalon wrote:
> 26/10/2021 22:29, Medvedkin, Vladimir:
>> Hi Thomas,
>>
>> Thanks for the review, I'll address your comments in v6.
>> Please find my comment below
>>
>> On 25/10/2021 19:02, Thomas Monjalon wrote:
>>> 21/10/2021 20:54, Vladimir Medvedkin:
>>>> This patch adds performance tests for different implementations
>>>> of the Toeplitz hash function.
>>>
>>> Please name them.
>>>
>>>> Signed-off-by: Vladimir Medvedkin <vladimir.medvedkin at intel.com>
>>>
>>> There are some garbage,
>>>
>>>> @@ -320,6 +321,7 @@ perf_test_names = [
>>>>            'hash_readwrite_lf_perf_autotest',
>>>>            'trace_perf_autotest',
>>>>            'ipsec_perf_autotest',
>>>> +	'thash_perf_autotest',
>>>
>>> here (tabs instead of space)
>>>
>>>>    driver_test_names = [
>>>> diff --git a/app/test/test_thash_perf.c b/app/test/test_thash_perf.c
>>>> new file mode 100644
>>>> index 0000000..fb66e20
>>>> --- /dev/null
>>>> +++ b/app/test/test_thash_perf.c
>>>> @@ -0,0 +1,120 @@
>>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>>> + * Copyright(c) 2021 Intel Corporation
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <stdint.h>
>>>> +#include <stdlib.h>
>>>> +#include <math.h>
>>>> +
>>>> +#include <rte_cycles.h>
>>>> +#include <rte_malloc.h>
>>>> +#include <rte_random.h>
>>>> +#include <rte_thash.h>
>>>> +
>>>> +#include "test.h"
>>>> +
>>>> +#define ITERATIONS	(1 << 15)
>>>> +#define	BATCH_SZ	(1 << 10)
>>>> +
>>>> +#define IPV4_2_TUPLE_LEN	(8)
>>>> +#define IPV4_4_TUPLE_LEN	(12)
>>>> +#define IPV6_2_TUPLE_LEN	(32)
>>>> +#define IPV6_4_TUPLE_LEN	(36)
>>>> +
>>>> +
>>>> +static uint8_t default_rss_key[] = {
>>>> +	0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
>>>> +	0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
>>>> +	0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
>>>> +	0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
>>>> +	0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
>>>> +};
>>>> +
>>>> +static void
>>>> +run_thash_test(unsigned int tuple_len)
>>>> +{
>>>> +	uint32_t *tuples[BATCH_SZ];
>>>> +	unsigned int i, j;
>>>> +	uint64_t start_tsc, end_tsc;
>>>> +	uint32_t len = RTE_ALIGN_CEIL(tuple_len, sizeof(uint32_t));
>>>> +	volatile uint32_t hash = 0;
>>>> +	uint32_t bulk_hash[BATCH_SZ] = { 0 };
>>>> +
>>>> +	for (i = 0; i < BATCH_SZ; i++) {
>>>> +		tuples[i] = rte_zmalloc(NULL, len, 0);
>>>> +		for (j = 0; j < len / sizeof(uint32_t); j++)
>>>> +			tuples[i][j] = rte_rand();
>>>> +	}
>>>> +
>>>> +	start_tsc = rte_rdtsc_precise();
>>>> +	for (i = 0; i < ITERATIONS; i++) {
>>>> +		for (j = 0; j < BATCH_SZ; j++) {
>>>> +			hash ^= rte_softrss(tuples[j], len / sizeof(uint32_t),
>>>> +				default_rss_key);
>>>> +		}
>>>> +	}
>>>> +	end_tsc = rte_rdtsc_precise();
>>>> +
>>>> +	printf("Average rte_softrss() takes \t\t%.1f cycles for key len %d\n",
>>>> +		(double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> +		BATCH_SZ), len);
>>>> +
>>>> +	start_tsc = rte_rdtsc_precise();
>>>> +	for (i = 0; i < ITERATIONS; i++) {
>>>> +		for (j = 0; j < BATCH_SZ; j++) {
>>>> +			hash ^= rte_softrss_be(tuples[j], len /
>>>> +				sizeof(uint32_t), default_rss_key);
>>>> +		}
>>>> +	}
>>>> +	end_tsc = rte_rdtsc_precise();
>>>> +
>>>> +	printf("Average rte_softrss_be() takes \t\t%.1f cycles for key len %d\n",
>>>> +		(double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> +		BATCH_SZ), len);
>>>
>>> The function could stop here (one function per type of implementation).
>>>
>>
>> Could you please clarify what do you mean?
>> The function stops here if the machine do not support GFNI, and this is
>> done intentionally. On machine without GFNI it tests only scalar
>> implementations for every given length.
> 
> No I mean you can split in smaller functions.
> 

Aha, I see, I'll send v7.

>>>> +
>>>> +	if (!rte_thash_gfni_supported())
>>>> +		return;
>>>> +
>>>> +	uint64_t rss_key_matrixes[RTE_DIM(default_rss_key)];
>>>> +
>>>> +	rte_thash_complete_matrix(rss_key_matrixes, default_rss_key,
>>>> +		RTE_DIM(default_rss_key));
>>>> +
>>>> +	start_tsc = rte_rdtsc_precise();
>>>> +	for (i = 0; i < ITERATIONS; i++) {
>>>> +		for (j = 0; j < BATCH_SZ; j++)
>>>> +			hash ^= rte_thash_gfni(rss_key_matrixes,
>>>> +				(uint8_t *)tuples[j], len);
>>>> +	}
>>>> +	end_tsc = rte_rdtsc_precise();
>>>> +
>>>> +	printf("Average rte_thash_gfni takes \t\t%.1f cycles for key len %d\n",
>>>> +		(double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> +		BATCH_SZ), len);
>>>> +
>>>> +	start_tsc = rte_rdtsc_precise();
>>>> +	for (i = 0; i < ITERATIONS; i++)
>>>> +		rte_thash_gfni_bulk(rss_key_matrixes, len, (uint8_t **)tuples,
>>>> +			bulk_hash, BATCH_SZ);
>>>> +
>>>> +	end_tsc = rte_rdtsc_precise();
>>>> +
>>>> +	printf("Average rte_thash_gfni_x2 takes \t%.1f cycles for key len %d\n",
>>>
>>> and here, the function name is not updated.
>>>
>>>> +		(double)(end_tsc - start_tsc) / (double)(ITERATIONS *
>>>> +		BATCH_SZ), len);
>>>> +
>>>
>>> useless blank line
>>>
>>>> +}
> 
> 
> 

-- 
Regards,
Vladimir


More information about the dev mailing list