[dpdk-dev] [PATCH v5 2/5] app/flow-perf: add insertion rate calculation

Andrew Rybchenko arybchenko at solarflare.com
Wed May 6 17:23:58 CEST 2020


My biggest concert with the patch is usage of huge number
of global variables which makes the code hard to read,
understand and maintain. See my notes below.
Please, share your thoughts.

On 5/6/20 3:36 PM, Wisam Jaddo wrote:
> Add insertion rate calculation feature into flow
> performance application.
> 
> The application now provide the ability to test
> insertion rate of specific rte_flow rule, by
> stressing it to the NIC, and calculate the
> insertion rate.
> 
> The application offers some options in the command
> line, to configure which rule to apply.
> 
> After that the application will start producing
> rules with same pattern but increasing the outer IP
> source address by 1 each time, thus it will give
> different flow each time, and all other items will
> have open masks.
> 
> The current design have single core insertion rate.
> In the future we may have a multi core insertion
> rate measurement support in the app.
> 
> Signed-off-by: Wisam Jaddo <wisamm at mellanox.com>
> ---

[snip]

> diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
> new file mode 100644
> index 000000000..fa60084cf
> --- /dev/null
> +++ b/app/test-flow-perf/actions_gen.c
> @@ -0,0 +1,88 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * The file contains the implementations of actions generators.
> + * Each generator is responsible for preparing it's action instance
> + * and initializing it with needed data.
> + **/
> +
> +#include <sys/types.h>
> +#include <rte_malloc.h>
> +#include <rte_flow.h>
> +#include <rte_ethdev.h>
> +
> +#define ALLOCATE_ACTION_VARS
> +#include "actions_gen.h"
> +#include "config.h"
> +
> +void
> +gen_mark(void)
> +{
> +	mark_action.id = MARK_ID;
> +}
> +
> +void
> +gen_queue(uint16_t queue)
> +{
> +	queue_action.index = queue;
> +}
> +
> +void
> +gen_jump(uint16_t next_table)
> +{
> +	jump_action.group = next_table;
> +}
> +
> +void
> +gen_rss(uint16_t *queues, uint16_t queues_number)
> +{
> +	uint16_t queue;
> +	struct action_rss_data *rss_data;
> +
> +	rss_data = rte_malloc("rss_data",
> +		sizeof(struct action_rss_data), 0);
> +
> +	if (rss_data == NULL)
> +		rte_exit(EXIT_FAILURE, "No Memory available!");
> +
> +	*rss_data = (struct action_rss_data){
> +		.conf = (struct rte_flow_action_rss){
> +			.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> +			.level = 0,
> +			.types = ETH_RSS_IP |
> +				ETH_RSS_TCP,

Duplicating it in two places suggests addition of a macro
with the value and usage in two places.

> +			.key_len = sizeof(rss_data->key),
> +			.queue_num = queues_number,
> +			.key = rss_data->key,
> +			.queue = rss_data->queue,
> +		},
> +		.key = { 1 },
> +		.queue = { 0 },
> +	};
> +
> +	for (queue = 0; queue < queues_number; queue++)
> +		rss_data->queue[queue] = queues[queue];
> +
> +	rss_action = &rss_data->conf;
> +}
> +
> +void
> +gen_set_meta(void)
> +{
> +	meta_action.data = RTE_BE32(META_DATA);
> +	meta_action.mask = RTE_BE32(0xffffffff);
> +}
> +
> +void
> +gen_set_tag(void)
> +{
> +	tag_action.data = RTE_BE32(META_DATA);
> +	tag_action.mask = RTE_BE32(0xffffffff);
> +	tag_action.index = TAG_INDEX;
> +}
> +
> +void
> +gen_port_id(void)
> +{
> +	port_id.id = PORT_ID_DST;
> +}
> 

[snip]

> diff --git a/app/test-flow-perf/flow_gen.c b/app/test-flow-perf/flow_gen.c
> new file mode 100644
> index 000000000..cf5453586
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.c
> @@ -0,0 +1,179 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * The file contains the implementations of the method to
> + * fill items, actions & attributes in their corresponding
> + * arrays, and then generate rte_flow rule.
> + *
> + * After the generation. The rule goes to validation then
> + * creation state and then return the results.
> + */
> +
> +#include <stdint.h>
> +
> +#include "flow_gen.h"
> +#include "items_gen.h"
> +#include "actions_gen.h"
> +#include "config.h"
> +
> +static void
> +fill_attributes(struct rte_flow_attr *attr,
> +	uint8_t flow_attrs, uint16_t group)
> +{
> +	if (flow_attrs & INGRESS)
> +		attr->ingress = 1;
> +	if (flow_attrs & EGRESS)
> +		attr->egress = 1;
> +	if (flow_attrs & TRANSFER)
> +		attr->transfer = 1;
> +	attr->group = group;
> +}
> +
> +static void
> +fill_items(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint16_t flow_items, uint32_t outer_ip_src)
> +{
> +	uint8_t items_counter = 0;
> +
> +	if (flow_items & META_ITEM)
> +		add_meta_data(items, items_counter++);
> +	if (flow_items & TAG_ITEM)
> +		add_meta_tag(items, items_counter++);
> +	if (flow_items & ETH_ITEM)
> +		add_ether(items, items_counter++);
> +	if (flow_items & VLAN_ITEM)
> +		add_vlan(items, items_counter++);
> +	if (flow_items & IPV4_ITEM)
> +		add_ipv4(items, items_counter++, outer_ip_src);
> +	if (flow_items & IPV6_ITEM)
> +		add_ipv6(items, items_counter++, outer_ip_src);
> +	if (flow_items & TCP_ITEM)
> +		add_tcp(items, items_counter++);
> +	if (flow_items & UDP_ITEM)
> +		add_udp(items, items_counter++);
> +	if (flow_items & VXLAN_ITEM)
> +		add_vxlan(items, items_counter++);
> +	if (flow_items & VXLAN_GPE_ITEM)
> +		add_vxlan_gpe(items, items_counter++);
> +	if (flow_items & GRE_ITEM)
> +		add_gre(items, items_counter++);
> +	if (flow_items & GENEVE_ITEM)
> +		add_geneve(items, items_counter++);
> +	if (flow_items & GTP_ITEM)
> +		add_gtp(items, items_counter++);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_END;
> +}
> +
> +static void
> +fill_actions(struct rte_flow_action actions[MAX_ACTIONS_NUM],
> +	uint16_t flow_actions, uint32_t counter, uint16_t next_table,
> +	uint16_t hairpinq)
> +{
> +	struct rte_flow_action_count count_action;
> +	uint8_t actions_counter = 0;
> +	uint16_t queues[RXQ_NUM];
> +	uint16_t i;
> +
> +	/* None-fate actions */
> +	if (flow_actions & MARK_ACTION) {
> +		if (!counter)

DPDK coding style says compare to 0 [1]. Many similar
comparisons below.

[1] https://doc.dpdk.org/guides/contributing/coding_style.html#null-pointers

> +			gen_mark();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_MARK;
> +		actions[actions_counter++].conf = &mark_action;
> +	}
> +	if (flow_actions & COUNT_ACTION) {
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_COUNT;
> +		actions[actions_counter++].conf = &count_action;
> +	}
> +	if (flow_actions & META_ACTION) {
> +		if (!counter)
> +			gen_set_meta();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_META;
> +		actions[actions_counter++].conf = &meta_action;
> +	}
> +	if (flow_actions & TAG_ACTION) {
> +		if (!counter)
> +			gen_set_tag();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_SET_TAG;
> +		actions[actions_counter++].conf = &tag_action;
> +	}
> +
> +	/* Fate actions */
> +	if (flow_actions & QUEUE_ACTION) {
> +		gen_queue(counter % RXQ_NUM);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & RSS_ACTION) {
> +		if (!counter) {
> +			for (i = 0; i < RXQ_NUM; i++)
> +				queues[i] = i;
> +			gen_rss(queues, RXQ_NUM);
> +		}
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +	if (flow_actions & JUMP_ACTION) {
> +		if (!counter)
> +			gen_jump(next_table);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_JUMP;
> +		actions[actions_counter++].conf = &jump_action;
> +	}
> +	if (flow_actions & PORT_ID_ACTION) {
> +		if (!counter)
> +			gen_port_id();
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
> +		actions[actions_counter++].conf = &port_id;
> +	}
> +	if (flow_actions & DROP_ACTION)
> +		actions[actions_counter++].type = RTE_FLOW_ACTION_TYPE_DROP;
> +	if (flow_actions & HAIRPIN_QUEUE_ACTION) {
> +		gen_queue((counter % hairpinq) + RXQ_NUM);
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_QUEUE;
> +		actions[actions_counter++].conf = &queue_action;
> +	}
> +	if (flow_actions & HAIRPIN_RSS_ACTION) {
> +		if (!counter) {
> +			uint16_t hairpin_queues[hairpinq];
> +			for (i = 0; i < hairpinq; i++)
> +				hairpin_queues[i] = i + RXQ_NUM;
> +			gen_rss(hairpin_queues, hairpinq);
> +		}
> +		actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_RSS;
> +		actions[actions_counter++].conf = rss_action;
> +	}
> +
> +	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_END;
> +}
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	uint16_t hairpinq,
> +	struct rte_flow_error *error)
> +{
> +	struct rte_flow_attr attr;
> +	struct rte_flow_item items[MAX_ITEMS_NUM];
> +	struct rte_flow_action actions[MAX_ACTIONS_NUM];
> +	struct rte_flow *flow = NULL;
> +
> +	memset(items, 0, sizeof(items));
> +	memset(actions, 0, sizeof(actions));
> +	memset(&attr, 0, sizeof(struct rte_flow_attr));
> +
> +	fill_attributes(&attr, flow_attrs, group);
> +
> +	fill_actions(actions, flow_actions,
> +			outer_ip_src, next_table, hairpinq);
> +
> +	fill_items(items, flow_items, outer_ip_src);
> +
> +	flow = rte_flow_create(port_id, &attr, items, actions, error);
> +	return flow;
> +}
> diff --git a/app/test-flow-perf/flow_gen.h b/app/test-flow-perf/flow_gen.h
> new file mode 100644
> index 000000000..43d9e7cfe
> --- /dev/null
> +++ b/app/test-flow-perf/flow_gen.h
> @@ -0,0 +1,63 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * This file contains the items, actions and attributes
> + * definition. And the methods to prepare and fill items,
> + * actions and attributes to generate rte_flow rule.
> + */
> +
> +#ifndef FLOW_PERF_FLOW_GEN
> +#define FLOW_PERF_FLOW_GEN
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "config.h"
> +
> +#define FLOW_ITEM_MASK(_x) (UINT64_C(1) << _x)
> +
> +/* Items */
> +#define ETH_ITEM             FLOW_ITEM_MASK(0)

I don't understand why you don't use enum rte_flow_item_type
members as offsets. In this case don't need these defines
at all, just use FLOW_ITEM_MASK(ETH). It will make it
easier to support new items.

> +#define IPV4_ITEM            FLOW_ITEM_MASK(1)
> +#define IPV6_ITEM            FLOW_ITEM_MASK(2)
> +#define VLAN_ITEM            FLOW_ITEM_MASK(3)
> +#define TCP_ITEM             FLOW_ITEM_MASK(4)
> +#define UDP_ITEM             FLOW_ITEM_MASK(5)
> +#define VXLAN_ITEM           FLOW_ITEM_MASK(6)
> +#define VXLAN_GPE_ITEM       FLOW_ITEM_MASK(7)
> +#define GRE_ITEM             FLOW_ITEM_MASK(8)
> +#define GENEVE_ITEM          FLOW_ITEM_MASK(9)
> +#define GTP_ITEM             FLOW_ITEM_MASK(10)
> +#define META_ITEM            FLOW_ITEM_MASK(11)
> +#define TAG_ITEM             FLOW_ITEM_MASK(12)
> +
> +/* Actions */
> +#define QUEUE_ACTION         FLOW_ITEM_MASK(0)

I don't understand why you don't use enum rte_flow_action_type
members as offsets.

> +#define MARK_ACTION          FLOW_ITEM_MASK(1)
> +#define JUMP_ACTION          FLOW_ITEM_MASK(2)
> +#define RSS_ACTION           FLOW_ITEM_MASK(3)
> +#define COUNT_ACTION         FLOW_ITEM_MASK(4)
> +#define META_ACTION          FLOW_ITEM_MASK(5)
> +#define TAG_ACTION           FLOW_ITEM_MASK(6)
> +#define DROP_ACTION          FLOW_ITEM_MASK(7)
> +#define PORT_ID_ACTION       FLOW_ITEM_MASK(8)
> +#define HAIRPIN_QUEUE_ACTION FLOW_ITEM_MASK(9)
> +#define HAIRPIN_RSS_ACTION   FLOW_ITEM_MASK(10)
> +
> +/* Attributes */
> +#define INGRESS              FLOW_ITEM_MASK(0)
> +#define EGRESS               FLOW_ITEM_MASK(1)
> +#define TRANSFER             FLOW_ITEM_MASK(2)
> +
> +struct rte_flow *
> +generate_flow(uint16_t port_id,
> +	uint16_t group,
> +	uint8_t flow_attrs,
> +	uint16_t flow_items,
> +	uint16_t flow_actions,
> +	uint16_t next_table,
> +	uint32_t outer_ip_src,
> +	uint16_t hairpinq,
> +	struct rte_flow_error *error);
> +
> +#endif /* FLOW_PERF_FLOW_GEN */
> diff --git a/app/test-flow-perf/items_gen.c b/app/test-flow-perf/items_gen.c
> new file mode 100644
> index 000000000..1e9479fb8
> --- /dev/null
> +++ b/app/test-flow-perf/items_gen.c
> @@ -0,0 +1,265 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright 2020 Mellanox Technologies, Ltd
> + *
> + * This file contain the implementations of the items
> + * related methods. Each Item have a method to prepare
> + * the item and add it into items array in given index.
> + */
> +
> +#include <stdint.h>
> +#include <rte_flow.h>
> +
> +#include "items_gen.h"
> +#include "config.h"
> +
> +/* Current design is single threaded. */
> +static struct rte_flow_item_eth eth_spec;
> +static struct rte_flow_item_eth eth_mask;

It looks like that the design has problems with:
eth / ip4 / udp / vxlan /eth / end
patterns.

> +static struct rte_flow_item_vlan vlan_spec;
> +static struct rte_flow_item_vlan vlan_mask;
> +static struct rte_flow_item_ipv4 ipv4_spec;
> +static struct rte_flow_item_ipv4 ipv4_mask;
> +static struct rte_flow_item_ipv6 ipv6_spec;
> +static struct rte_flow_item_ipv6 ipv6_mask;
> +static struct rte_flow_item_udp udp_spec;
> +static struct rte_flow_item_udp udp_mask;
> +static struct rte_flow_item_tcp tcp_spec;
> +static struct rte_flow_item_tcp tcp_mask;
> +static struct rte_flow_item_vxlan vxlan_spec;
> +static struct rte_flow_item_vxlan vxlan_mask;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_spec;
> +static struct rte_flow_item_vxlan_gpe vxlan_gpe_mask;
> +static struct rte_flow_item_gre gre_spec;
> +static struct rte_flow_item_gre gre_mask;
> +static struct rte_flow_item_geneve geneve_spec;
> +static struct rte_flow_item_geneve geneve_mask;
> +static struct rte_flow_item_gtp gtp_spec;
> +static struct rte_flow_item_gtp gtp_mask;
> +static struct rte_flow_item_meta meta_spec;
> +static struct rte_flow_item_meta meta_mask;
> +static struct rte_flow_item_tag tag_spec;
> +static struct rte_flow_item_tag tag_mask;

I think that such amount of global variables makes the code
hard to read and maintain. If that's only me, no problem.

> +
> +
> +void
> +add_ether(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&eth_spec, 0, sizeof(struct rte_flow_item_eth));
> +	memset(&eth_mask, 0, sizeof(struct rte_flow_item_eth));
> +	eth_spec.type = 0;
> +	eth_mask.type = 0;

What's the point to set type to 0 if you just memset the entire
structure?

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_ETH;
> +	items[items_counter].spec = &eth_spec;
> +	items[items_counter].mask = &eth_mask;
> +}
> +
> +void
> +add_vlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t vlan_value = VLAN_VALUE;
> +	memset(&vlan_spec, 0, sizeof(struct rte_flow_item_vlan));
> +	memset(&vlan_mask, 0, sizeof(struct rte_flow_item_vlan));
> +
> +	vlan_spec.tci = RTE_BE16(vlan_value);
> +	vlan_mask.tci = RTE_BE16(0xffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VLAN;
> +	items[items_counter].spec = &vlan_spec;
> +	items[items_counter].mask = &vlan_mask;
> +}
> +
> +void
> +add_ipv4(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, uint32_t src_ipv4)

Shouldn't src_ipv4 be rte_be32?

> +{
> +	memset(&ipv4_spec, 0, sizeof(struct rte_flow_item_ipv4));
> +	memset(&ipv4_mask, 0, sizeof(struct rte_flow_item_ipv4));
> +
> +	ipv4_spec.hdr.src_addr = src_ipv4;
> +	ipv4_mask.hdr.src_addr = 0xffffffff;

RTE_BE32() is missing above (at least to be consistent
with RTE_BE16() few lines above.

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV4;
> +	items[items_counter].spec = &ipv4_spec;
> +	items[items_counter].mask = &ipv4_mask;
> +}
> +
> +
> +void
> +add_ipv6(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter, int src_ipv6)

I think such specification of the source IPv6 address is
very confusing. If you really need, it would be nice
to explain why comments.

> +{
> +	memset(&ipv6_spec, 0, sizeof(struct rte_flow_item_ipv6));
> +	memset(&ipv6_mask, 0, sizeof(struct rte_flow_item_ipv6));
> +
> +	/** Set ipv6 src **/
> +	memset(&ipv6_spec.hdr.src_addr, src_ipv6,
> +		sizeof(ipv6_spec.hdr.src_addr) / 2);
> +
> +	/** Full mask **/
> +	memset(&ipv6_mask.hdr.src_addr, 1,
> +		sizeof(ipv6_spec.hdr.src_addr));

Are you that 1 is what you really want here? May be 0xff?

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_IPV6;
> +	items[items_counter].spec = &ipv6_spec;
> +	items[items_counter].mask = &ipv6_mask;
> +}
> +
> +void
> +add_tcp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&tcp_spec, 0, sizeof(struct rte_flow_item_tcp));
> +	memset(&tcp_mask, 0, sizeof(struct rte_flow_item_tcp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TCP;
> +	items[items_counter].spec = &tcp_spec;
> +	items[items_counter].mask = &tcp_mask;
> +}
> +
> +void
> +add_udp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	memset(&udp_spec, 0, sizeof(struct rte_flow_item_udp));
> +	memset(&udp_mask, 0, sizeof(struct rte_flow_item_udp));
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_UDP;
> +	items[items_counter].spec = &udp_spec;
> +	items[items_counter].mask = &udp_mask;
> +}
> +
> +void
> +add_vxlan(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_spec, 0, sizeof(struct rte_flow_item_vxlan));
> +	memset(&vxlan_mask, 0, sizeof(struct rte_flow_item_vxlan));
> +
> +	/* Set standard vxlan vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* Standard vxlan flags */
> +	vxlan_spec.flags = 0x8;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN;
> +	items[items_counter].spec = &vxlan_spec;
> +	items[items_counter].mask = &vxlan_mask;
> +}
> +
> +void
> +add_vxlan_gpe(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&vxlan_gpe_spec, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +	memset(&vxlan_gpe_mask, 0, sizeof(struct rte_flow_item_vxlan_gpe));
> +
> +	/* Set vxlan-gpe vni */
> +	for (i = 0; i < 3; i++) {
> +		vxlan_gpe_spec.vni[2 - i] = vni_value >> (i * 8);
> +		vxlan_gpe_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	/* vxlan-gpe flags */
> +	vxlan_gpe_spec.flags = 0x0c;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE;
> +	items[items_counter].spec = &vxlan_gpe_spec;
> +	items[items_counter].mask = &vxlan_gpe_mask;
> +}
> +
> +void
> +add_gre(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint16_t proto = GRE_PROTO;
> +	memset(&gre_spec, 0, sizeof(struct rte_flow_item_gre));
> +	memset(&gre_mask, 0, sizeof(struct rte_flow_item_gre));
> +
> +	gre_spec.protocol = RTE_BE16(proto);
> +	gre_mask.protocol = 0xffff;

RTE_BE16(0xffff) to be consistent

> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GRE;
> +	items[items_counter].spec = &gre_spec;
> +	items[items_counter].mask = &gre_mask;
> +}
> +
> +void
> +add_geneve(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t vni_value = VNI_VALUE;
> +	uint8_t i;
> +	memset(&geneve_spec, 0, sizeof(struct rte_flow_item_geneve));
> +	memset(&geneve_mask, 0, sizeof(struct rte_flow_item_geneve));
> +
> +	for (i = 0; i < 3; i++) {
> +		geneve_spec.vni[2 - i] = vni_value >> (i * 8);
> +		geneve_mask.vni[2 - i] = 0xff;
> +	}
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GENEVE;
> +	items[items_counter].spec = &geneve_spec;
> +	items[items_counter].mask = &geneve_mask;
> +}
> +
> +void
> +add_gtp(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t teid_value = TEID_VALUE;
> +	memset(&gtp_spec, 0, sizeof(struct rte_flow_item_gtp));
> +	memset(&gtp_mask, 0, sizeof(struct rte_flow_item_gtp));
> +
> +	gtp_spec.teid = RTE_BE32(teid_value);
> +	gtp_mask.teid = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_GTP;
> +	items[items_counter].spec = &gtp_spec;
> +	items[items_counter].mask = &gtp_mask;
> +}
> +
> +void
> +add_meta_data(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	memset(&meta_spec, 0, sizeof(struct rte_flow_item_meta));
> +	memset(&meta_mask, 0, sizeof(struct rte_flow_item_meta));
> +
> +	meta_spec.data = RTE_BE32(data);
> +	meta_mask.data = RTE_BE32(0xffffffff);
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_META;
> +	items[items_counter].spec = &meta_spec;
> +	items[items_counter].mask = &meta_mask;
> +}
> +
> +
> +void
> +add_meta_tag(struct rte_flow_item items[MAX_ITEMS_NUM],
> +	uint8_t items_counter)
> +{
> +	uint32_t data = META_DATA;
> +	uint8_t index = TAG_INDEX;
> +	memset(&tag_spec, 0, sizeof(struct rte_flow_item_tag));
> +	memset(&tag_mask, 0, sizeof(struct rte_flow_item_tag));
> +
> +	tag_spec.data = RTE_BE32(data);
> +	tag_mask.data = RTE_BE32(0xffffffff);
> +	tag_spec.index = index;
> +	tag_mask.index = 0xff;
> +
> +	items[items_counter].type = RTE_FLOW_ITEM_TYPE_TAG;
> +	items[items_counter].spec = &tag_spec;
> +	items[items_counter].mask = &tag_mask;
> +}

[snip]

> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> index 7a924cdb7..463e4a782 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -35,29 +35,156 @@
>  #include <rte_flow.h>
>  
>  #include "config.h"
> +#include "flow_gen.h"
>  
> -static uint32_t nb_lcores;
> +#define MAX_ITERATIONS             100
> +#define DEFAULT_RULES_COUNT    4000000
> +#define DEFAULT_ITERATION       100000
> +
> +struct rte_flow *flow;
> +static uint8_t flow_group;
> +
> +static uint16_t flow_items;
> +static uint16_t flow_actions;
> +static uint8_t flow_attrs;
> +static volatile bool force_quit;
> +static bool dump_iterations;
>  static struct rte_mempool *mbuf_mp;
> +static uint32_t nb_lcores;
> +static uint32_t flows_count;
> +static uint32_t iterations_number;
> +static uint32_t hairpinq;

Global variables again.

>  static void
>  usage(char *progname)
>  {
>  	printf("\nusage: %s\n", progname);
> +	printf("\nControl configurations:\n");
> +	printf("  --flows-count=N: to set the number of needed"
> +		" flows to insert, default is 4,000,000\n");
> +	printf("  --dump-iterations: To print rates for each"
> +		" iteration\n");
> +
> +	printf("To set flow attributes:\n");
> +	printf("  --ingress: set ingress attribute in flows\n");
> +	printf("  --egress: set egress attribute in flows\n");
> +	printf("  --transfer: set transfer attribute in flows\n");
> +	printf("  --group=N: set group for all flows,"
> +		" default is 0\n");
> +
> +	printf("To set flow items:\n");
> +	printf("  --ether: add ether layer in flow items\n");
> +	printf("  --vlan: add vlan layer in flow items\n");
> +	printf("  --ipv4: add ipv4 layer in flow items\n");
> +	printf("  --ipv6: add ipv6 layer in flow items\n");
> +	printf("  --tcp: add tcp layer in flow items\n");
> +	printf("  --udp: add udp layer in flow items\n");
> +	printf("  --vxlan: add vxlan layer in flow items\n");
> +	printf("  --vxlan-gpe: add vxlan-gpe layer in flow items\n");
> +	printf("  --gre: add gre layer in flow items\n");
> +	printf("  --geneve: add geneve layer in flow items\n");
> +	printf("  --gtp: add gtp layer in flow items\n");
> +	printf("  --meta: add meta layer in flow items\n");
> +	printf("  --tag: add tag layer in flow items\n");
> +
> +	printf("To set flow actions:\n");
> +	printf("  --port-id: add port-id action in flow actions\n");
> +	printf("  --rss: add rss action in flow actions\n");
> +	printf("  --queue: add queue action in flow actions\n");
> +	printf("  --jump: add jump action in flow actions\n");
> +	printf("  --mark: add mark action in flow actions\n");
> +	printf("  --count: add count action in flow actions\n");
> +	printf("  --set-meta: add set meta action in flow actions\n");
> +	printf("  --set-tag: add set tag action in flow actions\n");
> +	printf("  --drop: add drop action in flow actions\n");
> +	printf("  --hairpin-queue=N: add hairpin-queue action in flow actions\n");
> +	printf("  --hairpin-rss=N: add hairping-rss action in flow actions\n");
>  }
>  
>  static void
>  args_parse(int argc, char **argv)
>  {
>  	char **argvopt;
> -	int opt;
> +	int n, opt, i;
>  	int opt_idx;
> +	static const char * const items_str[] = {
> +		"ether", "vlan", "ipv4", "ipv6",

Typicaly Ethernet pattern item is names "eth" (e.g.
in testpmd if I'm not mistaken). Why does it differ here?

> +		"tcp", "udp", "vxlan", "vxlan-gpe",
> +		"gre", "geneve", "gtp", "meta",
> +		"tag"
> +	};
> +	uint32_t items[] = {
> +		ETH_ITEM, VLAN_ITEM, IPV4_ITEM,
> +		IPV6_ITEM, TCP_ITEM, UDP_ITEM,
> +		VXLAN_ITEM, VXLAN_GPE_ITEM, GRE_ITEM,
> +		GENEVE_ITEM, GTP_ITEM, META_ITEM,
> +		TAG_ITEM
> +	};

I guess above two structures should be both const and must
be in sync. If so, it would be better to create one
array structures with name and mask members. It is more
robust.

> +	static const char * const attributes_str[] = {
> +		"ingress", "egress", "transfer"
> +	};
> +	uint32_t attributes[] = {
> +		INGRESS, EGRESS, TRANSFER
> +	};

Same as above.

> +	static const char * const actions_str[] = {
> +		"port-id", "rss", "queue", "jump",
> +		"mark", "count", "set-meta", "set-tag",
> +		"drop",
> +	};
> +	uint32_t actions[] = {
> +		PORT_ID_ACTION, RSS_ACTION, QUEUE_ACTION,
> +		JUMP_ACTION, MARK_ACTION, COUNT_ACTION,
> +		META_ACTION, TAG_ACTION, DROP_ACTION
> +	};

Same as above.

> +	int items_size = RTE_DIM(items);
> +	int attributes_size = RTE_DIM(attributes);
> +	int actions_size = RTE_DIM(actions);
> +
>  	static struct option lgopts[] = {
>  		/* Control */
>  		{ "help",                       0, 0, 0 },
> +		{ "flows-count",                1, 0, 0 },
> +		{ "dump-iterations",            0, 0, 0 },
> +		/* Attributes */
> +		{ "ingress",                    0, 0, 0 },
> +		{ "egress",                     0, 0, 0 },
> +		{ "transfer",                   0, 0, 0 },

I think it should be possible to add these items in
a loop by attributes_str-like array.

> +		{ "group",                      1, 0, 0 },
> +		/* Items */
> +		{ "ether",                      0, 0, 0 },
> +		{ "vlan",                       0, 0, 0 },
> +		{ "ipv4",                       0, 0, 0 },
> +		{ "ipv6",                       0, 0, 0 },
> +		{ "tcp",                        0, 0, 0 },
> +		{ "udp",                        0, 0, 0 },
> +		{ "vxlan",                      0, 0, 0 },
> +		{ "vxlan-gpe",                  0, 0, 0 },
> +		{ "gre",                        0, 0, 0 },
> +		{ "geneve",                     0, 0, 0 },
> +		{ "gtp",                        0, 0, 0 },
> +		{ "meta",                       0, 0, 0 },
> +		{ "tag",                        0, 0, 0 },

I think it should be possible to add these items in
a loop by items_str-like array.  It would
allow to avoid copy-paste and simplify new
items addition.

> +		/* Actions */
> +		{ "port-id",                    0, 0, 0 },
> +		{ "rss",                        0, 0, 0 },
> +		{ "queue",                      0, 0, 0 },
> +		{ "jump",                       0, 0, 0 },
> +		{ "mark",                       0, 0, 0 },
> +		{ "count",                      0, 0, 0 },
> +		{ "set-meta",                   0, 0, 0 },
> +		{ "set-tag",                    0, 0, 0 },
> +		{ "drop",                       0, 0, 0 },

I think it should be possible to add these items in
a loop by actions_str-like array.  It would allow to
avoid copy-paste and simplify new actions addition.


> +		{ "hairpin-queue",              1, 0, 0 },
> +		{ "hairpin-rss",                1, 0, 0 },
>  	};
>  
> +	flow_items = 0;
> +	flow_actions = 0;
> +	flow_attrs = 0;
> +	hairpinq = 0;
>  	argvopt = argv;
>  
> +	printf(":: Flow -> ");
>  	while ((opt = getopt_long(argc, argvopt, "",
>  				lgopts, &opt_idx)) != EOF) {
>  		switch (opt) {
> @@ -66,6 +193,73 @@ args_parse(int argc, char **argv)
>  				usage(argv[0]);
>  				rte_exit(EXIT_SUCCESS, "Displayed help\n");
>  			}
> +
> +			/* Attributes */
> +			for (i = 0; i < attributes_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						attributes_str[i])) {

Comparison vs 0. Many cases below.

> +					flow_attrs |= attributes[i];
> +					printf("%s / ", attributes_str[i]);
> +				}
> +			if (!strcmp(lgopts[opt_idx].name, "group")) {
> +				n = atoi(optarg);
> +				if (n >= 0)
> +					flow_group = n;
> +				else
> +					rte_exit(EXIT_SUCCESS,
> +						"flow group should be >= 0");
> +				printf("group %d ", flow_group);
> +			}
> +
> +			/* Items */
> +			for (i = 0; i < items_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						items_str[i])) {
> +					flow_items |= items[i];
> +					printf("%s / ", items_str[i]);
> +				}
> +
> +			/* Actions */
> +			for (i = 0; i < actions_size; i++)
> +				if (!strcmp(lgopts[opt_idx].name,
> +						actions_str[i])) {
> +					flow_actions |= actions[i];
> +					printf("%s / ", actions_str[i]);
> +				}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-rss")) {
> +				n = atoi(optarg);
> +				if (n > 0)
> +					hairpinq = n;
> +				else
> +					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
> +
> +				flow_actions |= HAIRPIN_RSS_ACTION;
> +				printf("hairpin-rss / ");
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "hairpin-queue")) {
> +				n = atoi(optarg);
> +				if (n > 0)
> +					hairpinq = n;
> +				else
> +					rte_exit(EXIT_SUCCESS, "Hairpin queues should be > 0 ");
> +
> +				flow_actions |= HAIRPIN_QUEUE_ACTION;
> +				printf("hairpin-queue / ");
> +			}
> +
> +			/* Control */
> +			if (!strcmp(lgopts[opt_idx].name, "flows-count")) {
> +				n = atoi(optarg);
> +				if (n > (int) iterations_number)
> +					flows_count = n;
> +				else {
> +					printf("\n\nflows_count should be > %d",
> +						iterations_number);
> +					rte_exit(EXIT_SUCCESS, " ");
> +				}
> +			}
> +			if (!strcmp(lgopts[opt_idx].name, "dump-iterations"))
> +				dump_iterations = true;
>  			break;
>  		default:
>  			printf("Invalid option: %s\n", argv[optind]);
> @@ -74,15 +268,141 @@ args_parse(int argc, char **argv)
>  			break;
>  		}
>  	}
> +	printf("end_flow\n");
> +}
> +
> +static void
> +print_flow_error(struct rte_flow_error error)
> +{
> +	printf("Flow can't be created %d message: %s\n",
> +		error.type,
> +		error.message ? error.message : "(no stated reason)");
> +}
> +
> +static inline void
> +flows_handler(void)
> +{
> +	struct rte_flow_error error;
> +	clock_t start_iter, end_iter;
> +	double cpu_time_used;
> +	double flows_rate;
> +	double cpu_time_per_iter[MAX_ITERATIONS];
> +	double delta;
> +	uint16_t nr_ports;
> +	uint32_t i;
> +	int port_id;
> +	int iter_id;
> +	uint32_t eagain_counter = 0;
> +
> +	nr_ports = rte_eth_dev_count_avail();
> +
> +	for (i = 0; i < MAX_ITERATIONS; i++)
> +		cpu_time_per_iter[i] = -1;
> +
> +	if (iterations_number > flows_count)
> +		iterations_number = flows_count;
> +
> +	printf(":: Flows Count per port: %d\n", flows_count);
> +
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		cpu_time_used = 0;
> +		if (flow_group > 0) {
> +			/*
> +			 * Create global rule to jump into flow_group,
> +			 * this way the app will avoid the default rules.
> +			 *
> +			 * Golbal rule:
> +			 * group 0 eth / end actions jump group <flow_group>
> +			 *
> +			 */
> +			flow = generate_flow(port_id, 0, flow_attrs, ETH_ITEM,
> +				JUMP_ACTION, flow_group, 0, 0, &error);
> +
> +			if (!flow) {

Comparison vs NULL

> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +		}
> +
> +		/* Insertion Rate */
> +		printf("Flows insertion on port = %d\n", port_id);
> +		start_iter = clock();
> +		for (i = 0; i < flows_count; i++) {
> +			do {
> +				rte_errno = 0;
> +				flow = generate_flow(port_id, flow_group,
> +					flow_attrs, flow_items, flow_actions,
> +					JUMP_ACTION_TABLE, i, hairpinq, &error);
> +				if (!flow)
> +					eagain_counter++;
> +			} while (rte_errno == EAGAIN);
> +
> +			if (force_quit)
> +				i = flows_count;
> +
> +			if (!flow) {
> +				print_flow_error(error);
> +				rte_exit(EXIT_FAILURE, "error in creating flow");
> +			}
> +
> +			if (i && !((i + 1) % iterations_number)) {
> +				/* Save the insertion rate of each iter */
> +				end_iter = clock();
> +				delta = (double) (end_iter - start_iter);
> +				iter_id = ((i + 1) / iterations_number) - 1;
> +				cpu_time_per_iter[iter_id] =
> +					delta / CLOCKS_PER_SEC;
> +				cpu_time_used += cpu_time_per_iter[iter_id];
> +				start_iter = clock();
> +			}
> +		}
> +
> +		/* Iteration rate per iteration */
> +		if (dump_iterations)
> +			for (i = 0; i < MAX_ITERATIONS; i++) {
> +				if (cpu_time_per_iter[i] == -1)
> +					continue;
> +				delta = (double)(iterations_number /
> +					cpu_time_per_iter[i]);
> +				flows_rate = delta / 1000;
> +				printf(":: Iteration #%d: %d flows "
> +					"in %f sec[ Rate = %f K/Sec ]\n",
> +					i, iterations_number,
> +					cpu_time_per_iter[i], flows_rate);
> +			}
> +
> +		/* Insertion rate for all flows */
> +		flows_rate = ((double) (flows_count / cpu_time_used) / 1000);
> +		printf("\n:: Total flow insertion rate -> %f K/Sec\n",
> +						flows_rate);
> +		printf(":: The time for creating %d in flows %f seconds\n",
> +						flows_count, cpu_time_used);
> +		printf(":: EAGAIN counter = %d\n", eagain_counter);
> +	}
> +}
> +
> +static void
> +signal_handler(int signum)
> +{
> +	if (signum == SIGINT || signum == SIGTERM) {
> +		printf("\n\nSignal %d received, preparing to exit...\n",
> +					signum);
> +		printf("Error: Stats are wrong due to sudden signal!\n\n");
> +		force_quit = true;
> +	}

It is the patch which starts to use sigint.h and it should
be included in the patch, not the previous one.

>  }
>  
>  static void
>  init_port(void)
>  {
>  	int ret;
> -	uint16_t i;
> +	uint16_t i, j;
>  	uint16_t port_id;
>  	uint16_t nr_ports;
> +	uint16_t nr_queues;
> +	struct rte_eth_hairpin_conf hairpin_conf = {
> +		.peer_count = 1,
> +	};
>  	struct rte_eth_conf port_conf = {
>  		.rx_adv_conf = {
>  			.rss_conf.rss_hf =
> @@ -94,6 +414,10 @@ init_port(void)
>  	struct rte_eth_rxconf rxq_conf;
>  	struct rte_eth_dev_info dev_info;
>  
> +	nr_queues = RXQ_NUM;
> +	if (hairpinq)

Comparison vs 0

> +		nr_queues = RXQ_NUM + hairpinq;
> +
>  	nr_ports = rte_eth_dev_count_avail();
>  	if (nr_ports == 0)
>  		rte_exit(EXIT_FAILURE, "Error: no port detected\n");
> @@ -118,8 +442,8 @@ init_port(void)
>  
>  		printf(":: initializing port: %d\n", port_id);
>  
> -		ret = rte_eth_dev_configure(port_id, RXQ_NUM,
> -				TXQ_NUM, &port_conf);
> +		ret = rte_eth_dev_configure(port_id, nr_queues,
> +				nr_queues, &port_conf);
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE,
>  				":: cannot configure device: err=%d, port=%u\n",
> @@ -159,6 +483,30 @@ init_port(void)
>  				":: promiscuous mode enable failed: err=%s, port=%u\n",
>  				rte_strerror(-ret), port_id);
>  
> +		if (hairpinq) {
> +			for (i = RXQ_NUM, j = 0; i < nr_queues; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + TXQ_NUM;
> +				ret = rte_eth_rx_hairpin_queue_setup(port_id, i,
> +					NR_RXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin rx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}
> +
> +			for (i = TXQ_NUM, j = 0; i < nr_queues; i++, j++) {
> +				hairpin_conf.peers[0].port = port_id;
> +				hairpin_conf.peers[0].queue = j + RXQ_NUM;
> +				ret = rte_eth_tx_hairpin_queue_setup(port_id, i,
> +					NR_TXD, &hairpin_conf);
> +				if (ret != 0)
> +					rte_exit(EXIT_FAILURE,
> +						":: Hairpin tx queue setup failed: err=%d, port=%u\n",
> +						ret, port_id);
> +			}
> +		}
> +
>  		ret = rte_eth_dev_start(port_id);
>  		if (ret < 0)
>  			rte_exit(EXIT_FAILURE,
> @@ -180,6 +528,15 @@ main(int argc, char **argv)
>  	if (ret < 0)
>  		rte_exit(EXIT_FAILURE, "EAL init failed\n");
>  
> +	force_quit = false;
> +	dump_iterations = false;
> +	flows_count = DEFAULT_RULES_COUNT;
> +	iterations_number = DEFAULT_ITERATION;
> +	flow_group = 0;
> +
> +	signal(SIGINT, signal_handler);
> +	signal(SIGTERM, signal_handler);
> +
>  	argc -= ret;
>  	argv += ret;
>  	if (argc > 1)
> @@ -191,6 +548,8 @@ main(int argc, char **argv)
>  	if (nb_lcores <= 1)
>  		rte_exit(EXIT_FAILURE, "This app needs at least two cores\n");
>  
> +	flows_handler();
> +
>  	RTE_ETH_FOREACH_DEV(port) {
>  		rte_flow_flush(port, &error);
>  		rte_eth_dev_stop(port);

[snip]

> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> index 49eb450ae..b45fccd69 100644
> --- a/doc/guides/tools/flow-perf.rst
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -4,7 +4,18 @@
>  Flow performance tool
>  =====================
>  
> -Application for rte_flow performance testing.
> +Application for rte_flow performance testing. The application provide the

As far as I remember documentaiton guidelines recommend to
start new sentenses from a new line.

> +ability to test insertion rate of specific rte_flow rule, by stressing it
> +to the NIC, and calculate the insertion rate.
> +
> +The application offers some options in the command line, to configure
> +which rule to apply.
> +
> +After that the application will start producing rules with same pattern
> +but increasing the outer IP source address by 1 each time, thus it will
> +give different flow each time, and all other items will have open masks.
> +
> +The current design have single core insertion rate.
>  
>  
>  Compiling the Application

[snip]



More information about the dev mailing list