[dpdk-dev] [PATCH] ethdev: Additions to rte_flows to support vTEP encap/decap offload

Thomas Monjalon thomas at monjalon.net
Thu Apr 5 11:53:44 CEST 2018


+Cc Adrien, please review

10/03/2018 01:25, Declan Doherty:
> This V1 patchset contains the revised proposal to manage virtual
> tunnel endpoints (vTEP) hardware accleration based on community
> feedback on RFC
> (http://dpdk.org/ml/archives/dev/2017-December/084676.html). This
> proposal is purely enabled through rte_flow APIs with the
> additions of some new features which were previously implemented
> by the proposed rte_tep APIs which were proposed in the original
> RFC. This patchset ultimately aims to enable the configuration
> of inline data path encapsulation and decapsulation of tunnel
> endpoint network overlays on accelerated IO devices.
> 
> The summary of the additions to the rte_flow are as follows:
> 
> - Add new flow actions RTE_RTE_FLOW_ACTION_TYPE_VTEP_ENCAP and
> RTE_FLOW_ACTION_TYPE_VTEP_DECAP to rte_flow to support specfication
> of encapsulation and decapsulation of virtual Tunnel Endpoint on
> hardware.
> 
> - Updates the matching pattern item definition
> description to specify that all actions which modify a packet
> must be specified in the explicit order they are to be excuted.
> 
> - Introduces support for the use of pipeline metadata in
> the flow pattern defintion and the population of metadata fields
> from flow actions.
> 
> - Adds group counters to enable statistics to be kept on groups of
> flows such as all ingress/egress flows of a vTEP
> 
> - Adds group_action to allow a flows termination to be a group/table
> within the device.
> 
> A high level summary of the proposed usage model is as follows:
> 
> 1. Decapsulation
> 
> 1.1. Decapsulation of vTEP outer headers and forward all traffic
>      to the same queue/s or port, would have the follow flows
>      paramteters, sudo code used here.
> 
> struct rte_flow_attr attr = { .ingress = 1 };
> 
> struct rte_flow_item pattern[] = {
> 	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_END }
> };
> 
> struct rte_flow_action actions[] = {
> 	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_DECAP, .conf = VxLAN },
> 	{ .type = RTE_FLOW_ACTION_TYPE_VF, .conf = &vf_action  },
> 	{ .type = RTE_FLOW_ACTION_TYPE_END }
> 
> }
> 
> 1.2.
> Decapsulation of vTEP outer headers and matching on inner
> headers, and forwarding to the same queue/s or port.
> 
> 1.2.1.
> The same scenario as above but either the application
> or hardware requires configuration as 2 logically independent
> operations (viewing it as 2 logical tables). The first stage
> being the flow rule to define the pattern to match the vTEP
> and the action to decapsulate the packet, and the second stage
> stage table matches the inner header and defines the actions,
> forward to port etc.
> 
> flow rule for outer header on table 0
> 
> struct rte_flow_attr attr = { .ingress = 1, .table = 0 };
> 
> struct rte_flow_item pattern[] = {
> 	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &udp_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_VxLAN, .spec = &vxlan_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_END }
> };
> 
> struct rte_flow_action actions[] = {
> 	{ .type = RTE_FLOW_ACTION_TYPE_GROUP_COUNT, .conf = &vtep_counter },
> 	{ .type = RTE_FLOW_ACTION_TYPE_METADATA, .conf = &metadata_action },
> 	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_DECAP, .conf = VxLAN },
> 	{ .type = RTE_FLOW_ACTION_TYPE_GROUP, .conf = &group_action = { .id = 1 } },
> 	{ .type = RTE_FLOW_ACTION_TYPE_END }
> }
> 
> flow rule for inner header on table 1
> 
> struct rte_flow_attr attr = { .ingress = 1, .table = 1 };
> 
> struct rte_flow_item pattern[] = {
> 	{ .type = RTE_FLOW_ITEM_TYPE_METADATA,  .spec = &metadata_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_ETH,  .spec = &eth_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &tcp_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_END }
> };
> 
> struct rte_flow_action actions[] = {
> 	{ .type = RTE_FLOW_ACTION_TYPE_PORT, .conf = &port_action = { port_id } },
> 	{ .type = RTE_FLOW_ACTION_TYPE_END }
> }
> 
> Note that the metadata action in the flow rule in table 0 is generating
> the metadata in the pipeline which is then used in as part as the flow
> pattern in table 1 to specify the exact flow to match against. In the
> case where exact match rules are being provided by the application
> then i this metadata could be provided by the application in both rules.
> If there was wildcard matching happening at the first table then this
> metadata could be generated by hw, but this would require extension to
> currently proposed API to allow specification of how the metadata should
> be generated.
> 
> 2. Encapsulation
> 
> Encapsulation of all traffic matching a specific flow pattern to a
> specified vTEP and egressing to a particular port.
> 
> struct rte_flow_attr attr = { .egress = 1 };
> 
> struct rte_flow_item pattern[] = {
> 	{ .type = RTE_FLOW_ITEM_TYPE_ETH, .spec = &eth_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &ipv4_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &tcp_item },
> 	{ .type = RTE_FLOW_ITEM_TYPE_END }
> };
> 
> struct rte_flow_action_vtep_encap encap_action = {
> 	.patterns = {
> 		{ .type=eth, .item = {} },
> 		{ .type=ipv4, .item = {} },
> 		{ .type=udp, .item = {} },
> 		{ .type=vxlan, .item = {} } }
> };
> 
> struct rte_flow_action actions[] = {
> 	{ .type = RTE_FLOW_ACTION_TYPE_GROUP_COUNT, .conf = &group_count } },
> 	{ .type = RTE_FLOW_ACTION_TYPE_VTEP_ENCAP, .conf = &encap_action } },
> 	{ .type = RTE_FLOW_ACTION_TYPE_PORT, .conf = &port_action = { port_id } },
> 	{ .type = RTE_FLOW_ACTION_TYPE_END }
> }
> 
> Signed-off-by: Declan Doherty <declan.doherty at intel.com>
> ---
>  lib/librte_ether/rte_flow.h | 166 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 163 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_ether/rte_flow.h b/lib/librte_ether/rte_flow.h
> index 13e420218..8eb838c99 100644
> --- a/lib/librte_ether/rte_flow.h
> +++ b/lib/librte_ether/rte_flow.h
> @@ -323,6 +323,13 @@ enum rte_flow_item_type {
>  	 * See struct rte_flow_item_geneve.
>  	 */
>  	RTE_FLOW_ITEM_TYPE_GENEVE,
> +
> +	/**
> +	 * Matches specified pipeline metadata field.
> +	 *
> +	 * See struct rte_flow_item_metadata.
> +	 */
> +	RTE_FLOW_ITEM_TYPE_METADATA
>  };
>  
>  /**
> @@ -814,6 +821,17 @@ static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = {
>  };
>  #endif
>  
> +/**
> + * RTE_FLOW_ITEM_TYPE_METADATA
> + *
> + * Allow arbitrary pipeline metadata to be used in specification flow pattern
> + */
> +struct rte_flow_item_metadata {
> +	uint32_t id;		/**< field identifier */
> +	uint32_t size;		/**< field size */
> +	uint8_t bytes[];	/**< field value */
> +};
> +
>  /**
>   * Matching pattern item definition.
>   *
> @@ -859,9 +877,11 @@ struct rte_flow_item {
>   *
>   * Each possible action is represented by a type. Some have associated
>   * configuration structures. Several actions combined in a list can be
> - * affected to a flow rule. That list is not ordered.
> + * affected to a flow rule. That list is not ordered, with the exception of
> + * actions which modify the packet itself, these packet modification actions
> + * must be specified in the explicit order in which they are to be executed.
>   *
> - * They fall in three categories:
> + * They fall in four categories:
>   *
>   * - Terminating actions (such as QUEUE, DROP, RSS, PF, VF) that prevent
>   *   processing matched packets by subsequent flow rules, unless overridden
> @@ -870,6 +890,10 @@ struct rte_flow_item {
>   * - Non terminating actions (PASSTHRU, DUP) that leave matched packets up
>   *   for additional processing by subsequent flow rules.
>   *
> + * - Non terminating meta actions that do not affect the fate of
> + *   packets but result in modification of the packet itself (SECURITY,
> + *   VTEP_ENCAP, VTEP_DECAP).
> + *
>   * - Other non terminating meta actions that do not affect the fate of
>   *   packets (END, VOID, MARK, FLAG, COUNT).
>   *
> @@ -879,6 +903,11 @@ struct rte_flow_item {
>   * Only the last action of a given type is taken into account. PMDs still
>   * perform error checking on the entire list.
>   *
> + * Note that it may be possible for some packet modifications actions to be
> + * specified more than once on a single flow rule. For example an action which
> + * modified an IP header could be specified for both inner and outer IP headers
> + * on a tunneled packet.
> + *
>   * Note that PASSTHRU is the only action able to override a terminating
>   * rule.
>   */
> @@ -1010,7 +1039,50 @@ enum rte_flow_action_type {
>  	 *
>  	 * See struct rte_flow_action_security.
>  	 */
> -	RTE_FLOW_ACTION_TYPE_SECURITY
> +	RTE_FLOW_ACTION_TYPE_SECURITY,
> +
> +	/**
> +	 * Enable flow group counter for flow.
> +	 *
> +	 * Group counters can be retrieved and reset through
> +	 * rte_flow_group_count_query()
> +	 *
> +	 * See struct rte_flow_action_group_count.
> +	 */
> +	RTE_FLOW_ACTION_TYPE_GROUP_COUNT,
> +
> +	/**
> +	 *
> +	 * See struct rte_flow_action_vtep_encap.
> +	 */
> +	RTE_FLOW_ACTION_TYPE_VTEP_ENCAP,
> +
> +	/**
> +	 * Decapsulate all the headers of the vTEP
> +	 *
> +	 * See struct rte_flow_action_vtep_decap.
> +	 */
> +	RTE_FLOW_ACTION_TYPE_VTEP_DECAP,
> +
> +	/**
> +	 * [META]
> +	 *
> +	 * Set specific metadata field associated with packet which is then
> +	 * available to further pipeline stages.
> +	 *
> +	 * See struct rte_flow_action_metadata.
> +	 */
> +	RTE_FLOW_ACTION_TYPE_METADATA,
> +	
> +	/**
> +	 * Send packet to a group. In a logical hierarchy of groups (flow
> +	 * tables) this allows the terminating action to be a next stage table,
> +	 * which can match on further elements of the packet or on metadata
> +	 * generated from previous group stages.
> +	 * 
> +	 * See struct rte_flow_action_group.
> +	 */
> +	RTE_FLOW_ACTION_TYPE_GROUP
>  };
>  
>  /**
> @@ -1103,6 +1175,26 @@ struct rte_flow_action_vf {
>  	uint32_t id; /**< VF ID to redirect packets to. */
>  };
>  
> +/**
> + * RTE_FLOW_ACTION_TYPE_GROUP
> + * 
> + * Redirects packets to the group of flows on the current device.
> + *
> + * This action may be used to enable a number of functions. On hw
> + * devices which support multiple table chained together, this
> + * action allows the output of one table to be the input of
> + * another. For applications which need to program flow rules for 
> + * operations on a tunnel packet on the inner and outer header 
> + * separately, the groups can be used to enable a PMD to support
> + * two levels of virtual tables which it can then flatten into
> + * a single rule before programing to hardware. 
> + * 
> + * Terminating by default.
> + */
> +struct rte_flow_action_group {
> +	uint32_t id;
> +};
> +
>  /**
>   * RTE_FLOW_ACTION_TYPE_METER
>   *
> @@ -1148,6 +1240,54 @@ struct rte_flow_action_security {
>  	void *security_session; /**< Pointer to security session structure. */
>  };
>  
> +/**
> + * RTE_FLOW_ACTION_TYPE_GROUP_COUNT
> + *
> + * A packet/byte counter which can be shared across a group of flows programmed
> + * on the same port/switch domain.
> + *
> + * Non-terminating by default.
> + */
> +struct rte_flow_action_group_count {
> +	uint32_t id;
> +};
> +
> +/**
> + * RTE_FLOW_ACTION_TYPE_METADATA
> + *
> + * Set a specified metadata field in hw pipeline with value for consumption
> + * on further pipeline stages or on host interface.
> + *
> + * Non-terminating by default.
> + */
> +struct rte_flow_action_metadata {
> +	uint32_t id;		/**< field identifier */
> +	uint32_t size;		/**< field size */
> +	uint8_t bytes[];	/**< field value */
> +};
> +
> +/*
> + * RTE_FLOW_ACTION_TYPE_VTEP_ENCAP
> + *
> + * Non-terminating by default.
> + */
> +struct rte_flow_action_vtep_encap {
> +	struct rte_flow_action_item {
> +		enum rte_flow_item_type type;	/**< Item type. */
> +		const void *item; 		/**< Item definition. */
> +	} *pattern;
> +	/**< vTEP definition */
> +};
> +
> +/**
> + * RTE_FLOW_ACTION_TYP_VTEP_DECAP
> + *
> + * Non-terminating by default.
> + */
> +struct rte_flow_action_vtep_decap {
> +	enum rte_flow_item_type type; /**< Item type. */
> +};
> +
>  /**
>   * Definition of a single action.
>   *
> @@ -1476,6 +1616,26 @@ rte_flow_copy(struct rte_flow_desc *fd, size_t len,
>  	      const struct rte_flow_item *items,
>  	      const struct rte_flow_action *actions);
>  
> +/**
> + * A group counter is a counter which can be shared among multiple
> + * Get packet/bytes count for group counter.
> + *
> + * @param[in]	port_id
> + *
> + * @param[in]	group_count_id
> + * @param[out]	packets
> + * @param[out]	bytes
> + * @param[out]	error
> + *
> + * @return
> + */
> +int
> +rte_flow_query_group_count(uint16_t port_id,
> +		uint32_t group_count_id,
> +		uint64_t *packets, uint64_t *bytes,
> +		struct rte_flow_error *error);
> +
> +
>  #ifdef __cplusplus
>  }
>  #endif
> 







More information about the dev mailing list