[dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to Direct Verbs

Shahaf Shuler shahafs at mellanox.com
Mon Oct 29 11:03:14 CET 2018


Hi Dekel,

Thursday, October 25, 2018 11:08 PM, Dekel Peled:
> Subject: [dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to
> Direct Verbs
> 
> This patch implements the VXLAN encap action in DV flow for MLX5 PMD.
> 
> Signed-off-by: Dekel Peled <dekelp at mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_flow.h    |   2 +
>  drivers/net/mlx5/mlx5_flow_dv.c | 351
> +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 348 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 61299d6..6e92afe 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -92,6 +92,7 @@
>  #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)  #define
> MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)  #define
> MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
> +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22)
> 
>  #define MLX5_FLOW_FATE_ACTIONS \
>  	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE |
> MLX5_FLOW_ACTION_RSS) @@ -181,6 +182,7 @@ struct mlx5_flow_dv {
> #ifdef HAVE_IBV_FLOW_DV_SUPPORT
>  	struct mlx5dv_flow_action_attr
> actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
>  	/**< Action list. */
> +	struct ibv_flow_action *verbs_action; /**< Verbs encap/decap

The ibv_flow_action is already part of a union inside mlx5dv_flow_action_attr, why you need it separately? 
I see also in the below code that you copy it from the action list to this specific field. Can you elaborate why? 

> object.
> +*/
>  #endif
>  	int actions_n; /**< number of actions. */  }; diff --git
> a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 8f729f4..14110c5 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -34,6 +34,12 @@
> 
>  #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> 
> +/*
> + * Encap buf length, max:
> + *   Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14

VLAN is 4B not 8B. which tunnel is for 20B? 

> + */
> +#define MLX5_ENCAP_MAX_LEN 132
> +
>  /**
>   * Validate META item.
>   *
> @@ -96,6 +102,300 @@
>  }
> 
>  /**
> + * Validate the L2 encap action.
> + * Used for VXLAN encap action.

No need for that. Later on you put more supported protocols. This is a generic function for L2 encap validation. 

> + *
> + * @param[in] action_flags
> + *   Holds the actions detected until now.
> + * @param[in] action
> + *   Pointer to the encap action.
> + * @param[in] attr
> + *   Pointer to flow attributes
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_l2_encap(uint64_t action_flags,
> +				 const struct rte_flow_action *action,
> +				 const struct rte_flow_attr *attr,
> +				 struct rte_flow_error *error)
> +{
> +	if (!(action->conf))
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> action,
> +					  "configuration cannot be null");
> +	if (action_flags & MLX5_FLOW_ACTION_DROP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can't drop and encap in same
> flow");
> +	if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> NULL,
> +					  "can only have a single encap"
> +					  " action in a flow");
> +	if (attr->ingress)
> +		return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> +					  NULL,
> +					  "encap action not supported for "
> +					  "ingress");
> +	return 0;
> +}
> +
> +/**
> + * Get the size of specific rte_flow_item_type
> + *
> + * @param[in] item_type
> + *   Tested rte_flow_item_type.
> + *
> + * @return
> + *   sizeof struct item_type, 0 if void or irrelevant.
> + */
> +static size_t
> +flow_dv_get_item_len(const enum rte_flow_item_type item_type) {

Can we have this function as a macro?

#define flow_dv_get_item_len(t) (strncpm(t, VOID, 4) ? 0 : sizeof(struct rte_flow_item_##t)

Usage: flow_dv_get_item_len(ETH)


> +	size_t retval;
> +
> +	switch (item_type) {
> +	case RTE_FLOW_ITEM_TYPE_ETH:
> +		retval = sizeof(struct rte_flow_item_eth);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_VLAN:
> +		retval = sizeof(struct rte_flow_item_vlan);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_IPV4:
> +		retval = sizeof(struct rte_flow_item_ipv4);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_IPV6:
> +		retval = sizeof(struct rte_flow_item_ipv6);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_UDP:
> +		retval = sizeof(struct rte_flow_item_udp);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_TCP:
> +		retval = sizeof(struct rte_flow_item_tcp);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_VXLAN:
> +		retval = sizeof(struct rte_flow_item_vxlan);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_GRE:
> +		retval = sizeof(struct rte_flow_item_gre);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_NVGRE:
> +		retval = sizeof(struct rte_flow_item_nvgre);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> +		retval = sizeof(struct rte_flow_item_vxlan_gpe);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_MPLS:
> +		retval = sizeof(struct rte_flow_item_mpls);
> +		break;
> +	case RTE_FLOW_ITEM_TYPE_VOID: /* Fall through. */
> +	default:
> +		retval = 0;
> +		break;
> +	}
> +	return retval;
> +};
> +
> +/**
> + * Convert the encap action data from rte_flow_item to raw buffer
> + *
> + * @param[in] item
> + *   Pointer to rte_flow_item object.

Since it is an item list, "items" is preferable. 

> + * @param[out] buf
> + *   Pointer to the output buffer.
> + * @param[out] size
> + *   Pointer to the output buffer size.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_convert_encap_data(const struct rte_flow_item *item, uint8_t
> *buf,
> +			   size_t *size, struct rte_flow_error *error) {
> +	struct ether_hdr *eth = NULL;
> +	struct vlan_hdr *vlan = NULL;
> +	struct ipv4_hdr *ipv4 = NULL;
> +	struct ipv6_hdr *ipv6 = NULL;
> +	struct udp_hdr *udp = NULL;
> +	struct vxlan_hdr *vxlan = NULL;
> +	size_t len;
> +	size_t temp_size = 0;
> +
> +	if (!item)
> +		return rte_flow_error_set(error, EINVAL,
> +					  RTE_FLOW_ERROR_TYPE_ACTION,
> +					  NULL, "invalid empty data");
> +	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> +		len = flow_dv_get_item_len(item->type);
> +		if (len + temp_size > MLX5_ENCAP_MAX_LEN)
> +			return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  (void *)item->type,
> +						  "invalid item length");

It is not invalid item length, it is items total size is too big for encap. 

> +		rte_memcpy((void *)&buf[temp_size], item->spec, len);
> +		switch (item->type) {
> +		case RTE_FLOW_ITEM_TYPE_ETH:
> +			eth = (struct ether_hdr *)&buf[temp_size];
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VLAN:
> +			vlan = (struct vlan_hdr *)&buf[temp_size];
> +			if (!eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"eth header not found");
> +			if (!eth->ether_type)
> +				eth->ether_type =
> RTE_BE16(ETHER_TYPE_VLAN);
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV4:
> +			ipv4 = (struct ipv4_hdr *)&buf[temp_size];
> +			if (!vlan && !eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"neither eth nor vlan"
> +						" header found");
> +			if (vlan && !vlan->eth_proto)
> +				vlan->eth_proto =
> RTE_BE16(ETHER_TYPE_IPv4);
> +			else if (eth && !eth->ether_type)
> +				eth->ether_type =
> RTE_BE16(ETHER_TYPE_IPv4);
> +			if (!ipv4->version_ihl)
> +				ipv4->version_ihl = 0x45;
> +			if (!ipv4->time_to_live)
> +				ipv4->time_to_live = 0x40;

If no existing macro have those two above defined as ones. 
There are few more places on this function related to this comment. 

> +			break;
> +		case RTE_FLOW_ITEM_TYPE_IPV6:
> +			ipv6 = (struct ipv6_hdr *)&buf[temp_size];
> +			if (!vlan && !eth)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"neither eth nor vlan"
> +						" header found");
> +			if (vlan && !vlan->eth_proto)
> +				vlan->eth_proto =
> RTE_BE16(ETHER_TYPE_IPv6);
> +			else if (eth && !eth->ether_type)
> +				eth->ether_type =
> RTE_BE16(ETHER_TYPE_IPv6);
> +			if (!ipv6->vtc_flow)
> +				ipv6->vtc_flow = RTE_BE32(0x60000000);
> +			if (!ipv6->hop_limits)
> +				ipv6->hop_limits = 0xff;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_UDP:
> +			udp = (struct udp_hdr *)&buf[temp_size];
> +			if (!ipv4 && !ipv6)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"ip header not found");
> +			if (ipv4 && !ipv4->next_proto_id)
> +				ipv4->next_proto_id = IPPROTO_UDP;
> +			else if (ipv6 && !ipv6->proto)
> +				ipv6->proto = IPPROTO_UDP;
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN:
> +			vxlan = (struct vxlan_hdr *)&buf[temp_size];
> +			if (!udp)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"udp header not found");
> +			if (!udp->dst_port)
> +				udp->dst_port =
> RTE_BE16(MLX5_UDP_PORT_VXLAN);
> +			if (!vxlan->vx_flags)
> +				vxlan->vx_flags = RTE_BE32(0x08000000);
> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> +			vxlan = (struct vxlan_hdr *)&buf[temp_size];
> +			if (!udp)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"udp header not found");
> +			if (!udp->dst_port)
> +				udp->dst_port =
> +
> 	RTE_BE16(MLX5_UDP_PORT_VXLAN_GPE);
> +			if (!vxlan->vx_flags)
> +				vxlan->vx_flags = RTE_BE32(0x0c000003);

I would say you cannot set internally the next protocol. Only the user know what it is. 
I think in case VXLAN_GPE is set on the item list, the next_proto field is a must, otherwise the rule should be rejected. 

> +			break;
> +		case RTE_FLOW_ITEM_TYPE_GRE:
> +		case RTE_FLOW_ITEM_TYPE_NVGRE:
> +			if (!ipv4 && !ipv6)
> +				return rte_flow_error_set(error, EINVAL,
> +
> 	RTE_FLOW_ERROR_TYPE_ACTION,
> +						(void *)item->type,
> +						"ip header not found");
> +			if (ipv4 && !ipv4->next_proto_id)
> +				ipv4->next_proto_id = IPPROTO_GRE;
> +			else if (ipv6 && !ipv6->proto)
> +				ipv6->proto = IPPROTO_GRE;

This patch is for VXLAN, yet you add a GRE/NVGRE code block. It is better to add it on the subsequent patches adding this feature. 

Also you need to check if the user put the protocol type on the GRE header. Same as the VXLAN-GPE case, this is a must. 

> +			break;
> +		case RTE_FLOW_ITEM_TYPE_VOID:
> +			break;
> +		default:
> +			return rte_flow_error_set(error, EINVAL,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  (void *)item->type,
> +						  "unsupported item type");
> +			break;
> +		}
> +		temp_size += len;
> +	}
> +	*size = temp_size;
> +	return 0;
> +}
> +
> +/**
> + * Convert L2 encap action to DV specification.
> + * Used for VXLAN encap action.

Same - no need to specific which exact protocols. 

> + *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
> + * @param[in] action
> + *   Pointer to action structure.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   Pointer to action on success, NULL otherwise and rte_errno is set.
> + */
> +static struct ibv_flow_action *
> +flow_dv_create_action_l2_encap(struct rte_eth_dev *dev,
> +			       const struct rte_flow_action *action,
> +			       struct rte_flow_error *error) {
> +	struct ibv_flow_action *verbs_action = NULL;
> +	const struct rte_flow_item *encap_data;
> +	struct priv *priv = dev->data->dev_private;
> +	uint8_t buf[MLX5_ENCAP_MAX_LEN];
> +	size_t size = 0;
> +	int convert_result = 0;
> +
> +	encap_data = ((const struct rte_flow_action_vxlan_encap *)
> +						action->conf)->definition;
> +	convert_result = flow_dv_convert_encap_data(encap_data, buf,
> +						    &size, error);
> +	if (convert_result)
> +		return NULL;
> +	verbs_action = mlx5_glue-
> >dv_create_flow_action_packet_reformat
> +		(priv->ctx, size, (size ? buf : NULL),

How can size be 0 and yet the encap action is valid? 

> +
> MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
> +		 MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
> +	if (!verbs_action)
> +		rte_flow_error_set(error, EINVAL,
> RTE_FLOW_ERROR_TYPE_ACTION,
> +				   NULL, "cannot create L2 encap action");
> +	return verbs_action;
> +}
> +
> +/**
>   * Verify the @p attributes will be correctly understood by the NIC and store
>   * them in the @p flow if everything is correct.
>   *
> @@ -339,6 +639,16 @@
>  			action_flags |= MLX5_FLOW_ACTION_COUNT;
>  			++actions_n;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +			ret =
> flow_dv_validate_action_l2_encap(action_flags,
> +							       actions, attr,
> +							       error);
> +			if (ret < 0)
> +				return ret;
> +			action_flags |=
> MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +			++actions_n;
> +			break;
> +
>  		default:
>  			return rte_flow_error_set(error, ENOTSUP,
> 
> RTE_FLOW_ERROR_TYPE_ACTION,
> @@ -1045,14 +1355,26 @@
>  /**
>   * Store the requested actions in an array.
>   *
> + * @param[in] dev
> + *   Pointer to rte_eth_dev structure.
>   * @param[in] action
>   *   Flow action to translate.
>   * @param[in, out] dev_flow
>   *   Pointer to the mlx5_flow.
> + * @param[in] attr
> + *   Pointer to the flow attributes.
> + * @param[out] error
> + *   Pointer to the error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
> -static void
> -flow_dv_create_action(const struct rte_flow_action *action,
> -		      struct mlx5_flow *dev_flow)
> +static int
> +flow_dv_create_action(struct rte_eth_dev *dev,
> +		      const struct rte_flow_action *action,
> +		      struct mlx5_flow *dev_flow,
> +		      const struct rte_flow_attr *attr __rte_unused,

It was better to add this when you add the RAW encap feature. Now they marked as unused, and you didn't changed that when you actually started to use them.
Need to choose one of the paths:
1. add them on the RAW encap/decap support (better)
2. remove the rte_unused on the RAW encap/decap support


> +		      struct rte_flow_error *error)
>  {
>  	const struct rte_flow_action_queue *queue;
>  	const struct rte_flow_action_rss *rss; @@ -1100,10 +1422,24 @@
>  		/* Added to array only in apply since we need the QP */
>  		flow->actions |= MLX5_FLOW_ACTION_RSS;
>  		break;
> +	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> +		dev_flow->dv.actions[actions_n].type =
> +			MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> +		dev_flow->dv.actions[actions_n].action =
> +				flow_dv_create_action_l2_encap(dev,
> action,
> +							       error);
> +		if (!(dev_flow->dv.actions[actions_n].action))
> +			return -rte_errno;
> +		dev_flow->dv.verbs_action =
> +			dev_flow->dv.actions[actions_n].action;
> +		flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> +		actions_n++;
> +		break;
>  	default:
>  		break;
>  	}
>  	dev_flow->dv.actions_n = actions_n;
> +	return 0;
>  }
> 
>  static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
> @@ -1217,7 +1553,6 @@
>  	return 0;
>  }
> 
> -
>  /**
>   * Fill the flow with DV spec.
>   *
> @@ -1272,7 +1607,8 @@
>  	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
>  		return -rte_errno;
>  	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
> -		flow_dv_create_action(actions, dev_flow);
> +		if (flow_dv_create_action(dev, actions, dev_flow, attr,
> error))
> +			return -rte_errno;
>  	return 0;
>  }
> 
> @@ -1457,6 +1793,11 @@
>  		LIST_REMOVE(dev_flow, next);
>  		if (dev_flow->dv.matcher)
>  			flow_dv_matcher_release(dev, dev_flow);
> +		if (dev_flow->dv.verbs_action) {

Like I said in the beginning, I don't understand why this field is separate from the mlx5dv action list. 

> +			claim_zero(mlx5_glue->destroy_flow_action
> +						(dev_flow-
> >dv.verbs_action));
> +			dev_flow->dv.verbs_action = NULL;
> +		}
>  		rte_free(dev_flow);
>  	}
>  }
> --
> 1.8.3.1



More information about the dev mailing list