[dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to Direct Verbs
Dekel Peled
dekelp at mellanox.com
Mon Oct 29 17:44:37 CET 2018
Thanks, PSB.
> -----Original Message-----
> From: Shahaf Shuler
> Sent: Monday, October 29, 2018 12:03 PM
> To: Dekel Peled <dekelp at mellanox.com>; Yongseok Koh
> <yskoh at mellanox.com>
> Cc: dev at dpdk.org; Ori Kam <orika at mellanox.com>
> Subject: RE: [dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to
> Direct Verbs
>
> Hi Dekel,
>
> Thursday, October 25, 2018 11:08 PM, Dekel Peled:
> > Subject: [dpdk-dev] [PATCH v6 2/6] net/mlx5: add VXLAN encap action to
> > Direct Verbs
> >
> > This patch implements the VXLAN encap action in DV flow for MLX5 PMD.
> >
> > Signed-off-by: Dekel Peled <dekelp at mellanox.com>
> > ---
> > drivers/net/mlx5/mlx5_flow.h | 2 +
> > drivers/net/mlx5/mlx5_flow_dv.c | 351
> > +++++++++++++++++++++++++++++++++++++++-
> > 2 files changed, 348 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_flow.h
> > b/drivers/net/mlx5/mlx5_flow.h index 61299d6..6e92afe 100644
> > --- a/drivers/net/mlx5/mlx5_flow.h
> > +++ b/drivers/net/mlx5/mlx5_flow.h
> > @@ -92,6 +92,7 @@
> > #define MLX5_FLOW_ACTION_DEC_TTL (1u << 19) #define
> > MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20) #define
> > MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
> > +#define MLX5_FLOW_ACTION_VXLAN_ENCAP (1u << 22)
> >
> > #define MLX5_FLOW_FATE_ACTIONS \
> > (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE |
> > MLX5_FLOW_ACTION_RSS) @@ -181,6 +182,7 @@ struct mlx5_flow_dv {
> #ifdef
> > HAVE_IBV_FLOW_DV_SUPPORT
> > struct mlx5dv_flow_action_attr
> > actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
> > /**< Action list. */
> > + struct ibv_flow_action *verbs_action; /**< Verbs encap/decap
>
> The ibv_flow_action is already part of a union inside
> mlx5dv_flow_action_attr, why you need it separately?
> I see also in the below code that you copy it from the action list to this
> specific field. Can you elaborate why?
>
I added it to use when flow rule is removed, for easy access to the action to destroy.
I am now changing the code per 17.11 PR 876, adding cache of encap/decap actions, so this member is no longer needed, and will be removed.
> > object.
> > +*/
> > #endif
> > int actions_n; /**< number of actions. */ }; diff --git
> > a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> > index 8f729f4..14110c5 100644
> > --- a/drivers/net/mlx5/mlx5_flow_dv.c
> > +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> > @@ -34,6 +34,12 @@
> >
> > #ifdef HAVE_IBV_FLOW_DV_SUPPORT
> >
> > +/*
> > + * Encap buf length, max:
> > + * Eth:14/VLAN:8/IPv6:40/TCP:36/TUNNEL:20/Eth:14
>
> VLAN is 4B not 8B. which tunnel is for 20B?
This is code I reused from 17.11, I will verify it.
>
> > + */
> > +#define MLX5_ENCAP_MAX_LEN 132
> > +
> > /**
> > * Validate META item.
> > *
> > @@ -96,6 +102,300 @@
> > }
> >
> > /**
> > + * Validate the L2 encap action.
> > + * Used for VXLAN encap action.
>
> No need for that. Later on you put more supported protocols. This is a
> generic function for L2 encap validation.
I will remove it.
>
> > + *
> > + * @param[in] action_flags
> > + * Holds the actions detected until now.
> > + * @param[in] action
> > + * Pointer to the encap action.
> > + * @param[in] attr
> > + * Pointer to flow attributes
> > + * @param[out] error
> > + * Pointer to error structure.
> > + *
> > + * @return
> > + * 0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_validate_action_l2_encap(uint64_t action_flags,
> > + const struct rte_flow_action *action,
> > + const struct rte_flow_attr *attr,
> > + struct rte_flow_error *error)
> > +{
> > + if (!(action->conf))
> > + return rte_flow_error_set(error, EINVAL,
> > + RTE_FLOW_ERROR_TYPE_ACTION,
> > action,
> > + "configuration cannot be null");
> > + if (action_flags & MLX5_FLOW_ACTION_DROP)
> > + return rte_flow_error_set(error, EINVAL,
> > + RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > + "can't drop and encap in same
> > flow");
> > + if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP)
> > + return rte_flow_error_set(error, EINVAL,
> > + RTE_FLOW_ERROR_TYPE_ACTION,
> > NULL,
> > + "can only have a single encap"
> > + " action in a flow");
> > + if (attr->ingress)
> > + return rte_flow_error_set(error, ENOTSUP,
> > +
> > RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
> > + NULL,
> > + "encap action not supported for "
> > + "ingress");
> > + return 0;
> > +}
> > +
> > +/**
> > + * Get the size of specific rte_flow_item_type
> > + *
> > + * @param[in] item_type
> > + * Tested rte_flow_item_type.
> > + *
> > + * @return
> > + * sizeof struct item_type, 0 if void or irrelevant.
> > + */
> > +static size_t
> > +flow_dv_get_item_len(const enum rte_flow_item_type item_type) {
>
> Can we have this function as a macro?
>
> #define flow_dv_get_item_len(t) (strncpm(t, VOID, 4) ? 0 : sizeof(struct
> rte_flow_item_##t)
>
> Usage: flow_dv_get_item_len(ETH)
>
I will change it.
>
> > + size_t retval;
> > +
> > + switch (item_type) {
> > + case RTE_FLOW_ITEM_TYPE_ETH:
> > + retval = sizeof(struct rte_flow_item_eth);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VLAN:
> > + retval = sizeof(struct rte_flow_item_vlan);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_IPV4:
> > + retval = sizeof(struct rte_flow_item_ipv4);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_IPV6:
> > + retval = sizeof(struct rte_flow_item_ipv6);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_UDP:
> > + retval = sizeof(struct rte_flow_item_udp);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_TCP:
> > + retval = sizeof(struct rte_flow_item_tcp);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VXLAN:
> > + retval = sizeof(struct rte_flow_item_vxlan);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_GRE:
> > + retval = sizeof(struct rte_flow_item_gre);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_NVGRE:
> > + retval = sizeof(struct rte_flow_item_nvgre);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> > + retval = sizeof(struct rte_flow_item_vxlan_gpe);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_MPLS:
> > + retval = sizeof(struct rte_flow_item_mpls);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VOID: /* Fall through. */
> > + default:
> > + retval = 0;
> > + break;
> > + }
> > + return retval;
> > +};
> > +
> > +/**
> > + * Convert the encap action data from rte_flow_item to raw buffer
> > + *
> > + * @param[in] item
> > + * Pointer to rte_flow_item object.
>
> Since it is an item list, "items" is preferable.
I will rename it.
>
> > + * @param[out] buf
> > + * Pointer to the output buffer.
> > + * @param[out] size
> > + * Pointer to the output buffer size.
> > + * @param[out] error
> > + * Pointer to the error structure.
> > + *
> > + * @return
> > + * 0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +flow_dv_convert_encap_data(const struct rte_flow_item *item, uint8_t
> > *buf,
> > + size_t *size, struct rte_flow_error *error) {
> > + struct ether_hdr *eth = NULL;
> > + struct vlan_hdr *vlan = NULL;
> > + struct ipv4_hdr *ipv4 = NULL;
> > + struct ipv6_hdr *ipv6 = NULL;
> > + struct udp_hdr *udp = NULL;
> > + struct vxlan_hdr *vxlan = NULL;
> > + size_t len;
> > + size_t temp_size = 0;
> > +
> > + if (!item)
> > + return rte_flow_error_set(error, EINVAL,
> > + RTE_FLOW_ERROR_TYPE_ACTION,
> > + NULL, "invalid empty data");
> > + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
> > + len = flow_dv_get_item_len(item->type);
> > + if (len + temp_size > MLX5_ENCAP_MAX_LEN)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "invalid item length");
>
> It is not invalid item length, it is items total size is too big for encap.
I will change it.
>
> > + rte_memcpy((void *)&buf[temp_size], item->spec, len);
> > + switch (item->type) {
> > + case RTE_FLOW_ITEM_TYPE_ETH:
> > + eth = (struct ether_hdr *)&buf[temp_size];
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VLAN:
> > + vlan = (struct vlan_hdr *)&buf[temp_size];
> > + if (!eth)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "eth header not found");
> > + if (!eth->ether_type)
> > + eth->ether_type =
> > RTE_BE16(ETHER_TYPE_VLAN);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_IPV4:
> > + ipv4 = (struct ipv4_hdr *)&buf[temp_size];
> > + if (!vlan && !eth)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "neither eth nor vlan"
> > + " header found");
> > + if (vlan && !vlan->eth_proto)
> > + vlan->eth_proto =
> > RTE_BE16(ETHER_TYPE_IPv4);
> > + else if (eth && !eth->ether_type)
> > + eth->ether_type =
> > RTE_BE16(ETHER_TYPE_IPv4);
> > + if (!ipv4->version_ihl)
> > + ipv4->version_ihl = 0x45;
> > + if (!ipv4->time_to_live)
> > + ipv4->time_to_live = 0x40;
>
> If no existing macro have those two above defined as ones.
> There are few more places on this function related to this comment.
I will change it.
>
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_IPV6:
> > + ipv6 = (struct ipv6_hdr *)&buf[temp_size];
> > + if (!vlan && !eth)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "neither eth nor vlan"
> > + " header found");
> > + if (vlan && !vlan->eth_proto)
> > + vlan->eth_proto =
> > RTE_BE16(ETHER_TYPE_IPv6);
> > + else if (eth && !eth->ether_type)
> > + eth->ether_type =
> > RTE_BE16(ETHER_TYPE_IPv6);
> > + if (!ipv6->vtc_flow)
> > + ipv6->vtc_flow = RTE_BE32(0x60000000);
> > + if (!ipv6->hop_limits)
> > + ipv6->hop_limits = 0xff;
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_UDP:
> > + udp = (struct udp_hdr *)&buf[temp_size];
> > + if (!ipv4 && !ipv6)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "ip header not found");
> > + if (ipv4 && !ipv4->next_proto_id)
> > + ipv4->next_proto_id = IPPROTO_UDP;
> > + else if (ipv6 && !ipv6->proto)
> > + ipv6->proto = IPPROTO_UDP;
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VXLAN:
> > + vxlan = (struct vxlan_hdr *)&buf[temp_size];
> > + if (!udp)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "udp header not found");
> > + if (!udp->dst_port)
> > + udp->dst_port =
> > RTE_BE16(MLX5_UDP_PORT_VXLAN);
> > + if (!vxlan->vx_flags)
> > + vxlan->vx_flags = RTE_BE32(0x08000000);
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
> > + vxlan = (struct vxlan_hdr *)&buf[temp_size];
> > + if (!udp)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "udp header not found");
> > + if (!udp->dst_port)
> > + udp->dst_port =
> > +
> > RTE_BE16(MLX5_UDP_PORT_VXLAN_GPE);
> > + if (!vxlan->vx_flags)
> > + vxlan->vx_flags = RTE_BE32(0x0c000003);
>
> I would say you cannot set internally the next protocol. Only the user know
> what it is.
> I think in case VXLAN_GPE is set on the item list, the next_proto field is a
> must, otherwise the rule should be rejected.
I will verify it.
>
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_GRE:
> > + case RTE_FLOW_ITEM_TYPE_NVGRE:
> > + if (!ipv4 && !ipv6)
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "ip header not found");
> > + if (ipv4 && !ipv4->next_proto_id)
> > + ipv4->next_proto_id = IPPROTO_GRE;
> > + else if (ipv6 && !ipv6->proto)
> > + ipv6->proto = IPPROTO_GRE;
>
> This patch is for VXLAN, yet you add a GRE/NVGRE code block. It is better to
> add it on the subsequent patches adding this feature.
>
I will move it to next patch.
> Also you need to check if the user put the protocol type on the GRE header.
> Same as the VXLAN-GPE case, this is a must.
>
I will add it.
> > + break;
> > + case RTE_FLOW_ITEM_TYPE_VOID:
> > + break;
> > + default:
> > + return rte_flow_error_set(error, EINVAL,
> > +
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + (void *)item->type,
> > + "unsupported item type");
> > + break;
> > + }
> > + temp_size += len;
> > + }
> > + *size = temp_size;
> > + return 0;
> > +}
> > +
> > +/**
> > + * Convert L2 encap action to DV specification.
> > + * Used for VXLAN encap action.
>
> Same - no need to specific which exact protocols.
I will remove it.
>
> > + *
> > + * @param[in] dev
> > + * Pointer to rte_eth_dev structure.
> > + * @param[in] action
> > + * Pointer to action structure.
> > + * @param[out] error
> > + * Pointer to the error structure.
> > + *
> > + * @return
> > + * Pointer to action on success, NULL otherwise and rte_errno is set.
> > + */
> > +static struct ibv_flow_action *
> > +flow_dv_create_action_l2_encap(struct rte_eth_dev *dev,
> > + const struct rte_flow_action *action,
> > + struct rte_flow_error *error) {
> > + struct ibv_flow_action *verbs_action = NULL;
> > + const struct rte_flow_item *encap_data;
> > + struct priv *priv = dev->data->dev_private;
> > + uint8_t buf[MLX5_ENCAP_MAX_LEN];
> > + size_t size = 0;
> > + int convert_result = 0;
> > +
> > + encap_data = ((const struct rte_flow_action_vxlan_encap *)
> > + action->conf)->definition;
> > + convert_result = flow_dv_convert_encap_data(encap_data, buf,
> > + &size, error);
> > + if (convert_result)
> > + return NULL;
> > + verbs_action = mlx5_glue-
> > >dv_create_flow_action_packet_reformat
> > + (priv->ctx, size, (size ? buf : NULL),
>
> How can size be 0 and yet the encap action is valid?
It can't, this is just for safety.
I will remove it.
>
> > +
> > MLX5DV_FLOW_ACTION_PACKET_REFORMAT_TYPE_L2_TO_L2_TUNNEL,
> > + MLX5DV_FLOW_TABLE_TYPE_NIC_TX);
> > + if (!verbs_action)
> > + rte_flow_error_set(error, EINVAL,
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > + NULL, "cannot create L2 encap action");
> > + return verbs_action;
> > +}
> > +
> > +/**
> > * Verify the @p attributes will be correctly understood by the NIC and
> store
> > * them in the @p flow if everything is correct.
> > *
> > @@ -339,6 +639,16 @@
> > action_flags |= MLX5_FLOW_ACTION_COUNT;
> > ++actions_n;
> > break;
> > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > + ret =
> > flow_dv_validate_action_l2_encap(action_flags,
> > + actions, attr,
> > + error);
> > + if (ret < 0)
> > + return ret;
> > + action_flags |=
> > MLX5_FLOW_ACTION_VXLAN_ENCAP;
> > + ++actions_n;
> > + break;
> > +
> > default:
> > return rte_flow_error_set(error, ENOTSUP,
> >
> > RTE_FLOW_ERROR_TYPE_ACTION,
> > @@ -1045,14 +1355,26 @@
> > /**
> > * Store the requested actions in an array.
> > *
> > + * @param[in] dev
> > + * Pointer to rte_eth_dev structure.
> > * @param[in] action
> > * Flow action to translate.
> > * @param[in, out] dev_flow
> > * Pointer to the mlx5_flow.
> > + * @param[in] attr
> > + * Pointer to the flow attributes.
> > + * @param[out] error
> > + * Pointer to the error structure.
> > + *
> > + * @return
> > + * 0 on success, a negative errno value otherwise and rte_errno is set.
> > */
> > -static void
> > -flow_dv_create_action(const struct rte_flow_action *action,
> > - struct mlx5_flow *dev_flow)
> > +static int
> > +flow_dv_create_action(struct rte_eth_dev *dev,
> > + const struct rte_flow_action *action,
> > + struct mlx5_flow *dev_flow,
> > + const struct rte_flow_attr *attr __rte_unused,
>
> It was better to add this when you add the RAW encap feature. Now they
> marked as unused, and you didn't changed that when you actually started to
> use them.
> Need to choose one of the paths:
> 1. add them on the RAW encap/decap support (better) 2. remove the
> rte_unused on the RAW encap/decap support
>
I will move it to later patch.
>
> > + struct rte_flow_error *error)
> > {
> > const struct rte_flow_action_queue *queue;
> > const struct rte_flow_action_rss *rss; @@ -1100,10 +1422,24 @@
> > /* Added to array only in apply since we need the QP */
> > flow->actions |= MLX5_FLOW_ACTION_RSS;
> > break;
> > + case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
> > + dev_flow->dv.actions[actions_n].type =
> > + MLX5DV_FLOW_ACTION_IBV_FLOW_ACTION;
> > + dev_flow->dv.actions[actions_n].action =
> > + flow_dv_create_action_l2_encap(dev,
> > action,
> > + error);
> > + if (!(dev_flow->dv.actions[actions_n].action))
> > + return -rte_errno;
> > + dev_flow->dv.verbs_action =
> > + dev_flow->dv.actions[actions_n].action;
> > + flow->actions |= MLX5_FLOW_ACTION_VXLAN_ENCAP;
> > + actions_n++;
> > + break;
> > default:
> > break;
> > }
> > dev_flow->dv.actions_n = actions_n;
> > + return 0;
> > }
> >
> > static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
> > @@ -1217,7 +1553,6 @@
> > return 0;
> > }
> >
> > -
> > /**
> > * Fill the flow with DV spec.
> > *
> > @@ -1272,7 +1607,8 @@
> > if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
> > return -rte_errno;
> > for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
> > - flow_dv_create_action(actions, dev_flow);
> > + if (flow_dv_create_action(dev, actions, dev_flow, attr,
> > error))
> > + return -rte_errno;
> > return 0;
> > }
> >
> > @@ -1457,6 +1793,11 @@
> > LIST_REMOVE(dev_flow, next);
> > if (dev_flow->dv.matcher)
> > flow_dv_matcher_release(dev, dev_flow);
> > + if (dev_flow->dv.verbs_action) {
>
> Like I said in the beginning, I don't understand why this field is separate from
> the mlx5dv action list.
>
> > + claim_zero(mlx5_glue->destroy_flow_action
> > + (dev_flow-
> > >dv.verbs_action));
> > + dev_flow->dv.verbs_action = NULL;
> > + }
> > rte_free(dev_flow);
> > }
> > }
> > --
> > 1.8.3.1
More information about the dev
mailing list