[dpdk-dev] [PATCH v2 1/7] net/mlx5: e-switch VXLAN configuration and definitions

Yongseok Koh yskoh at mellanox.com
Tue Oct 23 12:01:31 CEST 2018


On Mon, Oct 15, 2018 at 02:13:29PM +0000, Viacheslav Ovsiienko wrote:
> This part of patchset adds configuration changes in makefile and
> meson.build for Mellanox MLX5 PMD. Also necessary defenitions
> for VXLAN support are made and appropriate data structures
> are presented.
> 
> Suggested-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> ---
>  drivers/net/mlx5/Makefile        |  80 ++++++++++++++++++
>  drivers/net/mlx5/meson.build     |  32 +++++++
>  drivers/net/mlx5/mlx5_flow.h     |  11 +++
>  drivers/net/mlx5/mlx5_flow_tcf.c | 175 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 298 insertions(+)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index 1e9c0b4..fec7779 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -207,6 +207,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		enum IFLA_PHYS_PORT_NAME \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IFLA_VXLAN_COLLECT_METADATA \
> +		linux/if_link.h \
> +		enum IFLA_VXLAN_COLLECT_METADATA \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_TCA_CHAIN \
>  		linux/rtnetlink.h \
>  		enum TCA_CHAIN \
> @@ -367,6 +372,81 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		enum TCA_VLAN_PUSH_VLAN_PRIORITY \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_KEY_ID \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_KEY_ID \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV4_SRC \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV4_DST \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV4_DST_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV6_SRC \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV6_DST \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_IPV6_DST_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \
> +		linux/pkt_cls.h \
> +		enum TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TC_ACT_TUNNEL_KEY \
> +		linux/tc_act/tc_tunnel_key.h \
> +		define TCA_ACT_TUNNEL_KEY \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
> +		HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT \
> +		linux/tc_act/tc_tunnel_key.h \
> +		enum TCA_TUNNEL_KEY_ENC_DST_PORT \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_TC_ACT_PEDIT \
>  		linux/tc_act/tc_pedit.h \
>  		enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
> diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
> index c192d44..43aabf2 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -126,6 +126,8 @@ if build
>  		'IFLA_PHYS_SWITCH_ID' ],
>  		[ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
>  		'IFLA_PHYS_PORT_NAME' ],
> +		[ 'HAVE_IFLA_VXLAN_COLLECT_METADATA', 'linux/if_link.h',
> +		'IFLA_VXLAN_COLLECT_METADATA' ],
>  		[ 'HAVE_TCA_CHAIN', 'linux/rtnetlink.h',
>  		'TCA_CHAIN' ],
>  		[ 'HAVE_TCA_FLOWER_ACT', 'linux/pkt_cls.h',
> @@ -190,6 +192,36 @@ if build
>  		'TC_ACT_GOTO_CHAIN' ],
>  		[ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
>  		'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_KEY_ID', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_KEY_ID' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV4_SRC' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV4_DST' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV4_DST_MASK' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV6_SRC' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV6_DST' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_IPV6_DST_MASK' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_UDP_SRC_PORT' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_UDP_DST_PORT' ],
> +		[ 'HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK', 'linux/pkt_cls.h',
> +		'TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK' ],
> +		[ 'HAVE_TC_ACT_TUNNEL_KEY', 'linux/tc_act/tc_tunnel_key.h',
> +		'TCA_ACT_TUNNEL_KEY' ],
> +		[ 'HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT', 'linux/tc_act/tc_tunnel_key.h',
> +		'TCA_TUNNEL_KEY_ENC_DST_PORT' ],
>  		[ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
>  		'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
>  		[ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 840d645..b838ab0 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -85,6 +85,8 @@
>  #define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
>  #define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
>  #define MLX5_FLOW_ACTION_JUMP (1u << 17)
> +#define MLX5_ACTION_VXLAN_ENCAP (1u << 11)
> +#define MLX5_ACTION_VXLAN_DECAP (1u << 12)

MLX5_ACTION_* has been changed to MLX5_FLOW_ACTION_* as you can see above. 
And make it alphabetical order; decap first and encap later? Or, at least make
it consistent. The order (case clause) is different among validate, prepare and
translate.

>  #define MLX5_FLOW_FATE_ACTIONS \
>  	(MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
> @@ -182,8 +184,17 @@ struct mlx5_flow_dv {
>  struct mlx5_flow_tcf {
>  	struct nlmsghdr *nlh;
>  	struct tcmsg *tcm;
> +	uint32_t nlsize; /**< Size of NL message buffer. */

It is used only for assert(), but if prepare() is trusted, why do we need to
keep it? I don't it is needed.

> +	uint32_t applied:1; /**< Whether rule is currently applied. */
> +	uint64_t item_flags; /**< Item flags. */

This isn't used at all.

> +	uint64_t action_flags; /**< Action flags. */

I checked following patches and it doesn't seem necessary. Please refer to the
comment on the translation func. But if you think it is really needed, you
could've used actions field of struct rte_flow and layers field of struct
mlx5_flow in mlx5_flow.h

>  	uint64_t hits;
>  	uint64_t bytes;
> +	union { /**< Tunnel encap/decap descriptor. */
> +		struct mlx5_flow_tcf_tunnel_hdr *tunnel;
> +		struct mlx5_flow_tcf_vxlan_decap *vxlan_decap;
> +		struct mlx5_flow_tcf_vxlan_encap *vxlan_encap;
> +	};

What is the reason for keeping pointer even though the actual structure follows
after mlx5_flow_tcf? Maybe you don't want to waste memory, as the size of
encap/decap struct differs a lot?

>  };
>  
>  /* Verbs specification header. */
> diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
> index 5c46f35..8f9c78a 100644
> --- a/drivers/net/mlx5/mlx5_flow_tcf.c
> +++ b/drivers/net/mlx5/mlx5_flow_tcf.c
> @@ -54,6 +54,37 @@ struct tc_vlan {
>  
>  #endif /* HAVE_TC_ACT_VLAN */
>  
> +#ifdef HAVE_TC_ACT_TUNNEL_KEY
> +
> +#include <linux/tc_act/tc_tunnel_key.h>
> +
> +#ifndef HAVE_TCA_TUNNEL_KEY_ENC_DST_PORT
> +#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
> +#endif
> +
> +#else /* HAVE_TC_ACT_TUNNEL_KEY */
> +
> +#define TCA_ACT_TUNNEL_KEY 17
> +#define TCA_TUNNEL_KEY_ACT_SET 1
> +#define TCA_TUNNEL_KEY_ACT_RELEASE 2
> +#define TCA_TUNNEL_KEY_PARMS 2
> +#define TCA_TUNNEL_KEY_ENC_IPV4_SRC 3
> +#define TCA_TUNNEL_KEY_ENC_IPV4_DST 4
> +#define TCA_TUNNEL_KEY_ENC_IPV6_SRC 5
> +#define TCA_TUNNEL_KEY_ENC_IPV6_DST 6
> +#define TCA_TUNNEL_KEY_ENC_KEY_ID 7
> +#define TCA_TUNNEL_KEY_ENC_DST_PORT 9
> +#define TCA_TUNNEL_KEY_NO_CSUM 10
> +
> +struct tc_tunnel_key {
> +	tc_gen;
> +	int t_action;
> +};
> +
> +#endif /* HAVE_TC_ACT_TUNNEL_KEY */
> +
> +
> +
>  #ifdef HAVE_TC_ACT_PEDIT
>  
>  #include <linux/tc_act/tc_pedit.h>
> @@ -210,6 +241,45 @@ struct tc_pedit_sel {
>  #ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
>  #define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
>  #endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_KEY_ID
> +#define TCA_FLOWER_KEY_ENC_KEY_ID 26
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC
> +#define TCA_FLOWER_KEY_ENC_IPV4_SRC 27
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK
> +#define TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK 28
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST
> +#define TCA_FLOWER_KEY_ENC_IPV4_DST 29
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV4_DST_MASK
> +#define TCA_FLOWER_KEY_ENC_IPV4_DST_MASK 30
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC
> +#define TCA_FLOWER_KEY_ENC_IPV6_SRC 31
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK
> +#define TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK 32
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST
> +#define TCA_FLOWER_KEY_ENC_IPV6_DST 33
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_IPV6_DST_MASK
> +#define TCA_FLOWER_KEY_ENC_IPV6_DST_MASK 34
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT
> +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT 43
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK
> +#define TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK 44
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT
> +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT 45
> +#endif
> +#ifndef HAVE_TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK
> +#define TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK 46
> +#endif
>  #ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
>  #define TCA_FLOWER_KEY_TCP_FLAGS 71
>  #endif
> @@ -232,6 +302,111 @@ struct tc_pedit_sel {
>  #define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
>  #endif
>  
> +#define MLX5_VXLAN_PORT_RANGE_MIN 30000
> +#define MLX5_VXLAN_PORT_RANGE_MAX 60000
> +#define MLX5_VXLAN_DEVICE_PFX "vmlx_"
> +
> +/** Tunnel action type, used for @p type in header structure. */
> +enum mlx5_flow_tcf_tunact_type {
> +	MLX5_FLOW_TCF_TUNACT_VXLAN_ENCAP,
> +	MLX5_FLOW_TCF_TUNACT_VXLAN_DECAP,
> +};
> +
> +/** Flags used for @p mask in tunnel action encap descriptors. */
> +#define	MLX5_FLOW_TCF_ENCAP_ETH_SRC (1u << 0)
> +#define	MLX5_FLOW_TCF_ENCAP_ETH_DST (1u << 1)
> +#define	MLX5_FLOW_TCF_ENCAP_IPV4_SRC (1u << 2)
> +#define	MLX5_FLOW_TCF_ENCAP_IPV4_DST (1u << 3)
> +#define	MLX5_FLOW_TCF_ENCAP_IPV6_SRC (1u << 4)
> +#define	MLX5_FLOW_TCF_ENCAP_IPV6_DST (1u << 5)
> +#define	MLX5_FLOW_TCF_ENCAP_UDP_SRC (1u << 6)
> +#define	MLX5_FLOW_TCF_ENCAP_UDP_DST (1u << 7)
> +#define	MLX5_FLOW_TCF_ENCAP_VXLAN_VNI (1u << 8)
> +
> +/** Neigh rule structure */
> +struct tcf_neigh_rule {
> +	LIST_ENTRY(tcf_neigh_rule) next;
> +	uint32_t refcnt;
> +	struct ether_addr eth;
> +	uint16_t mask;
> +	union {
> +		struct {
> +			rte_be32_t dst;
> +		} ipv4;
> +		struct {
> +			uint8_t dst[16];
> +		} ipv6;
> +	};
> +};
> +
> +/** Local rule structure */
> +struct tcf_local_rule {
> +	LIST_ENTRY(tcf_neigh_rule) next;
> +	uint32_t refcnt;
> +	uint16_t mask;
> +	union {
> +		struct {
> +			rte_be32_t dst;
> +			rte_be32_t src;
> +		} ipv4;
> +		struct {
> +			uint8_t dst[16];
> +			uint8_t src[16];
> +		} ipv6;
> +	};
> +};
> +
> +/** VXLAN virtual netdev. */
> +struct mlx5_flow_tcf_vtep {
> +	LIST_ENTRY(mlx5_flow_tcf_vtep) next;
> +	LIST_HEAD(, tcf_neigh_rule) neigh;
> +	LIST_HEAD(, tcf_local_rule) local;
> +	uint32_t refcnt;
> +	unsigned int ifindex; /**< Own interface index. */
> +	unsigned int ifouter; /**< Index of device attached to. */
> +	uint16_t port;
> +	uint8_t created;
> +};
> +
> +/** Tunnel descriptor header, common for all tunnel types. */
> +struct mlx5_flow_tcf_tunnel_hdr {
> +	uint32_t type; /**< Tunnel action type. */
> +	unsigned int ifindex_tun; /**< Tunnel endpoint interface. */
> +	unsigned int ifindex_org; /**< Original dst/src interface */
> +	unsigned int *ifindex_ptr; /**< Interface ptr in message. */
> +};
> +
> +struct mlx5_flow_tcf_vxlan_decap {
> +	struct mlx5_flow_tcf_tunnel_hdr hdr;
> +	uint16_t udp_port;
> +};
> +
> +struct mlx5_flow_tcf_vxlan_encap {
> +	struct mlx5_flow_tcf_tunnel_hdr hdr;
> +	uint32_t mask;
> +	struct {
> +		struct ether_addr dst;
> +		struct ether_addr src;
> +	} eth;
> +	union {
> +		struct {
> +			rte_be32_t dst;
> +			rte_be32_t src;
> +		} ipv4;
> +		struct {
> +			uint8_t dst[16];
> +			uint8_t src[16];
> +		} ipv6;
> +	};
> +struct {
> +		rte_be16_t src;
> +		rte_be16_t dst;
> +	} udp;
> +	struct {
> +		uint8_t vni[3];
> +	} vxlan;
> +};
> +
>  /**
>   * Structure for holding netlink context.
>   * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
> 


More information about the dev mailing list