[dpdk-dev] [PATCH v3 07/14] net/mlx5: support tunnel RSS level

Nélio Laranjeiro nelio.laranjeiro at 6wind.com
Fri Apr 13 15:27:01 CEST 2018


Seems you did not read my comments on this patch, I have exactly the
same ones.

On Fri, Apr 13, 2018 at 07:20:16PM +0800, Xueming Li wrote:
> Tunnel RSS level of flow RSS action offers user a choice to do RSS hash
> calculation on inner or outer RSS fields. Testpmd flow command examples:
> 
> GRE flow inner RSS:
>   flow create 0 ingress pattern eth / ipv4 proto is 47 / gre / end
> actions rss queues 1 2 end level 1 / end
> 
> GRE tunnel flow outer RSS:
>   flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
> actions rss queues 1 2 end level 0 / end
> 
> Signed-off-by: Xueming Li <xuemingl at mellanox.com>
> ---
>  drivers/net/mlx5/Makefile    |   2 +-
>  drivers/net/mlx5/mlx5_flow.c | 247 +++++++++++++++++++++++++++++--------------
>  drivers/net/mlx5/mlx5_glue.c |  16 +++
>  drivers/net/mlx5/mlx5_glue.h |   8 ++
>  drivers/net/mlx5/mlx5_rxq.c  |  56 +++++++++-
>  drivers/net/mlx5/mlx5_rxtx.h |   5 +-
>  6 files changed, 248 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> index ae118ad33..f9a6c460b 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -35,7 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  LIB = librte_pmd_mlx5.a
>  LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
>  LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
> -LIB_GLUE_VERSION = 18.02.0
> +LIB_GLUE_VERSION = 18.05.0
>  
>  # Sources.
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
> diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
> index dd099f328..a22554706 100644
> --- a/drivers/net/mlx5/mlx5_flow.c
> +++ b/drivers/net/mlx5/mlx5_flow.c
> @@ -116,6 +116,7 @@ enum hash_rxq_type {
>  	HASH_RXQ_UDPV6,
>  	HASH_RXQ_IPV6,
>  	HASH_RXQ_ETH,
> +	HASH_RXQ_TUNNEL,
>  };
>  
>  /* Initialization data for hash RX queue. */
> @@ -454,6 +455,7 @@ struct mlx5_flow_parse {
>  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
>  	uint8_t rss_key[40]; /**< copy of the RSS key. */
>  	enum hash_rxq_type layer; /**< Last pattern layer detected. */
> +	enum hash_rxq_type out_layer; /**< Last outer pattern layer detected. */
>  	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
>  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
>  	struct {
> @@ -461,6 +463,7 @@ struct mlx5_flow_parse {
>  		/**< Pointer to Verbs attributes. */
>  		unsigned int offset;
>  		/**< Current position or total size of the attribute. */
> +		uint64_t hash_fields; /**< Verbs hash fields. */
>  	} queue[RTE_DIM(hash_rxq_init)];
>  };
>  
> @@ -696,7 +699,8 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  						   " function is Toeplitz");
>  				return -rte_errno;
>  			}
> -			if (rss->level) {
> +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +			if (parser->rss_conf.level > 0) {
>  				rte_flow_error_set(error, EINVAL,
>  						   RTE_FLOW_ERROR_TYPE_ACTION,
>  						   actions,
> @@ -704,6 +708,15 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  						   " level is not supported");
>  				return -rte_errno;
>  			}
> +#endif
> +			if (parser->rss_conf.level > 1) {
> +				rte_flow_error_set(error, EINVAL,
> +						   RTE_FLOW_ERROR_TYPE_ACTION,
> +						   actions,
> +						   "RSS encapsulation level"
> +						   " > 1 is not supported");
> +				return -rte_errno;
> +			}
>  			if (rss->types & MLX5_RSS_HF_MASK) {
>  				rte_flow_error_set(error, EINVAL,
>  						   RTE_FLOW_ERROR_TYPE_ACTION,

Same comment as in previous review.
The levels are not matching the proposed API.
Level  0 = unspecified, 1 = outermost, 2 = next outermost, 3 = next next
...

> @@ -754,7 +767,7 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>  			}
>  			parser->rss_conf = (struct rte_flow_action_rss){
>  				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> -				.level = 0,
> +				.level = rss->level,
>  				.types = rss->types,
>  				.key_len = rss_key_len,
>  				.queue_num = rss->queue_num,
> @@ -838,10 +851,12 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
> +mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
> +				 const struct rte_flow_item items[],
>  				 struct rte_flow_error *error,
>  				 struct mlx5_flow_parse *parser)
>  {
> +	struct priv *priv = dev->data->dev_private;
>  	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
>  	unsigned int i;
>  	int ret = 0;
> @@ -881,6 +896,14 @@ mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
>  						   " tunnel encapsulations.");
>  				return -rte_errno;
>  			}
> +			if (!priv->config.tunnel_en &&
> +			    parser->rss_conf.level) {
> +				rte_flow_error_set(error, ENOTSUP,
> +					RTE_FLOW_ERROR_TYPE_ITEM,
> +					items,
> +					"Tunnel offloading not enabled");
> +				return -rte_errno;
> +			}
>  			parser->inner = IBV_FLOW_SPEC_INNER;
>  			parser->tunnel = flow_ptype[items->type];
>  		}
> @@ -1000,7 +1023,11 @@ static void
>  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  {
>  	unsigned int i;
> +	uint32_t inner = parser->inner;
>  
> +	/* Don't create extra flows for outer RSS. */
> +	if (parser->tunnel && !parser->rss_conf.level)
> +		return;
>  	/*
>  	 * Fill missing layers in verbs specifications, or compute the correct
>  	 * offset to allocate the memory space for the attributes and
> @@ -1011,23 +1038,25 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  			struct ibv_flow_spec_ipv4_ext ipv4;
>  			struct ibv_flow_spec_ipv6 ipv6;
>  			struct ibv_flow_spec_tcp_udp udp_tcp;
> +			struct ibv_flow_spec_eth eth;
>  		} specs;
>  		void *dst;
>  		uint16_t size;
>  
>  		if (i == parser->layer)
>  			continue;
> -		if (parser->layer == HASH_RXQ_ETH) {
> +		if (parser->layer == HASH_RXQ_ETH ||
> +		    parser->layer == HASH_RXQ_TUNNEL) {
>  			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
>  				size = sizeof(struct ibv_flow_spec_ipv4_ext);
>  				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
> -					.type = IBV_FLOW_SPEC_IPV4_EXT,
> +					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
>  					.size = size,
>  				};
>  			} else {
>  				size = sizeof(struct ibv_flow_spec_ipv6);
>  				specs.ipv6 = (struct ibv_flow_spec_ipv6){
> -					.type = IBV_FLOW_SPEC_IPV6,
> +					.type = inner | IBV_FLOW_SPEC_IPV6,
>  					.size = size,
>  				};
>  			}
> @@ -1044,7 +1073,7 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
>  			size = sizeof(struct ibv_flow_spec_tcp_udp);
>  			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
> -				.type = ((i == HASH_RXQ_UDPV4 ||
> +				.type = inner | ((i == HASH_RXQ_UDPV4 ||
>  					  i == HASH_RXQ_UDPV6) ?
>  					 IBV_FLOW_SPEC_UDP :
>  					 IBV_FLOW_SPEC_TCP),
> @@ -1065,6 +1094,8 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>  /**
>   * Update flows according to pattern and RSS hash fields.
>   *
> + * @param dev
> + *   Pointer to Ethernet device.
>   * @param[in, out] parser
>   *   Internal parser structure.
>   *
> @@ -1072,16 +1103,17 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)
>   *   0 on success, a negative errno value otherwise and rte_errno is set.
>   */
>  static int
> -mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
> +mlx5_flow_convert_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser)
>  {
> -	const unsigned int ipv4 =
> +	unsigned int ipv4 =
>  		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
>  	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
>  	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
>  	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
>  	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
> -	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> +	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
>  	unsigned int i;
> +	int found = 0;
>  
>  	/* Remove any other flow not matching the pattern. */
>  	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) {
> @@ -1093,9 +1125,51 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
>  		}
>  		return 0;
>  	}
> -	if (parser->layer == HASH_RXQ_ETH)
> +	/*
> +	 * Outer RSS.
> +	 * HASH_RXQ_ETH is the only rule since tunnel packet match this
> +	 * rule must match outer pattern.
> +	 */
> +	if (parser->tunnel && !parser->rss_conf.level) {
> +		/* Remove flows other than default. */
> +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
> +		ipv4 = hash_rxq_init[parser->out_layer].ip_version == MLX5_IPV4;
> +		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> +		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &
> +		    parser->rss_conf.types) {
> +			parser->queue[HASH_RXQ_ETH].hash_fields =
> +				hash_rxq_init[parser->out_layer].hash_fields;
> +		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &
> +		    parser->rss_conf.types)) {
> +			parser->queue[HASH_RXQ_ETH].hash_fields =
> +				hash_rxq_init[ip].hash_fields;
> +		} else if (parser->rss_conf.types) {
> +			DRV_LOG(WARNING,
> +				"port %u rss outer hash function doesn't match"
> +				" pattern", dev->data->port_id);
> +		}
> +		return 0;
> +	}
> +	if (parser->layer == HASH_RXQ_ETH || parser->layer == HASH_RXQ_TUNNEL) {
> +		/* Remove unused flows according to hash function. */
> +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> +			if (!parser->queue[i].ibv_attr)
> +				continue;
> +			if (hash_rxq_init[i].dpdk_rss_hf &
> +			    parser->rss_conf.types) {
> +				parser->queue[i].hash_fields =
> +					hash_rxq_init[i].hash_fields;
> +				continue;
> +			}
> +			rte_free(parser->queue[i].ibv_attr);
> +			parser->queue[i].ibv_attr = NULL;
> +		}
>  		return 0;
> -	/* This layer becomes useless as the pattern define under layers. */
> +	}
> +	/* Remove ETH layer flow. */
>  	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
>  	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
>  	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
> @@ -1105,20 +1179,50 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
>  		rte_free(parser->queue[i].ibv_attr);
>  		parser->queue[i].ibv_attr = NULL;
>  	}
> -	/* Remove impossible flow according to the RSS configuration. */
> -	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
> -	    parser->rss_conf.types) {
> -		/* Remove any other flow. */
> +	/*
> +	 * Keep L4 flows as IP pattern has to support L4 RSS.
> +	 * Otherwise, only keep the flow that match the pattern.
> +	 */
> +	if (parser->layer != ip) {
> +		/* Only keep the flow that match the pattern. */
>  		for (i = hmin; i != (hmax + 1); ++i) {
> -			if (i == parser->layer || !parser->queue[i].ibv_attr)
> +			if (i == parser->layer)
>  				continue;
>  			rte_free(parser->queue[i].ibv_attr);
>  			parser->queue[i].ibv_attr = NULL;
>  		}
> -	} else if (!parser->queue[ip].ibv_attr) {
> -		/* no RSS possible with the current configuration. */
> -		parser->rss_conf.queue_num = 1;
>  	}
> +	/* Remove impossible flow according to the RSS configuration. */
> +	for (i = hmin; i != (hmax + 1); ++i) {
> +		if (!parser->queue[i].ibv_attr)
> +			continue;
> +		if (parser->rss_conf.types &
> +		    hash_rxq_init[i].dpdk_rss_hf) {
> +			parser->queue[i].hash_fields =
> +				hash_rxq_init[i].hash_fields;
> +			found = 1;
> +			continue;
> +		}
> +		/* L4 flow could be used for L3 RSS. */
> +		if (i == parser->layer && i < ip &&
> +		    (hash_rxq_init[ip].dpdk_rss_hf &
> +		     parser->rss_conf.types)) {
> +			parser->queue[i].hash_fields =
> +				hash_rxq_init[ip].hash_fields;
> +			found = 1;
> +			continue;
> +		}
> +		/* L3 flow and L4 hash: non-rss L3 flow. */
> +		if (i == parser->layer && i == ip && found)
> +			/* IP pattern and L4 HF. */
> +			continue;
> +		rte_free(parser->queue[i].ibv_attr);
> +		parser->queue[i].ibv_attr = NULL;
> +	}
> +	if (!found)
> +		DRV_LOG(WARNING,
> +			"port %u rss hash function doesn't match "
> +			"pattern", dev->data->port_id);

The hash function is toeplitz, xor, it is not applied on the pattern but
used to compute an hash result using some information from the packet.
This comment is totally wrong.

Another point, such log will trigger on an application using MLX5 PMD
but not on MLX4 PMD and this specifically because on how the NIC using
the MLX5 PMD are made internally (MLX4 can use a single Hash RX queue
whereas MLX5 needs an Hash Rx queue per kind of protocol).
The fact being it will have the exact same behavior I'll *strongly*
suggest to remove such annoying warning.

>  	return 0;
>  }
>  
> @@ -1165,7 +1269,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
>  	if (ret)
>  		return ret;
> -	ret = mlx5_flow_convert_items_validate(items, error, parser);
> +	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
>  	if (ret)
>  		return ret;
>  	mlx5_flow_convert_finalise(parser);
> @@ -1186,10 +1290,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		for (i = 0; i != hash_rxq_init_n; ++i) {
>  			unsigned int offset;
>  
> -			if (!(parser->rss_conf.types &
> -			      hash_rxq_init[i].dpdk_rss_hf) &&
> -			    (i != HASH_RXQ_ETH))
> -				continue;
>  			offset = parser->queue[i].offset;
>  			parser->queue[i].ibv_attr =
>  				mlx5_flow_convert_allocate(offset, error);
> @@ -1201,6 +1301,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  	/* Third step. Conversion parse, fill the specifications. */
>  	parser->inner = 0;
>  	parser->tunnel = 0;
> +	parser->layer = HASH_RXQ_ETH;
>  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
>  		struct mlx5_flow_data data = {
>  			.parser = parser,
> @@ -1218,23 +1319,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
>  		if (ret)
>  			goto exit_free;
>  	}
> -	if (parser->mark)
> -		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> -	if (parser->count && parser->create) {
> -		mlx5_flow_create_count(dev, parser);
> -		if (!parser->cs)
> -			goto exit_count_error;
> -	}
>  	/*
>  	 * Last step. Complete missing specification to reach the RSS
>  	 * configuration.
>  	 */
>  	if (!parser->drop)
> -		ret = mlx5_flow_convert_rss(parser);
> +		ret = mlx5_flow_convert_rss(dev, parser);
>  		if (ret)
>  			goto exit_free;
>  		mlx5_flow_convert_finalise(parser);
>  	mlx5_flow_update_priority(dev, parser, attr);
> +	if (parser->mark)
> +		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> +	if (parser->count && parser->create) {
> +		mlx5_flow_create_count(dev, parser);
> +		if (!parser->cs)
> +			goto exit_count_error;
> +	}
>  exit_free:
>  	/* Only verification is expected, all resources should be released. */
>  	if (!parser->create) {
> @@ -1282,17 +1383,11 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
>  		if (!parser->queue[i].ibv_attr)
>  			continue;
> -		/* Specification must be the same l3 type or none. */
> -		if (parser->layer == HASH_RXQ_ETH ||
> -		    (hash_rxq_init[parser->layer].ip_version ==
> -		     hash_rxq_init[i].ip_version) ||
> -		    (hash_rxq_init[i].ip_version == 0)) {
> -			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> -					parser->queue[i].offset);
> -			memcpy(dst, src, size);
> -			++parser->queue[i].ibv_attr->num_of_specs;
> -			parser->queue[i].offset += size;
> -		}
> +		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> +				parser->queue[i].offset);
> +		memcpy(dst, src, size);
> +		++parser->queue[i].ibv_attr->num_of_specs;
> +		parser->queue[i].offset += size;
>  	}
>  }
>  
> @@ -1323,9 +1418,7 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
>  		.size = eth_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_ETH;
> +	parser->layer = HASH_RXQ_ETH;
>  	if (spec) {
>  		unsigned int i;
>  
> @@ -1438,9 +1531,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
>  		.size = ipv4_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_IPV4;
> +	parser->layer = HASH_RXQ_IPV4;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1493,9 +1584,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
>  		.size = ipv6_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner)
> -		parser->layer = HASH_RXQ_IPV6;
> +	parser->layer = HASH_RXQ_IPV6;
>  	if (spec) {
>  		unsigned int i;
>  		uint32_t vtc_flow_val;
> @@ -1568,13 +1657,10 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
>  		.size = udp_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner) {
> -		if (parser->layer == HASH_RXQ_IPV4)
> -			parser->layer = HASH_RXQ_UDPV4;
> -		else
> -			parser->layer = HASH_RXQ_UDPV6;
> -	}
> +	if (parser->layer == HASH_RXQ_IPV4)
> +		parser->layer = HASH_RXQ_UDPV4;
> +	else
> +		parser->layer = HASH_RXQ_UDPV6;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1617,13 +1703,10 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
>  		.size = tcp_size,
>  	};
>  
> -	/* Don't update layer for the inner pattern. */
> -	if (!parser->inner) {
> -		if (parser->layer == HASH_RXQ_IPV4)
> -			parser->layer = HASH_RXQ_TCPV4;
> -		else
> -			parser->layer = HASH_RXQ_TCPV6;
> -	}
> +	if (parser->layer == HASH_RXQ_IPV4)
> +		parser->layer = HASH_RXQ_TCPV4;
> +	else
> +		parser->layer = HASH_RXQ_TCPV6;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1673,6 +1756,8 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
>  	id.vni[0] = 0;
>  	parser->inner = IBV_FLOW_SPEC_INNER;
>  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
> +	parser->out_layer = parser->layer;
> +	parser->layer = HASH_RXQ_TUNNEL;
>  	if (spec) {
>  		if (!mask)
>  			mask = default_mask;
> @@ -1727,6 +1812,8 @@ mlx5_flow_create_gre(const struct rte_flow_item *item __rte_unused,
>  
>  	parser->inner = IBV_FLOW_SPEC_INNER;
>  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
> +	parser->out_layer = parser->layer;
> +	parser->layer = HASH_RXQ_TUNNEL;
>  	mlx5_flow_create_copy(parser, &tunnel, size);
>  	return 0;
>  }
> @@ -1890,33 +1977,33 @@ mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev,
>  	unsigned int i;
>  
>  	for (i = 0; i != hash_rxq_init_n; ++i) {
> -		uint64_t hash_fields;
> -
>  		if (!parser->queue[i].ibv_attr)
>  			continue;
>  		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
>  		parser->queue[i].ibv_attr = NULL;
> -		hash_fields = hash_rxq_init[i].hash_fields;
> +		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
>  		if (!priv->dev->data->dev_started)
>  			continue;
>  		flow->frxq[i].hrxq =
>  			mlx5_hrxq_get(dev,
>  				      parser->rss_conf.key,
>  				      parser->rss_conf.key_len,
> -				      hash_fields,
> +				      flow->frxq[i].hash_fields,
>  				      parser->rss_conf.queue,
>  				      parser->rss_conf.queue_num,
> -				      parser->tunnel);
> +				      parser->tunnel,
> +				      parser->rss_conf.level);
>  		if (flow->frxq[i].hrxq)
>  			continue;
>  		flow->frxq[i].hrxq =
>  			mlx5_hrxq_new(dev,
>  				      parser->rss_conf.key,
>  				      parser->rss_conf.key_len,
> -				      hash_fields,
> +				      flow->frxq[i].hash_fields,
>  				      parser->rss_conf.queue,
>  				      parser->rss_conf.queue_num,
> -				      parser->tunnel);
> +				      parser->tunnel,
> +				      parser->rss_conf.level);
>  		if (!flow->frxq[i].hrxq) {
>  			return rte_flow_error_set(error, ENOMEM,
>  						  RTE_FLOW_ERROR_TYPE_HANDLE,
> @@ -2013,7 +2100,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev *dev,
>  		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
>  			dev->data->port_id,
>  			(void *)flow, i,
> -			(void *)flow->frxq[i].hrxq,
> +			(void *)flow->frxq[i].hrxq->qp,
>  			(void *)flow->frxq[i].ibv_flow);
>  	}
>  	if (!flows_n) {
> @@ -2541,19 +2628,21 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
>  			flow->frxq[i].hrxq =
>  				mlx5_hrxq_get(dev, flow->rss_conf.key,
>  					      flow->rss_conf.key_len,
> -					      hash_rxq_init[i].hash_fields,
> +					      flow->frxq[i].hash_fields,
>  					      flow->rss_conf.queue,
>  					      flow->rss_conf.queue_num,
> -					      flow->tunnel);
> +					      flow->tunnel,
> +					      flow->rss_conf.level);
>  			if (flow->frxq[i].hrxq)
>  				goto flow_create;
>  			flow->frxq[i].hrxq =
>  				mlx5_hrxq_new(dev, flow->rss_conf.key,
>  					      flow->rss_conf.key_len,
> -					      hash_rxq_init[i].hash_fields,
> +					      flow->frxq[i].hash_fields,
>  					      flow->rss_conf.queue,
>  					      flow->rss_conf.queue_num,
> -					      flow->tunnel);
> +					      flow->tunnel,
> +					      flow->rss_conf.level);
>  			if (!flow->frxq[i].hrxq) {
>  				DRV_LOG(DEBUG,
>  					"port %u flow %p cannot be applied",
> diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
> index be684d378..6874aa32a 100644
> --- a/drivers/net/mlx5/mlx5_glue.c
> +++ b/drivers/net/mlx5/mlx5_glue.c
> @@ -313,6 +313,21 @@ mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type)
>  	return mlx5dv_init_obj(obj, obj_type);
>  }
>  
> +static struct ibv_qp *
> +mlx5_glue_dv_create_qp(struct ibv_context *context,
> +		       struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> +		       struct mlx5dv_qp_init_attr *dv_qp_init_attr)
> +{
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	return mlx5dv_create_qp(context, qp_init_attr_ex, dv_qp_init_attr);
> +#else
> +	(void)context;
> +	(void)qp_init_attr_ex;
> +	(void)dv_qp_init_attr;
> +	return NULL;
> +#endif
> +}
> +
>  const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
>  	.version = MLX5_GLUE_VERSION,
>  	.fork_init = mlx5_glue_fork_init,
> @@ -356,4 +371,5 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
>  	.dv_query_device = mlx5_glue_dv_query_device,
>  	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
>  	.dv_init_obj = mlx5_glue_dv_init_obj,
> +	.dv_create_qp = mlx5_glue_dv_create_qp,
>  };
> diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
> index b5efee3b6..841363872 100644
> --- a/drivers/net/mlx5/mlx5_glue.h
> +++ b/drivers/net/mlx5/mlx5_glue.h
> @@ -31,6 +31,10 @@ struct ibv_counter_set_init_attr;
>  struct ibv_query_counter_set_attr;
>  #endif
>  
> +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +struct mlx5dv_qp_init_attr;
> +#endif
> +
>  /* LIB_GLUE_VERSION must be updated every time this structure is modified. */
>  struct mlx5_glue {
>  	const char *version;
> @@ -106,6 +110,10 @@ struct mlx5_glue {
>  				   enum mlx5dv_set_ctx_attr_type type,
>  				   void *attr);
>  	int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
> +	struct ibv_qp *(*dv_create_qp)
> +		(struct ibv_context *context,
> +		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> +		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
>  };
>  
>  const struct mlx5_glue *mlx5_glue;
> diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> index 073732e16..1997609ec 100644
> --- a/drivers/net/mlx5/mlx5_rxq.c
> +++ b/drivers/net/mlx5/mlx5_rxq.c
> @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
>   *   Number of queues.
>   * @param tunnel
>   *   Tunnel type.
> + * @param rss_level
> + *   RSS hash on tunnel level.
>   *
>   * @return
>   *   The Verbs object initialised, NULL otherwise and rte_errno is set.
> @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *
>  mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> +	      const uint16_t *queues, uint32_t queues_n,
> +	      uint32_t tunnel, uint32_t rss_level)
>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
>  	struct mlx5_ind_table_ibv *ind_tbl;
>  	struct ibv_qp *qp;
>  	int err;
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	struct mlx5dv_qp_init_attr qp_init_attr = {0};
> +#endif
>  
>  	queues_n = hash_fields ? queues_n : 1;
>  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
> @@ -1410,6 +1416,36 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  		rte_errno = ENOMEM;
>  		return NULL;
>  	}
> +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> +	if (tunnel) {
> +		qp_init_attr.comp_mask =
> +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
> +		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
> +	}
> +	qp = mlx5_glue->dv_create_qp(
> +		priv->ctx,
> +		&(struct ibv_qp_init_attr_ex){
> +			.qp_type = IBV_QPT_RAW_PACKET,
> +			.comp_mask =
> +				IBV_QP_INIT_ATTR_PD |
> +				IBV_QP_INIT_ATTR_IND_TABLE |
> +				IBV_QP_INIT_ATTR_RX_HASH,
> +			.rx_hash_conf = (struct ibv_rx_hash_conf){
> +				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> +				.rx_hash_key_len = rss_key_len ? rss_key_len :
> +						   rss_hash_default_key_len,
> +				.rx_hash_key = rss_key ?
> +					       (void *)(uintptr_t)rss_key :
> +					       rss_hash_default_key,
> +				.rx_hash_fields_mask = hash_fields |
> +					(tunnel && rss_level ?
> +					(uint32_t)IBV_RX_HASH_INNER : 0),
> +			},
> +			.rwq_ind_tbl = ind_tbl->ind_table,
> +			.pd = priv->pd,
> +		},
> +		&qp_init_attr);
> +#else
>  	qp = mlx5_glue->create_qp_ex
>  		(priv->ctx,
>  		 &(struct ibv_qp_init_attr_ex){
> @@ -1420,13 +1456,17 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  				IBV_QP_INIT_ATTR_RX_HASH,
>  			.rx_hash_conf = (struct ibv_rx_hash_conf){
>  				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> -				.rx_hash_key_len = rss_key_len,
> -				.rx_hash_key = (void *)(uintptr_t)rss_key,
> +				.rx_hash_key_len = rss_key_len ? rss_key_len :
> +						   rss_hash_default_key_len,
> +				.rx_hash_key = rss_key ?
> +					       (void *)(uintptr_t)rss_key :
> +					       rss_hash_default_key,
>  				.rx_hash_fields_mask = hash_fields,
>  			},
>  			.rwq_ind_tbl = ind_tbl->ind_table,
>  			.pd = priv->pd,
>  		 });
> +#endif
>  	if (!qp) {
>  		rte_errno = errno;
>  		goto error;
> @@ -1439,6 +1479,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	hrxq->rss_key_len = rss_key_len;
>  	hrxq->hash_fields = hash_fields;
>  	hrxq->tunnel = tunnel;
> +	hrxq->rss_level = rss_level;
>  	memcpy(hrxq->rss_key, rss_key, rss_key_len);
>  	rte_atomic32_inc(&hrxq->refcnt);
>  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
> @@ -1448,6 +1489,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>  	return hrxq;
>  error:
>  	err = rte_errno; /* Save rte_errno before cleanup. */
> +	DRV_LOG(ERR, "port %u: Error creating Hash Rx queue",
> +		dev->data->port_id);

Internal developer log should not remain in the code.  The user will
already have a flow creation failure, there is no need to annoy him with
messages he cannot understand.

>  	mlx5_ind_table_ibv_release(dev, ind_tbl);
>  	if (qp)
>  		claim_zero(mlx5_glue->destroy_qp(qp));
> @@ -1469,6 +1512,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
>   *   Number of queues.
>   * @param tunnel
>   *   Tunnel type.
> + * @param rss_level
> + *   RSS hash on tunnel level
>   *
>   * @return
>   *   An hash Rx queue on success.
> @@ -1477,7 +1522,8 @@ struct mlx5_hrxq *
>  mlx5_hrxq_get(struct rte_eth_dev *dev,
>  	      const uint8_t *rss_key, uint32_t rss_key_len,
>  	      uint64_t hash_fields,
> -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> +	      const uint16_t *queues, uint32_t queues_n,
> +	      uint32_t tunnel, uint32_t rss_level)

rss_level > 1 means tunnel, there is no need to have a redundant
information.

>  {
>  	struct priv *priv = dev->data->dev_private;
>  	struct mlx5_hrxq *hrxq;
> @@ -1494,6 +1540,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,
>  			continue;
>  		if (hrxq->tunnel != tunnel)
>  			continue;
> +		if (hrxq->rss_level != rss_level)
> +			continue;
>  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
>  		if (!ind_tbl)
>  			continue;
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index d35605b55..62cf55109 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -147,6 +147,7 @@ struct mlx5_hrxq {
>  	struct ibv_qp *qp; /* Verbs queue pair. */
>  	uint64_t hash_fields; /* Verbs Hash fields. */
>  	uint32_t tunnel; /* Tunnel type. */
> +	uint32_t rss_level; /* RSS on tunnel level. */
>  	uint32_t rss_key_len; /* Hash key length in bytes. */
>  	uint8_t rss_key[]; /* Hash key. */
>  };
> @@ -251,12 +252,12 @@ struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
>  				const uint16_t *queues, uint32_t queues_n,
> -				uint32_t tunnel);
> +				uint32_t tunnel, uint32_t rss_level);
>  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
>  				const uint8_t *rss_key, uint32_t rss_key_len,
>  				uint64_t hash_fields,
>  				const uint16_t *queues, uint32_t queues_n,
> -				uint32_t tunnel);
> +				uint32_t tunnel, uint32_t rss_level);
>  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq);
>  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);
>  uint64_t mlx5_get_rx_port_offloads(void);
> -- 
> 2.13.3
> 

Thanks,
-- 
Nélio Laranjeiro
6WIND


More information about the dev mailing list