[dpdk-dev] [PATCH v3 07/14] net/mlx5: support tunnel RSS level

Xueming(Steven) Li xuemingl at mellanox.com
Sat Apr 14 12:12:58 CEST 2018


Hi Nelio,

> -----Original Message-----
> From: Nélio Laranjeiro <nelio.laranjeiro at 6wind.com>
> Sent: Friday, April 13, 2018 9:27 PM
> To: Xueming(Steven) Li <xuemingl at mellanox.com>
> Cc: Shahaf Shuler <shahafs at mellanox.com>; dev at dpdk.org
> Subject: Re: [PATCH v3 07/14] net/mlx5: support tunnel RSS level
> 
> Seems you did not read my comments on this patch, I have exactly the same
> ones.

I finally found your previous comments in trash box, not sure why outlook 
always deliver important mail to trash box, anyway I updated rules.

> 
> On Fri, Apr 13, 2018 at 07:20:16PM +0800, Xueming Li wrote:
> > Tunnel RSS level of flow RSS action offers user a choice to do RSS
> > hash calculation on inner or outer RSS fields. Testpmd flow command
> examples:
> >
> > GRE flow inner RSS:
> >   flow create 0 ingress pattern eth / ipv4 proto is 47 / gre / end
> > actions rss queues 1 2 end level 1 / end
> >
> > GRE tunnel flow outer RSS:
> >   flow create 0 ingress pattern eth  / ipv4 proto is 47 / gre / end
> > actions rss queues 1 2 end level 0 / end
> >
> > Signed-off-by: Xueming Li <xuemingl at mellanox.com>
> > ---
> >  drivers/net/mlx5/Makefile    |   2 +-
> >  drivers/net/mlx5/mlx5_flow.c | 247
> > +++++++++++++++++++++++++++++--------------
> >  drivers/net/mlx5/mlx5_glue.c |  16 +++
> >  drivers/net/mlx5/mlx5_glue.h |   8 ++
> >  drivers/net/mlx5/mlx5_rxq.c  |  56 +++++++++-
> >  drivers/net/mlx5/mlx5_rxtx.h |   5 +-
> >  6 files changed, 248 insertions(+), 86 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
> > index ae118ad33..f9a6c460b 100644
> > --- a/drivers/net/mlx5/Makefile
> > +++ b/drivers/net/mlx5/Makefile
> > @@ -35,7 +35,7 @@ include $(RTE_SDK)/mk/rte.vars.mk  LIB =
> > librte_pmd_mlx5.a  LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
> >  LIB_GLUE_BASE = librte_pmd_mlx5_glue.so -LIB_GLUE_VERSION = 18.02.0
> > +LIB_GLUE_VERSION = 18.05.0
> >
> >  # Sources.
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c diff --git
> > a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index
> > dd099f328..a22554706 100644
> > --- a/drivers/net/mlx5/mlx5_flow.c
> > +++ b/drivers/net/mlx5/mlx5_flow.c
> > @@ -116,6 +116,7 @@ enum hash_rxq_type {
> >  	HASH_RXQ_UDPV6,
> >  	HASH_RXQ_IPV6,
> >  	HASH_RXQ_ETH,
> > +	HASH_RXQ_TUNNEL,
> >  };
> >
> >  /* Initialization data for hash RX queue. */ @@ -454,6 +455,7 @@
> > struct mlx5_flow_parse {
> >  	uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use.
> */
> >  	uint8_t rss_key[40]; /**< copy of the RSS key. */
> >  	enum hash_rxq_type layer; /**< Last pattern layer detected. */
> > +	enum hash_rxq_type out_layer; /**< Last outer pattern layer
> > +detected. */
> >  	uint32_t tunnel; /**< Tunnel type of RTE_PTYPE_TUNNEL_XXX. */
> >  	struct ibv_counter_set *cs; /**< Holds the counter set for the rule
> */
> >  	struct {
> > @@ -461,6 +463,7 @@ struct mlx5_flow_parse {
> >  		/**< Pointer to Verbs attributes. */
> >  		unsigned int offset;
> >  		/**< Current position or total size of the attribute. */
> > +		uint64_t hash_fields; /**< Verbs hash fields. */
> >  	} queue[RTE_DIM(hash_rxq_init)];
> >  };
> >
> > @@ -696,7 +699,8 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
> >  						   " function is Toeplitz");
> >  				return -rte_errno;
> >  			}
> > -			if (rss->level) {
> > +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > +			if (parser->rss_conf.level > 0) {
> >  				rte_flow_error_set(error, EINVAL,
> >  						   RTE_FLOW_ERROR_TYPE_ACTION,
> >  						   actions,
> > @@ -704,6 +708,15 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
> >  						   " level is not supported");
> >  				return -rte_errno;
> >  			}
> > +#endif
> > +			if (parser->rss_conf.level > 1) {
> > +				rte_flow_error_set(error, EINVAL,
> > +						   RTE_FLOW_ERROR_TYPE_ACTION,
> > +						   actions,
> > +						   "RSS encapsulation level"
> > +						   " > 1 is not supported");
> > +				return -rte_errno;
> > +			}
> >  			if (rss->types & MLX5_RSS_HF_MASK) {
> >  				rte_flow_error_set(error, EINVAL,
> >  						   RTE_FLOW_ERROR_TYPE_ACTION,
> 
> Same comment as in previous review.
> The levels are not matching the proposed API.
> Level  0 = unspecified, 1 = outermost, 2 = next outermost, 3 = next
> next ...

Thanks, a big  missing in rebase.

> 
> > @@ -754,7 +767,7 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
> >  			}
> >  			parser->rss_conf = (struct rte_flow_action_rss){
> >  				.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
> > -				.level = 0,
> > +				.level = rss->level,
> >  				.types = rss->types,
> >  				.key_len = rss_key_len,
> >  				.queue_num = rss->queue_num,
> > @@ -838,10 +851,12 @@ mlx5_flow_convert_actions(struct rte_eth_dev *dev,
> >   *   0 on success, a negative errno value otherwise and rte_errno is
> set.
> >   */
> >  static int
> > -mlx5_flow_convert_items_validate(const struct rte_flow_item items[],
> > +mlx5_flow_convert_items_validate(struct rte_eth_dev *dev,
> > +				 const struct rte_flow_item items[],
> >  				 struct rte_flow_error *error,
> >  				 struct mlx5_flow_parse *parser)
> >  {
> > +	struct priv *priv = dev->data->dev_private;
> >  	const struct mlx5_flow_items *cur_item = mlx5_flow_items;
> >  	unsigned int i;
> >  	int ret = 0;
> > @@ -881,6 +896,14 @@ mlx5_flow_convert_items_validate(const struct
> rte_flow_item items[],
> >  						   " tunnel encapsulations.");
> >  				return -rte_errno;
> >  			}
> > +			if (!priv->config.tunnel_en &&
> > +			    parser->rss_conf.level) {
> > +				rte_flow_error_set(error, ENOTSUP,
> > +					RTE_FLOW_ERROR_TYPE_ITEM,
> > +					items,
> > +					"Tunnel offloading not enabled");
> > +				return -rte_errno;
> > +			}
> >  			parser->inner = IBV_FLOW_SPEC_INNER;
> >  			parser->tunnel = flow_ptype[items->type];
> >  		}
> > @@ -1000,7 +1023,11 @@ static void
> >  mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser)  {
> >  	unsigned int i;
> > +	uint32_t inner = parser->inner;
> >
> > +	/* Don't create extra flows for outer RSS. */
> > +	if (parser->tunnel && !parser->rss_conf.level)
> > +		return;
> >  	/*
> >  	 * Fill missing layers in verbs specifications, or compute the
> correct
> >  	 * offset to allocate the memory space for the attributes and @@
> > -1011,23 +1038,25 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse
> *parser)
> >  			struct ibv_flow_spec_ipv4_ext ipv4;
> >  			struct ibv_flow_spec_ipv6 ipv6;
> >  			struct ibv_flow_spec_tcp_udp udp_tcp;
> > +			struct ibv_flow_spec_eth eth;
> >  		} specs;
> >  		void *dst;
> >  		uint16_t size;
> >
> >  		if (i == parser->layer)
> >  			continue;
> > -		if (parser->layer == HASH_RXQ_ETH) {
> > +		if (parser->layer == HASH_RXQ_ETH ||
> > +		    parser->layer == HASH_RXQ_TUNNEL) {
> >  			if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
> >  				size = sizeof(struct ibv_flow_spec_ipv4_ext);
> >  				specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
> > -					.type = IBV_FLOW_SPEC_IPV4_EXT,
> > +					.type = inner | IBV_FLOW_SPEC_IPV4_EXT,
> >  					.size = size,
> >  				};
> >  			} else {
> >  				size = sizeof(struct ibv_flow_spec_ipv6);
> >  				specs.ipv6 = (struct ibv_flow_spec_ipv6){
> > -					.type = IBV_FLOW_SPEC_IPV6,
> > +					.type = inner | IBV_FLOW_SPEC_IPV6,
> >  					.size = size,
> >  				};
> >  			}
> > @@ -1044,7 +1073,7 @@ mlx5_flow_convert_finalise(struct mlx5_flow_parse
> *parser)
> >  		    (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
> >  			size = sizeof(struct ibv_flow_spec_tcp_udp);
> >  			specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
> > -				.type = ((i == HASH_RXQ_UDPV4 ||
> > +				.type = inner | ((i == HASH_RXQ_UDPV4 ||
> >  					  i == HASH_RXQ_UDPV6) ?
> >  					 IBV_FLOW_SPEC_UDP :
> >  					 IBV_FLOW_SPEC_TCP),
> > @@ -1065,6 +1094,8 @@ mlx5_flow_convert_finalise(struct
> > mlx5_flow_parse *parser)
> >  /**
> >   * Update flows according to pattern and RSS hash fields.
> >   *
> > + * @param dev
> > + *   Pointer to Ethernet device.
> >   * @param[in, out] parser
> >   *   Internal parser structure.
> >   *
> > @@ -1072,16 +1103,17 @@ mlx5_flow_convert_finalise(struct
> mlx5_flow_parse *parser)
> >   *   0 on success, a negative errno value otherwise and rte_errno is
> set.
> >   */
> >  static int
> > -mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
> > +mlx5_flow_convert_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse
> > +*parser)
> >  {
> > -	const unsigned int ipv4 =
> > +	unsigned int ipv4 =
> >  		hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
> >  	const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 :
> HASH_RXQ_TCPV6;
> >  	const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> >  	const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 :
> HASH_RXQ_TCPV4;
> >  	const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 :
> HASH_RXQ_IPV4;
> > -	const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> > +	enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> >  	unsigned int i;
> > +	int found = 0;
> >
> >  	/* Remove any other flow not matching the pattern. */
> >  	if (parser->rss_conf.queue_num == 1 && !parser->rss_conf.types) { @@
> > -1093,9 +1125,51 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse *parser)
> >  		}
> >  		return 0;
> >  	}
> > -	if (parser->layer == HASH_RXQ_ETH)
> > +	/*
> > +	 * Outer RSS.
> > +	 * HASH_RXQ_ETH is the only rule since tunnel packet match this
> > +	 * rule must match outer pattern.
> > +	 */
> > +	if (parser->tunnel && !parser->rss_conf.level) {
> > +		/* Remove flows other than default. */
> > +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> > +			rte_free(parser->queue[i].ibv_attr);
> > +			parser->queue[i].ibv_attr = NULL;
> > +		}
> > +		ipv4 = hash_rxq_init[parser->out_layer].ip_version ==
> MLX5_IPV4;
> > +		ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
> > +		if (hash_rxq_init[parser->out_layer].dpdk_rss_hf &
> > +		    parser->rss_conf.types) {
> > +			parser->queue[HASH_RXQ_ETH].hash_fields =
> > +				hash_rxq_init[parser->out_layer].hash_fields;
> > +		} else if (ip && (hash_rxq_init[ip].dpdk_rss_hf &
> > +		    parser->rss_conf.types)) {
> > +			parser->queue[HASH_RXQ_ETH].hash_fields =
> > +				hash_rxq_init[ip].hash_fields;
> > +		} else if (parser->rss_conf.types) {
> > +			DRV_LOG(WARNING,
> > +				"port %u rss outer hash function doesn't match"
> > +				" pattern", dev->data->port_id);
> > +		}
> > +		return 0;
> > +	}
> > +	if (parser->layer == HASH_RXQ_ETH || parser->layer ==
> HASH_RXQ_TUNNEL) {
> > +		/* Remove unused flows according to hash function. */
> > +		for (i = 0; i != hash_rxq_init_n - 1; ++i) {
> > +			if (!parser->queue[i].ibv_attr)
> > +				continue;
> > +			if (hash_rxq_init[i].dpdk_rss_hf &
> > +			    parser->rss_conf.types) {
> > +				parser->queue[i].hash_fields =
> > +					hash_rxq_init[i].hash_fields;
> > +				continue;
> > +			}
> > +			rte_free(parser->queue[i].ibv_attr);
> > +			parser->queue[i].ibv_attr = NULL;
> > +		}
> >  		return 0;
> > -	/* This layer becomes useless as the pattern define under layers. */
> > +	}
> > +	/* Remove ETH layer flow. */
> >  	rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
> >  	parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
> >  	/* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4.
> > */ @@ -1105,20 +1179,50 @@ mlx5_flow_convert_rss(struct mlx5_flow_parse
> *parser)
> >  		rte_free(parser->queue[i].ibv_attr);
> >  		parser->queue[i].ibv_attr = NULL;
> >  	}
> > -	/* Remove impossible flow according to the RSS configuration. */
> > -	if (hash_rxq_init[parser->layer].dpdk_rss_hf &
> > -	    parser->rss_conf.types) {
> > -		/* Remove any other flow. */
> > +	/*
> > +	 * Keep L4 flows as IP pattern has to support L4 RSS.
> > +	 * Otherwise, only keep the flow that match the pattern.
> > +	 */
> > +	if (parser->layer != ip) {
> > +		/* Only keep the flow that match the pattern. */
> >  		for (i = hmin; i != (hmax + 1); ++i) {
> > -			if (i == parser->layer || !parser->queue[i].ibv_attr)
> > +			if (i == parser->layer)
> >  				continue;
> >  			rte_free(parser->queue[i].ibv_attr);
> >  			parser->queue[i].ibv_attr = NULL;
> >  		}
> > -	} else if (!parser->queue[ip].ibv_attr) {
> > -		/* no RSS possible with the current configuration. */
> > -		parser->rss_conf.queue_num = 1;
> >  	}
> > +	/* Remove impossible flow according to the RSS configuration. */
> > +	for (i = hmin; i != (hmax + 1); ++i) {
> > +		if (!parser->queue[i].ibv_attr)
> > +			continue;
> > +		if (parser->rss_conf.types &
> > +		    hash_rxq_init[i].dpdk_rss_hf) {
> > +			parser->queue[i].hash_fields =
> > +				hash_rxq_init[i].hash_fields;
> > +			found = 1;
> > +			continue;
> > +		}
> > +		/* L4 flow could be used for L3 RSS. */
> > +		if (i == parser->layer && i < ip &&
> > +		    (hash_rxq_init[ip].dpdk_rss_hf &
> > +		     parser->rss_conf.types)) {
> > +			parser->queue[i].hash_fields =
> > +				hash_rxq_init[ip].hash_fields;
> > +			found = 1;
> > +			continue;
> > +		}
> > +		/* L3 flow and L4 hash: non-rss L3 flow. */
> > +		if (i == parser->layer && i == ip && found)
> > +			/* IP pattern and L4 HF. */
> > +			continue;
> > +		rte_free(parser->queue[i].ibv_attr);
> > +		parser->queue[i].ibv_attr = NULL;
> > +	}
> > +	if (!found)
> > +		DRV_LOG(WARNING,
> > +			"port %u rss hash function doesn't match "
> > +			"pattern", dev->data->port_id);
> 
> The hash function is toeplitz, xor, it is not applied on the pattern but
> used to compute an hash result using some information from the packet.
> This comment is totally wrong.

Thanks, I'll replace "hash function" to "hash fields".

> 
> Another point, such log will trigger on an application using MLX5 PMD but
> not on MLX4 PMD and this specifically because on how the NIC using the
> MLX5 PMD are made internally (MLX4 can use a single Hash RX queue whereas
> MLX5 needs an Hash Rx queue per kind of protocol).
> The fact being it will have the exact same behavior I'll *strongly*
> suggest to remove such annoying warning.

After some test on mlx5 current code, the behavior in previous code doesn't
seem to be consistent, not sure whether it same in mlx4 PMD:
- Pattern: eth/ipv4/tcp, RSS: UDP, creation success.
- Pattern: eth/ipv4,RSS: IPv6, creation failed.

This patch support the 2nd case w/o hash, and warn upon the first case.
Take example of first case, a packet that matches the pattern must be TCP,
no reason to hash it as TCP, same to the 2nd case. They are totally
wrong configuration, but to be robust, warning is used here, and users 
have to learn that NO hash result because HF configuration mismatch through 
this warning message.

Please note that below cases are valid and no warning:
- Pattern: eth/ipv4, RSS: UDP
- Pattern: eth/ipv4/udp, RSS: IPv4

> 
> >  	return 0;
> >  }
> >
> > @@ -1165,7 +1269,7 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> >  	ret = mlx5_flow_convert_actions(dev, actions, error, parser);
> >  	if (ret)
> >  		return ret;
> > -	ret = mlx5_flow_convert_items_validate(items, error, parser);
> > +	ret = mlx5_flow_convert_items_validate(dev, items, error, parser);
> >  	if (ret)
> >  		return ret;
> >  	mlx5_flow_convert_finalise(parser);
> > @@ -1186,10 +1290,6 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> >  		for (i = 0; i != hash_rxq_init_n; ++i) {
> >  			unsigned int offset;
> >
> > -			if (!(parser->rss_conf.types &
> > -			      hash_rxq_init[i].dpdk_rss_hf) &&
> > -			    (i != HASH_RXQ_ETH))
> > -				continue;
> >  			offset = parser->queue[i].offset;
> >  			parser->queue[i].ibv_attr =
> >  				mlx5_flow_convert_allocate(offset, error); @@ -
> 1201,6 +1301,7 @@
> > mlx5_flow_convert(struct rte_eth_dev *dev,
> >  	/* Third step. Conversion parse, fill the specifications. */
> >  	parser->inner = 0;
> >  	parser->tunnel = 0;
> > +	parser->layer = HASH_RXQ_ETH;
> >  	for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
> >  		struct mlx5_flow_data data = {
> >  			.parser = parser,
> > @@ -1218,23 +1319,23 @@ mlx5_flow_convert(struct rte_eth_dev *dev,
> >  		if (ret)
> >  			goto exit_free;
> >  	}
> > -	if (parser->mark)
> > -		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> > -	if (parser->count && parser->create) {
> > -		mlx5_flow_create_count(dev, parser);
> > -		if (!parser->cs)
> > -			goto exit_count_error;
> > -	}
> >  	/*
> >  	 * Last step. Complete missing specification to reach the RSS
> >  	 * configuration.
> >  	 */
> >  	if (!parser->drop)
> > -		ret = mlx5_flow_convert_rss(parser);
> > +		ret = mlx5_flow_convert_rss(dev, parser);
> >  		if (ret)
> >  			goto exit_free;
> >  		mlx5_flow_convert_finalise(parser);
> >  	mlx5_flow_update_priority(dev, parser, attr);
> > +	if (parser->mark)
> > +		mlx5_flow_create_flag_mark(parser, parser->mark_id);
> > +	if (parser->count && parser->create) {
> > +		mlx5_flow_create_count(dev, parser);
> > +		if (!parser->cs)
> > +			goto exit_count_error;
> > +	}
> >  exit_free:
> >  	/* Only verification is expected, all resources should be released.
> */
> >  	if (!parser->create) {
> > @@ -1282,17 +1383,11 @@ mlx5_flow_create_copy(struct mlx5_flow_parse
> *parser, void *src,
> >  	for (i = 0; i != hash_rxq_init_n; ++i) {
> >  		if (!parser->queue[i].ibv_attr)
> >  			continue;
> > -		/* Specification must be the same l3 type or none. */
> > -		if (parser->layer == HASH_RXQ_ETH ||
> > -		    (hash_rxq_init[parser->layer].ip_version ==
> > -		     hash_rxq_init[i].ip_version) ||
> > -		    (hash_rxq_init[i].ip_version == 0)) {
> > -			dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> > -					parser->queue[i].offset);
> > -			memcpy(dst, src, size);
> > -			++parser->queue[i].ibv_attr->num_of_specs;
> > -			parser->queue[i].offset += size;
> > -		}
> > +		dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
> > +				parser->queue[i].offset);
> > +		memcpy(dst, src, size);
> > +		++parser->queue[i].ibv_attr->num_of_specs;
> > +		parser->queue[i].offset += size;
> >  	}
> >  }
> >
> > @@ -1323,9 +1418,7 @@ mlx5_flow_create_eth(const struct rte_flow_item
> *item,
> >  		.size = eth_size,
> >  	};
> >
> > -	/* Don't update layer for the inner pattern. */
> > -	if (!parser->inner)
> > -		parser->layer = HASH_RXQ_ETH;
> > +	parser->layer = HASH_RXQ_ETH;
> >  	if (spec) {
> >  		unsigned int i;
> >
> > @@ -1438,9 +1531,7 @@ mlx5_flow_create_ipv4(const struct rte_flow_item
> *item,
> >  		.size = ipv4_size,
> >  	};
> >
> > -	/* Don't update layer for the inner pattern. */
> > -	if (!parser->inner)
> > -		parser->layer = HASH_RXQ_IPV4;
> > +	parser->layer = HASH_RXQ_IPV4;
> >  	if (spec) {
> >  		if (!mask)
> >  			mask = default_mask;
> > @@ -1493,9 +1584,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item
> *item,
> >  		.size = ipv6_size,
> >  	};
> >
> > -	/* Don't update layer for the inner pattern. */
> > -	if (!parser->inner)
> > -		parser->layer = HASH_RXQ_IPV6;
> > +	parser->layer = HASH_RXQ_IPV6;
> >  	if (spec) {
> >  		unsigned int i;
> >  		uint32_t vtc_flow_val;
> > @@ -1568,13 +1657,10 @@ mlx5_flow_create_udp(const struct rte_flow_item
> *item,
> >  		.size = udp_size,
> >  	};
> >
> > -	/* Don't update layer for the inner pattern. */
> > -	if (!parser->inner) {
> > -		if (parser->layer == HASH_RXQ_IPV4)
> > -			parser->layer = HASH_RXQ_UDPV4;
> > -		else
> > -			parser->layer = HASH_RXQ_UDPV6;
> > -	}
> > +	if (parser->layer == HASH_RXQ_IPV4)
> > +		parser->layer = HASH_RXQ_UDPV4;
> > +	else
> > +		parser->layer = HASH_RXQ_UDPV6;
> >  	if (spec) {
> >  		if (!mask)
> >  			mask = default_mask;
> > @@ -1617,13 +1703,10 @@ mlx5_flow_create_tcp(const struct rte_flow_item
> *item,
> >  		.size = tcp_size,
> >  	};
> >
> > -	/* Don't update layer for the inner pattern. */
> > -	if (!parser->inner) {
> > -		if (parser->layer == HASH_RXQ_IPV4)
> > -			parser->layer = HASH_RXQ_TCPV4;
> > -		else
> > -			parser->layer = HASH_RXQ_TCPV6;
> > -	}
> > +	if (parser->layer == HASH_RXQ_IPV4)
> > +		parser->layer = HASH_RXQ_TCPV4;
> > +	else
> > +		parser->layer = HASH_RXQ_TCPV6;
> >  	if (spec) {
> >  		if (!mask)
> >  			mask = default_mask;
> > @@ -1673,6 +1756,8 @@ mlx5_flow_create_vxlan(const struct rte_flow_item
> *item,
> >  	id.vni[0] = 0;
> >  	parser->inner = IBV_FLOW_SPEC_INNER;
> >  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_VXLAN)];
> > +	parser->out_layer = parser->layer;
> > +	parser->layer = HASH_RXQ_TUNNEL;
> >  	if (spec) {
> >  		if (!mask)
> >  			mask = default_mask;
> > @@ -1727,6 +1812,8 @@ mlx5_flow_create_gre(const struct rte_flow_item
> > *item __rte_unused,
> >
> >  	parser->inner = IBV_FLOW_SPEC_INNER;
> >  	parser->tunnel = ptype_ext[PTYPE_IDX(RTE_PTYPE_TUNNEL_GRE)];
> > +	parser->out_layer = parser->layer;
> > +	parser->layer = HASH_RXQ_TUNNEL;
> >  	mlx5_flow_create_copy(parser, &tunnel, size);
> >  	return 0;
> >  }
> > @@ -1890,33 +1977,33 @@ mlx5_flow_create_action_queue_rss(struct
> rte_eth_dev *dev,
> >  	unsigned int i;
> >
> >  	for (i = 0; i != hash_rxq_init_n; ++i) {
> > -		uint64_t hash_fields;
> > -
> >  		if (!parser->queue[i].ibv_attr)
> >  			continue;
> >  		flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
> >  		parser->queue[i].ibv_attr = NULL;
> > -		hash_fields = hash_rxq_init[i].hash_fields;
> > +		flow->frxq[i].hash_fields = parser->queue[i].hash_fields;
> >  		if (!priv->dev->data->dev_started)
> >  			continue;
> >  		flow->frxq[i].hrxq =
> >  			mlx5_hrxq_get(dev,
> >  				      parser->rss_conf.key,
> >  				      parser->rss_conf.key_len,
> > -				      hash_fields,
> > +				      flow->frxq[i].hash_fields,
> >  				      parser->rss_conf.queue,
> >  				      parser->rss_conf.queue_num,
> > -				      parser->tunnel);
> > +				      parser->tunnel,
> > +				      parser->rss_conf.level);
> >  		if (flow->frxq[i].hrxq)
> >  			continue;
> >  		flow->frxq[i].hrxq =
> >  			mlx5_hrxq_new(dev,
> >  				      parser->rss_conf.key,
> >  				      parser->rss_conf.key_len,
> > -				      hash_fields,
> > +				      flow->frxq[i].hash_fields,
> >  				      parser->rss_conf.queue,
> >  				      parser->rss_conf.queue_num,
> > -				      parser->tunnel);
> > +				      parser->tunnel,
> > +				      parser->rss_conf.level);
> >  		if (!flow->frxq[i].hrxq) {
> >  			return rte_flow_error_set(error, ENOMEM,
> >  						  RTE_FLOW_ERROR_TYPE_HANDLE,
> > @@ -2013,7 +2100,7 @@ mlx5_flow_create_action_queue(struct rte_eth_dev
> *dev,
> >  		DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p",
> >  			dev->data->port_id,
> >  			(void *)flow, i,
> > -			(void *)flow->frxq[i].hrxq,
> > +			(void *)flow->frxq[i].hrxq->qp,
> >  			(void *)flow->frxq[i].ibv_flow);
> >  	}
> >  	if (!flows_n) {
> > @@ -2541,19 +2628,21 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct
> mlx5_flows *list)
> >  			flow->frxq[i].hrxq =
> >  				mlx5_hrxq_get(dev, flow->rss_conf.key,
> >  					      flow->rss_conf.key_len,
> > -					      hash_rxq_init[i].hash_fields,
> > +					      flow->frxq[i].hash_fields,
> >  					      flow->rss_conf.queue,
> >  					      flow->rss_conf.queue_num,
> > -					      flow->tunnel);
> > +					      flow->tunnel,
> > +					      flow->rss_conf.level);
> >  			if (flow->frxq[i].hrxq)
> >  				goto flow_create;
> >  			flow->frxq[i].hrxq =
> >  				mlx5_hrxq_new(dev, flow->rss_conf.key,
> >  					      flow->rss_conf.key_len,
> > -					      hash_rxq_init[i].hash_fields,
> > +					      flow->frxq[i].hash_fields,
> >  					      flow->rss_conf.queue,
> >  					      flow->rss_conf.queue_num,
> > -					      flow->tunnel);
> > +					      flow->tunnel,
> > +					      flow->rss_conf.level);
> >  			if (!flow->frxq[i].hrxq) {
> >  				DRV_LOG(DEBUG,
> >  					"port %u flow %p cannot be applied", diff --
> git
> > a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c index
> > be684d378..6874aa32a 100644
> > --- a/drivers/net/mlx5/mlx5_glue.c
> > +++ b/drivers/net/mlx5/mlx5_glue.c
> > @@ -313,6 +313,21 @@ mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj,
> uint64_t obj_type)
> >  	return mlx5dv_init_obj(obj, obj_type);  }
> >
> > +static struct ibv_qp *
> > +mlx5_glue_dv_create_qp(struct ibv_context *context,
> > +		       struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> > +		       struct mlx5dv_qp_init_attr *dv_qp_init_attr) { #ifdef
> > +HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > +	return mlx5dv_create_qp(context, qp_init_attr_ex, dv_qp_init_attr);
> > +#else
> > +	(void)context;
> > +	(void)qp_init_attr_ex;
> > +	(void)dv_qp_init_attr;
> > +	return NULL;
> > +#endif
> > +}
> > +
> >  const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
> >  	.version = MLX5_GLUE_VERSION,
> >  	.fork_init = mlx5_glue_fork_init,
> > @@ -356,4 +371,5 @@ const struct mlx5_glue *mlx5_glue = &(const struct
> mlx5_glue){
> >  	.dv_query_device = mlx5_glue_dv_query_device,
> >  	.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
> >  	.dv_init_obj = mlx5_glue_dv_init_obj,
> > +	.dv_create_qp = mlx5_glue_dv_create_qp,
> >  };
> > diff --git a/drivers/net/mlx5/mlx5_glue.h
> > b/drivers/net/mlx5/mlx5_glue.h index b5efee3b6..841363872 100644
> > --- a/drivers/net/mlx5/mlx5_glue.h
> > +++ b/drivers/net/mlx5/mlx5_glue.h
> > @@ -31,6 +31,10 @@ struct ibv_counter_set_init_attr;  struct
> > ibv_query_counter_set_attr;  #endif
> >
> > +#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT struct mlx5dv_qp_init_attr;
> > +#endif
> > +
> >  /* LIB_GLUE_VERSION must be updated every time this structure is
> > modified. */  struct mlx5_glue {
> >  	const char *version;
> > @@ -106,6 +110,10 @@ struct mlx5_glue {
> >  				   enum mlx5dv_set_ctx_attr_type type,
> >  				   void *attr);
> >  	int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type);
> > +	struct ibv_qp *(*dv_create_qp)
> > +		(struct ibv_context *context,
> > +		 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
> > +		 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
> >  };
> >
> >  const struct mlx5_glue *mlx5_glue;
> > diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
> > index 073732e16..1997609ec 100644
> > --- a/drivers/net/mlx5/mlx5_rxq.c
> > +++ b/drivers/net/mlx5/mlx5_rxq.c
> > @@ -1386,6 +1386,8 @@ mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev)
> >   *   Number of queues.
> >   * @param tunnel
> >   *   Tunnel type.
> > + * @param rss_level
> > + *   RSS hash on tunnel level.
> >   *
> >   * @return
> >   *   The Verbs object initialised, NULL otherwise and rte_errno is set.
> > @@ -1394,13 +1396,17 @@ struct mlx5_hrxq *  mlx5_hrxq_new(struct
> > rte_eth_dev *dev,
> >  	      const uint8_t *rss_key, uint32_t rss_key_len,
> >  	      uint64_t hash_fields,
> > -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> > +	      const uint16_t *queues, uint32_t queues_n,
> > +	      uint32_t tunnel, uint32_t rss_level)
> >  {
> >  	struct priv *priv = dev->data->dev_private;
> >  	struct mlx5_hrxq *hrxq;
> >  	struct mlx5_ind_table_ibv *ind_tbl;
> >  	struct ibv_qp *qp;
> >  	int err;
> > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > +	struct mlx5dv_qp_init_attr qp_init_attr = {0}; #endif
> >
> >  	queues_n = hash_fields ? queues_n : 1;
> >  	ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); @@ -1410,6
> > +1416,36 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
> >  		rte_errno = ENOMEM;
> >  		return NULL;
> >  	}
> > +#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
> > +	if (tunnel) {
> > +		qp_init_attr.comp_mask =
> > +				MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;
> > +		qp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;
> > +	}
> > +	qp = mlx5_glue->dv_create_qp(
> > +		priv->ctx,
> > +		&(struct ibv_qp_init_attr_ex){
> > +			.qp_type = IBV_QPT_RAW_PACKET,
> > +			.comp_mask =
> > +				IBV_QP_INIT_ATTR_PD |
> > +				IBV_QP_INIT_ATTR_IND_TABLE |
> > +				IBV_QP_INIT_ATTR_RX_HASH,
> > +			.rx_hash_conf = (struct ibv_rx_hash_conf){
> > +				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> > +				.rx_hash_key_len = rss_key_len ? rss_key_len :
> > +						   rss_hash_default_key_len,
> > +				.rx_hash_key = rss_key ?
> > +					       (void *)(uintptr_t)rss_key :
> > +					       rss_hash_default_key,
> > +				.rx_hash_fields_mask = hash_fields |
> > +					(tunnel && rss_level ?
> > +					(uint32_t)IBV_RX_HASH_INNER : 0),
> > +			},
> > +			.rwq_ind_tbl = ind_tbl->ind_table,
> > +			.pd = priv->pd,
> > +		},
> > +		&qp_init_attr);
> > +#else
> >  	qp = mlx5_glue->create_qp_ex
> >  		(priv->ctx,
> >  		 &(struct ibv_qp_init_attr_ex){
> > @@ -1420,13 +1456,17 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
> >  				IBV_QP_INIT_ATTR_RX_HASH,
> >  			.rx_hash_conf = (struct ibv_rx_hash_conf){
> >  				.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
> > -				.rx_hash_key_len = rss_key_len,
> > -				.rx_hash_key = (void *)(uintptr_t)rss_key,
> > +				.rx_hash_key_len = rss_key_len ? rss_key_len :
> > +						   rss_hash_default_key_len,
> > +				.rx_hash_key = rss_key ?
> > +					       (void *)(uintptr_t)rss_key :
> > +					       rss_hash_default_key,
> >  				.rx_hash_fields_mask = hash_fields,
> >  			},
> >  			.rwq_ind_tbl = ind_tbl->ind_table,
> >  			.pd = priv->pd,
> >  		 });
> > +#endif
> >  	if (!qp) {
> >  		rte_errno = errno;
> >  		goto error;
> > @@ -1439,6 +1479,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
> >  	hrxq->rss_key_len = rss_key_len;
> >  	hrxq->hash_fields = hash_fields;
> >  	hrxq->tunnel = tunnel;
> > +	hrxq->rss_level = rss_level;
> >  	memcpy(hrxq->rss_key, rss_key, rss_key_len);
> >  	rte_atomic32_inc(&hrxq->refcnt);
> >  	LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); @@ -1448,6 +1489,8 @@
> > mlx5_hrxq_new(struct rte_eth_dev *dev,
> >  	return hrxq;
> >  error:
> >  	err = rte_errno; /* Save rte_errno before cleanup. */
> > +	DRV_LOG(ERR, "port %u: Error creating Hash Rx queue",
> > +		dev->data->port_id);
> 
> Internal developer log should not remain in the code.  The user will
> already have a flow creation failure, there is no need to annoy him with
> messages he cannot understand.
> 
> >  	mlx5_ind_table_ibv_release(dev, ind_tbl);
> >  	if (qp)
> >  		claim_zero(mlx5_glue->destroy_qp(qp));
> > @@ -1469,6 +1512,8 @@ mlx5_hrxq_new(struct rte_eth_dev *dev,
> >   *   Number of queues.
> >   * @param tunnel
> >   *   Tunnel type.
> > + * @param rss_level
> > + *   RSS hash on tunnel level
> >   *
> >   * @return
> >   *   An hash Rx queue on success.
> > @@ -1477,7 +1522,8 @@ struct mlx5_hrxq *  mlx5_hrxq_get(struct
> > rte_eth_dev *dev,
> >  	      const uint8_t *rss_key, uint32_t rss_key_len,
> >  	      uint64_t hash_fields,
> > -	      const uint16_t *queues, uint32_t queues_n, uint32_t tunnel)
> > +	      const uint16_t *queues, uint32_t queues_n,
> > +	      uint32_t tunnel, uint32_t rss_level)
> 
> rss_level > 1 means tunnel, there is no need to have a redundant
> information.
> 
> >  {
> >  	struct priv *priv = dev->data->dev_private;
> >  	struct mlx5_hrxq *hrxq;
> > @@ -1494,6 +1540,8 @@ mlx5_hrxq_get(struct rte_eth_dev *dev,
> >  			continue;
> >  		if (hrxq->tunnel != tunnel)
> >  			continue;
> > +		if (hrxq->rss_level != rss_level)
> > +			continue;
> >  		ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n);
> >  		if (!ind_tbl)
> >  			continue;
> > diff --git a/drivers/net/mlx5/mlx5_rxtx.h
> > b/drivers/net/mlx5/mlx5_rxtx.h index d35605b55..62cf55109 100644
> > --- a/drivers/net/mlx5/mlx5_rxtx.h
> > +++ b/drivers/net/mlx5/mlx5_rxtx.h
> > @@ -147,6 +147,7 @@ struct mlx5_hrxq {
> >  	struct ibv_qp *qp; /* Verbs queue pair. */
> >  	uint64_t hash_fields; /* Verbs Hash fields. */
> >  	uint32_t tunnel; /* Tunnel type. */
> > +	uint32_t rss_level; /* RSS on tunnel level. */
> >  	uint32_t rss_key_len; /* Hash key length in bytes. */
> >  	uint8_t rss_key[]; /* Hash key. */
> >  };
> > @@ -251,12 +252,12 @@ struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev
> *dev,
> >  				const uint8_t *rss_key, uint32_t rss_key_len,
> >  				uint64_t hash_fields,
> >  				const uint16_t *queues, uint32_t queues_n,
> > -				uint32_t tunnel);
> > +				uint32_t tunnel, uint32_t rss_level);
> >  struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev,
> >  				const uint8_t *rss_key, uint32_t rss_key_len,
> >  				uint64_t hash_fields,
> >  				const uint16_t *queues, uint32_t queues_n,
> > -				uint32_t tunnel);
> > +				uint32_t tunnel, uint32_t rss_level);
> >  int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq
> > *hxrq);  int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev);  uint64_t
> > mlx5_get_rx_port_offloads(void);
> > --
> > 2.13.3
> >
> 
> Thanks,
> --
> Nélio Laranjeiro
> 6WIND


More information about the dev mailing list