[dpdk-dev] [PATCH] net/mlx5: skip table zero to improve insertion rate

Dekel Peled dekelp at mellanox.com
Mon Sep 9 15:16:35 CEST 2019


E-switch tables one and above provide higher insertion rate
than table zero, as well as enhanced functionality.

This patch adds a mechanism to utilize these advantages, by creating
a default rule on port start, which directs all packets from e-switch
table zero to table one.
Other flow rules, requested for group n, will be created in
e-switch table n+1.
Jump action to e-switch group n will be created to group n+1.

Utility function mlx5_flow_group_to_table() is added to translate the
rte_flow group value to HW table value, and is called by PMD flow
engine on flow rule validation and creation.

Signed-off-by: Dekel Peled <dekelp at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
 drivers/net/mlx5/mlx5.h            |   5 +-
 drivers/net/mlx5/mlx5_flow.c       | 108 ++++++++++++++++++++++++++++++++++---
 drivers/net/mlx5/mlx5_flow.h       |   5 ++
 drivers/net/mlx5/mlx5_flow_dv.c    |  87 ++++++++++++++++++++++--------
 drivers/net/mlx5/mlx5_flow_verbs.c |   3 ++
 drivers/net/mlx5/mlx5_trigger.c    |   3 ++
 6 files changed, 179 insertions(+), 32 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 86a272e..5186b7d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -513,8 +513,8 @@ struct mlx5_flow_tbl_resource {
 	rte_atomic32_t refcnt; /**< Reference counter. */
 };
 
-#define MLX5_MAX_TABLES 0xffff
-#define MLX5_MAX_TABLES_FDB 0xffff
+#define MLX5_MAX_TABLES UINT16_MAX
+#define MLX5_MAX_TABLES_FDB UINT16_MAX
 
 #define MLX5_DBR_PAGE_SIZE 4096 /* Must be >= 512. */
 #define MLX5_DBR_SIZE 8
@@ -825,6 +825,7 @@ int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
 int mlx5_ctrl_flow(struct rte_eth_dev *dev,
 		   struct rte_flow_item_eth *eth_spec,
 		   struct rte_flow_item_eth *eth_mask);
+struct rte_flow *mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev);
 int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev);
 void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev);
 void mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 78cc06f..ab6a444 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1990,6 +1990,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 		   const struct rte_flow_attr *attr __rte_unused,
 		   const struct rte_flow_item items[] __rte_unused,
 		   const struct rte_flow_action actions[] __rte_unused,
+		   bool external __rte_unused,
 		   struct rte_flow_error *error)
 {
 	return rte_flow_error_set(error, ENOTSUP,
@@ -2102,6 +2103,8 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -2113,13 +2116,13 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 		  const struct rte_flow_attr *attr,
 		  const struct rte_flow_item items[],
 		  const struct rte_flow_action actions[],
-		  struct rte_flow_error *error)
+		  bool external, struct rte_flow_error *error)
 {
 	const struct mlx5_flow_driver_ops *fops;
 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
 
 	fops = flow_get_drv_ops(type);
-	return fops->validate(dev, attr, items, actions, error);
+	return fops->validate(dev, attr, items, actions, external, error);
 }
 
 /**
@@ -2291,7 +2294,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 {
 	int ret;
 
-	ret = flow_drv_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, true, error);
 	if (ret < 0)
 		return ret;
 	return 0;
@@ -2353,6 +2356,8 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
  *   Pattern specification (list terminated by the END pattern item).
  * @param[in] actions
  *   Associated actions (list terminated by the END action).
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Perform verbose error reporting if not NULL.
  *
@@ -2364,7 +2369,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 		 const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
+		 bool external, struct rte_flow_error *error)
 {
 	struct rte_flow *flow = NULL;
 	struct mlx5_flow *dev_flow;
@@ -2378,7 +2383,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 	uint32_t i;
 	uint32_t flow_size;
 
-	ret = flow_drv_validate(dev, attr, items, actions, error);
+	ret = flow_drv_validate(dev, attr, items, actions, external, error);
 	if (ret < 0)
 		return NULL;
 	flow_size = sizeof(struct rte_flow);
@@ -2420,6 +2425,7 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 		if (!dev_flow)
 			goto error;
 		dev_flow->flow = flow;
+		dev_flow->external = external;
 		LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
 		ret = flow_drv_translate(dev, dev_flow, attr,
 					 buf->entry[i].pattern,
@@ -2445,6 +2451,55 @@ uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
 }
 
 /**
+ * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
+ * incoming packets to table 1.
+ *
+ * Other flow rules, requested for group n, will be created in
+ * e-switch table n+1.
+ * Jump action to e-switch group n will be created to group n+1.
+ *
+ * Used when working in switchdev mode, to utilise advantages of table 1
+ * and above.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ *
+ * @return
+ *   Pointer to flow on success, NULL otherwise and rte_errno is set.
+ */
+struct rte_flow *
+mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
+{
+	const struct rte_flow_attr attr = {
+		.group = 0,
+		.priority = 0,
+		.ingress = 1,
+		.egress = 0,
+		.transfer = 1,
+	};
+	const struct rte_flow_item pattern = {
+		.type = RTE_FLOW_ITEM_TYPE_END,
+	};
+	struct rte_flow_action_jump jump = {
+		.group = 1,
+	};
+	const struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_JUMP,
+			.conf = &jump,
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct rte_flow_error error;
+
+	return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
+				actions, false, &error);
+}
+
+/**
  * Create a flow.
  *
  * @see rte_flow_create()
@@ -2460,7 +2515,7 @@ struct rte_flow *
 	struct mlx5_priv *priv = dev->data->dev_private;
 
 	return flow_list_create(dev, &priv->flows,
-				attr, items, actions, error);
+				attr, items, actions, true, error);
 }
 
 /**
@@ -2657,7 +2712,7 @@ struct rte_flow *
 	for (i = 0; i != priv->reta_idx_n; ++i)
 		queue[i] = (*priv->reta_idx)[i];
 	flow = flow_list_create(dev, &priv->ctrl_flows,
-				&attr, items, actions, &error);
+				&attr, items, actions, false, &error);
 	if (!flow)
 		return -rte_errno;
 	return 0;
@@ -3071,7 +3126,8 @@ struct rte_flow *
 		goto error;
 	}
 	flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
-				fdir_flow->items, fdir_flow->actions, NULL);
+				fdir_flow->items, fdir_flow->actions, true,
+				NULL);
 	if (!flow)
 		goto error;
 	assert(!flow->fdir);
@@ -3419,3 +3475,39 @@ struct rte_flow *
 	pool->raw_hw = NULL;
 	sh->cmng.pending_queries--;
 }
+
+/**
+ * Translate the rte_flow group index to HW table value.
+ *
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[in] external
+ *   Value is part of flow rule created by request external to PMD.
+ * @param[in] group
+ *   rte_flow group index value.
+ * @param[out] table
+ *   HW table value.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
+			 uint32_t group, uint32_t *table,
+			 struct rte_flow_error *error)
+{
+	if (attributes->transfer && external) {
+		if (group == UINT32_MAX)
+			return rte_flow_error_set
+						(error, EINVAL,
+						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+						 NULL,
+						 "group index not supported");
+		*table = group + 1;
+	} else {
+		*table = group;
+	}
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 822ff36..c8bb414 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -373,6 +373,7 @@ struct mlx5_flow {
 #endif
 		struct mlx5_flow_verbs verbs;
 	};
+	bool external; /**< true if the flow is created external to PMD. */
 };
 
 /* Flow structure. */
@@ -399,6 +400,7 @@ typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev,
 				    const struct rte_flow_attr *attr,
 				    const struct rte_flow_item items[],
 				    const struct rte_flow_action actions[],
+				    bool external,
 				    struct rte_flow_error *error);
 typedef struct mlx5_flow *(*mlx5_flow_prepare_t)
 	(const struct rte_flow_attr *attr, const struct rte_flow_item items[],
@@ -437,6 +439,9 @@ struct mlx5_flow_driver_ops {
 
 /* mlx5_flow.c */
 
+int mlx5_flow_group_to_table(const struct rte_flow_attr *attributes,
+			     bool external, uint32_t group, uint32_t *table,
+			     struct rte_flow_error *error);
 uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
 				     uint64_t layer_types,
 				     uint64_t hash_fields);
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 7b2ba07..5253cfb 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -1960,8 +1960,10 @@ struct field_modify_info modify_tcp[] = {
  *   Pointer to the jump action.
  * @param[in] action_flags
  *   Holds the actions detected until now.
- * @param[in] group
- *   The group of the current flow.
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[in] external
+ *   Action belongs to flow rule created by request external to PMD.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -1971,9 +1973,14 @@ struct field_modify_info modify_tcp[] = {
 static int
 flow_dv_validate_action_jump(const struct rte_flow_action *action,
 			     uint64_t action_flags,
-			     uint32_t group,
-			     struct rte_flow_error *error)
+			     const struct rte_flow_attr *attributes,
+			     bool external, struct rte_flow_error *error)
 {
+	uint32_t max_group = attributes->transfer ? MLX5_MAX_TABLES_FDB :
+						    MLX5_MAX_TABLES;
+	uint32_t target_group, table;
+	int ret = 0;
+
 	if (action_flags & (MLX5_FLOW_FATE_ACTIONS |
 			    MLX5_FLOW_FATE_ESWITCH_ACTIONS))
 		return rte_flow_error_set(error, EINVAL,
@@ -1984,10 +1991,20 @@ struct field_modify_info modify_tcp[] = {
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
 					  NULL, "action configuration not set");
-	if (group >= ((const struct rte_flow_action_jump *)action->conf)->group)
+	target_group =
+		((const struct rte_flow_action_jump *)action->conf)->group;
+	ret = mlx5_flow_group_to_table(attributes, external, target_group,
+				       &table, error);
+	if (ret)
+		return ret;
+	if (table >= max_group)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
+					  "target group index out of range");
+	if (attributes->group >= target_group)
 		return rte_flow_error_set(error, EINVAL,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
-					  "target group must be higher then"
+					  "target group must be higher than"
 					  " the current flow group");
 	return 0;
 }
@@ -2768,6 +2785,8 @@ struct field_modify_info modify_tcp[] = {
  *   Pointer to dev struct.
  * @param[in] attributes
  *   Pointer to flow attributes
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to error structure.
  *
@@ -2777,6 +2796,7 @@ struct field_modify_info modify_tcp[] = {
 static int
 flow_dv_validate_attributes(struct rte_eth_dev *dev,
 			    const struct rte_flow_attr *attributes,
+			    bool external __rte_unused,
 			    struct rte_flow_error *error)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
@@ -2787,7 +2807,22 @@ struct field_modify_info modify_tcp[] = {
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
 					  NULL,
-					  "groups is not supported");
+					  "groups are not supported");
+#else
+	uint32_t max_group = attributes->transfer ? MLX5_MAX_TABLES_FDB :
+						    MLX5_MAX_TABLES;
+	uint32_t table;
+	int ret;
+
+	ret = mlx5_flow_group_to_table(attributes, external,
+				       attributes->group,
+				       &table, error);
+	if (ret)
+		return ret;
+	if (table >= max_group)
+		return rte_flow_error_set(error, EINVAL,
+					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
+					  "group index out of range");
 #endif
 	if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
 	    attributes->priority >= priority_max)
@@ -2811,12 +2846,6 @@ struct field_modify_info modify_tcp[] = {
 				(error, ENOTSUP,
 				 RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, attributes,
 				 "egress is not supported");
-		if (attributes->group >= MLX5_MAX_TABLES_FDB)
-			return rte_flow_error_set
-				(error, EINVAL,
-				 RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
-				 NULL, "group must be smaller than "
-				 RTE_STR(MLX5_MAX_TABLES_FDB));
 	}
 	if (!(attributes->egress ^ attributes->ingress))
 		return rte_flow_error_set(error, ENOTSUP,
@@ -2837,6 +2866,8 @@ struct field_modify_info modify_tcp[] = {
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -2847,7 +2878,7 @@ struct field_modify_info modify_tcp[] = {
 flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
 		 const struct rte_flow_item items[],
 		 const struct rte_flow_action actions[],
-		 struct rte_flow_error *error)
+		 bool external, struct rte_flow_error *error)
 {
 	int ret;
 	uint64_t action_flags = 0;
@@ -2866,7 +2897,7 @@ struct field_modify_info modify_tcp[] = {
 
 	if (items == NULL)
 		return -1;
-	ret = flow_dv_validate_attributes(dev, attr, error);
+	ret = flow_dv_validate_attributes(dev, attr, external, error);
 	if (ret < 0)
 		return ret;
 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
@@ -3236,7 +3267,8 @@ struct field_modify_info modify_tcp[] = {
 		case RTE_FLOW_ACTION_TYPE_JUMP:
 			ret = flow_dv_validate_action_jump(actions,
 							   action_flags,
-							   attr->group, error);
+							   attr, external,
+							   error);
 			if (ret)
 				return ret;
 			++actions_n;
@@ -4693,8 +4725,14 @@ struct field_modify_info modify_tcp[] = {
 	void *match_mask = matcher.mask.buf;
 	void *match_value = dev_flow->dv.value.buf;
 	uint8_t next_protocol = 0xff;
+	uint32_t table;
+	int ret = 0;
 
-	flow->group = attr->group;
+	ret = mlx5_flow_group_to_table(attr, dev_flow->external, attr->group,
+				       &table, error);
+	if (ret)
+		return ret;
+	flow->group = table;
 	if (attr->transfer)
 		res.ft_type = MLX5DV_FLOW_TABLE_TYPE_FDB;
 	if (priority == MLX5_FLOW_PRIO_RSVD)
@@ -4780,7 +4818,7 @@ struct field_modify_info modify_tcp[] = {
 			flow->counter = flow_dv_counter_alloc(dev,
 							      count->shared,
 							      count->id,
-							      attr->group);
+							      flow->group);
 			if (flow->counter == NULL)
 				goto cnt_err;
 			dev_flow->dv.actions[actions_n++] =
@@ -4866,7 +4904,12 @@ struct field_modify_info modify_tcp[] = {
 			break;
 		case RTE_FLOW_ACTION_TYPE_JUMP:
 			jump_data = action->conf;
-			tbl = flow_dv_tbl_resource_get(dev, jump_data->group,
+			ret = mlx5_flow_group_to_table(attr, dev_flow->external,
+						       jump_data->group, &table,
+						       error);
+			if (ret)
+				return ret;
+			tbl = flow_dv_tbl_resource_get(dev, table,
 						       attr->egress,
 						       attr->transfer, error);
 			if (!tbl)
@@ -5017,7 +5060,7 @@ struct field_modify_info modify_tcp[] = {
 			mlx5_flow_tunnel_ip_check(items, next_protocol,
 						  &item_flags, &tunnel);
 			flow_dv_translate_item_ipv4(match_mask, match_value,
-						    items, tunnel, attr->group);
+						    items, tunnel, flow->group);
 			matcher.priority = MLX5_PRIORITY_MAP_L3;
 			dev_flow->dv.hash_fields |=
 				mlx5_flow_hashfields_adjust
@@ -5044,7 +5087,7 @@ struct field_modify_info modify_tcp[] = {
 			mlx5_flow_tunnel_ip_check(items, next_protocol,
 						  &item_flags, &tunnel);
 			flow_dv_translate_item_ipv6(match_mask, match_value,
-						    items, tunnel, attr->group);
+						    items, tunnel, flow->group);
 			matcher.priority = MLX5_PRIORITY_MAP_L3;
 			dev_flow->dv.hash_fields |=
 				mlx5_flow_hashfields_adjust
@@ -5163,7 +5206,7 @@ struct field_modify_info modify_tcp[] = {
 	matcher.priority = mlx5_flow_adjust_priority(dev, priority,
 						     matcher.priority);
 	matcher.egress = attr->egress;
-	matcher.group = attr->group;
+	matcher.group = flow->group;
 	matcher.transfer = attr->transfer;
 	if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
 		return -rte_errno;
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
index c5b28e3..23110f2 100644
--- a/drivers/net/mlx5/mlx5_flow_verbs.c
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -1016,6 +1016,8 @@
  *   Pointer to the list of items.
  * @param[in] actions
  *   Pointer to the list of actions.
+ * @param[in] external
+ *   This flow rule is created by request external to PMD.
  * @param[out] error
  *   Pointer to the error structure.
  *
@@ -1027,6 +1029,7 @@
 		    const struct rte_flow_attr *attr,
 		    const struct rte_flow_item items[],
 		    const struct rte_flow_action actions[],
+		    bool external __rte_unused,
 		    struct rte_flow_error *error)
 {
 	int ret;
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index aa323ad..122f31c 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -286,6 +286,9 @@
 	unsigned int j;
 	int ret;
 
+	if (priv->config.dv_esw_en && !priv->config.vf)
+		if (!mlx5_flow_create_esw_table_zero_flow(dev))
+			goto error;
 	if (priv->isolated)
 		return 0;
 	if (dev->data->promiscuous) {
-- 
1.8.3.1



More information about the dev mailing list