[dpdk-dev] [RFC PATCH] net/bonding: add rte flow support

Matan Azrad matan at mellanox.com
Tue Mar 6 16:34:27 CET 2018


Ethernet devices which are grouped by bonding PMD, aka slaves, are
sharing the same queues and RSS configurations and their Rx burst
functions must be managed by the bonding PMD according to the bonding
architecture.

So, it makes sense to configure the same flow rules for all the bond
slaves to allow consistency in packet flow management.

Add rte flow support to the bonding PMD.

Signed-off-by: Matan Azrad <matan at mellanox.com>
---


Implementation details:

Allow rte flow next operations: validate, create, destroy, flush, query, isolate.

Validate:
Validation will pass only if all the existed slaves validations will pass.

Create:
Create the flow in all slaves.
Save all the slaves created flows objects in bonding internal flow structure.
Save each flow configuration to be able to configure them for each new slave.
Failure in flow creation for existed slave will reject the flow.
Failure in flow creation for new slaves in slave adding time will reject the slave.
Return the bonding flow structure pointer to the application.

Destroy:
Destroy the flow in all slaves and release the internal flow memory.

Flush:
Destroy all the bonding PMD flows in all the slaves (calling to slaves flush will destroy all the slave flows which may include another flows from application or the bond internal LACP flow).

Query:
Return the query result of the bonding primary slave.(alternatively we can sum all the query data for COUNT action and return -ENOTSUP for another queries).

Isolate:
Call to flow isolate for all slaves.
isolate mode will be configured for new slaves too(will reject the slave in failure case).


* This implementation allows to application to configure flows directly to the slaves and to manage another rte flows set.
* The recommendation is to use rte flow by the bonding PMD and not directly by the slaves PMDs (for example: calling to flow flush of the slave directly may hurt LACP mechanism).

You can look on the code below to see more details.

Thoughts?

 drivers/net/bonding/Makefile               |   1 +
 drivers/net/bonding/rte_eth_bond_api.c     |  61 +++++++++
 drivers/net/bonding/rte_eth_bond_flow.c    | 206 +++++++++++++++++++++++++++++
 drivers/net/bonding/rte_eth_bond_pmd.c     |  28 +++-
 drivers/net/bonding/rte_eth_bond_private.h |  19 +++
 5 files changed, 312 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/bonding/rte_eth_bond_flow.c

diff --git a/drivers/net/bonding/Makefile b/drivers/net/bonding/Makefile
index 4a6633e..acad16a 100644
--- a/drivers/net/bonding/Makefile
+++ b/drivers/net/bonding/Makefile
@@ -27,6 +27,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_pmd.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_args.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_8023ad.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_alb.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += rte_eth_bond_flow.c
 
 #
 # Export include files
diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index f854b73..350b46e 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -223,6 +223,47 @@
 }
 
 static int
+slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals)
+{
+	struct rte_flow *flow;
+	struct rte_flow_error ferror;
+	uint16_t slave_port_id = internals->slaves[slave_id].port_id;
+
+	if (internals->flow_isolated_valid != 0) {
+		if (rte_flow_isolate(slave_port_id, internals->flow_isolated,
+		    &ferror)) {
+			RTE_BOND_LOG(ERR, "rte_flow_isolate failed for slave"
+				     " %d: %s", slave_id, ferror.message ?
+				     ferror.message : "(no stated reason)");
+			return -1;
+		}
+	}
+	TAILQ_FOREACH(flow, &internals->flow_list, next) {
+		flow->flows[slave_id] = rte_flow_create(slave_port_id,
+							&flow->fd->attr,
+							flow->fd->items,
+							flow->fd->actions,
+							&ferror);
+		if (flow->flows[slave_id] == NULL) {
+			RTE_BOND_LOG(ERR, "Cannot create flow for slave"
+				     " %d: %s", slave_id,
+				     ferror.message ? ferror.message :
+				     "(no stated reason)");
+			/* Destroy successful bond flows from the slave */
+			TAILQ_FOREACH(flow, &internals->flow_list, next) {
+				if (flow->flows[slave_id] != NULL) {
+					rte_flow_destroy(slave_port_id, flow,
+							 &ferror);
+					flow->flows[slave_id] = NULL;
+				}
+			}
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int
 __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
 	struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev;
@@ -316,6 +357,12 @@
 	bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
 			internals->flow_type_rss_offloads;
 
+	if (slave_rte_flow_prepare(internals->slave_count, internals) != 0) {
+		RTE_BOND_LOG(ERR, "Failed to prepare new slave flows: port=%d",
+			     slave_port_id);
+		return -1;
+	}
+
 	internals->slave_count++;
 
 	if (bonded_eth_dev->data->dev_started) {
@@ -393,6 +440,8 @@
 	struct rte_eth_dev *bonded_eth_dev;
 	struct bond_dev_private *internals;
 	struct rte_eth_dev *slave_eth_dev;
+	struct rte_flow_error flow_error;
+	struct rte_flow *flow;
 	int i, slave_idx;
 
 	bonded_eth_dev = &rte_eth_devices[bonded_port_id];
@@ -432,6 +481,18 @@
 	rte_eth_dev_default_mac_addr_set(slave_port_id,
 			&(internals->slaves[slave_idx].persisted_mac_addr));
 
+	/*
+	 * Remove bond device flows from slave device.
+	 * Note: don't restore flow isolate mode.
+	 */
+	TAILQ_FOREACH(flow, &internals->flow_list, next) {
+		if (flow->flows[slave_idx] != NULL) {
+			rte_flow_destroy(slave_port_id, flow->flows[slave_idx],
+					 &flow_error);
+			flow->flows[slave_idx] = NULL;
+		}
+	}
+
 	slave_eth_dev = &rte_eth_devices[slave_port_id];
 	slave_remove(internals, slave_eth_dev);
 	slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE);
diff --git a/drivers/net/bonding/rte_eth_bond_flow.c b/drivers/net/bonding/rte_eth_bond_flow.c
new file mode 100644
index 0000000..1373393
--- /dev/null
+++ b/drivers/net/bonding/rte_eth_bond_flow.c
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox.
+ */
+
+#include <sys/queue.h>
+
+#include <rte_malloc.h>
+#include <rte_tailq.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+
+#include "rte_eth_bond_private.h"
+
+static struct rte_flow *
+bond_flow_alloc(int numa_node, const struct rte_flow_attr *attr,
+		   const struct rte_flow_item *items,
+		   const struct rte_flow_action *actions)
+{
+	struct rte_flow *flow;
+	size_t fdsz;
+
+	fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
+	flow = rte_zmalloc_socket(NULL, sizeof(struct rte_flow) + fdsz,
+				  RTE_CACHE_LINE_SIZE, numa_node);
+	if (unlikely(flow == NULL)) {
+		RTE_BOND_LOG(ERR, "Could not allocate new flow");
+		return NULL;
+	}
+	flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
+	if (unlikely(rte_flow_copy(flow->fd, fdsz, attr, items, actions) !=
+		     fdsz)) {
+		RTE_BOND_LOG(ERR, "Failed to copy flow description");
+		rte_free(flow);
+		return NULL;
+	}
+	return flow;
+}
+
+static void
+bond_flow_release(struct rte_flow **flow)
+{
+	rte_free(*flow);
+	*flow = NULL;
+}
+
+static int
+bond_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+		   const struct rte_flow_item patterns[],
+		   const struct rte_flow_action actions[],
+		   struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int i;
+	int ret;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		ret = rte_flow_validate(internals->slaves[i].port_id, attr,
+					patterns, actions, error);
+		if (ret) {
+			RTE_BOND_LOG(ERR, "Operation rte_flow_validate failed"
+				     " for slave %d with error %d", i, ret);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static struct rte_flow *
+bond_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+		 const struct rte_flow_item patterns[],
+		 const struct rte_flow_action actions[],
+		 struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	struct rte_flow *flow;
+	int i;
+
+	flow = bond_flow_alloc(dev->data->numa_node, attr, patterns, actions);
+	if (unlikely(flow == NULL)) {
+		rte_flow_error_set(error, ENOMEM,
+				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+				   rte_strerror(ENOMEM));
+		return NULL;
+	}
+	for (i = 0; i < internals->slave_count; i++) {
+		flow->flows[i] = rte_flow_create(internals->slaves[i].port_id,
+						 attr, patterns, actions, error);
+		if (unlikely(flow->flows[i] == NULL)) {
+			RTE_BOND_LOG(ERR, "Failed to create flow on slave %d",
+				     i);
+			goto err;
+		}
+	}
+	TAILQ_INSERT_TAIL(&internals->flow_list, flow, next);
+	return flow;
+err:
+	/* Destroy all slaves flows. */
+	for (i = 0; i < internals->slave_count; i++) {
+		if (flow->flows[i] != NULL)
+			rte_flow_destroy(internals->slaves[i].port_id,
+					 flow->flows[i], error);
+	}
+	bond_flow_release(&flow);
+	return NULL;
+}
+
+static int
+bond_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
+		  struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int i;
+	int ret = 0;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		int lret;
+
+		if (unlikely(flow->flows[i] == NULL))
+			continue;
+		lret = rte_flow_destroy(internals->slaves[i].port_id,
+					flow->flows[i], error);
+		if (unlikely(lret != 0)) {
+			RTE_BOND_LOG(ERR, "Failed to destroy flow on slave %d:"
+				     " %d", i, lret);
+			ret = lret;
+		}
+	}
+	TAILQ_REMOVE(&internals->flow_list, flow, next);
+	bond_flow_release(&flow);
+	return ret;
+}
+
+static int
+bond_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	struct rte_flow *flow;
+	void *tmp;
+	int ret = 0;
+	int lret;
+
+	/*
+	 * Destroy all bond flows from its slaves instead of flushing them to
+	 * keep the LCAP flow or any other external flows.
+	 */
+	TAILQ_FOREACH_SAFE(flow, &internals->flow_list, next, tmp) {
+		lret = bond_flow_destroy(dev, flow, error);
+		if (unlikely(lret != 0))
+			ret = lret;
+	}
+	if (unlikely(ret != 0))
+		RTE_BOND_LOG(ERR, "Failed to flush flow in all slaves");
+	return ret;
+}
+
+static int
+bond_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
+		enum rte_flow_action_type type, void *arg,
+		struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int slave_id;
+
+	/* Query the primary slave. */
+	if (unlikely(internals->slave_count < 1)) {
+		RTE_BOND_LOG(ERR, "No slaves to query flow");
+		return -ENODEV;
+	}
+	for (slave_id = 0; slave_id < internals->slave_count; slave_id++)
+		if (internals->current_primary_port ==
+		    internals->slaves[slave_id].port_id)
+			break;
+	return rte_flow_query(internals->current_primary_port,
+			      flow->flows[slave_id], type, arg, error);
+}
+
+static int
+bond_flow_isolate(struct rte_eth_dev *dev, int set,
+		  struct rte_flow_error *error)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+	int i;
+	int ret;
+
+	for (i = 0; i < internals->slave_count; i++) {
+		ret = rte_flow_isolate(internals->slaves[i].port_id, set, error);
+		if (unlikely(ret != 0)) {
+			RTE_BOND_LOG(ERR, "Operation rte_flow_isolate failed"
+				     " for slave %d with error %d", i, ret);
+			internals->flow_isolated_valid = 0;
+			return ret;
+		}
+	}
+	internals->flow_isolated = set;
+	internals->flow_isolated_valid = 1;
+	return 0;
+}
+
+const struct rte_flow_ops bond_flow_ops = {
+	.validate = bond_flow_validate,
+	.create = bond_flow_create,
+	.destroy = bond_flow_destroy,
+	.flush = bond_flow_flush,
+	.query = bond_flow_query,
+	.isolate = bond_flow_isolate,
+};
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index c34c325..6d90f9c 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1910,7 +1910,6 @@ struct bwg_slave {
 		struct bond_dev_private *internals;
 
 		internals = bonded_eth_dev->data->dev_private;
-
 		for (i = 0; i < internals->slave_count; i++) {
 			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
 				errval = rte_eth_dev_rss_reta_update(
@@ -1950,10 +1949,18 @@ struct bwg_slave {
 				slave_eth_dev->data->port_id)
 			break;
 
-	if (i < (internals->slave_count - 1))
+	if (i < (internals->slave_count - 1)) {
+		struct rte_flow *flow;
+
 		memmove(&internals->slaves[i], &internals->slaves[i + 1],
 				sizeof(internals->slaves[0]) *
 				(internals->slave_count - i - 1));
+		TAILQ_FOREACH(flow, &internals->flow_list, next) {
+			memmove(&flow->flows[i], &flow->flows[i + 1],
+				sizeof(flow->flows[0]) *
+				(internals->slave_count - i - 1));
+		}
+	}
 
 	internals->slave_count--;
 
@@ -2858,6 +2865,17 @@ struct bwg_slave {
 		RTE_BOND_LOG(ERR, "Failed to update MAC address");
 }
 
+static int
+bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
+		 enum rte_filter_type type, enum rte_filter_op op, void *arg)
+{
+	if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
+		*(const void **)arg = &bond_flow_ops;
+		return 0;
+	}
+	return -ENOTSUP;
+}
+
 const struct eth_dev_ops default_dev_ops = {
 	.dev_start            = bond_ethdev_start,
 	.dev_stop             = bond_ethdev_stop,
@@ -2879,7 +2897,8 @@ struct bwg_slave {
 	.rss_hash_update      = bond_ethdev_rss_hash_update,
 	.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get,
 	.mtu_set              = bond_ethdev_mtu_set,
-	.mac_addr_set         = bond_ethdev_mac_address_set
+	.mac_addr_set         = bond_ethdev_mac_address_set,
+	.filter_ctrl          = bond_filter_ctrl
 };
 
 static int
@@ -2945,6 +2964,9 @@ struct bwg_slave {
 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
 	memset(internals->slaves, 0, sizeof(internals->slaves));
 
+	TAILQ_INIT(&internals->flow_list);
+	internals->flow_isolated_valid = 0;
+
 	/* Set mode 4 default configuration */
 	bond_mode_8023ad_setup(eth_dev, NULL);
 	if (bond_ethdev_mode_set(eth_dev, mode)) {
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 92e15f8..63ce9e4 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -5,6 +5,8 @@
 #ifndef _RTE_ETH_BOND_PRIVATE_H_
 #define _RTE_ETH_BOND_PRIVATE_H_
 
+#include <sys/queue.h>
+
 #include <rte_ethdev_driver.h>
 #include <rte_spinlock.h>
 #include <rte_bitmap.h>
@@ -37,6 +39,8 @@
 
 extern struct rte_vdev_driver pmd_bond_drv;
 
+extern const struct rte_flow_ops bond_flow_ops;
+
 /** Port Queue Mapping Structure */
 struct bond_rx_queue {
 	uint16_t queue_id;
@@ -80,6 +84,14 @@ struct bond_slave_details {
 	uint16_t reta_size;
 };
 
+struct rte_flow {
+	TAILQ_ENTRY(rte_flow) next;
+	/* Slaves flows */
+	struct rte_flow *flows[RTE_MAX_ETHPORTS];
+	/* Flow description for synchronization */
+	struct rte_flow_desc *fd;
+};
+
 typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
 		uint8_t slave_count, uint16_t *slaves);
 
@@ -131,6 +143,13 @@ struct bond_dev_private {
 	uint32_t rx_offload_capa;            /** Rx offload capability */
 	uint32_t tx_offload_capa;            /** Tx offload capability */
 
+	/**< List of the configured flows */
+	TAILQ_HEAD(sub_flows, rte_flow) flow_list;
+
+	/**< Flow isolation state */
+	int flow_isolated;
+	int flow_isolated_valid;
+
 	/** Bit mask of RSS offloads, the bit offset also means flow type */
 	uint64_t flow_type_rss_offloads;
 
-- 
1.9.5



More information about the dev mailing list