[dpdk-dev] [PATCH v2 13/29] net/mlx4: refactor internal flow rules

Adrien Mazarguil adrien.mazarguil at 6wind.com
Thu Oct 12 14:19:27 CEST 2017


When not in isolated mode, a flow rule is automatically configured by the
PMD to receive traffic addressed to the MAC address of the device. This
somewhat duplicates flow API functionality.

Remove legacy support for internal flow rules to instead handle them
through the flow API implementation.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 drivers/net/mlx4/mlx4.c      |  20 ++---
 drivers/net/mlx4/mlx4.h      |   1 -
 drivers/net/mlx4/mlx4_flow.c | 155 +++++++++++++++++++++++++++++++++++---
 drivers/net/mlx4/mlx4_flow.h |   6 ++
 drivers/net/mlx4/mlx4_rxq.c  | 117 +++-------------------------
 drivers/net/mlx4/mlx4_rxtx.h |   2 -
 6 files changed, 172 insertions(+), 129 deletions(-)

diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index b084903..40c0ee2 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -96,8 +96,15 @@ const char *pmd_mlx4_init_params[] = {
 static int
 mlx4_dev_configure(struct rte_eth_dev *dev)
 {
-	(void)dev;
-	return 0;
+	struct priv *priv = dev->data->dev_private;
+	int ret;
+
+	/* Prepare internal flow rules. */
+	ret = mlx4_flow_sync(priv);
+	if (ret)
+		ERROR("cannot set up internal flow rules: %s",
+		      strerror(-ret));
+	return ret;
 }
 
 /**
@@ -121,9 +128,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 		return 0;
 	DEBUG("%p: attaching configured flows to all RX queues", (void *)dev);
 	priv->started = 1;
-	ret = mlx4_mac_addr_add(priv);
-	if (ret)
-		goto err;
 	ret = mlx4_intr_install(priv);
 	if (ret) {
 		ERROR("%p: interrupt handler installation failed",
@@ -139,7 +143,6 @@ mlx4_dev_start(struct rte_eth_dev *dev)
 	return 0;
 err:
 	/* Rollback. */
-	mlx4_mac_addr_del(priv);
 	priv->started = 0;
 	return ret;
 }
@@ -163,7 +166,6 @@ mlx4_dev_stop(struct rte_eth_dev *dev)
 	priv->started = 0;
 	mlx4_flow_stop(priv);
 	mlx4_intr_uninstall(priv);
-	mlx4_mac_addr_del(priv);
 }
 
 /**
@@ -185,7 +187,7 @@ mlx4_dev_close(struct rte_eth_dev *dev)
 	DEBUG("%p: closing device \"%s\"",
 	      (void *)dev,
 	      ((priv->ctx != NULL) ? priv->ctx->device->name : ""));
-	mlx4_mac_addr_del(priv);
+	mlx4_flow_clean(priv);
 	dev->rx_pkt_burst = mlx4_rx_burst_removed;
 	dev->tx_pkt_burst = mlx4_tx_burst_removed;
 	for (i = 0; i != dev->data->nb_rx_queues; ++i)
@@ -542,8 +544,6 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		     mac.addr_bytes[4], mac.addr_bytes[5]);
 		/* Register MAC address. */
 		priv->mac = mac;
-		if (mlx4_mac_addr_add(priv))
-			goto port_error;
 #ifndef NDEBUG
 		{
 			char ifname[IF_NAMESIZE];
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index f71679b..fb4708d 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -100,7 +100,6 @@ struct priv {
 	struct ibv_device_attr device_attr; /**< Device properties. */
 	struct ibv_pd *pd; /**< Protection Domain. */
 	struct ether_addr mac; /**< MAC address. */
-	struct ibv_flow *mac_flow; /**< Flow associated with MAC address. */
 	/* Device properties. */
 	uint16_t mtu; /**< Configured MTU. */
 	uint8_t port; /**< Physical port number. */
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c
index 669eba2..fb38179 100644
--- a/drivers/net/mlx4/mlx4_flow.c
+++ b/drivers/net/mlx4/mlx4_flow.c
@@ -617,6 +617,10 @@ mlx4_flow_prepare(struct priv *priv,
 
 		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
 			continue;
+		if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
+			flow->internal = 1;
+			continue;
+		}
 		/*
 		 * The nic can support patterns with NULL eth spec only
 		 * if eth is a single item in a rule.
@@ -916,7 +920,17 @@ mlx4_flow_create(struct rte_eth_dev *dev,
 		return NULL;
 	err = mlx4_flow_toggle(priv, flow, priv->started, error);
 	if (!err) {
-		LIST_INSERT_HEAD(&priv->flows, flow, next);
+		struct rte_flow *curr = LIST_FIRST(&priv->flows);
+
+		/* New rules are inserted after internal ones. */
+		if (!curr || !curr->internal) {
+			LIST_INSERT_HEAD(&priv->flows, flow, next);
+		} else {
+			while (LIST_NEXT(curr, next) &&
+			       LIST_NEXT(curr, next)->internal)
+				curr = LIST_NEXT(curr, next);
+			LIST_INSERT_AFTER(curr, flow, next);
+		}
 		return flow;
 	}
 	rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
@@ -941,13 +955,14 @@ mlx4_flow_isolate(struct rte_eth_dev *dev,
 	if (!!enable == !!priv->isolated)
 		return 0;
 	priv->isolated = !!enable;
-	if (enable) {
-		mlx4_mac_addr_del(priv);
-	} else if (mlx4_mac_addr_add(priv) < 0) {
-		priv->isolated = 1;
+	if (mlx4_flow_sync(priv)) {
+		priv->isolated = !enable;
 		return rte_flow_error_set(error, rte_errno,
 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-					  NULL, "cannot leave isolated mode");
+					  NULL,
+					  enable ?
+					  "cannot enter isolated mode" :
+					  "cannot leave isolated mode");
 	}
 	return 0;
 }
@@ -974,7 +989,9 @@ mlx4_flow_destroy(struct rte_eth_dev *dev,
 }
 
 /**
- * Destroy all flow rules.
+ * Destroy user-configured flow rules.
+ *
+ * This function skips internal flows rules.
  *
  * @see rte_flow_flush()
  * @see rte_flow_ops
@@ -984,17 +1001,133 @@ mlx4_flow_flush(struct rte_eth_dev *dev,
 		struct rte_flow_error *error)
 {
 	struct priv *priv = dev->data->dev_private;
+	struct rte_flow *flow = LIST_FIRST(&priv->flows);
 
-	while (!LIST_EMPTY(&priv->flows)) {
-		struct rte_flow *flow;
+	while (flow) {
+		struct rte_flow *next = LIST_NEXT(flow, next);
 
-		flow = LIST_FIRST(&priv->flows);
-		mlx4_flow_destroy(dev, flow, error);
+		if (!flow->internal)
+			mlx4_flow_destroy(dev, flow, error);
+		flow = next;
 	}
 	return 0;
 }
 
 /**
+ * Generate internal flow rules.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
+{
+	struct rte_flow_attr attr = {
+		.ingress = 1,
+	};
+	struct rte_flow_item pattern[] = {
+		{
+			.type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_ETH,
+			.spec = &(struct rte_flow_item_eth){
+				.dst = priv->mac,
+			},
+			.mask = &(struct rte_flow_item_eth){
+				.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+			},
+		},
+		{
+			.type = RTE_FLOW_ITEM_TYPE_END,
+		},
+	};
+	struct rte_flow_action actions[] = {
+		{
+			.type = RTE_FLOW_ACTION_TYPE_QUEUE,
+			.conf = &(struct rte_flow_action_queue){
+				.index = 0,
+			},
+		},
+		{
+			.type = RTE_FLOW_ACTION_TYPE_END,
+		},
+	};
+
+	if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
+		return -rte_errno;
+	return 0;
+}
+
+/**
+ * Synchronize flow rules.
+ *
+ * This function synchronizes flow rules with the state of the device by
+ * taking into account isolated mode and whether target queues are
+ * configured.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx4_flow_sync(struct priv *priv)
+{
+	struct rte_flow *flow;
+	int ret;
+
+	/* Internal flow rules are guaranteed to come first in the list. */
+	if (priv->isolated) {
+		/*
+		 * Get rid of them in isolated mode, stop at the first
+		 * non-internal rule found.
+		 */
+		for (flow = LIST_FIRST(&priv->flows);
+		     flow && flow->internal;
+		     flow = LIST_FIRST(&priv->flows))
+			claim_zero(mlx4_flow_destroy(priv->dev, flow, NULL));
+	} else if (!LIST_FIRST(&priv->flows) ||
+		   !LIST_FIRST(&priv->flows)->internal) {
+		/*
+		 * If the first rule is not internal outside isolated mode,
+		 * they must be added back.
+		 */
+		ret = mlx4_flow_internal(priv, NULL);
+		if (ret)
+			return ret;
+	}
+	if (priv->started)
+		return mlx4_flow_start(priv);
+	mlx4_flow_stop(priv);
+	return 0;
+}
+
+/**
+ * Clean up all flow rules.
+ *
+ * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
+ * rules regardless of whether they are internal or user-configured.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ */
+void
+mlx4_flow_clean(struct priv *priv)
+{
+	struct rte_flow *flow;
+
+	while ((flow = LIST_FIRST(&priv->flows)))
+		mlx4_flow_destroy(priv->dev, flow, NULL);
+}
+
+/**
  * Disable flow rules.
  *
  * @param priv
diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h
index 68ffb33..c2ffa8d 100644
--- a/drivers/net/mlx4/mlx4_flow.h
+++ b/drivers/net/mlx4/mlx4_flow.h
@@ -55,12 +55,16 @@
 /** Last and lowest priority level for a flow rule. */
 #define MLX4_FLOW_PRIORITY_LAST UINT32_C(0xfff)
 
+/** Meta pattern item used to distinguish internal rules. */
+#define MLX4_FLOW_ITEM_TYPE_INTERNAL ((enum rte_flow_item_type)-1)
+
 /** PMD-specific (mlx4) definition of a flow rule handle. */
 struct rte_flow {
 	LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
 	struct ibv_flow *ibv_flow; /**< Verbs flow. */
 	struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
 	uint32_t ibv_attr_size; /**< Size of Verbs attributes. */
+	uint32_t internal:1; /**< Internal flow rule outside isolated mode. */
 	uint32_t drop:1; /**< This rule drops packets. */
 	uint32_t queue:1; /**< Target is a receive queue. */
 	uint16_t queue_id; /**< Target queue. */
@@ -68,6 +72,8 @@ struct rte_flow {
 
 /* mlx4_flow.c */
 
+int mlx4_flow_sync(struct priv *priv);
+void mlx4_flow_clean(struct priv *priv);
 int mlx4_flow_start(struct priv *priv);
 void mlx4_flow_stop(struct priv *priv);
 int mlx4_filter_ctrl(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c
index 2d54ab0..7bb2f9e 100644
--- a/drivers/net/mlx4/mlx4_rxq.c
+++ b/drivers/net/mlx4/mlx4_rxq.c
@@ -59,6 +59,7 @@
 #include <rte_mempool.h>
 
 #include "mlx4.h"
+#include "mlx4_flow.h"
 #include "mlx4_rxtx.h"
 #include "mlx4_utils.h"
 
@@ -399,8 +400,8 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			return -rte_errno;
 		}
 		dev->data->rx_queues[idx] = NULL;
-		if (idx == 0)
-			mlx4_mac_addr_del(priv);
+		/* Disable associated flows. */
+		mlx4_flow_sync(priv);
 		mlx4_rxq_cleanup(rxq);
 	} else {
 		rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket);
@@ -419,6 +420,14 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		DEBUG("%p: adding Rx queue %p to list",
 		      (void *)dev, (void *)rxq);
 		dev->data->rx_queues[idx] = rxq;
+		/* Re-enable associated flows. */
+		ret = mlx4_flow_sync(priv);
+		if (ret) {
+			dev->data->rx_queues[idx] = NULL;
+			mlx4_rxq_cleanup(rxq);
+			rte_free(rxq);
+			return ret;
+		}
 		/* Update receive callback. */
 		dev->rx_pkt_burst = mlx4_rx_burst;
 	}
@@ -446,111 +455,9 @@ mlx4_rx_queue_release(void *dpdk_rxq)
 			DEBUG("%p: removing Rx queue %p from list",
 			      (void *)priv->dev, (void *)rxq);
 			priv->dev->data->rx_queues[i] = NULL;
-			if (i == 0)
-				mlx4_mac_addr_del(priv);
 			break;
 		}
+	mlx4_flow_sync(priv);
 	mlx4_rxq_cleanup(rxq);
 	rte_free(rxq);
 }
-
-/**
- * Unregister a MAC address.
- *
- * @param priv
- *   Pointer to private structure.
- */
-void
-mlx4_mac_addr_del(struct priv *priv)
-{
-#ifndef NDEBUG
-	uint8_t (*mac)[ETHER_ADDR_LEN] = &priv->mac.addr_bytes;
-#endif
-
-	if (!priv->mac_flow)
-		return;
-	DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x",
-	      (void *)priv,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5]);
-	claim_zero(ibv_destroy_flow(priv->mac_flow));
-	priv->mac_flow = NULL;
-}
-
-/**
- * Register a MAC address.
- *
- * The MAC address is registered in queue 0.
- *
- * @param priv
- *   Pointer to private structure.
- *
- * @return
- *   0 on success, negative errno value otherwise and rte_errno is set.
- */
-int
-mlx4_mac_addr_add(struct priv *priv)
-{
-	uint8_t (*mac)[ETHER_ADDR_LEN] = &priv->mac.addr_bytes;
-	struct rxq *rxq;
-	struct ibv_flow *flow;
-
-	/* If device isn't started, this is all we need to do. */
-	if (!priv->started)
-		return 0;
-	if (priv->isolated)
-		return 0;
-	if (priv->dev->data->rx_queues && priv->dev->data->rx_queues[0])
-		rxq = priv->dev->data->rx_queues[0];
-	else
-		return 0;
-
-	/* Allocate flow specification on the stack. */
-	struct __attribute__((packed)) {
-		struct ibv_flow_attr attr;
-		struct ibv_flow_spec_eth spec;
-	} data;
-	struct ibv_flow_attr *attr = &data.attr;
-	struct ibv_flow_spec_eth *spec = &data.spec;
-
-	if (priv->mac_flow)
-		mlx4_mac_addr_del(priv);
-	/*
-	 * No padding must be inserted by the compiler between attr and spec.
-	 * This layout is expected by libibverbs.
-	 */
-	assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
-	*attr = (struct ibv_flow_attr){
-		.type = IBV_FLOW_ATTR_NORMAL,
-		.priority = 3,
-		.num_of_specs = 1,
-		.port = priv->port,
-		.flags = 0
-	};
-	*spec = (struct ibv_flow_spec_eth){
-		.type = IBV_FLOW_SPEC_ETH,
-		.size = sizeof(*spec),
-		.val = {
-			.dst_mac = {
-				(*mac)[0], (*mac)[1], (*mac)[2],
-				(*mac)[3], (*mac)[4], (*mac)[5]
-			},
-		},
-		.mask = {
-			.dst_mac = "\xff\xff\xff\xff\xff\xff",
-		}
-	};
-	DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x",
-	      (void *)priv,
-	      (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5]);
-	/* Create related flow. */
-	flow = ibv_create_flow(rxq->qp, attr);
-	if (flow == NULL) {
-		rte_errno = errno ? errno : EINVAL;
-		ERROR("%p: flow configuration failed, errno=%d: %s",
-		      (void *)rxq, rte_errno, strerror(errno));
-		return -rte_errno;
-	}
-	assert(priv->mac_flow == NULL);
-	priv->mac_flow = flow;
-	return 0;
-}
diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h
index 365b585..7a2c982 100644
--- a/drivers/net/mlx4/mlx4_rxtx.h
+++ b/drivers/net/mlx4/mlx4_rxtx.h
@@ -128,8 +128,6 @@ int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
 			const struct rte_eth_rxconf *conf,
 			struct rte_mempool *mp);
 void mlx4_rx_queue_release(void *dpdk_rxq);
-void mlx4_mac_addr_del(struct priv *priv);
-int mlx4_mac_addr_add(struct priv *priv);
 
 /* mlx4_rxtx.c */
 
-- 
2.1.4



More information about the dev mailing list