[dpdk-dev] [PATCH v7 3/3] net/failsafe: fix hotplug races

Matan Azrad matan at mellanox.com
Mon Feb 12 21:51:42 CET 2018


Fail-safe uses a periodic alarm mechanism, running from the host
thread, to manage the hot-plug events of its sub-devices. This
management requires a lot of sub-devices PMDs operations
(stop, close, start, configure, etc.).

While the hot-plug alarm runs in the host thread, the application may
call fail-safe operations, which directly trigger the sub-devices PMDs
operations as well. This call may occur from any thread decided by the
application (probably the master thread).

Thus, more than one operation can be executed to a sub-device at the
same time. This can initiate a lot of races in the sub-PMDs.

Moreover, some control operations update the fail-safe internal
databases, which can be used by the alarm mechanism at the same time.
This can also initiate races and crashes.

Fail-safe is the owner of its sub-devices and must synchronize their
use according to the ETHDEV ownership rules.

Synchronize hot-plug management by a new lock mechanism uses a mutex to
atomically defend each critical section in the fail-safe hot-plug
mechanism and control operations to prevent any races between them.

Fixes: a46f8d5 ("net/failsafe: add fail-safe PMD")
Cc: stable at dpdk.org

Signed-off-by: Matan Azrad <matan at mellanox.com>
---
 drivers/net/failsafe/Makefile           |   1 +
 drivers/net/failsafe/failsafe.c         |  35 ++++++++
 drivers/net/failsafe/failsafe_ether.c   |   5 ++
 drivers/net/failsafe/failsafe_flow.c    |  20 ++++-
 drivers/net/failsafe/failsafe_ops.c     | 148 ++++++++++++++++++++++++++------
 drivers/net/failsafe/failsafe_private.h |  56 ++++++++++++
 6 files changed, 239 insertions(+), 26 deletions(-)

diff --git a/drivers/net/failsafe/Makefile b/drivers/net/failsafe/Makefile
index d1ae899..bd2f019 100644
--- a/drivers/net/failsafe/Makefile
+++ b/drivers/net/failsafe/Makefile
@@ -68,5 +68,6 @@ CFLAGS += -pedantic
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_vdev
+LDLIBS += -lpthread
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/failsafe/failsafe.c b/drivers/net/failsafe/failsafe.c
index 7b2cdbb..c499bfb 100644
--- a/drivers/net/failsafe/failsafe.c
+++ b/drivers/net/failsafe/failsafe.c
@@ -113,17 +113,46 @@ fs_hotplug_alarm(void *arg)
 			break;
 	/* if we have non-probed device */
 	if (i != PRIV(dev)->subs_tail) {
+		if (fs_lock(dev, 1) != 0)
+			goto reinstall;
 		ret = failsafe_eth_dev_state_sync(dev);
+		fs_unlock(dev, 1);
 		if (ret)
 			ERROR("Unable to synchronize sub_device state");
 	}
 	failsafe_dev_remove(dev);
+reinstall:
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret)
 		ERROR("Unable to set up next alarm");
 }
 
 static int
+fs_mutex_init(struct fs_priv *priv)
+{
+	int ret;
+	pthread_mutexattr_t attr;
+
+	ret = pthread_mutexattr_init(&attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex attributes - %s", strerror(ret));
+		return ret;
+	}
+	/* Allow mutex relocks for the thread holding the mutex. */
+	ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
+	if (ret) {
+		ERROR("Cannot set mutex type - %s", strerror(ret));
+		return ret;
+	}
+	ret = pthread_mutex_init(&priv->hotplug_mutex, &attr);
+	if (ret) {
+		ERROR("Cannot initiate mutex - %s", strerror(ret));
+		return ret;
+	}
+	return 0;
+}
+
+static int
 fs_eth_dev_create(struct rte_vdev_device *vdev)
 {
 	struct rte_eth_dev *dev;
@@ -176,6 +205,9 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
 	ret = failsafe_eal_init(dev);
 	if (ret)
 		goto free_args;
+	ret = fs_mutex_init(priv);
+	if (ret)
+		goto free_args;
 	ret = failsafe_hotplug_alarm_install(dev);
 	if (ret) {
 		ERROR("Could not set up plug-in event detection");
@@ -250,6 +282,9 @@ fs_rte_eth_free(const char *name)
 		ERROR("Error while uninitializing sub-EAL");
 	failsafe_args_free(dev);
 	fs_sub_device_free(dev);
+	ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Error while destroying hotplug mutex");
 	rte_free(PRIV(dev));
 	rte_eth_dev_release_port(dev);
 	return ret;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index d820faf..2c0bf93 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -328,8 +328,11 @@ failsafe_dev_remove(struct rte_eth_dev *dev)
 
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		if (sdev->remove && fs_rxtx_clean(sdev)) {
+			if (fs_lock(dev, 1) != 0)
+				return;
 			fs_dev_stats_save(sdev);
 			fs_dev_remove(sdev);
+			fs_unlock(dev, 1);
 		}
 }
 
@@ -429,6 +432,7 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 {
 	struct sub_device *sdev = cb_arg;
 
+	fs_lock(sdev->fs_dev, 0);
 	/* Switch as soon as possible tx_dev. */
 	fs_switch_dev(sdev->fs_dev, sdev);
 	/* Use safe bursts in any case. */
@@ -438,6 +442,7 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
 	 * the callback at the source of the current thread context.
 	 */
 	sdev->remove = 1;
+	fs_unlock(sdev->fs_dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_flow.c b/drivers/net/failsafe/failsafe_flow.c
index 4d18e8e..ec8c909 100644
--- a/drivers/net/failsafe/failsafe_flow.c
+++ b/drivers/net/failsafe/failsafe_flow.c
@@ -55,6 +55,7 @@ fs_flow_validate(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_validate on sub_device %d", i);
 		ret = rte_flow_validate(PORT_ID(sdev),
@@ -62,9 +63,11 @@ fs_flow_validate(struct rte_eth_dev *dev,
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_validate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -79,6 +82,7 @@ fs_flow_create(struct rte_eth_dev *dev,
 	struct rte_flow *flow;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	flow = fs_flow_allocate(attr, patterns, actions);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		flow->flows[i] = rte_flow_create(PORT_ID(sdev),
@@ -90,6 +94,7 @@ fs_flow_create(struct rte_eth_dev *dev,
 		}
 	}
 	TAILQ_INSERT_TAIL(&PRIV(dev)->flow_list, flow, next);
+	fs_unlock(dev, 0);
 	return flow;
 err:
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -98,6 +103,7 @@ err:
 				flow->flows[i], error);
 	}
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return NULL;
 }
 
@@ -115,6 +121,7 @@ fs_flow_destroy(struct rte_eth_dev *dev,
 		return -EINVAL;
 	}
 	ret = 0;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		int local_ret;
 
@@ -131,6 +138,7 @@ fs_flow_destroy(struct rte_eth_dev *dev,
 	}
 	TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 	fs_flow_release(&flow);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -144,12 +152,14 @@ fs_flow_flush(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_flow_flush on sub_device %d", i);
 		ret = rte_flow_flush(PORT_ID(sdev), error);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_flush failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -157,6 +167,7 @@ fs_flow_flush(struct rte_eth_dev *dev,
 		TAILQ_REMOVE(&PRIV(dev)->flow_list, flow, next);
 		fs_flow_release(&flow);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -169,15 +180,19 @@ fs_flow_query(struct rte_eth_dev *dev,
 {
 	struct sub_device *sdev;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
 	if (sdev != NULL) {
 		int ret = rte_flow_query(PORT_ID(sdev),
 					 flow->flows[SUB_ID(sdev)],
 					 type, arg, error);
 
-		if ((ret = fs_err(sdev, ret)))
+		if ((ret = fs_err(sdev, ret))) {
+			fs_unlock(dev, 0);
 			return ret;
+		}
 	}
+	fs_unlock(dev, 0);
 	WARN("No active sub_device to query about its flow");
 	return -1;
 }
@@ -191,6 +206,7 @@ fs_flow_isolate(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state < DEV_PROBED)
 			continue;
@@ -202,11 +218,13 @@ fs_flow_isolate(struct rte_eth_dev *dev,
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_flow_isolate failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->flow_isolated = set;
 	}
 	PRIV(dev)->flow_isolated = set;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_ops.c b/drivers/net/failsafe/failsafe_ops.c
index f0e48c1..fe64c68 100644
--- a/drivers/net/failsafe/failsafe_ops.c
+++ b/drivers/net/failsafe/failsafe_ops.c
@@ -94,6 +94,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	supp_tx_offloads = PRIV(dev)->infos.tx_offload_capa;
 	tx_offloads = dev->data->dev_conf.txmode.offloads;
 	if ((tx_offloads & supp_tx_offloads) != tx_offloads) {
@@ -101,6 +102,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 		ERROR("Some Tx offloads are not supported, "
 		      "requested 0x%" PRIx64 " supported 0x%" PRIx64,
 		      tx_offloads, supp_tx_offloads);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	FOREACH_SUBDEV(sdev, i, dev) {
@@ -139,6 +141,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 			if (!fs_err(sdev, ret))
 				continue;
 			ERROR("Could not configure sub_device %d", i);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		if (rmv_interrupt) {
@@ -165,6 +168,7 @@ fs_dev_configure(struct rte_eth_dev *dev)
 	}
 	if (PRIV(dev)->state < DEV_ACTIVE)
 		PRIV(dev)->state = DEV_ACTIVE;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -175,9 +179,12 @@ fs_dev_start(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	ret = failsafe_rx_intr_install(dev);
-	if (ret)
+	if (ret) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	FOREACH_SUBDEV(sdev, i, dev) {
 		if (sdev->state != DEV_ACTIVE)
 			continue;
@@ -186,6 +193,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 		if (ret) {
 			if (!fs_err(sdev, ret))
 				continue;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		ret = failsafe_rx_intr_install_subdevice(sdev);
@@ -193,6 +201,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 			if (!fs_err(sdev, ret))
 				continue;
 			rte_eth_dev_stop(PORT_ID(sdev));
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		sdev->state = DEV_STARTED;
@@ -200,6 +209,7 @@ fs_dev_start(struct rte_eth_dev *dev)
 	if (PRIV(dev)->state < DEV_STARTED)
 		PRIV(dev)->state = DEV_STARTED;
 	fs_switch_dev(dev, NULL);
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -209,6 +219,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	PRIV(dev)->state = DEV_STARTED - 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_STARTED) {
 		rte_eth_dev_stop(PORT_ID(sdev));
@@ -216,6 +227,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
 		sdev->state = DEV_STARTED - 1;
 	}
 	failsafe_rx_intr_uninstall(dev);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -225,15 +237,18 @@ fs_dev_set_link_up(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_up on sub_device %d", i);
 		ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_up failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -244,15 +259,18 @@ fs_dev_set_link_down(struct rte_eth_dev *dev)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_link_down on sub_device %d", i);
 		ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_link_down failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -263,6 +281,7 @@ fs_dev_close(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	failsafe_hotplug_alarm_cancel(dev);
 	if (PRIV(dev)->state == DEV_STARTED)
 		dev->dev_ops->dev_stop(dev);
@@ -273,6 +292,7 @@ fs_dev_close(struct rte_eth_dev *dev)
 		sdev->state = DEV_ACTIVE - 1;
 	}
 	fs_dev_free_queues(dev);
+	fs_unlock(dev, 0);
 }
 
 static bool
@@ -305,14 +325,16 @@ fs_rx_queue_release(void *queue)
 	if (queue == NULL)
 		return;
 	rxq = queue;
+	dev = rxq->priv->dev;
+	fs_lock(dev, 0);
 	if (rxq->event_fd > 0)
 		close(rxq->event_fd);
-	dev = rxq->priv->dev;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, rx_queue_release)
 			(ETH(sdev)->data->rx_queues[rxq->qid]);
 	dev->data->rx_queues[rxq->qid] = NULL;
 	rte_free(rxq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -338,6 +360,7 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rxq = dev->data->rx_queues[rx_queue_id];
 	if (rxq != NULL) {
 		fs_rx_queue_release(rxq);
@@ -353,14 +376,17 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 		      dev->data->dev_conf.rxmode.offloads,
 		      PRIV(dev)->infos.rx_offload_capa |
 		      PRIV(dev)->infos.rx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	rxq = rte_zmalloc(NULL,
 			  sizeof(*rxq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (rxq == NULL)
+	if (rxq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&rxq->refcnt[i]);
 	rxq->qid = rx_queue_id;
@@ -371,8 +397,10 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 	rxq->priv = PRIV(dev);
 	rxq->sdev = PRIV(dev)->subs;
 	ret = rte_intr_efd_enable(&intr_handle, 1);
-	if (ret < 0)
+	if (ret < 0) {
+		fs_unlock(dev, 0);
 		return ret;
+	}
 	rxq->event_fd = intr_handle.efds[0];
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -385,9 +413,11 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
 			goto free_rxq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_rxq:
 	fs_rx_queue_release(rxq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -400,20 +430,21 @@ fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
 	int ret;
 	int rc = 0;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	/* Fail if proxy service is nor running. */
 	if (PRIV(dev)->rxp.sstate != SS_RUNNING) {
 		ERROR("failsafe interrupt services are not running");
-		rte_errno = EAGAIN;
-		return -rte_errno;
+		rc = -EAGAIN;
+		goto unlock;
 	}
 	rxq->enable_events = 1;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -422,6 +453,8 @@ fs_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx)
 		if (ret)
 			rc = ret;
 	}
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -437,14 +470,15 @@ fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
 	int rc = 0;
 	int ret;
 
+	fs_lock(dev, 0);
 	if (idx >= dev->data->nb_rx_queues) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq = dev->data->rx_queues[idx];
 	if (rxq == NULL || rxq->event_fd <= 0) {
-		rte_errno = EINVAL;
-		return -rte_errno;
+		rc = -EINVAL;
+		goto unlock;
 	}
 	rxq->enable_events = 0;
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
@@ -456,6 +490,8 @@ fs_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx)
 	/* Clear pending events */
 	while (read(rxq->event_fd, &u64, sizeof(uint64_t)) >  0)
 		;
+unlock:
+	fs_unlock(dev, 0);
 	if (rc)
 		rte_errno = -rc;
 	return rc;
@@ -492,11 +528,13 @@ fs_tx_queue_release(void *queue)
 		return;
 	txq = queue;
 	dev = txq->priv->dev;
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		SUBOPS(sdev, tx_queue_release)
 			(ETH(sdev)->data->tx_queues[txq->qid]);
 	dev->data->tx_queues[txq->qid] = NULL;
 	rte_free(txq);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -511,6 +549,7 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	txq = dev->data->tx_queues[tx_queue_id];
 	if (txq != NULL) {
 		fs_tx_queue_release(txq);
@@ -531,14 +570,17 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 		      dev->data->dev_conf.txmode.offloads,
 		      PRIV(dev)->infos.tx_offload_capa |
 		      PRIV(dev)->infos.tx_queue_offload_capa);
+		fs_unlock(dev, 0);
 		return -rte_errno;
 	}
 	txq = rte_zmalloc("ethdev TX queue",
 			  sizeof(*txq) +
 			  sizeof(rte_atomic64_t) * PRIV(dev)->subs_tail,
 			  RTE_CACHE_LINE_SIZE);
-	if (txq == NULL)
+	if (txq == NULL) {
+		fs_unlock(dev, 0);
 		return -ENOMEM;
+	}
 	FOREACH_SUBDEV(sdev, i, dev)
 		rte_atomic64_init(&txq->refcnt[i]);
 	txq->qid = tx_queue_id;
@@ -557,9 +599,11 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
 			goto free_txq;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 free_txq:
 	fs_tx_queue_release(txq);
+	fs_unlock(dev, 0);
 	return ret;
 }
 
@@ -586,8 +630,10 @@ fs_promiscuous_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -596,8 +642,10 @@ fs_promiscuous_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_promiscuous_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -606,8 +654,10 @@ fs_allmulticast_enable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_enable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static void
@@ -616,8 +666,10 @@ fs_allmulticast_disable(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_allmulticast_disable(PORT_ID(sdev));
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -628,6 +680,7 @@ fs_link_update(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling link_update on sub_device %d", i);
 		ret = (SUBOPS(sdev, link_update))(ETH(sdev), wait_to_complete);
@@ -635,6 +688,7 @@ fs_link_update(struct rte_eth_dev *dev,
 		    rte_eth_dev_is_removed(PORT_ID(sdev)) == 0) {
 			ERROR("Link update failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -646,9 +700,11 @@ fs_link_update(struct rte_eth_dev *dev,
 		l2 = &ETH(TX_SUBDEV(dev))->data->dev_link;
 		if (memcmp(l1, l2, sizeof(*l1))) {
 			*l1 = *l2;
+			fs_unlock(dev, 0);
 			return 0;
 		}
 	}
+	fs_unlock(dev, 0);
 	return -1;
 }
 
@@ -661,6 +717,7 @@ fs_stats_get(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	rte_memcpy(stats, &PRIV(dev)->stats_accumulator, sizeof(*stats));
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		struct rte_eth_stats *snapshot = &sdev->stats_snapshot.stats;
@@ -676,12 +733,14 @@ fs_stats_get(struct rte_eth_dev *dev,
 			ERROR("Operation rte_eth_stats_get failed for sub_device %d with error %d",
 				  i, ret);
 			*timestamp = 0;
+			fs_unlock(dev, 0);
 			return ret;
 		}
 		*timestamp = rte_rdtsc();
 inc:
 		failsafe_stats_increment(stats, snapshot);
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -691,11 +750,13 @@ fs_stats_reset(struct rte_eth_dev *dev)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		rte_eth_stats_reset(PORT_ID(sdev));
 		memset(&sdev->stats_snapshot, 0, sizeof(struct rte_eth_stats));
 	}
 	memset(&PRIV(dev)->stats_accumulator, 0, sizeof(struct rte_eth_stats));
+	fs_unlock(dev, 0);
 }
 
 /**
@@ -771,14 +832,20 @@ fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 {
 	struct sub_device *sdev;
 	struct rte_eth_dev *edev;
+	const uint32_t *ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return NULL;
+	if (sdev == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	edev = ETH(sdev);
 	/* ENOTSUP: counts as no supported ptypes */
-	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL)
-		return NULL;
+	if (SUBOPS(sdev, dev_supported_ptypes_get) == NULL) {
+		ret = NULL;
+		goto unlock;
+	}
 	/*
 	 * The API does not permit to do a clean AND of all ptypes,
 	 * It is also incomplete by design and we do not really care
@@ -786,7 +853,10 @@ fs_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 	 * We just return the ptypes of the device of highest
 	 * priority, usually the PREFERRED device.
 	 */
-	return SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+	ret = SUBOPS(sdev, dev_supported_ptypes_get)(edev);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -796,15 +866,18 @@ fs_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_set_mtu on sub_device %d", i);
 		ret = rte_eth_dev_set_mtu(PORT_ID(sdev), mtu);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_set_mtu failed for sub_device %d with error %d",
 			      i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -815,15 +888,18 @@ fs_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_vlan_filter on sub_device %d", i);
 		ret = rte_eth_dev_vlan_filter(PORT_ID(sdev), vlan_id, on);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_vlan_filter failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -832,13 +908,22 @@ fs_flow_ctrl_get(struct rte_eth_dev *dev,
 		struct rte_eth_fc_conf *fc_conf)
 {
 	struct sub_device *sdev;
+	int ret;
 
+	fs_lock(dev, 0);
 	sdev = TX_SUBDEV(dev);
-	if (sdev == NULL)
-		return 0;
-	if (SUBOPS(sdev, flow_ctrl_get) == NULL)
-		return -ENOTSUP;
-	return SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+	if (sdev == NULL) {
+		ret = 0;
+		goto unlock;
+	}
+	if (SUBOPS(sdev, flow_ctrl_get) == NULL) {
+		ret = -ENOTSUP;
+		goto unlock;
+	}
+	ret = SUBOPS(sdev, flow_ctrl_get)(ETH(sdev), fc_conf);
+unlock:
+	fs_unlock(dev, 0);
+	return ret;
 }
 
 static int
@@ -849,15 +934,18 @@ fs_flow_ctrl_set(struct rte_eth_dev *dev,
 	uint8_t i;
 	int ret;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_flow_ctrl_set on sub_device %d", i);
 		ret = rte_eth_dev_flow_ctrl_set(PORT_ID(sdev), fc_conf);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_flow_ctrl_set failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -867,6 +955,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	/* No check: already done within the rte_eth_dev_mac_addr_remove
 	 * call for the fail-safe device.
 	 */
@@ -874,6 +963,7 @@ fs_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
 		rte_eth_dev_mac_addr_remove(PORT_ID(sdev),
 				&dev->data->mac_addrs[index]);
 	PRIV(dev)->mac_addr_pool[index] = 0;
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -887,11 +977,13 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 	uint8_t i;
 
 	RTE_ASSERT(index < FAILSAFE_MAX_ETHADDR);
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), mac_addr, vmdq);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_mac_addr_add failed for sub_device %"
 			      PRIu8 " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
@@ -900,6 +992,7 @@ fs_mac_addr_add(struct rte_eth_dev *dev,
 		PRIV(dev)->nb_mac_addr = index;
 	}
 	PRIV(dev)->mac_addr_pool[index] = vmdq;
+	fs_unlock(dev, 0);
 	return 0;
 }
 
@@ -909,8 +1002,10 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
 	struct sub_device *sdev;
 	uint8_t i;
 
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
 		rte_eth_dev_default_mac_addr_set(PORT_ID(sdev), mac_addr);
+	fs_unlock(dev, 0);
 }
 
 static int
@@ -928,15 +1023,18 @@ fs_filter_ctrl(struct rte_eth_dev *dev,
 		*(const void **)arg = &fs_flow_ops;
 		return 0;
 	}
+	fs_lock(dev, 0);
 	FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
 		DEBUG("Calling rte_eth_dev_filter_ctrl on sub_device %d", i);
 		ret = rte_eth_dev_filter_ctrl(PORT_ID(sdev), type, op, arg);
 		if ((ret = fs_err(sdev, ret))) {
 			ERROR("Operation rte_eth_dev_filter_ctrl failed for sub_device %d"
 			      " with error %d", i, ret);
+			fs_unlock(dev, 0);
 			return ret;
 		}
 	}
+	fs_unlock(dev, 0);
 	return 0;
 }
 
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index f3be152..5b84db9 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -7,6 +7,7 @@
 #define _RTE_ETH_FAILSAFE_PRIVATE_H_
 
 #include <sys/queue.h>
+#include <pthread.h>
 
 #include <rte_atomic.h>
 #include <rte_dev.h>
@@ -161,6 +162,9 @@ struct fs_priv {
 	 * appropriate failsafe Rx queue.
 	 */
 	struct rx_proxy rxp;
+	pthread_mutex_t hotplug_mutex;
+	/* Hot-plug mutex is locked by the alarm mechanism. */
+	volatile unsigned int alarm_lock:1;
 	unsigned int pending_alarm:1; /* An alarm is pending */
 	/* flow isolation state */
 	int flow_isolated:1;
@@ -347,6 +351,58 @@ fs_find_next(struct rte_eth_dev *dev,
 }
 
 /*
+ * Lock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline int
+fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+
+	if (is_alarm) {
+		ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			DEBUG("Hot-plug mutex lock trying failed(%s), will try"
+			      " again later...", strerror(ret));
+			return ret;
+		}
+		PRIV(dev)->alarm_lock = 1;
+	} else {
+		ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
+		if (ret) {
+			ERROR("Cannot lock mutex(%s)", strerror(ret));
+			return ret;
+		}
+	}
+	DEBUG("Hot-plug mutex was locked by thread %lu%s", pthread_self(),
+	      PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : "");
+	return ret;
+}
+
+/*
+ * Unlock hot-plug mutex.
+ * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
+ */
+static inline void
+fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
+{
+	int ret;
+	unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock;
+
+	if (is_alarm) {
+		RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
+		PRIV(dev)->alarm_lock = 0;
+	}
+	ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
+	if (ret)
+		ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
+	else
+		DEBUG("Hot-plug mutex was unlocked by thread %lu%s",
+		      pthread_self(),
+		      prev_alarm_lock ? " by the hot-plug alarm" : "");
+}
+
+/*
  * Switch emitting device.
  * If banned is set, banned must not be considered for
  * the role of emitting device.
-- 
1.9.5



More information about the dev mailing list