[dpdk-dev] [PATCH v10 05/19] eal: enable hotplug on multi-process

Qi Zhang qi.z.zhang at intel.com
Mon Jul 9 05:36:52 CEST 2018


We are going to introduce the solution to handle hotplug in
multi-process, it includes the below scenario:

1. Attach a device from the primary
2. Detach a device from the primary
3. Attach a device from a secondary
4. Detach a device from a secondary

In the primary-secondary process model, we assume devices are shared
by default. that means attaches or detaches a device on any process
will broadcast to all other processes through mp channel then device
information will be synchronized on all processes.

Any failure during attaching/detaching process will cause inconsistent
status between processes, so proper rollback action should be considered.

This patch covers the implementation of case 1,2.
Case 3,4 will be implemented on a separate patch.

IPC scenario for Case 1, 2:

attach a device
a) primary attach the new device if failed goto h).
b) primary send attach sync request to all secondary.
c) secondary receive request and attach the device and send a reply.
d) primary check the reply if all success goes to i).
e) primary send attach rollback sync request to all secondary.
f) secondary receive the request and detach the device and send a reply.
g) primary receive the reply and detach device as rollback action.
h) attach fail
i) attach success

detach a device
a) primary send detach sync request to all secondary
b) secondary detach the device and send reply
c) primary check the reply if all success goes to f).
d) primary send detach rollback sync request to all secondary.
e) secondary receive the request and attach back device. goto g)
f) primary detach the device if success goto g), else goto d)
g) detach fail.
h) detach success.

Signed-off-by: Qi Zhang <qi.z.zhang at intel.com>
---
 lib/librte_eal/bsdapp/eal/Makefile      |   1 +
 lib/librte_eal/common/eal_common_dev.c  | 140 +++++++++++++++++++++++-
 lib/librte_eal/common/eal_private.h     |  37 +++++++
 lib/librte_eal/common/hotplug_mp.c      | 181 ++++++++++++++++++++++++++++++++
 lib/librte_eal/common/hotplug_mp.h      |  44 ++++++++
 lib/librte_eal/common/include/rte_bus.h |   3 +
 lib/librte_eal/common/include/rte_dev.h |   9 ++
 lib/librte_eal/common/meson.build       |   1 +
 lib/librte_eal/linuxapp/eal/Makefile    |   1 +
 lib/librte_eal/linuxapp/eal/eal.c       |   6 ++
 10 files changed, 418 insertions(+), 5 deletions(-)
 create mode 100644 lib/librte_eal/common/hotplug_mp.c
 create mode 100644 lib/librte_eal/common/hotplug_mp.h

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index 3fd33f1e4..4ecc73b42 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -59,6 +59,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 14c5f05fa..fb1a122ae 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -16,8 +16,10 @@
 #include <rte_log.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
+#include <rte_string_fns.h>
 
 #include "eal_private.h"
+#include "hotplug_mp.h"
 
 /**
  * The device event callback description.
@@ -102,8 +104,9 @@ int rte_eal_dev_detach(struct rte_device *dev)
 	return ret;
 }
 
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
-			const char *devargs)
+int
+do_dev_hotplug_add(const char *busname, const char *devname,
+		const char *devargs)
 {
 	struct rte_bus *bus;
 	struct rte_device *dev;
@@ -168,8 +171,7 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
 	return ret;
 }
 
-int __rte_experimental
-rte_eal_hotplug_remove(const char *busname, const char *devname)
+int do_dev_hotplug_remove(const char *busname, const char *devname)
 {
 	struct rte_bus *bus;
 	struct rte_device *dev;
@@ -197,11 +199,139 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
 	if (ret)
 		RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
 			dev->name);
-	rte_devargs_remove(busname, devname);
+	else
+		rte_devargs_remove(busname, devname);
+
 	return ret;
 }
 
 int __rte_experimental
+rte_eal_hotplug_add(const char *busname, const char *devname,
+		const char *devargs)
+{
+	struct eal_dev_mp_req req;
+	int ret;
+
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_ATTACH;
+	strlcpy(req.busname, busname, RTE_BUS_NAME_MAX_LEN);
+	strlcpy(req.devname, devname, RTE_DEV_NAME_MAX_LEN);
+	strlcpy(req.devargs, devargs, RTE_DEV_ARGS_MAX_LEN);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -ENOTSUP;
+
+	/**
+	 * attach a device from primary start from here:
+	 *
+	 * a) primary attach the new device if failed goto h).
+	 * b) primary send attach sync request to all secondary.
+	 * c) secondary receive request and attach the device and send a reply.
+	 * d) primary check the reply if all success goes to i).
+	 * e) primary send attach rollback sync request to all secondary.
+	 * f) secondary receive the request and detach the device and send a
+	 *    reply.
+	 * g) primary receive the reply and detach device as rollback action.
+	 * h) attach fail
+	 * i) attach success
+	 */
+
+	/* step a) */
+	ret = do_dev_hotplug_add(busname, devname, devargs);
+	if (ret)
+		/* step h) */
+		return ret;
+
+	/* step b), c) */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+
+	/* step d) */
+	if (ret) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send hotplug add request to secondary\n");
+		goto rollback;
+	}
+
+	if (req.result)
+		goto rollback;
+
+	/* step i */
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+	/* step e), f) */
+	eal_dev_hotplug_request_to_secondary(&req);
+	/* step g) */
+	do_dev_hotplug_remove(busname, devname);
+	/* step h */
+	return -ENODEV;
+}
+
+int __rte_experimental
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+	struct eal_dev_mp_req req;
+	int ret;
+
+	memset(&req, 0, sizeof(req));
+	req.t = EAL_DEV_REQ_TYPE_DETACH;
+	strlcpy(req.busname, busname, RTE_BUS_NAME_MAX_LEN);
+	strlcpy(req.devname, devname, RTE_DEV_NAME_MAX_LEN);
+
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+		return -ENOTSUP;
+
+	/**
+	 * detach a device from primary start from here:
+	 *
+	 * a) primary send detach sync request to all secondary
+	 * b) secondary detach the device and send reply
+	 * c) primary check the reply if all success goes to f).
+	 * d) primary send detach rollback sync request to all secondary.
+	 * e) secondary receive the request and attach back device. goto g)
+	 * f) primary detach the device if success goto g), else goto d)
+	 * g) detach fail.
+	 * h) detach success.
+	 */
+
+	/* step a), b) */
+	ret = eal_dev_hotplug_request_to_secondary(&req);
+	/* step c) */
+	if (ret) {
+		RTE_LOG(ERR, EAL,
+			"Failed to send device detach request to secondary\n");
+		return ret;
+	}
+
+	if (req.result) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on secondary process\n");
+		goto rollback;
+	}
+
+	/* step f) */
+	ret = do_dev_hotplug_remove(busname, devname);
+	if (ret) {
+		RTE_LOG(ERR, EAL,
+			"Failed to detach device on primary process\n");
+		goto rollback;
+	}
+	/* step h */
+	return 0;
+
+rollback:
+	req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+	/* step d), e) */
+	if (eal_dev_hotplug_request_to_secondary(&req))
+		RTE_LOG(ERR, EAL,
+			"Failed to attach back device on secondary."
+			"Devices in secondary may not sync with primary\n");
+	/* step g) */
+	return -ENODEV;
+}
+
+int __rte_experimental
 rte_dev_event_callback_register(const char *device_name,
 				rte_dev_event_cb_fn cb_fn,
 				void *cb_arg)
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d50..1883b05d1 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -258,4 +258,41 @@ int rte_mp_channel_init(void);
  */
 void dev_callback_process(char *device_name, enum rte_dev_event_type event);
 
+/**
+ * Hotplug add a given device to a specific bus.
+ *
+ * @param busname
+ *   The bus name the device is added to.
+ * @param devname
+ *   The device name. Based on this device name, eal will identify a driver
+ *   capable of handling it and pass it to the driver probing function.
+ * @param devargs
+ *   Device arguments to be passed to the driver.
+ * @return
+ *   0 on success, negative on error.
+ */
+int do_dev_hotplug_add(const char *busname, const char *devname,
+			const char *devargs);
+
+/**
+ * Hotplug remove a given device from a specific bus.
+ *
+ * @param busname
+ *   The bus name the device is removed from.
+ * @param devname
+ *   The device name being removed.
+ * @return
+ *   0 on success, negative on error.
+ */
+int do_dev_hotplug_remove(const char *busname,
+			const char *devname);
+
+/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ *   0 on success, negative on error.
+ */
+int rte_dev_hotplug_mp_init(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 000000000..261d17fe6
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+	struct rte_mp_msg msg;
+	void *peer;
+};
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	RTE_SET_USED(msg);
+	RTE_SET_USED(peer);
+	return -ENOTSUP;
+}
+
+static void __handle_primary_request(void *param)
+{
+	struct mp_reply_bundle *bundle = param;
+	struct rte_mp_msg *msg = &bundle->msg;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct rte_mp_msg mp_resp;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+
+	switch (req->t) {
+	case EAL_DEV_REQ_TYPE_ATTACH:
+	case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+		ret = do_dev_hotplug_add(req->busname, req->devname, "");
+		break;
+	case EAL_DEV_REQ_TYPE_DETACH:
+	case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+		ret = do_dev_hotplug_remove(req->busname, req->devname);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+	resp->result = ret;
+	if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+		RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+	free(bundle->peer);
+	free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+	struct rte_mp_msg mp_resp;
+	const struct eal_dev_mp_req *req =
+		(const struct eal_dev_mp_req *)msg->param;
+	struct eal_dev_mp_req *resp =
+		(struct eal_dev_mp_req *)mp_resp.param;
+	struct mp_reply_bundle *bundle;
+	int ret = 0;
+
+	memset(&mp_resp, 0, sizeof(mp_resp));
+	strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+	mp_resp.len_param = sizeof(*req);
+	memcpy(resp, req, sizeof(*resp));
+
+	bundle = calloc(1, sizeof(*bundle));
+	if (bundle == NULL) {
+		resp->result = -ENOMEM;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+			return ret;
+		}
+	}
+
+	bundle->msg = *msg;
+	/**
+	 * We need to send reply on interrupt thread, but peer can't be
+	 * parsed directly, so this is a temporal hack, need to be fixed
+	 * when it is ready.
+	 */
+	bundle->peer = (void *)strdup(peer);
+
+	/**
+	 * We are at IPC callback thread, sync IPC is not allowed due to
+	 * dead lock, so we delegate the task to interrupt thread.
+	 */
+	ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+	if (ret) {
+		resp->result = ret;
+		ret = rte_mp_reply(&mp_resp, peer);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+			return ret;
+		}
+	}
+	return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+	RTE_SET_USED(req);
+	return -ENOTSUP;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+	struct rte_mp_msg mp_req;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+	int ret;
+	int i;
+
+	memset(&mp_req, 0, sizeof(mp_req));
+	memcpy(mp_req.param, req, sizeof(*req));
+	mp_req.len_param = sizeof(*req);
+	strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+	ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+	if (ret) {
+		RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+		return ret;
+	}
+
+	if (mp_reply.nb_sent != mp_reply.nb_received) {
+		RTE_LOG(ERR, EAL, "not all secondary reply\n");
+		return -1;
+	}
+
+	req->result = 0;
+	for (i = 0; i < mp_reply.nb_received; i++) {
+		struct eal_dev_mp_req *resp =
+			(struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+		if (resp->result) {
+			req->result = resp->result;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+int rte_dev_hotplug_mp_init(void)
+{
+	int ret;
+
+	if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+					handle_secondary_request);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	} else {
+		ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+		handle_primary_request);
+		if (ret) {
+			RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+				EAL_DEV_MP_ACTION_REQUEST);
+			return ret;
+		}
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/common/hotplug_mp.h b/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 000000000..f24ba107b
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include <rte_dev.h>
+#include <rte_bus.h>
+
+#define EAL_DEV_MP_ACTION_REQUEST      "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE     "eal_dev_mp_response"
+
+enum eal_dev_req_type {
+	EAL_DEV_REQ_TYPE_ATTACH,
+	EAL_DEV_REQ_TYPE_DETACH,
+	EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+	EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+	enum eal_dev_req_type t;
+	char busname[RTE_BUS_NAME_MAX_LEN];
+	char devname[RTE_DEV_NAME_MAX_LEN];
+	char devargs[RTE_DEV_ARGS_MAX_LEN];
+	int result;
+};
+
+/**
+ * this is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4e..720f7c3c8 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -197,6 +197,9 @@ struct rte_bus_conf {
 typedef enum rte_iova_mode (*rte_bus_get_iommu_class_t)(void);
 
 
+/* Max length for a bus name */
+#define RTE_BUS_NAME_MAX_LEN 32
+
 /**
  * A structure describing a generic bus.
  */
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 3879ff3ca..667df20f0 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -152,6 +152,9 @@ struct rte_driver {
  */
 #define RTE_DEV_NAME_MAX_LEN 64
 
+/* Max devargs length be allowed */
+#define RTE_DEV_ARGS_MAX_LEN 128
+
 /**
  * A structure describing a generic device.
  */
@@ -193,6 +196,9 @@ int rte_eal_dev_detach(struct rte_device *dev);
  * @b EXPERIMENTAL: this API may change without prior notice
  *
  * Hotplug add a given device to a specific bus.
+ * In multi-process, this function will inform all other processes
+ * to hotplug add the same device. Any failure on other process
+ * will rollback the action.
  *
  * @param busname
  *   The bus name the device is added to.
@@ -212,6 +218,9 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
  * @b EXPERIMENTAL: this API may change without prior notice
  *
  * Hotplug remove a given device from a specific bus.
+ * In multi-process, this function will inform all other processes
+ * to hotplug remove the same device. Any failure on other process
+ * will rollback the action.
  *
  * @param busname
  *   The bus name the device is removed from.
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 8a3dcfee0..0f2cf9a1a 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -25,6 +25,7 @@ common_sources = files(
 	'eal_common_tailqs.c',
 	'eal_common_thread.c',
 	'eal_common_timer.c',
+	'hotplug_mp.c',
 	'malloc_elem.c',
 	'malloc_heap.c',
 	'malloc_mp.c',
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 3719ec9d7..6c225b54a 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -67,6 +67,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_mp.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 8655b8691..0c6984990 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -850,6 +850,12 @@ rte_eal_init(int argc, char **argv)
 		}
 	}
 
+	/* register mp action callbacks for hotplug */
+	if (rte_dev_hotplug_mp_init() < 0) {
+		rte_eal_init_alert("failed to register mp callback for hotplug\n");
+		return -1;
+	}
+
 	if (rte_bus_scan()) {
 		rte_eal_init_alert("Cannot scan the buses for devices\n");
 		rte_errno = ENODEV;
-- 
2.13.6



More information about the dev mailing list