[dpdk-dev] [RFC 2/4] eal: enable experimental dlopen()-based secondary process support

Anatoly Burakov anatoly.burakov at intel.com
Fri May 19 18:39:44 CEST 2017


Primary process forks itself into a new process that will be used as
basis for forking secondary processes. Secondary process then connects
to this forked process over a socket, and triggers a fork.

This new forked secondary dlopen()'s the original secondary process
binary and runs main() again. In the meantime, the original secondary
process waits until this new forked secondary dies, and exits.

"Waiting until secondary dies" is achieved through a blocking flock()
call - once it succeeds, secondary is dead as all locks are released at
process exit.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/linuxapp/eal/Makefile           |   2 +
 lib/librte_eal/linuxapp/eal/eal.c              | 105 +++++-
 lib/librte_eal/linuxapp/eal/eal_mp.h           |  54 +++
 lib/librte_eal/linuxapp/eal/eal_mp_primary.c   | 477 +++++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_mp_secondary.c | 301 ++++++++++++++++
 5 files changed, 933 insertions(+), 6 deletions(-)
 create mode 100755 lib/librte_eal/linuxapp/eal/eal_mp.h
 create mode 100755 lib/librte_eal/linuxapp/eal/eal_mp_primary.c
 create mode 100755 lib/librte_eal/linuxapp/eal/eal_mp_secondary.c

diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 24aab8d..f0ec382 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -63,6 +63,8 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_mp_socket.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_mp_secondary.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_mp_primary.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 7c78f2d..3d646b9 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -80,6 +80,7 @@
 #include <malloc_heap.h>
 
 #include "eal_private.h"
+#include "eal_mp.h"
 #include "eal_thread.h"
 #include "eal_internal_cfg.h"
 #include "eal_filesystem.h"
@@ -480,7 +481,7 @@ eal_parse_vfio_intr(const char *mode)
 
 /* Parse the arguments for --log-level only */
 static void
-eal_log_level_parse(int argc, char **argv)
+eal_early_parse(int argc, char **argv)
 {
 	int opt;
 	char **argvopt;
@@ -504,6 +505,9 @@ eal_log_level_parse(int argc, char **argv)
 		ret = (opt == OPT_LOG_LEVEL_NUM) ?
 			eal_parse_common_option(opt, optarg, &internal_config) : 0;
 
+		ret = (opt == OPT_PROC_TYPE_NUM) ?
+		    eal_parse_common_option(opt, optarg, &internal_config) : 0;
+
 		/* common parser is not happy */
 		if (ret < 0)
 			break;
@@ -745,6 +749,68 @@ static void rte_eal_init_alert(const char *msg)
 	RTE_LOG(ERR, EAL, "%s\n", msg);
 }
 
+/* secondary needs to pass parameters to the app */
+static int
+secondary_first_run(int argc, char **argv) {
+	rte_srand(rte_rdtsc());
+
+	if (eal_secondary_init(argc, argv) < 0)
+		rte_panic("Cannot init secondary\n");
+
+	RTE_LOG(ERR, EAL, "Secondary preliminary init\n");
+
+	return 0;
+}
+
+static int
+secondary_second_run(void) {
+	unsigned i;
+	int ret;
+	char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+	eal_thread_init_master(rte_config.master_lcore);
+
+	RTE_LCORE_FOREACH_SLAVE(i) {
+
+		/*
+		 * create communication pipes between master thread
+		 * and children
+		 */
+		if (pipe(lcore_config[i].pipe_master2slave) < 0)
+			rte_panic("Cannot create pipe\n");
+		if (pipe(lcore_config[i].pipe_slave2master) < 0)
+			rte_panic("Cannot create pipe\n");
+
+		lcore_config[i].state = WAIT;
+
+		/* create a thread for each lcore */
+		ret = pthread_create(&lcore_config[i].thread_id, NULL,
+		             eal_thread_loop, NULL);
+		if (ret != 0)
+			rte_panic("Cannot create thread\n");
+
+		/* Set thread_name for aid in debugging. */
+		snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+		    "lcore-slave-%d", i);
+		ret = rte_thread_setname(lcore_config[i].thread_id,
+		                thread_name);
+		if (ret != 0)
+			RTE_LOG(DEBUG, EAL,
+			    "Cannot set name for lcore thread\n");
+	}
+
+	/*
+	 * Launch a dummy function on all slave lcores, so that master lcore
+	 * knows they are all ready when this function returns.
+	 */
+	rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
+	rte_eal_mp_wait_lcore();
+
+	RTE_LOG(ERR, EAL, "Secondary finished init\n");
+
+	return 0;
+}
+
 /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
@@ -752,6 +818,7 @@ rte_eal_init(int argc, char **argv)
 	int i, fctret, ret;
 	pthread_t thread_id;
 	static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
+	static rte_atomic32_t run_once_secondary = RTE_ATOMIC32_INIT(0);
 	const char *logid;
 	char cpuset[RTE_CPU_AFFINITY_STR_LEN];
 	char thread_name[RTE_MAX_THREAD_NAME_LEN];
@@ -763,10 +830,28 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	/* short-circuit running secondary processes */
 	if (!rte_atomic32_test_and_set(&run_once)) {
-		rte_eal_init_alert("already called initialization.");
-		rte_errno = EALREADY;
-		return -1;
+		if (internal_config.process_type == RTE_PROC_SECONDARY) {
+			if (!rte_atomic32_test_and_set(&run_once_secondary)) {
+				RTE_LOG(ERR, EAL, "Can't run secondary init twice!\n");
+				rte_errno = EALREADY;
+				return -1;
+			} else {
+
+				/* parse EAL arguments before running secondary process */
+				fctret = eal_parse_args(argc, argv);
+				if (fctret < 0)
+					exit(1);
+
+				secondary_second_run();
+				return fctret;
+			}
+		} else {
+			rte_eal_init_alert("already called initialization.");
+			rte_errno = EALREADY;
+			return -1;
+		}
 	}
 
 	logid = strrchr(argv[0], '/');
@@ -776,8 +861,8 @@ rte_eal_init(int argc, char **argv)
 
 	eal_reset_internal_config(&internal_config);
 
-	/* set log level as early as possible */
-	eal_log_level_parse(argc, argv);
+	/* set log level and process type as early as possible */
+	eal_early_parse(argc, argv);
 
 	if (rte_eal_cpu_init() < 0) {
 		rte_eal_init_alert("Cannot detect lcores.");
@@ -785,6 +870,11 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (internal_config.process_type == RTE_PROC_SECONDARY) {
+		secondary_first_run(argc, argv);
+		rte_exit(EXIT_SUCCESS, "Done");
+	}
+
 	fctret = eal_parse_args(argc, argv);
 	if (fctret < 0) {
 		rte_eal_init_alert("Invalid 'command line' arguments.");
@@ -939,6 +1029,9 @@ rte_eal_init(int argc, char **argv)
 		return -1;
 	}
 
+	if (eal_secondary_mp_sync_setup() < 0)
+		RTE_LOG(WARNING, EAL, "Couldn't start multiprocess socket!\n");
+
 	rte_eal_mcfg_complete();
 
 	return fctret;
diff --git a/lib/librte_eal/linuxapp/eal/eal_mp.h b/lib/librte_eal/linuxapp/eal/eal_mp.h
new file mode 100755
index 0000000..43ff9df
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_mp.h
@@ -0,0 +1,54 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef EAL_MP_H
+#define EAL_MP_H
+
+#include "eal_mp_socket.h"
+
+#define SOCKET_REQ_FORK SOCKET_REQ_USER + 0
+#define SOCKET_REQ_STDIN SOCKET_REQ_USER + 1
+#define SOCKET_REQ_STDOUT SOCKET_REQ_USER + 2
+#define SOCKET_REQ_STDERR SOCKET_REQ_USER + 3
+#define SOCKET_REQ_LOGFILE SOCKET_REQ_USER + 4
+#define SOCKET_REQ_PATH SOCKET_REQ_USER + 5
+#define SOCKET_REQ_ARGC SOCKET_REQ_USER + 6
+#define SOCKET_REQ_ARGV SOCKET_REQ_USER + 7
+
+int eal_secondary_mp_sync_setup(void);
+int eal_secondary_mp_sync_connect_to_primary(void);
+void eal_secondary_mp_sync_get_socket_path(char *buffer, int bufsz);
+
+int eal_secondary_init(int argc, char **argv);
+
+#endif // EAL_MP_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_mp_primary.c b/lib/librte_eal/linuxapp/eal/eal_mp_primary.c
new file mode 100755
index 0000000..32fce69
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_mp_primary.c
@@ -0,0 +1,477 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <dlfcn.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <time.h>
+#include <sys/file.h>
+#include <linux/version.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_thread.h"
+#include "eal_mp.h"
+
+#define SOCKET_PATH_FMT "%s/.%s_mp_secondary_socket"
+#define LOCKFILE_PATH_FMT "%s/.%s_secondary_lock_%s"
+
+static
+const char *get_run_dir(void) {
+	const char *dir = "/var/run";
+	const char *home_dir = getenv("HOME");
+
+	if (getuid() != 0 && home_dir != NULL)
+		dir = home_dir;
+	return dir;
+}
+
+static
+void get_rand_str(char *str, int sz) {
+	char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+	for (int i = 0; i < sz - 1; i++) {
+		// this does not give us *true* randomness but it's good enough
+		int idx = rand() % sizeof(charset);
+		str[i] = charset[idx];
+	}
+	str[sz - 1] = '\0';
+}
+
+/* we need to know its length */
+static
+int get_lock_file_path(char *str, int sz) {
+	char rand_str[16];
+
+	get_rand_str(rand_str, 16);
+
+	return snprintf(str, sz, LOCKFILE_PATH_FMT, get_run_dir(),
+	                internal_config.hugefile_prefix, rand_str);
+}
+
+static int secondary_mp_socket_fd;
+
+/* get socket path (/var/run if root, $HOME otherwise) */
+void
+eal_secondary_mp_sync_get_socket_path(char *buffer, int bufsz)
+{
+	/* use current prefix as file path */
+	snprintf(buffer, bufsz, SOCKET_PATH_FMT, get_run_dir(),
+	        internal_config.hugefile_prefix);
+}
+
+static void *
+secondary_wait_thread(void * arg)
+{
+	int status;
+	pid_t pid = *(pid_t*) arg;
+
+	RTE_LOG(INFO, EAL, "Secondary process %i started\n", pid);
+
+	waitpid(pid, &status, 0);
+
+	RTE_LOG(INFO, EAL, "Secondary process %i died\n", pid);
+
+	/* TODO: notify others this one has died? */
+
+	pthread_exit(0);
+	return 0;
+}
+
+/* handle parent exit */
+static void
+parent_exit(int __rte_unused sig)
+{
+	exit(0);
+}
+
+/*
+ * data flow for socket comm protocol:
+ *
+ * in case of any error, socket is closed.
+ */
+
+static int
+secondary_mp_sync_socket_setup(void)
+{
+	int socket_fd;
+	char path[PATH_MAX];
+
+	/* generate random socket name */
+	eal_secondary_mp_sync_get_socket_path(path, sizeof(path));
+
+	socket_fd = eal_mp_sync_socket_setup(path);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+		return -1;
+	}
+
+	/* save the socket in local configuration */
+	secondary_mp_socket_fd = socket_fd;
+
+	return 0;
+}
+
+/* connect socket_fd in secondary process to the primary process's socket */
+int
+eal_secondary_mp_sync_connect_to_primary(void)
+{
+	char path[PATH_MAX];
+
+	eal_secondary_mp_sync_get_socket_path(path, sizeof(path));
+
+	return eal_mp_sync_connect_to_primary(path);
+}
+
+/*
+ * listen for sockets
+ */
+static void
+secondary_mp_sync_listener(void)
+{
+	int cur_argv = 0, argc = 0;
+	char *argv[4096] = {0};
+	char *str;
+	int ret;
+
+	/* get seed from tsc */
+	srand((unsigned) rte_rdtsc());
+
+	if (secondary_mp_sync_socket_setup() < 0) {
+		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
+		return;
+	}
+
+	/* wait for requests on the socket and the IPC */
+	for (;;) {
+		int conn_sock;
+		struct sockaddr_un addr;
+		socklen_t sockaddr_len = sizeof(addr);
+
+		/* this is a blocking call */
+		conn_sock = accept(secondary_mp_socket_fd, (struct sockaddr *) &addr,
+		        &sockaddr_len);
+
+		/* just restart on error */
+		if (conn_sock == -1)
+			continue;
+
+		/* set socket to linger after close */
+		struct linger l;
+		l.l_onoff = 1;
+		l.l_linger = 60;
+
+		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
+			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
+			        "on listen socket (%s)\n", strerror(errno));
+
+		bool done = false;
+		bool is_fork = false;
+
+		/* forked process data */
+		char path[PATH_MAX] = "";
+		char lockfile[PATH_MAX] = "";
+		int sp_stdin = STDIN_FILENO;
+		int sp_stdout = STDOUT_FILENO;
+		int sp_stderr = STDERR_FILENO;
+		int sp_log = rte_logs.file == NULL ? sp_stderr : fileno(rte_logs.file);
+
+		while (!done) {
+			ret = eal_mp_sync_receive_request(conn_sock);
+
+			switch (ret) {
+			case SOCKET_REQ_STDIN:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				ret = eal_mp_sync_receive_fd(conn_sock);
+				if (ret < 0) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				sp_stdin = ret;
+				break;
+			case SOCKET_REQ_STDOUT:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				ret = eal_mp_sync_receive_fd(conn_sock);
+				if (ret < 0) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				sp_stdout = ret;
+				break;
+			case SOCKET_REQ_STDERR:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				ret = eal_mp_sync_receive_fd(conn_sock);
+				if (ret < 0) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				sp_stderr = ret;
+				break;
+			case SOCKET_REQ_LOGFILE:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				ret = eal_mp_sync_receive_fd(conn_sock);
+				if (ret < 0) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				sp_log = ret;
+				break;
+			case SOCKET_REQ_PATH:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				/* receive path */
+				if (eal_mp_sync_receive_data(conn_sock, path, PATH_MAX)) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				break;
+			case SOCKET_REQ_ARGC:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				/* receive argc */
+				if (eal_mp_sync_receive_data(conn_sock, &argc, sizeof(argc))) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				break;
+			case SOCKET_REQ_ARGV:
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				/* 4K should be enough for everyone */
+				str = (char*) calloc(1024, 4);
+
+				/* receive argv */
+				if (eal_mp_sync_receive_data(conn_sock, str, 4096)) {
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				argv[cur_argv++] = str;
+
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+				break;
+			case SOCKET_REQ_FORK:
+				/*
+				 * before we can fork, we need to make sure that argc matches
+				 * cur_argv
+				 */
+				if (argc != cur_argv) {
+					RTE_LOG(ERR, EAL, "Argument number mismatch\n");
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+				eal_mp_sync_send_request(conn_sock, SOCKET_OK);
+
+				// get_lock_file_path returns length, not total bytes
+				int len = get_lock_file_path(lockfile, sizeof(lockfile)) + 1;
+
+				pid_t id = fork();
+
+				if (id < 0) {
+					RTE_LOG(ERR, EAL, "Failed to fork\n");
+					eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+					done = true;
+					break;
+				}
+
+				/* we're going to be forked, so stop the loop */
+				done = true;
+				if (id == 0) {
+					/* pointer to exported function */
+					void (*exported)(int argc, char** argv);
+					is_fork = true;
+
+					/* touch the file */
+					int fd = creat(lockfile, O_EXCL);
+					flock(fd, LOCK_EX);
+
+					/* set up file descriptors */
+					dup2(sp_stdin, STDIN_FILENO);
+					dup2(sp_stdout, STDOUT_FILENO);
+					dup2(sp_stderr, STDERR_FILENO);
+					rte_openlog_stream(fdopen(sp_log, "w+"));
+
+					/* send lockfile path */
+					eal_mp_sync_send_data(conn_sock, lockfile, len);
+
+					/* close the sockets */
+					close(secondary_mp_socket_fd);
+					close(conn_sock);
+
+					/* let the magic happen! */
+					void *h = dlopen(path, RTLD_NOW | RTLD_GLOBAL);
+					if (!h) {
+						RTE_LOG(ERR, EAL, "Couldn't dlopen: %s\n", dlerror());
+						exit(1);
+					}
+					dlerror();
+					*(void **) (&exported) = dlsym(h, "main");
+
+					char *err = dlerror();
+					if (err) {
+						RTE_LOG(ERR, EAL, "Couldn't dlsym: %s\n", err);
+						exit(1);
+					}
+					/* prepare to run EAL second time */
+					internal_config.process_type = RTE_PROC_SECONDARY;
+					rte_eal_get_configuration()->process_type = RTE_PROC_SECONDARY;
+
+					(*exported)(argc, argv);
+
+					dlclose(h);
+				} else {
+					char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+					/* clean up after ourselves */
+					close(sp_stdin);
+					close(sp_stdout);
+					close(sp_stderr);
+					close(sp_log);
+					for (int i = 0; i < argc; i++) {
+						free(argv[i]);
+					}
+
+					pthread_t thread;
+					/* run a new thread waiting for child's termination */
+
+					/* TODO: store id somewhere, as this is dangerous */
+					ret = pthread_create(&thread, NULL,
+					        secondary_wait_thread, &id);
+					if (ret) {
+						RTE_LOG(ERR, EAL,
+						    "Failed to create thread for communication with secondary processes!\n");
+					}
+
+					/* Set thread_name for aid in debugging. */
+					snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "secondary_%u", id);
+					RTE_LOG(DEBUG, EAL, "Attempting to create thread %s\n", thread_name);
+					ret = rte_thread_setname(thread, thread_name);
+					if (ret)
+						RTE_LOG(DEBUG, EAL,
+						    "Failed to set thread name for secondary processes!\n");
+
+				}
+				break;
+			default:
+				eal_mp_sync_send_request(conn_sock, SOCKET_ERR);
+				done = true;
+				break;
+			}
+		}
+		/* forked process probably closed this already but we don't care */
+		close(conn_sock);
+		if (is_fork) {
+			/* fork executable doesn't need to listen on socket */
+			return;
+		}
+	}
+}
+
+/*
+ * set up a local socket and tell it to listen for incoming connections
+ */
+int
+eal_secondary_mp_sync_setup(void)
+{
+	/* pre-fork instead of creating a listening thread */
+	pid_t id = fork();
+	if (id < 0) {
+		RTE_LOG(ERR, EAL, "Failed to fork!\n");
+		return -1;
+	} else if (id == 0) {
+		/* child process */
+
+		if (prctl(PR_SET_PDEATHSIG, SIGUSR1, 0, 0, 0, 0) != 0)
+			RTE_LOG(ERR, EAL, "Can't register parent exit handler\n");
+		else {
+			struct sigaction act;
+			memset(&act, 0 , sizeof(act));
+			act.sa_handler = parent_exit;
+			if (sigaction(SIGUSR1, &act, NULL) != 0)
+				RTE_LOG(ERR, EAL, "Can't register parent exit signal callback\n");
+		}
+
+		secondary_mp_sync_listener();
+		rte_exit(EXIT_SUCCESS, "Secondary process finished\n");
+	} else {
+		/* what if socket setup fails? do we care? */
+		RTE_LOG(INFO, EAL, "Fork successful\n");
+	}
+
+	return 0;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_mp_secondary.c b/lib/librte_eal/linuxapp/eal/eal_mp_secondary.c
new file mode 100755
index 0000000..5ebfbc9
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_mp_secondary.c
@@ -0,0 +1,301 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <dlfcn.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <time.h>
+#include <sys/file.h>
+#include <linux/version.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
+
+#include <rte_log.h>
+#include <rte_pci.h>
+#include <rte_eal_memconfig.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+
+#include "eal_filesystem.h"
+#include "eal_pci_init.h"
+#include "eal_thread.h"
+#include "eal_mp.h"
+
+#define EXPORT __attribute__((visibility("default")))
+
+#define SELF_PATH "/proc/self/exe"
+
+enum fd_type {
+	STDIN,
+	STDOUT,
+	STDERR,
+	LOGFILE
+};
+
+/* connect socket_fd in secondary process to the primary process's socket */
+static
+int connect_to_primary(void)
+{
+	struct sockaddr_un addr;
+	socklen_t sockaddr_len;
+	int socket_fd;
+
+	/* set up a socket */
+	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(INFO, EAL, "Failed to create socket!\n");
+		return -1;
+	}
+
+	eal_secondary_mp_sync_get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+	addr.sun_family = AF_UNIX;
+
+	sockaddr_len = sizeof(struct sockaddr_un);
+
+	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
+		return socket_fd;
+
+	/* if connect failed */
+	close(socket_fd);
+	return -1;
+}
+
+static int
+sendpath(int socket) {
+	char path[PATH_MAX] = "";
+	int len = readlink(SELF_PATH, path, PATH_MAX - 1) + 1;
+	if (len < 0) {
+		RTE_LOG(INFO, EAL, "Failed to get current path\n");
+		return -1;
+	}
+	if (eal_mp_sync_send_request(socket, SOCKET_REQ_PATH)) {
+		RTE_LOG(INFO, EAL, "Couldn't send path request\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get path ack\n");
+		return -1;
+	}
+	if (eal_mp_sync_send_data(socket, path, len)) {
+		RTE_LOG(INFO, EAL, "Couldn't send path\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get path ack\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+sendargs(int socket, int argc, char **argv) {
+	if (argc == 0) {
+		/* no arguments to be sent */
+		return 0;
+	}
+	if (eal_mp_sync_send_request(socket, SOCKET_REQ_ARGC)) {
+		RTE_LOG(INFO, EAL, "Couldn't send argc request\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get argc ack\n");
+		return -1;
+	}
+	if (eal_mp_sync_send_data(socket, &argc, sizeof(argc))) {
+		RTE_LOG(INFO, EAL, "Couldn't send argc\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get argc ack\n");
+		return -1;
+	}
+
+	for (int i = 0; i < argc; i++) {
+		char *str = argv[i];
+		int len = strlen(str) + 1;
+
+		if (eal_mp_sync_send_request(socket, SOCKET_REQ_ARGV)) {
+			RTE_LOG(INFO, EAL, "Couldn't send argv request\n");
+			return -1;
+		}
+		if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+			RTE_LOG(INFO, EAL, "Didn't get argc ack\n");
+			return -1;
+		}
+		if (eal_mp_sync_send_data(socket, str, len)) {
+			RTE_LOG(INFO, EAL, "Couldn't send argv\n");
+			return -1;
+		}
+		if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+			RTE_LOG(INFO, EAL, "Didn't get argv ack\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
+sendfd(int socket, enum fd_type t) {
+	int fd, req;
+	switch (t) {
+		case STDIN:
+			fd = STDIN_FILENO;
+			req = SOCKET_REQ_STDIN;
+			break;
+		case STDOUT:
+			fd = STDOUT_FILENO;
+			req = SOCKET_REQ_STDOUT;
+			break;
+		case STDERR:
+			fd = STDERR_FILENO;
+			req = SOCKET_REQ_STDERR;
+			break;
+		case LOGFILE:
+			fd = rte_logs.file == NULL ? STDERR_FILENO : fileno(rte_logs.file);
+			req = SOCKET_REQ_LOGFILE;
+			break;
+	}
+	if (eal_mp_sync_send_request(socket, req)) {
+		RTE_LOG(INFO, EAL, "Couldn't send fd request\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get fd request ack\n");
+		return -1;
+	}
+	if (eal_mp_sync_send_fd(socket, fd)) {
+		RTE_LOG(INFO, EAL, "Couldn't send fd\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get fd ack\n");
+		return -1;
+	}
+	return 0;
+}
+
+static int
+reqfork(int socket, char *str, int sz) {
+	if (eal_mp_sync_send_request(socket, SOCKET_REQ_FORK)) {
+		RTE_LOG(INFO, EAL, "Couldn't send fork request\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_request(socket) != SOCKET_OK) {
+		RTE_LOG(INFO, EAL, "Didn't get fork request ack\n");
+		return -1;
+	}
+	if (eal_mp_sync_receive_data(socket, str, sz)) {
+		RTE_LOG(INFO, EAL, "Couldn't receive lockfile path\n");
+		return -1;
+	}
+	return 0;
+}
+
+int eal_secondary_init(int argc, char **argv) {
+	RTE_LOG(INFO, EAL, "Secondary process initializing\n");
+
+	char path[4096];
+
+	int sock = connect_to_primary();
+
+	if (sock < 0) {
+		RTE_LOG(INFO, EAL, "Couldn't connect to primary\n");
+		return -1;
+	}
+	if (sendpath(sock)) {
+		RTE_LOG(INFO, EAL, "Sending path failed\n");
+		return -1;
+	}
+	if (sendfd(sock, STDIN)) {
+		RTE_LOG(INFO, EAL, "Sending stdin failed\n");
+		return -1;
+	}
+	if (sendfd(sock, STDOUT)) {
+		RTE_LOG(INFO, EAL, "Sending stdout failed\n");
+		return -1;
+	}
+	if (sendfd(sock, STDERR)) {
+		RTE_LOG(INFO, EAL, "Sending stderr failed\n");
+		return -1;
+	}
+	if (sendfd(sock, LOGFILE)) {
+		RTE_LOG(INFO, EAL, "Sending logfile failed\n");
+		return -1;
+	}
+	if (sendargs(sock, argc, argv)) {
+		RTE_LOG(INFO, EAL, "Sending args failed\n");
+		return -1;
+	}
+	if (reqfork(sock, path, sizeof(path))) {
+		RTE_LOG(INFO, EAL, "Fork failed\n");
+		return -1;
+	}
+	close(sock);
+
+	/* at this point, the file is locked by the primary */
+
+	int fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(INFO, EAL, "open failed for %s: %s\n", path, strerror(errno));
+		return -1;
+	}
+
+	// blocking call - if succeeded, that means secondary is dead
+	if (flock(fd, LOCK_EX) < 0) {
+		RTE_LOG(INFO, EAL, "Lock failed: %s\n", strerror(errno));
+		return -1;
+	} else {
+		RTE_LOG(INFO, EAL, "Secondary process exited\n");
+		close(fd);
+		unlink(path);
+	}
+
+	return 0;
+}
-- 
2.7.4



More information about the dev mailing list