[dpdk-dev] [PATCH v1 2/3] examples/l2fwd-keepalive: add IPC liveness reporting

Remy Horton remy.horton at intel.com
Fri Apr 29 07:41:06 CEST 2016


Signed-off-by: Remy Horton <remy.horton at intel.com>
---
 examples/Makefile                          |   1 +
 examples/l2fwd-keepalive/Makefile          |   4 +-
 examples/l2fwd-keepalive/ka-agent/Makefile |  51 +++++++++++
 examples/l2fwd-keepalive/ka-agent/main.c   | 128 ++++++++++++++++++++++++++++
 examples/l2fwd-keepalive/main.c            |  22 ++++-
 examples/l2fwd-keepalive/shm.c             | 130 +++++++++++++++++++++++++++++
 examples/l2fwd-keepalive/shm.h             | 102 ++++++++++++++++++++++
 7 files changed, 434 insertions(+), 4 deletions(-)
 create mode 100644 examples/l2fwd-keepalive/ka-agent/Makefile
 create mode 100644 examples/l2fwd-keepalive/ka-agent/main.c
 create mode 100644 examples/l2fwd-keepalive/shm.c
 create mode 100644 examples/l2fwd-keepalive/shm.h

diff --git a/examples/Makefile b/examples/Makefile
index b28b30e..bd688b9 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -64,6 +64,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += l2fwd-crypto
 DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem
 DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += l2fwd-jobstats
 DIRS-y += l2fwd-keepalive
+DIRS-y += l2fwd-keepalive/ka-agent
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += l3fwd
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl
 ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
diff --git a/examples/l2fwd-keepalive/Makefile b/examples/l2fwd-keepalive/Makefile
index 568edcb..3fcf513 100644
--- a/examples/l2fwd-keepalive/Makefile
+++ b/examples/l2fwd-keepalive/Makefile
@@ -1,6 +1,6 @@
 #   BSD LICENSE
 #
-#   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
 #   All rights reserved.
 #
 #   Redistribution and use in source and binary forms, with or without
@@ -42,7 +42,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 APP = l2fwd-keepalive
 
 # all source are stored in SRCS-y
-SRCS-y := main.c
+SRCS-y := main.c shm.c
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
diff --git a/examples/l2fwd-keepalive/ka-agent/Makefile b/examples/l2fwd-keepalive/ka-agent/Makefile
new file mode 100644
index 0000000..4eaac76
--- /dev/null
+++ b/examples/l2fwd-keepalive/ka-agent/Makefile
@@ -0,0 +1,51 @@
+#   BSD LICENSE
+#
+#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ka-agent
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)/../
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-keepalive/ka-agent/main.c b/examples/l2fwd-keepalive/ka-agent/main.c
new file mode 100644
index 0000000..f05e3a5
--- /dev/null
+++ b/examples/l2fwd-keepalive/ka-agent/main.c
@@ -0,0 +1,128 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/queue.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include <rte_keepalive.h>
+
+#include <shm.h>
+
+static struct rte_keepalive_shm *ka_shm_create(void)
+{
+	int fd = shm_open(RTE_KEEPALIVE_SHM_NAME, O_RDWR, 0666);
+	size_t size = sizeof(struct rte_keepalive_shm);
+	struct rte_keepalive_shm *shm;
+
+	if (fd < 0)
+		printf("Failed to open %s as SHM:%s\n",
+			RTE_KEEPALIVE_SHM_NAME,
+		strerror(errno));
+	else {
+		shm = (struct rte_keepalive_shm *) mmap(
+			0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+		close(fd);
+		if (shm == MAP_FAILED)
+			printf("Failed to mmap SHM:%s\n", strerror(errno));
+		else
+			return shm;
+	}
+
+	/* Reset to zero, as it was set to MAP_FAILED aka: (void *)-1 */
+	shm = 0;
+	return NULL;
+}
+
+int main(void)
+{
+	struct rte_keepalive_shm *shm = ka_shm_create();
+	struct timespec timeout = { .tv_nsec = 0 };
+	time_t start_time;
+	int idx_core;
+	int cnt_cores;
+
+	if (shm == NULL) {
+		printf("Unable to access shared core state\n");
+		return 1;
+	}
+	while (1) {
+		if (shm->magic == RTE_KEEPALIVE_SHM_MAGIC)
+			break;
+		printf("Shared KA memory not setup. Sleeping..\n");
+		sleep(5);
+	}
+	start_time = shm->time_of_init;
+	while (1) {
+		if (start_time != shm->time_of_init) {
+			printf("Signature mismatch.\n");
+			break;
+		}
+
+		timeout.tv_sec = time(NULL) + 2;
+		if (sem_timedwait(&shm->core_died, &timeout) == -1)
+			continue;
+
+		cnt_cores = 0;
+		for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES;
+				idx_core++)
+			if (shm->core_state[idx_core] == 2)
+				cnt_cores++;
+		if (cnt_cores == 0) {
+			/* Can happen if core was restarted since Semaphore
+			 * was sent, due to agent being offline.
+			 */
+			printf("Warning: Empty dead core report\n");
+			continue;
+		}
+
+		printf("%i dead cores: ", cnt_cores);
+		for (idx_core = 0;
+				idx_core < RTE_KEEPALIVE_MAXCORES;
+				idx_core++)
+			if (shm->core_state[idx_core] == 2)
+				printf("%d, ", idx_core);
+		printf("\b\b\n");
+	}
+	if (munmap(shm, sizeof(struct rte_keepalive_shm)) != 0)
+		printf("Warning: munmap() failed\n");
+	return 0;
+}
diff --git a/examples/l2fwd-keepalive/main.c b/examples/l2fwd-keepalive/main.c
index 8da89aa..8ebf1fd 100644
--- a/examples/l2fwd-keepalive/main.c
+++ b/examples/l2fwd-keepalive/main.c
@@ -72,6 +72,8 @@
 #include <rte_timer.h>
 #include <rte_keepalive.h>
 
+#include "shm.h"
+
 #define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
 
 #define NB_MBUF   8192
@@ -523,17 +525,25 @@ check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
 }
 
 static void
-dead_core(__attribute__((unused)) void *ptr_data, const int id_core)
+dead_core(void *ptr_data, const int id_core)
 {
 	printf("Dead core %i - restarting..\n", id_core);
 	if (rte_eal_get_lcore_state(id_core) == FINISHED) {
 		rte_eal_wait_lcore(id_core);
 		rte_eal_remote_launch(l2fwd_launch_one_lcore, NULL, id_core);
+		rte_keepalive_shm_dead((struct rte_keepalive_shm *)ptr_data,
+			id_core);
 	} else {
 		printf("..false positive!\n");
 	}
 }
 
+static void
+alive_core(void *ptr_data, const int id_core)
+{
+	rte_keepalive_shm_alive((struct rte_keepalive_shm *)ptr_data, id_core);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -725,10 +735,18 @@ main(int argc, char **argv)
 	rte_timer_init(&stats_timer);
 
 	if (check_period > 0) {
+		struct rte_keepalive_shm *ka_shm;
+
+		ka_shm = rte_keepalive_shm_create();
+		if (ka_shm == NULL)
+			rte_exit(EXIT_FAILURE,
+				"rte_keepalive_shm_create() failed");
 		rte_global_keepalive_info =
-			rte_keepalive_create(&dead_core, NULL);
+			rte_keepalive_create(&dead_core, ka_shm);
 		if (rte_global_keepalive_info == NULL)
 			rte_exit(EXIT_FAILURE, "init_keep_alive() failed");
+		rte_keepalive_register_alive_callback(rte_global_keepalive_info,
+			alive_core, ka_shm);
 		rte_timer_init(&hb_timer);
 		if (rte_timer_reset(&hb_timer,
 				(check_period * rte_get_timer_hz()) / 1000,
diff --git a/examples/l2fwd-keepalive/shm.c b/examples/l2fwd-keepalive/shm.c
new file mode 100644
index 0000000..f94ddc2
--- /dev/null
+++ b/examples/l2fwd-keepalive/shm.c
@@ -0,0 +1,130 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <time.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_keepalive.h>
+
+#include "shm.h"
+
+struct rte_keepalive_shm *rte_keepalive_shm_create(void)
+{
+	int fd;
+	int idx_core;
+	struct rte_keepalive_shm *ka_shm;
+
+	/* If any existing object is not unlinked, it makes it all too easy
+	 * for clients to end up with stale shared memory blocks when
+	 * restarted. Unlinking makes sure subsequent shm_open by clients
+	 * will get the new block mapped below.
+	 */
+	if (shm_unlink(RTE_KEEPALIVE_SHM_NAME) == -1 && errno != ENOENT)
+		printf("Warning: Error unlinking stale %s (%s)\n",
+			RTE_KEEPALIVE_SHM_NAME, strerror(errno));
+
+	fd = shm_open(RTE_KEEPALIVE_SHM_NAME,
+		O_CREAT | O_TRUNC | O_RDWR, 0666);
+	if (fd < 0)
+		RTE_LOG(INFO, EAL,
+			"Failed to open %s as SHM (%s)\n",
+			RTE_KEEPALIVE_SHM_NAME,
+			strerror(errno));
+	else if (ftruncate(fd, sizeof(struct rte_keepalive_shm)) != 0)
+		RTE_LOG(INFO, EAL,
+			"Failed to resize SHM (%s)\n", strerror(errno));
+	else {
+		ka_shm = (struct rte_keepalive_shm *) mmap(
+			0, sizeof(struct rte_keepalive_shm),
+			PROT_READ | PROT_WRITE,	MAP_SHARED, fd, 0);
+		close(fd);
+		if (ka_shm == MAP_FAILED)
+			RTE_LOG(INFO, EAL,
+				"Failed to mmap SHM (%s)\n", strerror(errno));
+		else {
+			memset(ka_shm, 0, sizeof(struct rte_keepalive_shm));
+
+			/* Initialize the semaphores for IPC/SHM use */
+			if (sem_init(&ka_shm->core_died, 1, 0) != 0) {
+				RTE_LOG(INFO, EAL,
+					"Failed to setup SHM semaphore (%s)\n",
+					strerror(errno));
+				return NULL;
+			}
+
+			/* Set all cores to 'not present' */
+			for (idx_core = 0;
+				idx_core < RTE_KEEPALIVE_MAXCORES;
+				idx_core++)
+				ka_shm->core_state[idx_core] = -1;
+
+			/* Set magic number so agent knows setup
+			 * has finished.
+			 */
+			ka_shm->magic = RTE_KEEPALIVE_SHM_MAGIC;
+			ka_shm->time_of_init = time(NULL);
+
+			return ka_shm;
+		}
+	}
+return NULL;
+}
+
+void rte_keepalive_shm_alive(struct rte_keepalive_shm *shm, const int id_core)
+{
+	shm->core_state[id_core] = RTE_KEEPALIVE_SHM_ALIVE;
+}
+
+void rte_keepalive_shm_dead(struct rte_keepalive_shm *shm, const int id_core)
+{
+	int count;
+
+	shm->core_state[id_core] = RTE_KEEPALIVE_SHM_DEAD;
+
+	/* Limit number of times semaphore can be incremented, in case
+	 * listening agent is not active.
+	 */
+	if (sem_getvalue(&shm->core_died, &count) == -1) {
+		RTE_LOG(INFO, EAL, "Semaphore check failed(%s)\n",
+			strerror(errno));
+		return;
+	}
+	if (count > 1)
+		return;
+
+	if (sem_post(&shm->core_died) != 0)
+		RTE_LOG(INFO, EAL,
+			"Failed to increment semaphore (%s)\n",
+			strerror(errno));
+}
diff --git a/examples/l2fwd-keepalive/shm.h b/examples/l2fwd-keepalive/shm.h
new file mode 100644
index 0000000..f2a58cc
--- /dev/null
+++ b/examples/l2fwd-keepalive/shm.h
@@ -0,0 +1,102 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define RTE_KEEPALIVE_SHM_NAME "/dpdk_keepalive_shm_name"
+#define RTE_KEEPALIVE_SHM_MAGIC 0xff00aaf0
+
+#define RTE_KEEPALIVE_SHM_ALIVE 1
+#define RTE_KEEPALIVE_SHM_DEAD 2
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <semaphore.h>
+
+/**
+ * Keepalive SHM structure.
+ *
+ * The shared memory allocated by the primary is this size, and contains the
+ * information as contained within this struct. A secondary may open the SHM,
+ * and read the contents.
+ */
+struct rte_keepalive_shm {
+	/** Initialisation check. */
+	uint32_t magic;
+
+	/* Last state refresh time.
+	 *
+	 * Time keepalive started. Used to detect shutdown.
+	 */
+	uint32_t time_of_init;
+
+	/** IPC semaphore. Posted when a core dies */
+	sem_t core_died;
+
+	/**
+	 * Relayed status of each core.
+	 *
+	 * Each entry takes on one of the following values:
+	 *  -1 if core not monitored by keepalive
+	 *   1 if core alive (the normal good state)
+	 *   2 if core is dead.
+	 *   @note: State 0 (MISSING) is not relayed.
+	 */
+	int32_t core_state[RTE_KEEPALIVE_MAXCORES];
+};
+
+/**
+ * Create shared host memory keepalive object.
+ * @return
+ *  Pointer to SHM keepalive structure, or NULL on failure.
+ */
+struct rte_keepalive_shm *rte_keepalive_shm_create(void);
+
+/**
+ * Registers given core as 'alive'
+ * @param *shm
+ *  Pointer to SHM keepalive structure.
+ * @param id_core
+ *  Id of affected core
+ */
+void rte_keepalive_shm_alive(struct rte_keepalive_shm *shm, const int id_core);
+
+/**
+ * Registers given core as 'dead'
+ * @param *shm
+ *  Pointer to SHM keepalive structure.
+ * @param id_core
+ *  Id of affected core
+ */
+void rte_keepalive_shm_dead(struct rte_keepalive_shm *shm, const int id_core);
-- 
2.5.5



More information about the dev mailing list