[dpdk-dev] [PATCH 7/8] eal/bsdapp: add alarm support

Anatoly Burakov anatoly.burakov at intel.com
Fri Jun 15 16:25:07 CEST 2018


Implement EAL alarm API support for FreeBSD. The implementation
is largely identical to that of Linux version, with one key
difference.

The alarm API is a little Linux-centric in that it is expecting
the alarm API to manage alarm timeouts without involvement of the
interrupt thread. This works on Linux because in Linux, there's
timerfd API which allows waiting for timer events on an fd.

On FreeBSD, however, there are no timerfd's, and timer events are
set up directly in kevent. There is no way to pass information from
the alarm API to the interrupt thread, so we also add a little
back-channel magic to get soonest alarm timeout from the alarm API.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/bsdapp/eal/eal_alarm.c         | 299 +++++++++++++++++-
 lib/librte_eal/bsdapp/eal/eal_alarm_private.h |  19 ++
 lib/librte_eal/bsdapp/eal/eal_interrupts.c    |  29 +-
 3 files changed, 334 insertions(+), 13 deletions(-)
 create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm_private.h

diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c97..55763e520 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2018 Intel Corporation
  */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_alarm.h>
+#include <rte_cycles.h>
 #include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
 #include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+	LIST_ENTRY(alarm_entry) next;
+	struct rte_intr_handle handle;
+	struct timespec time;
+	rte_eal_alarm_callback cb_fn;
+	void *cb_arg;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
 
 int
 rte_eal_alarm_init(void)
 {
+	intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+	/* on FreeBSD, timers don't use fd's, and their identifiers are stored
+	 * in separate namespace from fd's, so using any value is OK. however,
+	 * EAL interrupts handler expects fd's to be unique, so use an actual fd
+	 * to guarantee unique timer identifier.
+	 */
+	intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+	return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+	if (now->tv_sec < at->tv_sec)
+		return -1;
+	if (now->tv_sec > at->tv_sec)
+		return 1;
+	if (now->tv_nsec < at->tv_nsec)
+		return -1;
+	if (now->tv_nsec > at->tv_nsec)
+		return 1;
+	return 0;
+}
+
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+	uint64_t now_ns, at_ns;
+
+	if (timespec_cmp(now, at) >= 0)
+		return 0;
+
+	now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+	at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+	return at_ns - now_ns;
+}
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val)
+{
+	struct alarm_entry *ap;
+	struct timespec now;
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return -1;
+
+	if (LIST_EMPTY(&alarm_list))
+		return -1;
+
+	ap = LIST_FIRST(&alarm_list);
+
+	*val = diff_ns(&now, &ap->time);
+
 	return 0;
 }
 
+static int
+unregister_current_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		do {
+			ret = rte_intr_callback_unregister(&intr_handle,
+				eal_alarm_callback, &ap->time);
+		} while (ret == -EAGAIN);
+	}
+
+	return ret;
+}
+
+static int
+register_first_callback(void)
+{
+	struct alarm_entry *ap;
+	int ret = 0;
+
+	if (!LIST_EMPTY(&alarm_list)) {
+		ap = LIST_FIRST(&alarm_list);
+
+		/* register a new callback */
+		ret = rte_intr_callback_register(&intr_handle,
+				eal_alarm_callback, &ap->time);
+	}
+	return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+	struct timespec now;
+	struct alarm_entry *ap;
+
+	rte_spinlock_lock(&alarm_list_lk);
+	ap = LIST_FIRST(&alarm_list);
+
+	if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+		return;
+
+	while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+		ap->executing = 1;
+		ap->executing_id = pthread_self();
+		rte_spinlock_unlock(&alarm_list_lk);
+
+		ap->cb_fn(ap->cb_arg);
+
+		rte_spinlock_lock(&alarm_list_lk);
+
+		LIST_REMOVE(ap, next);
+		free(ap);
+
+		ap = LIST_FIRST(&alarm_list);
+	}
+
+	/* timer has been deleted from the kqueue, so recreate it if needed */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+}
+
 
 int
-rte_eal_alarm_set(uint64_t us __rte_unused,
-		rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap, *new_alarm;
+	struct timespec now;
+	uint64_t ns;
+	int ret = 0;
+
+	/* check parameters, also ensure us won't cause a uint64_t overflow */
+	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+		return -EINVAL;
+
+	new_alarm = calloc(1, sizeof(*new_alarm));
+	if (new_alarm == NULL)
+		return -ENOMEM;
+
+	/* use current time to calculate absolute time of alarm */
+	clock_gettime(CLOCK_TYPE_ID, &now);
+
+	ns = us * NS_PER_US;
+
+	new_alarm->cb_fn = cb_fn;
+	new_alarm->cb_arg = cb_arg;
+	new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	if (LIST_EMPTY(&alarm_list))
+		LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+	else {
+		LIST_FOREACH(ap, &alarm_list, next) {
+			if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+				LIST_INSERT_BEFORE(ap, new_alarm, next);
+				break;
+			}
+			if (LIST_NEXT(ap, next) == NULL) {
+				LIST_INSERT_AFTER(ap, new_alarm, next);
+				break;
+			}
+		}
+	}
+
+	/* re-register first callback just in case */
+	register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return ret;
 }
 
 int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
-		void *cb_arg __rte_unused)
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
-	return -ENOTSUP;
+	struct alarm_entry *ap, *ap_prev;
+	int count = 0;
+	int err = 0;
+	int executing;
+
+	if (!cb_fn) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while (1) {
+			ap = LIST_FIRST(&alarm_list);
+			if (ap == NULL)
+				break;
+			if (cb_fn != ap->cb_fn)
+				break;
+			if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+				break;
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				free(ap);
+				count++;
+			} else {
+				/* If calling from other context, mark that
+				 * alarm is executing so loop can spin till it
+				 * finish. Otherwise we are trying to cancel
+				 * ourselves - mark it by EINPROGRESS.
+				 */
+				if (pthread_equal(ap->executing_id,
+						pthread_self()) == 0)
+					executing++;
+				else
+					err = EINPROGRESS;
+
+				break;
+			}
+		}
+		ap_prev = ap;
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 ||
+					 cb_arg == ap->cb_arg)) {
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap, next);
+					free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id,
+							 pthread_self()) == 0) {
+					executing++;
+				} else {
+					err = EINPROGRESS;
+				}
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
+	if (count == 0 && err == 0)
+		rte_errno = ENOENT;
+	else if (err)
+		rte_errno = err;
+
+	rte_spinlock_lock(&alarm_list_lk);
+
+	/* unregister if no alarms left, otherwise re-register first */
+	if (LIST_EMPTY(&alarm_list))
+		unregister_current_callback();
+	else
+		register_first_callback();
+
+	rte_spinlock_unlock(&alarm_list_lk);
+
+	return count;
 }
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 000000000..65c711518
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 0fe068894..70ef844ff 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -16,6 +16,7 @@
 #include <rte_interrupts.h>
 
 #include "eal_private.h"
+#include "eal_alarm_private.h"
 
 #define MAX_INTR_EVENTS 16
 
@@ -56,7 +57,22 @@ static volatile int kq = -1;
 static int
 intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
 {
-	ke->filter = EVFILT_READ;
+	/* alarm callbacks are special case */
+	if (ih->type == RTE_INTR_HANDLE_ALARM) {
+		uint64_t timeout_ns;
+
+		/* get soonest alarm timeout */
+		if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+			return -1;
+
+		ke->filter = EVFILT_TIMER;
+		/* timers are one shot */
+		ke->flags |= EV_ONESHOT;
+		ke->fflags = NOTE_NSECONDS;
+		ke->data = timeout_ns;
+	} else {
+		ke->filter = EVFILT_READ;
+	}
 	ke->ident = ih->fd;
 
 	return 0;
@@ -122,8 +138,10 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
 		}
 	}
 
-	/* add events to the queue */
-	if (add_event) {
+	/* add events to the queue. timer events are special as we need to
+	 * re-set the timer.
+	 */
+	if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
 		struct kevent ke;
 
 		memset(&ke, 0, sizeof(ke));
@@ -218,8 +236,9 @@ rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
 		if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
 			RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
 				src->intr_handle.fd, strerror(errno));
-			ret = -errno;
-			goto out;
+			/* removing non-existent even is an expected condition
+			 * in some circumstances (e.g. oneshot events).
+			 */
 		}
 
 		/*walk through the callbacks and remove all that match. */
-- 
2.17.1


More information about the dev mailing list