[dpdk-dev] [PATCH v6 04/10] eal: implement functions for thread affinity management

Narcisa Ana Maria Vasile navasile at linux.microsoft.com
Sat Apr 3 03:39:02 CEST 2021


From: Narcisa Vasile <navasile at microsoft.com>

Implement functions for getting/setting thread affinity.

Signed-off-by: Narcisa Vasile <navasile at microsoft.com>
Signed-off-by: Dmitry Malloy <dmitrym at microsoft.com>
---
 lib/librte_eal/common/rte_thread.c   |  13 ++
 lib/librte_eal/include/rte_thread.h  |  41 +++++++
 lib/librte_eal/windows/eal_lcore.c   | 170 +++++++++++++++++++--------
 lib/librte_eal/windows/eal_windows.h |  10 ++
 lib/librte_eal/windows/rte_thread.c  | 132 ++++++++++++++++++++-
 5 files changed, 318 insertions(+), 48 deletions(-)

diff --git a/lib/librte_eal/common/rte_thread.c b/lib/librte_eal/common/rte_thread.c
index 0bd1b115d..4f93e3ff1 100644
--- a/lib/librte_eal/common/rte_thread.c
+++ b/lib/librte_eal/common/rte_thread.c
@@ -29,6 +29,19 @@ rte_thread_equal(rte_thread_t t1, rte_thread_t t2)
 	return pthread_equal(t1, t2);
 }
 
+int
+rte_thread_set_affinity_by_id(rte_thread_t thread_id, size_t cpuset_size,
+			const rte_cpuset_t *cpuset)
+{
+	return pthread_setaffinity_np(thread_id, cpuset_size, cpuset);
+}
+
+int rte_thread_get_affinity_by_id(rte_thread_t threadid, size_t cpuset_size,
+		rte_cpuset_t *cpuset)
+{
+	return pthread_getaffinity_np(threadid, cpuset_size, cpuset);
+}
+
 int
 rte_thread_attr_init(rte_thread_attr_t *attr)
 {
diff --git a/lib/librte_eal/include/rte_thread.h b/lib/librte_eal/include/rte_thread.h
index 2d7b3bc05..4b1e3dfe8 100644
--- a/lib/librte_eal/include/rte_thread.h
+++ b/lib/librte_eal/include/rte_thread.h
@@ -73,6 +73,47 @@ rte_thread_t rte_thread_self(void);
 __rte_experimental
 int rte_thread_equal(rte_thread_t t1, rte_thread_t t2);
 
+/**
+ * Set the affinity of thread 'thread_id' to the cpu set
+ * specified by 'cpuset'.
+ *
+ * @param thread_id
+ *    Id of the thread for which to set the affinity.
+ *
+ * @param cpuset_size
+ *
+ * @param cpuset
+ *   Pointer to CPU affinity to set.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_set_affinity_by_id(rte_thread_t thread_id, size_t cpuset_size,
+				  const rte_cpuset_t *cpuset);
+
+/**
+ * Get the affinity of thread 'thread_id' and store it
+ * in 'cpuset'.
+ *
+ * @param thread_id
+ *    Id of the thread for which to get the affinity.
+ *
+ * @param cpuset_size
+ *    Size of the cpu set.
+ *
+ * @param cpuset
+ *   Pointer for storing the affinity value.
+ *
+ * @return
+ *   On success, return 0.
+ *   On failure, return a positive errno-style error number.
+ */
+__rte_experimental
+int rte_thread_get_affinity_by_id(rte_thread_t thread_id, size_t cpuset_size,
+				  rte_cpuset_t *cpuset);
+
 /**
  * Initialize the attributes of a thread.
  * These attributes can be passed to the rte_thread_create() function
diff --git a/lib/librte_eal/windows/eal_lcore.c b/lib/librte_eal/windows/eal_lcore.c
index a85149be9..023c5c895 100644
--- a/lib/librte_eal/windows/eal_lcore.c
+++ b/lib/librte_eal/windows/eal_lcore.c
@@ -2,7 +2,6 @@
  * Copyright(c) 2019 Intel Corporation
  */
 
-#include <pthread.h>
 #include <stdbool.h>
 #include <stdint.h>
 
@@ -28,13 +27,15 @@ struct socket_map {
 };
 
 struct cpu_map {
-	unsigned int socket_count;
 	unsigned int lcore_count;
+	unsigned int socket_count;
+	unsigned int cpu_count;
 	struct lcore_map lcores[RTE_MAX_LCORE];
 	struct socket_map sockets[RTE_MAX_NUMA_NODES];
+	GROUP_AFFINITY cpus[CPU_SETSIZE];
 };
 
-static struct cpu_map cpu_map = { 0 };
+static struct cpu_map cpu_map;
 
 /* eal_create_cpu_map() is called before logging is initialized */
 static void
@@ -48,13 +49,111 @@ log_early(const char *format, ...)
 	va_end(va);
 }
 
+static int
+eal_query_group_affinity(void)
+{
+	SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos = NULL;
+	DWORD infos_size = 0;
+	int ret = 0;
+
+	if (!GetLogicalProcessorInformationEx(RelationGroup, NULL,
+					      &infos_size)) {
+		DWORD error = GetLastError();
+		if (error != ERROR_INSUFFICIENT_BUFFER) {
+			log_early("Cannot get group information size, "
+				  "error %lu\n", error);
+			rte_errno = EINVAL;
+			ret = -1;
+			goto cleanup;
+		}
+	}
+
+	infos = malloc(infos_size);
+	if (infos == NULL) {
+		log_early("Cannot allocate memory for NUMA node information\n");
+		rte_errno = ENOMEM;
+		ret = -1;
+		goto cleanup;
+	}
+
+	if (!GetLogicalProcessorInformationEx(RelationGroup, infos,
+					      &infos_size)) {
+		log_early("Cannot get group information, error %lu\n",
+			  GetLastError());
+		rte_errno = EINVAL;
+		ret = -1;
+		goto cleanup;
+	}
+
+	cpu_map.cpu_count = 0;
+	USHORT group_count = infos->Group.ActiveGroupCount;
+	for (USHORT group_number = 0; group_number < group_count; group_number++) {
+		KAFFINITY affinity = infos->Group.GroupInfo[group_number].ActiveProcessorMask;
+
+		for (unsigned int i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
+			if ((affinity & ((KAFFINITY)1 << i)) == 0)
+				continue;
+			cpu_map.cpus[cpu_map.cpu_count].Group = group_number;
+			cpu_map.cpus[cpu_map.cpu_count].Mask = (KAFFINITY)1 << i;
+			cpu_map.cpu_count++;
+		}
+	}
+
+cleanup:
+	free(infos);
+	return ret;
+}
+
+static bool
+eal_check_for_duplicate_numa(const SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info)
+{
+	const unsigned int node_id = info->NumaNode.NodeNumber;
+	const GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
+	struct lcore_map *lcore;
+	unsigned int socket_id;
+
+	/* NUMA node may be reported multiple times if it includes
+	 * cores from different processor groups, e. g. 80 cores
+	 * of a physical processor comprise one NUMA node, but two
+	 * processor groups, because group size is limited by 32/64.
+	 */
+	for (socket_id = 0; socket_id < cpu_map.socket_count; socket_id++) {
+		if (cpu_map.sockets[socket_id].node_id == node_id)
+			break;
+	}
+
+	if (socket_id == cpu_map.socket_count) {
+		if (socket_id == RTE_DIM(cpu_map.sockets))
+			return true;
+
+		cpu_map.sockets[socket_id].node_id = node_id;
+		cpu_map.socket_count++;
+	}
+
+	for (unsigned int i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
+		if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
+			continue;
+
+		if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores))
+			return true;
+
+		lcore = &cpu_map.lcores[cpu_map.lcore_count];
+		lcore->socket_id = socket_id;
+		lcore->core_id = cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
+		cpu_map.lcore_count++;
+	}
+	return false;
+}
+
 int
 eal_create_cpu_map(void)
 {
 	SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos, *info;
 	DWORD infos_size;
 	bool full = false;
+	int ret = 0;
 
+	infos = NULL;
 	infos_size = 0;
 	if (!GetLogicalProcessorInformationEx(
 			RelationNumaNode, NULL, &infos_size)) {
@@ -79,57 +178,27 @@ eal_create_cpu_map(void)
 		log_early("Cannot get NUMA node information, error %lu\n",
 			GetLastError());
 		rte_errno = EINVAL;
-		return -1;
+		ret = -1;
+		goto exit;
 	}
 
 	info = infos;
 	while ((uint8_t *)info - (uint8_t *)infos < infos_size) {
-		unsigned int node_id = info->NumaNode.NodeNumber;
-		GROUP_AFFINITY *cores = &info->NumaNode.GroupMask;
-		struct lcore_map *lcore;
-		unsigned int i, socket_id;
-
-		/* NUMA node may be reported multiple times if it includes
-		 * cores from different processor groups, e. g. 80 cores
-		 * of a physical processor comprise one NUMA node, but two
-		 * processor groups, because group size is limited by 32/64.
-		 */
-		for (socket_id = 0; socket_id < cpu_map.socket_count;
-		    socket_id++) {
-			if (cpu_map.sockets[socket_id].node_id == node_id)
-				break;
-		}
-
-		if (socket_id == cpu_map.socket_count) {
-			if (socket_id == RTE_DIM(cpu_map.sockets)) {
-				full = true;
-				goto exit;
-			}
-
-			cpu_map.sockets[socket_id].node_id = node_id;
-			cpu_map.socket_count++;
-		}
-
-		for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) {
-			if ((cores->Mask & ((KAFFINITY)1 << i)) == 0)
-				continue;
-
-			if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores)) {
-				full = true;
-				goto exit;
-			}
-
-			lcore = &cpu_map.lcores[cpu_map.lcore_count];
-			lcore->socket_id = socket_id;
-			lcore->core_id =
-				cores->Group * EAL_PROCESSOR_GROUP_SIZE + i;
-			cpu_map.lcore_count++;
-		}
+		if (eal_check_for_duplicate_numa(info))
+			break;
 
 		info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)(
 			(uint8_t *)info + info->Size);
 	}
 
+	if (eal_query_group_affinity()) {
+		/*
+		 * No need to set rte_errno here.
+		 * It is set by eal_query_group_affinity().
+		 */
+		ret = -1;
+		goto exit;
+	}
 exit:
 	if (full) {
 		/* Not a fatal error, but important for troubleshooting. */
@@ -139,7 +208,7 @@ eal_create_cpu_map(void)
 
 	free(infos);
 
-	return 0;
+	return ret;
 }
 
 int
@@ -165,3 +234,12 @@ eal_socket_numa_node(unsigned int socket_id)
 {
 	return cpu_map.sockets[socket_id].node_id;
 }
+
+PGROUP_AFFINITY
+eal_get_cpu_affinity(size_t cpu_index)
+{
+	if (cpu_index < CPU_SETSIZE)
+		return &cpu_map.cpus[cpu_index];
+
+	return NULL;
+}
diff --git a/lib/librte_eal/windows/eal_windows.h b/lib/librte_eal/windows/eal_windows.h
index 478accc1b..dc5dc8240 100644
--- a/lib/librte_eal/windows/eal_windows.h
+++ b/lib/librte_eal/windows/eal_windows.h
@@ -55,6 +55,16 @@ int eal_thread_create(pthread_t *thread);
  */
 unsigned int eal_socket_numa_node(unsigned int socket_id);
 
+/**
+ * Get pointer to the group affinity for the cpu.
+ *
+ * @param cpu_index
+ *  Index of the cpu, as it comes from rte_cpuset_t.
+ * @return
+ *  Pointer to the group affinity for the cpu.
+ */
+PGROUP_AFFINITY eal_get_cpu_affinity(size_t cpu_index);
+
 /**
  * Schedule code for execution in the interrupt thread.
  *
diff --git a/lib/librte_eal/windows/rte_thread.c b/lib/librte_eal/windows/rte_thread.c
index ecd2f810f..2fa130b1f 100644
--- a/lib/librte_eal/windows/rte_thread.c
+++ b/lib/librte_eal/windows/rte_thread.c
@@ -4,9 +4,9 @@
  */
 
 #include <rte_common.h>
-#include <rte_errno.h>
 #include <rte_thread.h>
-#include <rte_windows.h>
+
+#include "eal_windows.h"
 
 struct eal_tls_key {
 	DWORD thread_index;
@@ -69,6 +69,134 @@ rte_thread_equal(rte_thread_t t1, rte_thread_t t2)
 	return t1 == t2 ? 1 : 0;
 }
 
+static int
+rte_convert_cpuset_to_affinity(const rte_cpuset_t *cpuset,
+			       PGROUP_AFFINITY affinity)
+{
+	int ret = 0;
+	PGROUP_AFFINITY cpu_affinity = NULL;
+
+	memset(affinity, 0, sizeof(GROUP_AFFINITY));
+	affinity->Group = (USHORT)-1;
+
+	/* Check that all cpus of the set belong to the same processor group and
+	 * accumulate thread affinity to be applied.
+	 */
+	for (unsigned int cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
+		if (!CPU_ISSET(cpu_idx, cpuset))
+			continue;
+
+		cpu_affinity = eal_get_cpu_affinity(cpu_idx);
+
+		if (affinity->Group == (USHORT)-1) {
+			affinity->Group = cpu_affinity->Group;
+		} else if (affinity->Group != cpu_affinity->Group) {
+			ret = EINVAL;
+			goto cleanup;
+		}
+
+		affinity->Mask |= cpu_affinity->Mask;
+	}
+
+	if (affinity->Mask == 0) {
+		ret = EINVAL;
+		goto cleanup;
+	}
+
+cleanup:
+	return ret;
+}
+
+int rte_thread_set_affinity_by_id(rte_thread_t thread_id,
+			    size_t cpuset_size,
+			    const rte_cpuset_t *cpuset)
+{
+	int ret = 0;
+	GROUP_AFFINITY thread_affinity;
+	HANDLE thread_handle = NULL;
+
+	if (cpuset == NULL || cpuset_size < sizeof(*cpuset)) {
+		ret = EINVAL;
+		goto cleanup;
+	}
+
+	ret = rte_convert_cpuset_to_affinity(cpuset, &thread_affinity);
+	if (ret != 0) {
+		RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n");
+		goto cleanup;
+	}
+
+	thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
+	if (thread_handle == NULL) {
+		ret = rte_thread_translate_win32_error();
+		RTE_LOG_WIN32_ERR("OpenThread()");
+		goto cleanup;
+	}
+
+	if (!SetThreadGroupAffinity(thread_handle, &thread_affinity, NULL)) {
+		ret = rte_thread_translate_win32_error();
+		RTE_LOG_WIN32_ERR("SetThreadGroupAffinity()");
+		goto cleanup;
+	}
+
+cleanup:
+	if (thread_handle != NULL) {
+		CloseHandle(thread_handle);
+		thread_handle = NULL;
+	}
+
+	return ret;
+}
+
+int
+rte_thread_get_affinity_by_id(rte_thread_t thread_id, size_t cpuset_size,
+			rte_cpuset_t *cpuset)
+{
+	HANDLE thread_handle = NULL;
+	PGROUP_AFFINITY cpu_affinity;
+	GROUP_AFFINITY thread_affinity;
+	int ret = 0;
+
+	if (cpuset == NULL || cpuset_size < sizeof(*cpuset)) {
+		ret = EINVAL;
+		goto cleanup;
+	}
+
+	thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE, thread_id);
+	if (thread_handle == NULL) {
+		ret = rte_thread_translate_win32_error();
+		RTE_LOG_WIN32_ERR("OpenThread()");
+		goto cleanup;
+	}
+
+	/* obtain previous thread affinity */
+	if (!GetThreadGroupAffinity(thread_handle, &thread_affinity)) {
+		ret = rte_thread_translate_win32_error();
+		RTE_LOG_WIN32_ERR("GetThreadGroupAffinity()");
+		goto cleanup;
+	}
+
+	CPU_ZERO(cpuset);
+
+	/* Convert affinity to DPDK cpu set */
+	for (unsigned int cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) {
+
+		cpu_affinity = eal_get_cpu_affinity(cpu_idx);
+
+		if ((cpu_affinity->Group == thread_affinity.Group) &&
+		   ((cpu_affinity->Mask & thread_affinity.Mask) != 0)) {
+			CPU_SET(cpu_idx, cpuset);
+		}
+	}
+
+cleanup:
+	if (thread_handle != NULL) {
+		CloseHandle(thread_handle);
+		thread_handle = NULL;
+	}
+	return ret;
+}
+
 int
 rte_thread_attr_init(rte_thread_attr_t *attr)
 {
-- 
2.30.0.vfs.0.2



More information about the dev mailing list