[dpdk-dev] [PATCH v9 03/12] eal: introduce memory management wrappers

Dmitry Kozlyuk dmitry.kozliuk at gmail.com
Mon Jun 15 02:43:45 CEST 2020


Introduce OS-independent wrappers for memory management operations used
across DPDK and specifically in common code of EAL:

* rte_mem_map()
* rte_mem_unmap()
* rte_mem_page_size()
* rte_mem_lock()

Windows uses different APIs for memory mapping and reservation, while
Unices reserve memory by mapping it. Introduce EAL private functions to
support memory reservation in common code:

* eal_mem_reserve()
* eal_mem_free()
* eal_mem_set_dump()

Wrappers follow POSIX semantics limited to DPDK tasks, but their
signatures deliberately differ from POSIX ones to be more safe and
expressive. New symbols are internal. Being thin wrappers, they require
no special maintenance.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk at gmail.com>
---

Not adding rte_eal_paging.h to Doxygen index because, to my
understanding, it only contains public API, and it was decided to keep
rte_eal_paging.h functions private.

 lib/librte_eal/common/eal_common_fbarray.c |  40 +++---
 lib/librte_eal/common/eal_common_memory.c  |  61 ++++-----
 lib/librte_eal/common/eal_private.h        |  78 ++++++++++-
 lib/librte_eal/freebsd/Makefile            |   1 +
 lib/librte_eal/include/rte_eal_paging.h    |  98 +++++++++++++
 lib/librte_eal/linux/Makefile              |   1 +
 lib/librte_eal/linux/eal_memalloc.c        |   5 +-
 lib/librte_eal/rte_eal_version.map         |   9 ++
 lib/librte_eal/unix/eal_unix_memory.c      | 152 +++++++++++++++++++++
 lib/librte_eal/unix/meson.build            |   1 +
 10 files changed, 381 insertions(+), 65 deletions(-)
 create mode 100644 lib/librte_eal/include/rte_eal_paging.h
 create mode 100644 lib/librte_eal/unix/eal_unix_memory.c

diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index c52ddb967..fd0292a64 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -5,15 +5,16 @@
 #include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
-#include <sys/mman.h>
 #include <stdint.h>
 #include <errno.h>
 #include <string.h>
 #include <unistd.h>
 
 #include <rte_common.h>
-#include <rte_log.h>
+#include <rte_eal_paging.h>
 #include <rte_errno.h>
+#include <rte_log.h>
+#include <rte_memory.h>
 #include <rte_spinlock.h>
 #include <rte_tailq.h>
 
@@ -90,12 +91,9 @@ resize_and_map(int fd, void *addr, size_t len)
 		return -1;
 	}
 
-	map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
-			MAP_SHARED | MAP_FIXED, fd, 0);
+	map_addr = rte_mem_map(addr, len, RTE_PROT_READ | RTE_PROT_WRITE,
+			RTE_MAP_SHARED | RTE_MAP_FORCE_ADDRESS, fd, 0);
 	if (map_addr != addr) {
-		RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
-		/* pass errno up the chain */
-		rte_errno = errno;
 		return -1;
 	}
 	return 0;
@@ -733,7 +731,7 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 		return -1;
 	}
 
-	page_sz = sysconf(_SC_PAGESIZE);
+	page_sz = rte_mem_page_size();
 	if (page_sz == (size_t)-1) {
 		free(ma);
 		return -1;
@@ -754,11 +752,13 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 
 	if (internal_config.no_shconf) {
 		/* remap virtual area as writable */
-		void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
-				MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
-		if (new_data == MAP_FAILED) {
+		static const int flags = RTE_MAP_FORCE_ADDRESS |
+			RTE_MAP_PRIVATE | RTE_MAP_ANONYMOUS;
+		void *new_data = rte_mem_map(data, mmap_len,
+			RTE_PROT_READ | RTE_PROT_WRITE, flags, fd, 0);
+		if (new_data == NULL) {
 			RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
-					__func__, strerror(errno));
+					__func__, rte_strerror(rte_errno));
 			goto fail;
 		}
 	} else {
@@ -820,7 +820,7 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
 	return 0;
 fail:
 	if (data)
-		munmap(data, mmap_len);
+		rte_mem_unmap(data, mmap_len);
 	if (fd >= 0)
 		close(fd);
 	free(ma);
@@ -858,7 +858,7 @@ rte_fbarray_attach(struct rte_fbarray *arr)
 		return -1;
 	}
 
-	page_sz = sysconf(_SC_PAGESIZE);
+	page_sz = rte_mem_page_size();
 	if (page_sz == (size_t)-1) {
 		free(ma);
 		return -1;
@@ -909,7 +909,7 @@ rte_fbarray_attach(struct rte_fbarray *arr)
 	return 0;
 fail:
 	if (data)
-		munmap(data, mmap_len);
+		rte_mem_unmap(data, mmap_len);
 	if (fd >= 0)
 		close(fd);
 	free(ma);
@@ -937,8 +937,7 @@ rte_fbarray_detach(struct rte_fbarray *arr)
 	 * really do anything about it, things will blow up either way.
 	 */
 
-	size_t page_sz = sysconf(_SC_PAGESIZE);
-
+	size_t page_sz = rte_mem_page_size();
 	if (page_sz == (size_t)-1)
 		return -1;
 
@@ -957,7 +956,7 @@ rte_fbarray_detach(struct rte_fbarray *arr)
 		goto out;
 	}
 
-	munmap(arr->data, mmap_len);
+	rte_mem_unmap(arr->data, mmap_len);
 
 	/* area is unmapped, close fd and remove the tailq entry */
 	if (tmp->fd >= 0)
@@ -992,8 +991,7 @@ rte_fbarray_destroy(struct rte_fbarray *arr)
 	 * really do anything about it, things will blow up either way.
 	 */
 
-	size_t page_sz = sysconf(_SC_PAGESIZE);
-
+	size_t page_sz = rte_mem_page_size();
 	if (page_sz == (size_t)-1)
 		return -1;
 
@@ -1042,7 +1040,7 @@ rte_fbarray_destroy(struct rte_fbarray *arr)
 		}
 		close(fd);
 	}
-	munmap(arr->data, mmap_len);
+	rte_mem_unmap(arr->data, mmap_len);
 
 	/* area is unmapped, remove the tailq entry */
 	TAILQ_REMOVE(&mem_area_tailq, tmp, next);
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 4c897a13f..aa377990f 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -11,13 +11,13 @@
 #include <string.h>
 #include <unistd.h>
 #include <inttypes.h>
-#include <sys/mman.h>
 #include <sys/queue.h>
 
 #include <rte_fbarray.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_eal_memconfig.h>
+#include <rte_eal_paging.h>
 #include <rte_errno.h>
 #include <rte_log.h>
 
@@ -40,18 +40,10 @@
 static void *next_baseaddr;
 static uint64_t system_page_sz;
 
-#ifdef RTE_EXEC_ENV_LINUX
-#define RTE_DONTDUMP MADV_DONTDUMP
-#elif defined RTE_EXEC_ENV_FREEBSD
-#define RTE_DONTDUMP MADV_NOCORE
-#else
-#error "madvise doesn't support this OS"
-#endif
-
 #define MAX_MMAP_WITH_DEFINED_ADDR_TRIES 5
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
-		size_t page_sz, int flags, int mmap_flags)
+	size_t page_sz, int flags, int reserve_flags)
 {
 	bool addr_is_hint, allow_shrink, unmap, no_align;
 	uint64_t map_sz;
@@ -59,9 +51,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 	uint8_t try = 0;
 
 	if (system_page_sz == 0)
-		system_page_sz = sysconf(_SC_PAGESIZE);
-
-	mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+		system_page_sz = rte_mem_page_size();
 
 	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
 
@@ -105,24 +95,24 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 			return NULL;
 		}
 
-		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
-				mmap_flags, -1, 0);
-		if (mapped_addr == MAP_FAILED && allow_shrink)
+		mapped_addr = eal_mem_reserve(
+			requested_addr, (size_t)map_sz, reserve_flags);
+		if ((mapped_addr == NULL) && allow_shrink)
 			*size -= page_sz;
 
-		if (mapped_addr != MAP_FAILED && addr_is_hint &&
-		    mapped_addr != requested_addr) {
+		if ((mapped_addr != NULL) && addr_is_hint &&
+				(mapped_addr != requested_addr)) {
 			try++;
 			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
 			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
 				/* hint was not used. Try with another offset */
-				munmap(mapped_addr, map_sz);
-				mapped_addr = MAP_FAILED;
+				eal_mem_free(mapped_addr, map_sz);
+				mapped_addr = NULL;
 				requested_addr = next_baseaddr;
 			}
 		}
 	} while ((allow_shrink || addr_is_hint) &&
-		 mapped_addr == MAP_FAILED && *size > 0);
+		(mapped_addr == NULL) && (*size > 0));
 
 	/* align resulting address - if map failed, we will ignore the value
 	 * anyway, so no need to add additional checks.
@@ -132,20 +122,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 
 	if (*size == 0) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
-			strerror(errno));
-		rte_errno = errno;
+			rte_strerror(rte_errno));
 		return NULL;
-	} else if (mapped_addr == MAP_FAILED) {
+	} else if (mapped_addr == NULL) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
-			strerror(errno));
-		/* pass errno up the call chain */
-		rte_errno = errno;
+			rte_strerror(rte_errno));
 		return NULL;
 	} else if (requested_addr != NULL && !addr_is_hint &&
 			aligned_addr != requested_addr) {
 		RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
 			requested_addr, aligned_addr);
-		munmap(mapped_addr, map_sz);
+		eal_mem_free(mapped_addr, map_sz);
 		rte_errno = EADDRNOTAVAIL;
 		return NULL;
 	} else if (requested_addr != NULL && addr_is_hint &&
@@ -161,7 +148,7 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 		aligned_addr, *size);
 
 	if (unmap) {
-		munmap(mapped_addr, map_sz);
+		eal_mem_free(mapped_addr, map_sz);
 	} else if (!no_align) {
 		void *map_end, *aligned_end;
 		size_t before_len, after_len;
@@ -179,19 +166,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
 		/* unmap space before aligned mmap address */
 		before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
 		if (before_len > 0)
-			munmap(mapped_addr, before_len);
+			eal_mem_free(mapped_addr, before_len);
 
 		/* unmap space after aligned end mmap address */
 		after_len = RTE_PTR_DIFF(map_end, aligned_end);
 		if (after_len > 0)
-			munmap(aligned_end, after_len);
+			eal_mem_free(aligned_end, after_len);
 	}
 
 	if (!unmap) {
 		/* Exclude these pages from a core dump. */
-		if (madvise(aligned_addr, *size, RTE_DONTDUMP) != 0)
-			RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
-				strerror(errno));
+		eal_mem_set_dump(aligned_addr, *size, false);
 	}
 
 	return aligned_addr;
@@ -547,10 +532,10 @@ rte_eal_memdevice_init(void)
 int
 rte_mem_lock_page(const void *virt)
 {
-	unsigned long virtual = (unsigned long)virt;
-	int page_size = getpagesize();
-	unsigned long aligned = (virtual & ~(page_size - 1));
-	return mlock((void *)aligned, page_size);
+	uintptr_t virtual = (uintptr_t)virt;
+	size_t page_size = rte_mem_page_size();
+	uintptr_t aligned = RTE_PTR_ALIGN_FLOOR(virtual, page_size);
+	return rte_mem_lock((void *)aligned, page_size);
 }
 
 int
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 6733a2321..1696345c2 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -11,6 +11,7 @@
 
 #include <rte_dev.h>
 #include <rte_lcore.h>
+#include <rte_memory.h>
 
 /**
  * Structure storing internal configuration (per-lcore)
@@ -202,6 +203,24 @@ int rte_eal_alarm_init(void);
  */
 int rte_eal_check_module(const char *module_name);
 
+/**
+ * Memory reservation flags.
+ */
+enum eal_mem_reserve_flags {
+	/**
+	 * Reserve hugepages. May be unsupported by some platforms.
+	 */
+	EAL_RESERVE_HUGEPAGES = 1 << 0,
+	/**
+	 * Force reserving memory at the requested address.
+	 * This can be a destructive action depending on the implementation.
+	 *
+	 * @see RTE_MAP_FORCE_ADDRESS for description of possible consequences
+	 *      (although implementations are not required to use it).
+	 */
+	EAL_RESERVE_FORCE_ADDRESS = 1 << 1
+};
+
 /**
  * Get virtual area of specified size from the OS.
  *
@@ -215,8 +234,8 @@ int rte_eal_check_module(const char *module_name);
  *   Page size on which to align requested virtual area.
  * @param flags
  *   EAL_VIRTUAL_AREA_* flags.
- * @param mmap_flags
- *   Extra flags passed directly to mmap().
+ * @param reserve_flags
+ *   Extra flags passed directly to eal_mem_reserve().
  *
  * @return
  *   Virtual area address if successful.
@@ -233,7 +252,7 @@ int rte_eal_check_module(const char *module_name);
 /**< immediately unmap reserved virtual area. */
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
-		size_t page_sz, int flags, int mmap_flags);
+		size_t page_sz, int flags, int reserve_flags);
 
 /**
  * Get cpu core_id.
@@ -493,4 +512,57 @@ eal_file_lock(int fd, enum eal_flock_op op, enum eal_flock_mode mode);
 int
 eal_file_truncate(int fd, ssize_t size);
 
+/**
+ * Reserve a region of virtual memory.
+ *
+ * Use eal_mem_free() to free reserved memory.
+ *
+ * @param requested_addr
+ *  A desired reservation address which must be page-aligned.
+ *  The system might not respect it.
+ *  NULL means the address will be chosen by the system.
+ * @param size
+ *  Reservation size. Must be a multiple of system page size.
+ * @param flags
+ *  Reservation options, a combination of eal_mem_reserve_flags.
+ * @returns
+ *  Starting address of the reserved area on success, NULL on failure.
+ *  Callers must not access this memory until remapping it.
+ */
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags);
+
+/**
+ * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
+ *
+ * If *virt* and *size* describe a part of the reserved region,
+ * only this part of the region is freed (accurately up to the system
+ * page size). If *virt* points to allocated memory, *size* must match
+ * the one specified on allocation. The behavior is undefined
+ * if the memory pointed by *virt* is obtained from another source
+ * than listed above.
+ *
+ * @param virt
+ *  A virtual address in a region previously reserved.
+ * @param size
+ *  Number of bytes to unreserve.
+ */
+void
+eal_mem_free(void *virt, size_t size);
+
+/**
+ * Configure memory region inclusion into dumps.
+ *
+ * @param virt
+ *  Starting address of the region.
+ * @param size
+ *  Size of the region.
+ * @param dump
+ *  True to include memory into dumps, false to exclude.
+ * @return
+ *  0 on success, (-1) on failure and rte_errno is set.
+ */
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/freebsd/Makefile b/lib/librte_eal/freebsd/Makefile
index 0f8741d96..2374ba0b7 100644
--- a/lib/librte_eal/freebsd/Makefile
+++ b/lib/librte_eal/freebsd/Makefile
@@ -77,6 +77,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_reciprocal.c
 
 # from unix dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_file.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += eal_unix_memory.c
 
 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_FREEBSD) += rte_cpuflags.c
diff --git a/lib/librte_eal/include/rte_eal_paging.h b/lib/librte_eal/include/rte_eal_paging.h
new file mode 100644
index 000000000..ed98e70e9
--- /dev/null
+++ b/lib/librte_eal/include/rte_eal_paging.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Dmitry Kozlyuk
+ */
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+
+/**
+ * @file
+ * @internal
+ *
+ * Wrappers for OS facilities related to memory paging, used across DPDK.
+ */
+
+/** Memory protection flags. */
+enum rte_mem_prot {
+	RTE_PROT_READ = 1 << 0,   /**< Read access. */
+	RTE_PROT_WRITE = 1 << 1,  /**< Write access. */
+	RTE_PROT_EXECUTE = 1 << 2 /**< Code execution. */
+};
+
+/** Additional flags for memory mapping. */
+enum rte_map_flags {
+	/** Changes to the mapped memory are visible to other processes. */
+	RTE_MAP_SHARED = 1 << 0,
+	/** Mapping is not backed by a regular file. */
+	RTE_MAP_ANONYMOUS = 1 << 1,
+	/** Copy-on-write mapping, changes are invisible to other processes. */
+	RTE_MAP_PRIVATE = 1 << 2,
+	/**
+	 * Force mapping to the requested address. This flag should be used
+	 * with caution, because to fulfill the request implementation
+	 * may remove all other mappings in the requested region. However,
+	 * it is not required to do so, thus mapping with this flag may fail.
+	 */
+	RTE_MAP_FORCE_ADDRESS = 1 << 3
+};
+
+/**
+ * Map a portion of an opened file or the page file into memory.
+ *
+ * This function is similar to POSIX mmap(3) with common MAP_ANONYMOUS
+ * extension, except for the return value.
+ *
+ * @param requested_addr
+ *  Desired virtual address for mapping. Can be NULL to let OS choose.
+ * @param size
+ *  Size of the mapping in bytes.
+ * @param prot
+ *  Protection flags, a combination of rte_mem_prot values.
+ * @param flags
+ *  Additional mapping flags, a combination of rte_map_flags.
+ * @param fd
+ *  Mapped file descriptor. Can be negative for anonymous mapping.
+ * @param offset
+ *  Offset of the mapped region in fd. Must be 0 for anonymous mappings.
+ * @return
+ *  Mapped address or NULL on failure and rte_errno is set to OS error.
+ */
+__rte_internal
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset);
+
+/**
+ * OS-independent implementation of POSIX munmap(3).
+ */
+__rte_internal
+int
+rte_mem_unmap(void *virt, size_t size);
+
+/**
+ * Get system page size. This function never fails.
+ *
+ * @return
+ *   Page size in bytes.
+ */
+__rte_internal
+size_t
+rte_mem_page_size(void);
+
+/**
+ * Lock in physical memory all pages crossed by the address region.
+ *
+ * @param virt
+ *   Base virtual address of the region.
+ * @param size
+ *   Size of the region.
+ * @return
+ *   0 on success, negative on error.
+ *
+ * @see rte_mem_page_size() to retrieve the page size.
+ * @see rte_mem_lock_page() to lock an entire single page.
+ */
+__rte_internal
+int
+rte_mem_lock(const void *virt, size_t size);
diff --git a/lib/librte_eal/linux/Makefile b/lib/librte_eal/linux/Makefile
index 331489f99..8febf2212 100644
--- a/lib/librte_eal/linux/Makefile
+++ b/lib/librte_eal/linux/Makefile
@@ -84,6 +84,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_reciprocal.c
 
 # from unix dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_file.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += eal_unix_memory.c
 
 # from arch dir
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUX) += rte_cpuflags.c
diff --git a/lib/librte_eal/linux/eal_memalloc.c b/lib/librte_eal/linux/eal_memalloc.c
index 2c717f8bd..bf29b83c6 100644
--- a/lib/librte_eal/linux/eal_memalloc.c
+++ b/lib/librte_eal/linux/eal_memalloc.c
@@ -630,7 +630,7 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
 mapped:
 	munmap(addr, alloc_sz);
 unmapped:
-	flags = MAP_FIXED;
+	flags = EAL_RESERVE_FORCE_ADDRESS;
 	new_addr = eal_get_virtual_area(addr, &alloc_sz, alloc_sz, 0, flags);
 	if (new_addr != addr) {
 		if (new_addr != NULL)
@@ -687,8 +687,7 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 		return -1;
 	}
 
-	if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
-		RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+	eal_mem_set_dump(ms->addr, ms->len, false);
 
 	exit_early = false;
 
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index d8038749a..196eef5af 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -387,3 +387,12 @@ EXPERIMENTAL {
 	rte_trace_regexp;
 	rte_trace_save;
 };
+
+INTERNAL {
+	global:
+
+	rte_mem_lock;
+	rte_mem_map;
+	rte_mem_page_size;
+	rte_mem_unmap;
+};
diff --git a/lib/librte_eal/unix/eal_unix_memory.c b/lib/librte_eal/unix/eal_unix_memory.c
new file mode 100644
index 000000000..ec7156df9
--- /dev/null
+++ b/lib/librte_eal/unix/eal_unix_memory.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Dmitry Kozlyuk
+ */
+
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <rte_eal_paging.h>
+#include <rte_errno.h>
+#include <rte_log.h>
+
+#include "eal_private.h"
+
+#ifdef RTE_EXEC_ENV_LINUX
+#define EAL_DONTDUMP MADV_DONTDUMP
+#define EAL_DODUMP   MADV_DODUMP
+#elif defined RTE_EXEC_ENV_FREEBSD
+#define EAL_DONTDUMP MADV_NOCORE
+#define EAL_DODUMP   MADV_CORE
+#else
+#error "madvise doesn't support this OS"
+#endif
+
+static void *
+mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset)
+{
+	void *virt = mmap(requested_addr, size, prot, flags, fd, offset);
+	if (virt == MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL,
+			"Cannot mmap(%p, 0x%zx, 0x%x, 0x%x, %d, 0x%zx): %s\n",
+			requested_addr, size, prot, flags, fd, offset,
+			strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	}
+	return virt;
+}
+
+static int
+mem_unmap(void *virt, size_t size)
+{
+	int ret = munmap(virt, size);
+	if (ret < 0) {
+		RTE_LOG(DEBUG, EAL, "Cannot munmap(%p, 0x%zx): %s\n",
+			virt, size, strerror(errno));
+		rte_errno = errno;
+	}
+	return ret;
+}
+
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags)
+{
+	int sys_flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+	if (flags & EAL_RESERVE_HUGEPAGES) {
+#ifdef MAP_HUGETLB
+		sys_flags |= MAP_HUGETLB;
+#else
+		rte_errno = ENOTSUP;
+		return NULL;
+#endif
+	}
+
+	if (flags & EAL_RESERVE_FORCE_ADDRESS)
+		sys_flags |= MAP_FIXED;
+
+	return mem_map(requested_addr, size, PROT_NONE, sys_flags, -1, 0);
+}
+
+void
+eal_mem_free(void *virt, size_t size)
+{
+	mem_unmap(virt, size);
+}
+
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump)
+{
+	int flags = dump ? EAL_DODUMP : EAL_DONTDUMP;
+	int ret = madvise(virt, size, flags);
+	if (ret) {
+		RTE_LOG(DEBUG, EAL, "madvise(%p, %#zx, %d) failed: %s\n",
+				virt, size, flags, strerror(rte_errno));
+		rte_errno = errno;
+	}
+	return ret;
+}
+
+static int
+mem_rte_to_sys_prot(int prot)
+{
+	int sys_prot = PROT_NONE;
+
+	if (prot & RTE_PROT_READ)
+		sys_prot |= PROT_READ;
+	if (prot & RTE_PROT_WRITE)
+		sys_prot |= PROT_WRITE;
+	if (prot & RTE_PROT_EXECUTE)
+		sys_prot |= PROT_EXEC;
+
+	return sys_prot;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset)
+{
+	int sys_flags = 0;
+	int sys_prot;
+
+	sys_prot = mem_rte_to_sys_prot(prot);
+
+	if (flags & RTE_MAP_SHARED)
+		sys_flags |= MAP_SHARED;
+	if (flags & RTE_MAP_ANONYMOUS)
+		sys_flags |= MAP_ANONYMOUS;
+	if (flags & RTE_MAP_PRIVATE)
+		sys_flags |= MAP_PRIVATE;
+	if (flags & RTE_MAP_FORCE_ADDRESS)
+		sys_flags |= MAP_FIXED;
+
+	return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+	return mem_unmap(virt, size);
+}
+
+size_t
+rte_mem_page_size(void)
+{
+	static size_t page_size;
+
+	if (!page_size)
+		page_size = sysconf(_SC_PAGESIZE);
+
+	return page_size;
+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+	int ret = mlock(virt, size);
+	if (ret)
+		rte_errno = errno;
+	return ret;
+}
diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
index 21029ba1a..e733910a1 100644
--- a/lib/librte_eal/unix/meson.build
+++ b/lib/librte_eal/unix/meson.build
@@ -3,4 +3,5 @@
 
 sources += files(
 	'eal_file.c',
+	'eal_unix_memory.c',
 )
-- 
2.25.4



More information about the dev mailing list