[dpdk-dev] [RFC 14/23] eal/memory: add support for dynamic unmapping of pages

Anatoly Burakov anatoly.burakov at intel.com
Tue Dec 19 12:04:40 CET 2017


This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/common/eal_memalloc.h       |   3 +
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 131 ++++++++++++++++++++++++++++-
 2 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index 59fd330..47e4367 100755
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -44,4 +44,7 @@ int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,
 		int socket, bool exact);
 
+int
+eal_memalloc_free_page(struct rte_memseg *ms);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 527c2f6..13172a0 100755
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -109,6 +109,18 @@ huge_recover_sigbus(void)
 	}
 }
 
+/*
+ * uses fstat to report the size of a file on disk
+ */
+static bool
+is_zero_length(int fd)
+{
+	struct stat st;
+	if (fstat(fd, &st) < 0)
+		return false;
+	return st.st_blocks == 0;
+}
+
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 static bool
 prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) {
@@ -267,6 +279,61 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	return ret;
 }
 
+static int
+free_page(struct rte_memseg *ms, struct hugepage_info *hi, unsigned list_idx,
+		unsigned seg_idx) {
+	uint64_t fa_offset;
+	char path[PATH_MAX];
+	int fd;
+
+	fa_offset = seg_idx * ms->hugepage_sz;
+
+	if (internal_config.single_file_segments) {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir, list_idx);
+	} else {
+		eal_get_hugefile_path(path, sizeof(path), hi->hugedir,
+				list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
+	}
+
+	munmap(ms->addr, ms->hugepage_sz);
+
+	// TODO: race condition?
+
+	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	if (internal_config.single_file_segments) {
+		/* now, truncate or remove the original file */
+		fd = open(path, O_RDWR, 0600);
+		if (fd < 0) {
+			RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
+					strerror(errno));
+			// TODO: proper error handling
+			return -1;
+		}
+
+		if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				fa_offset, ms->hugepage_sz)) {
+			RTE_LOG(DEBUG, EAL, "Page deallocation failed: %s\n",
+				strerror(errno));
+		}
+		if (is_zero_length(fd)) {
+			unlink(path);
+		}
+		close(fd);
+	} else {
+		unlink(path);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return 0;
+}
+
 int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 		uint64_t size, int socket, bool exact) {
@@ -274,7 +341,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	struct rte_memseg_list *msl = NULL;
 	void *addr;
 	unsigned msl_idx;
-	int cur_idx, next_idx, end_idx, i, ret = 0;
+	int cur_idx, next_idx, start_idx, end_idx, i, j, ret = 0;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	bool have_numa;
 	int oldpolicy;
@@ -366,6 +433,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	}
 
 	end_idx = cur_idx + n;
+	start_idx = cur_idx;
 
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	have_numa = prepare_numa(&oldpolicy, oldmask, socket);
@@ -387,6 +455,20 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 				ret = i;
 				goto restore_numa;
 			}
+			RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n",
+				i);
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr = &msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				if (free_page(tmp, hi, msl_idx, start_idx + j))
+					rte_panic("Cannot free page\n");
+
+				rte_fbarray_set_used(arr, j, false);
+			}
 			if (ms)
 				memset(ms, 0, sizeof(struct rte_memseg*) * n);
 			ret = -1;
@@ -414,3 +496,50 @@ eal_memalloc_alloc_page(uint64_t size, int socket) {
 		return NULL;
 	return ms;
 }
+
+int
+eal_memalloc_free_page(struct rte_memseg *ms) {
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	unsigned msl_idx, seg_idx;
+	struct hugepage_info *hi = NULL;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (int i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (ms->hugepage_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		uintptr_t start_addr, end_addr;
+		struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx];
+
+		start_addr = (uintptr_t) cur->base_va;
+		end_addr = start_addr +
+				cur->memseg_arr.capacity * cur->hugepage_sz;
+
+		if ((uintptr_t) ms->addr < start_addr ||
+				(uintptr_t) ms->addr >= end_addr) {
+			continue;
+		}
+		msl = cur;
+		seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz;
+		break;
+	}
+	if (!msl) {
+		RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		return -1;
+	}
+	rte_fbarray_set_used(&msl->memseg_arr, seg_idx, false);
+	return free_page(ms, hi, msl_idx, seg_idx);
+}
-- 
2.7.4



More information about the dev mailing list