[dpdk-dev] [PATCH v2 15/41] eal: add support for unmapping pages at runtime

Anatoly Burakov anatoly.burakov at intel.com
Wed Mar 7 17:56:43 CET 2018


This isn't used anywhere yet, but the support is now there. Also,
adding cleanup to allocation procedures, so that if we fail to
allocate everything we asked for, we can free all of it back.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
 lib/librte_eal/common/eal_memalloc.h       |   3 +
 lib/librte_eal/linuxapp/eal/eal_memalloc.c | 148 ++++++++++++++++++++++++++++-
 2 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index c1076cf..adf59c4 100644
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -16,4 +16,7 @@ int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n, uint64_t size,
 		int socket, bool exact);
 
+int
+eal_memalloc_free_page(struct rte_memseg *ms);
+
 #endif // EAL_MEMALLOC_H
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 1ba1201..bbeeeba 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -499,6 +499,64 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
 	return -1;
 }
 
+static int
+free_page(struct rte_memseg *ms, struct hugepage_info *hi,
+		unsigned int list_idx, unsigned int seg_idx)
+{
+	uint64_t map_offset;
+	char path[PATH_MAX];
+	int fd, ret;
+
+	if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
+				MAP_FAILED) {
+		RTE_LOG(DEBUG, EAL, "couldn't unmap page\n");
+		return -1;
+	}
+
+	fd = get_page_fd(path, sizeof(path), hi, list_idx, seg_idx);
+	if (fd < 0)
+		return -1;
+
+	if (internal_config.single_file_segments) {
+		map_offset = seg_idx * ms->hugepage_sz;
+		if (resize_hugefile(fd, map_offset, ms->hugepage_sz, false))
+			return -1;
+		/* if file is zero-length, we've already shrunk it, so it's
+		 * safe to remove.
+		 */
+		if (is_zero_length(fd)) {
+			struct msl_entry *te = get_msl_entry_by_idx(list_idx);
+			if (te != NULL && te->fd >= 0) {
+				close(te->fd);
+				te->fd = -1;
+			}
+			unlink(path);
+		}
+		ret = 0;
+	} else {
+		/* if we're able to take out a write lock, we're the last one
+		 * holding onto this page.
+		 */
+
+		ret = lock(fd, 0, ms->hugepage_sz, F_WRLCK);
+		if (ret >= 0) {
+			/* no one else is using this page */
+			if (ret == 1)
+				unlink(path);
+			ret = lock(fd, 0, ms->hugepage_sz, F_UNLCK);
+			if (ret != 1)
+				RTE_LOG(ERR, EAL, "%s(): unable to unlock file %s\n",
+					__func__, path);
+		}
+		close(fd);
+	}
+
+	memset(ms, 0, sizeof(*ms));
+
+	return ret;
+}
+
 int
 eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 		uint64_t size, int socket, bool exact)
@@ -507,7 +565,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 	struct rte_memseg_list *msl = NULL;
 	void *addr;
 	unsigned int msl_idx;
-	int cur_idx, end_idx, i, ret = -1;
+	int cur_idx, start_idx, end_idx, i, j, ret = -1;
 #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES
 	bool have_numa;
 	int oldpolicy;
@@ -557,6 +615,7 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 			continue;
 
 		end_idx = cur_idx + n;
+		start_idx = cur_idx;
 
 		for (i = 0; cur_idx < end_idx; cur_idx++, i++) {
 			struct rte_memseg *cur;
@@ -567,25 +626,56 @@ eal_memalloc_alloc_page_bulk(struct rte_memseg **ms, int n,
 
 			if (alloc_page(cur, addr, size, socket, hi, msl_idx,
 					cur_idx)) {
+
 				RTE_LOG(DEBUG, EAL, "attempted to allocate %i pages, but only %i were allocated\n",
 					n, i);
 
-				/* if exact number wasn't requested, stop */
-				if (!exact)
+				/* if exact number of pages wasn't requested,
+				 * failing to allocate is not an error. we could
+				 * of course try other lists to see if there are
+				 * better fits, but a bird in the hand...
+				 */
+				if (!exact) {
 					ret = i;
-				goto restore_numa;
+					goto restore_numa;
+				}
+				RTE_LOG(DEBUG, EAL, "exact amount of pages was requested, so returning %i allocated pages\n",
+					i);
+
+				/* clean up */
+				for (j = start_idx; j < cur_idx; j++) {
+					struct rte_memseg *tmp;
+					struct rte_fbarray *arr =
+							&msl->memseg_arr;
+
+					tmp = rte_fbarray_get(arr, j);
+					if (free_page(tmp, hi, msl_idx,
+							start_idx + j))
+						rte_panic("Cannot free page\n");
+
+					rte_fbarray_set_free(arr, j);
+				}
+				/* clear the list */
+				if (ms)
+					memset(ms, 0, sizeof(*ms) * n);
+
+				/* try next list */
+				goto next_list;
 			}
 			if (ms)
 				ms[i] = cur;
 
 			rte_fbarray_set_used(&msl->memseg_arr, cur_idx);
 		}
+		/* we allocated all pages */
 		ret = n;
 
 		break;
+next_list:
+		/* dummy semi-colon to make label work */;
 	}
 	/* we didn't break */
-	if (!msl) {
+	if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
 		RTE_LOG(ERR, EAL, "%s(): couldn't find suitable memseg_list\n",
 			__func__);
 	}
@@ -607,3 +697,51 @@ eal_memalloc_alloc_page(uint64_t size, int socket)
 	/* return pointer to newly allocated memseg */
 	return ms;
 }
+
+int
+eal_memalloc_free_page(struct rte_memseg *ms)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *msl = NULL;
+	unsigned int msl_idx, seg_idx;
+	struct hugepage_info *hi = NULL;
+	int i;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (i = 0; i < (int) RTE_DIM(internal_config.hugepage_info); i++) {
+		if (ms->hugepage_sz ==
+				internal_config.hugepage_info[i].hugepage_sz) {
+			hi = &internal_config.hugepage_info[i];
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+		uintptr_t start_addr, end_addr;
+		struct rte_memseg_list *cur = &mcfg->memsegs[msl_idx];
+
+		start_addr = (uintptr_t) cur->base_va;
+		end_addr = start_addr + cur->memseg_arr.len * cur->hugepage_sz;
+
+		if ((uintptr_t) ms->addr < start_addr ||
+				(uintptr_t) ms->addr >= end_addr) {
+			continue;
+		}
+		msl = cur;
+		seg_idx = RTE_PTR_DIFF(ms->addr, start_addr) / ms->hugepage_sz;
+		break;
+	}
+	if (!msl) {
+		RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		return -1;
+	}
+	rte_fbarray_set_free(&msl->memseg_arr, seg_idx);
+	return free_page(ms, hi, msl_idx, seg_idx);
+}
-- 
2.7.4


More information about the dev mailing list