[dpdk-dev] [PATCH v5 11/11] eal/windows: implement basic memory management

Dmitry Kozlyuk dmitry.kozliuk at gmail.com
Mon May 25 02:37:20 CEST 2020


Basic memory management supports core libraries and PMDs operating in
IOVA as PA mode. It uses a kernel-mode driver, virt2phys, to obtain
IOVAs of hugepages allocated from user-mode. Multi-process mode is not
implemented and is forcefully disabled at startup.

Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk at gmail.com>
---
 config/meson.build                            |  12 +-
 doc/guides/windows_gsg/run_apps.rst           |  54 +-
 lib/librte_eal/common/meson.build             |  11 +
 lib/librte_eal/common/rte_malloc.c            |   9 +
 lib/librte_eal/rte_eal_exports.def            | 119 +++
 lib/librte_eal/windows/eal.c                  | 146 +++-
 lib/librte_eal/windows/eal_memalloc.c         | 442 +++++++++++
 lib/librte_eal/windows/eal_memory.c           | 710 ++++++++++++++++++
 lib/librte_eal/windows/eal_mp.c               | 103 +++
 lib/librte_eal/windows/eal_windows.h          |  75 ++
 lib/librte_eal/windows/include/meson.build    |   1 +
 lib/librte_eal/windows/include/rte_os.h       |   9 +
 .../windows/include/rte_virt2phys.h           |  34 +
 lib/librte_eal/windows/include/rte_windows.h  |   2 +
 lib/librte_eal/windows/include/unistd.h       |   3 +
 lib/librte_eal/windows/meson.build            |   5 +
 16 files changed, 1728 insertions(+), 7 deletions(-)
 create mode 100644 lib/librte_eal/windows/eal_memalloc.c
 create mode 100644 lib/librte_eal/windows/eal_memory.c
 create mode 100644 lib/librte_eal/windows/eal_mp.c
 create mode 100644 lib/librte_eal/windows/include/rte_virt2phys.h

diff --git a/config/meson.build b/config/meson.build
index c1e80de4b..d3f05f878 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -261,15 +261,21 @@ if is_freebsd
 endif
 
 if is_windows
-	# Minimum supported API is Windows 7.
-	add_project_arguments('-D_WIN32_WINNT=0x0601', language: 'c')
+	# VirtualAlloc2() is available since Windows 10 / Server 2016.
+	add_project_arguments('-D_WIN32_WINNT=0x0A00', language: 'c')
 
 	# Use MinGW-w64 stdio, because DPDK assumes ANSI-compliant formatting.
 	if cc.get_id() == 'gcc'
 		add_project_arguments('-D__USE_MINGW_ANSI_STDIO', language: 'c')
 	endif
 
-	add_project_link_arguments('-ladvapi32', language: 'c')
+	# Contrary to docs, VirtualAlloc2() is exported by mincore.lib
+	# in Windows SDK, while MinGW exports it by advapi32.a.
+	if is_ms_linker
+		add_project_link_arguments('-lmincore', language: 'c')
+	endif
+
+	add_project_link_arguments('-ladvapi32', '-lsetupapi', language: 'c')
 endif
 
 if get_option('b_lto')
diff --git a/doc/guides/windows_gsg/run_apps.rst b/doc/guides/windows_gsg/run_apps.rst
index 21ac7f6c1..78e5a614f 100644
--- a/doc/guides/windows_gsg/run_apps.rst
+++ b/doc/guides/windows_gsg/run_apps.rst
@@ -7,10 +7,10 @@ Running DPDK Applications
 Grant *Lock pages in memory* Privilege
 --------------------------------------
 
-Use of hugepages ("large pages" in Windows terminolocy) requires
+Use of hugepages ("large pages" in Windows terminology) requires
 ``SeLockMemoryPrivilege`` for the user running an application.
 
-1. Open *Local Security Policy* snap in, either:
+1. Open *Local Security Policy* snap-in, either:
 
    * Control Panel / Computer Management / Local Security Policy;
    * or Win+R, type ``secpol``, press Enter.
@@ -24,7 +24,55 @@ Use of hugepages ("large pages" in Windows terminolocy) requires
 
 See `Large-Page Support`_ in MSDN for details.
 
-.. _Large-page Support: https://docs.microsoft.com/en-us/windows/win32/memory/large-page-support
+.. _Large-Page Support: https://docs.microsoft.com/en-us/windows/win32/memory/large-page-support
+
+
+Load virt2phys Driver
+---------------------
+
+Access to physical addresses is provided by a kernel-mode driver, virt2phys.
+It is mandatory at least for using hardware PMDs, but may also be required
+for mempools.
+
+Refer to documentation in ``dpdk-kmods`` repository for details on system
+setup, driver build and installation. This driver is not signed, so signature
+checking must be disabled to load it.
+
+.. warning::
+
+    Disabling driver signature enforcement weakens OS security.
+    It is discouraged in production environments.
+
+Compiled package consists of ``virt2phys.inf``, ``virt2phys.cat``,
+and ``virt2phys.sys``. It can be installed as follows
+from Elevated Command Prompt:
+
+.. code-block:: console
+
+    pnputil /add-driver Z:\path\to\virt2phys.inf /install
+
+On Windows Server additional steps are required:
+
+1. From Device Manager, Action menu, select "Add legacy hardware".
+2. It will launch the "Add Hardware Wizard". Click "Next".
+3. Select second option "Install the hardware that I manually select
+   from a list (Advanced)".
+4. On the next screen, "Kernel bypass" will be shown as a device class.
+5. Select it, and click "Next".
+6. The previously installed drivers will now be installed for the
+   "Virtual to physical address translator" device.
+
+When loaded successfully, the driver is shown in *Device Manager* as *Virtual
+to physical address translator* device under *Kernel bypass* category.
+Installed driver persists across reboots.
+
+If DPDK is unable to communicate with the driver, a warning is printed
+on initialization (debug-level logs provide more details):
+
+.. code-block:: text
+
+    EAL: Cannot open virt2phys driver interface
+
 
 
 Run the ``helloworld`` Example
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 4e9208129..310844269 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,13 +8,24 @@ if is_windows
 		'eal_common_bus.c',
 		'eal_common_class.c',
 		'eal_common_devargs.c',
+		'eal_common_dynmem.c',
 		'eal_common_errno.c',
+		'eal_common_fbarray.c',
 		'eal_common_launch.c',
 		'eal_common_lcore.c',
 		'eal_common_log.c',
+		'eal_common_mcfg.c',
+		'eal_common_memalloc.c',
+		'eal_common_memory.c',
+		'eal_common_memzone.c',
 		'eal_common_options.c',
+		'eal_common_string_fns.c',
+		'eal_common_tailqs.c',
 		'eal_common_thread.c',
 		'eal_common_trace_points.c',
+		'malloc_elem.c',
+		'malloc_heap.c',
+		'rte_malloc.c',
 	)
 	subdir_done()
 endif
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index f1b73168b..34b416927 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -20,7 +20,16 @@
 #include <rte_lcore.h>
 #include <rte_common.h>
 #include <rte_spinlock.h>
+
+#ifndef RTE_EXEC_ENV_WINDOWS
 #include <rte_eal_trace.h>
+#else
+/* Suppress -Wempty-body for tracepoints used as "if" body. */
+#define rte_eal_trace_mem_malloc(...) do {} while (0)
+#define rte_eal_trace_mem_zmalloc(...) do {} while (0)
+#define rte_eal_trace_mem_realloc(...) do {} while (0)
+#define rte_eal_trace_mem_free(...) do {} while (0)
+#endif
 
 #include <rte_malloc.h>
 #include "malloc_elem.h"
diff --git a/lib/librte_eal/rte_eal_exports.def b/lib/librte_eal/rte_eal_exports.def
index 12a6c79d6..854b83bcd 100644
--- a/lib/librte_eal/rte_eal_exports.def
+++ b/lib/librte_eal/rte_eal_exports.def
@@ -1,9 +1,128 @@
 EXPORTS
 	__rte_panic
+	rte_calloc
+	rte_calloc_socket
 	rte_eal_get_configuration
+	rte_eal_has_hugepages
 	rte_eal_init
+	rte_eal_iova_mode
 	rte_eal_mp_remote_launch
 	rte_eal_mp_wait_lcore
+	rte_eal_process_type
 	rte_eal_remote_launch
+	rte_eal_tailq_lookup
+	rte_eal_tailq_register
+	rte_eal_using_phys_addrs
+	rte_free
 	rte_log
+	rte_malloc
+	rte_malloc_dump_stats
+	rte_malloc_get_socket_stats
+	rte_malloc_set_limit
+	rte_malloc_socket
+	rte_malloc_validate
+	rte_malloc_virt2iova
+	rte_mcfg_mem_read_lock
+	rte_mcfg_mem_read_unlock
+	rte_mcfg_mem_write_lock
+	rte_mcfg_mem_write_unlock
+	rte_mcfg_mempool_read_lock
+	rte_mcfg_mempool_read_unlock
+	rte_mcfg_mempool_write_lock
+	rte_mcfg_mempool_write_unlock
+	rte_mcfg_tailq_read_lock
+	rte_mcfg_tailq_read_unlock
+	rte_mcfg_tailq_write_lock
+	rte_mcfg_tailq_write_unlock
+	rte_mem_lock_page
+	rte_mem_virt2iova
+	rte_mem_virt2phy
+	rte_memory_get_nchannel
+	rte_memory_get_nrank
+	rte_memzone_dump
+	rte_memzone_free
+	rte_memzone_lookup
+	rte_memzone_reserve
+	rte_memzone_reserve_aligned
+	rte_memzone_reserve_bounded
+	rte_memzone_walk
 	rte_vlog
+	rte_realloc
+	rte_zmalloc
+	rte_zmalloc_socket
+
+	rte_mp_action_register
+	rte_mp_action_unregister
+	rte_mp_reply
+	rte_mp_sendmsg
+
+	rte_fbarray_attach
+	rte_fbarray_destroy
+	rte_fbarray_detach
+	rte_fbarray_dump_metadata
+	rte_fbarray_find_contig_free
+	rte_fbarray_find_contig_used
+	rte_fbarray_find_idx
+	rte_fbarray_find_next_free
+	rte_fbarray_find_next_n_free
+	rte_fbarray_find_next_n_used
+	rte_fbarray_find_next_used
+	rte_fbarray_get
+	rte_fbarray_init
+	rte_fbarray_is_used
+	rte_fbarray_set_free
+	rte_fbarray_set_used
+	rte_malloc_dump_heaps
+	rte_mem_alloc_validator_register
+	rte_mem_alloc_validator_unregister
+	rte_mem_check_dma_mask
+	rte_mem_event_callback_register
+	rte_mem_event_callback_unregister
+	rte_mem_iova2virt
+	rte_mem_virt2memseg
+	rte_mem_virt2memseg_list
+	rte_memseg_contig_walk
+	rte_memseg_list_walk
+	rte_memseg_walk
+	rte_mp_request_async
+	rte_mp_request_sync
+
+	rte_fbarray_find_prev_free
+	rte_fbarray_find_prev_n_free
+	rte_fbarray_find_prev_n_used
+	rte_fbarray_find_prev_used
+	rte_fbarray_find_rev_contig_free
+	rte_fbarray_find_rev_contig_used
+	rte_memseg_contig_walk_thread_unsafe
+	rte_memseg_list_walk_thread_unsafe
+	rte_memseg_walk_thread_unsafe
+
+	rte_malloc_heap_create
+	rte_malloc_heap_destroy
+	rte_malloc_heap_get_socket
+	rte_malloc_heap_memory_add
+	rte_malloc_heap_memory_attach
+	rte_malloc_heap_memory_detach
+	rte_malloc_heap_memory_remove
+	rte_malloc_heap_socket_is_external
+	rte_mem_check_dma_mask_thread_unsafe
+	rte_mem_set_dma_mask
+	rte_memseg_get_fd
+	rte_memseg_get_fd_offset
+	rte_memseg_get_fd_offset_thread_unsafe
+	rte_memseg_get_fd_thread_unsafe
+
+	rte_extmem_attach
+	rte_extmem_detach
+	rte_extmem_register
+	rte_extmem_unregister
+
+	rte_fbarray_find_biggest_free
+	rte_fbarray_find_biggest_used
+	rte_fbarray_find_rev_biggest_free
+	rte_fbarray_find_rev_biggest_used
+
+	rte_get_page_size
+	rte_mem_lock
+	rte_mem_map
+	rte_mem_unmap
diff --git a/lib/librte_eal/windows/eal.c b/lib/librte_eal/windows/eal.c
index 7c2fcc860..d7020ffa8 100644
--- a/lib/librte_eal/windows/eal.c
+++ b/lib/librte_eal/windows/eal.c
@@ -94,6 +94,24 @@ eal_proc_type_detect(void)
 	return ptype;
 }
 
+enum rte_proc_type_t
+rte_eal_process_type(void)
+{
+	return rte_config.process_type;
+}
+
+int
+rte_eal_has_hugepages(void)
+{
+	return !internal_config.no_hugetlbfs;
+}
+
+enum rte_iova_mode
+rte_eal_iova_mode(void)
+{
+	return rte_config.iova_mode;
+}
+
 /* display usage */
 static void
 eal_usage(const char *prgname)
@@ -225,6 +243,89 @@ rte_eal_init_alert(const char *msg)
 	RTE_LOG(ERR, EAL, "%s\n", msg);
 }
 
+int
+eal_file_truncate(int fd, ssize_t size)
+{
+	HANDLE handle;
+	DWORD ret;
+	LONG low = (LONG)((size_t)size);
+	LONG high = (LONG)((size_t)size >> 32);
+
+	handle = (HANDLE)_get_osfhandle(fd);
+	if (handle == INVALID_HANDLE_VALUE) {
+		rte_errno = EBADF;
+		return -1;
+	}
+
+	ret = SetFilePointer(handle, low, &high, FILE_BEGIN);
+	if (ret == INVALID_SET_FILE_POINTER) {
+		RTE_LOG_WIN32_ERR("SetFilePointer()");
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+lock_file(HANDLE handle, enum eal_flock_op op, enum eal_flock_mode mode)
+{
+	DWORD sys_flags = 0;
+	OVERLAPPED overlapped;
+
+	if (op == EAL_FLOCK_EXCLUSIVE)
+		sys_flags |= LOCKFILE_EXCLUSIVE_LOCK;
+	if (mode == EAL_FLOCK_RETURN)
+		sys_flags |= LOCKFILE_FAIL_IMMEDIATELY;
+
+	memset(&overlapped, 0, sizeof(overlapped));
+	if (!LockFileEx(handle, sys_flags, 0, 0, 0, &overlapped)) {
+		if ((sys_flags & LOCKFILE_FAIL_IMMEDIATELY) &&
+			(GetLastError() == ERROR_IO_PENDING)) {
+			rte_errno = EWOULDBLOCK;
+		} else {
+			RTE_LOG_WIN32_ERR("LockFileEx()");
+			rte_errno = EINVAL;
+		}
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+unlock_file(HANDLE handle)
+{
+	if (!UnlockFileEx(handle, 0, 0, 0, NULL)) {
+		RTE_LOG_WIN32_ERR("UnlockFileEx()");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+int
+eal_file_lock(int fd, enum eal_flock_op op, enum eal_flock_mode mode)
+{
+	HANDLE handle = (HANDLE)_get_osfhandle(fd);
+
+	if (handle == INVALID_HANDLE_VALUE) {
+		rte_errno = EBADF;
+		return -1;
+	}
+
+	switch (op) {
+	case EAL_FLOCK_EXCLUSIVE:
+	case EAL_FLOCK_SHARED:
+		return lock_file(handle, op, mode);
+	case EAL_FLOCK_UNLOCK:
+		return unlock_file(handle);
+	default:
+		rte_errno = EINVAL;
+		return -1;
+	}
+}
+
 /* Stubs to enable EAL trace point compilation
  * until eal_common_trace.c can be compiled.
  */
@@ -256,7 +357,7 @@ __rte_trace_point_register(rte_trace_point_t *trace, const char *name,
 	return -ENOTSUP;
 }
 
-/* Launch threads, called at application init(). */
+ /* Launch threads, called at application init(). */
 int
 rte_eal_init(int argc, char **argv)
 {
@@ -279,6 +380,13 @@ rte_eal_init(int argc, char **argv)
 	if (fctret < 0)
 		exit(1);
 
+	/* Prevent creation of shared memory files. */
+	if (internal_config.in_memory == 0) {
+		RTE_LOG(WARNING, EAL, "Multi-process support is requested, "
+			"but not available.\n");
+		internal_config.in_memory = 1;
+	}
+
 	if (!internal_config.no_hugetlbfs && (eal_hugepage_info_init() < 0)) {
 		rte_eal_init_alert("Cannot get hugepage information");
 		rte_errno = EACCES;
@@ -290,6 +398,42 @@ rte_eal_init(int argc, char **argv)
 			internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE;
 	}
 
+	if (eal_mem_win32api_init() < 0) {
+		rte_eal_init_alert("Cannot access Win32 memory management");
+		rte_errno = ENOTSUP;
+		return -1;
+	}
+
+	if (eal_mem_virt2iova_init() < 0) {
+		/* Non-fatal error if physical addresses are not required. */
+		RTE_LOG(WARNING, EAL, "Cannot access virt2phys driver, "
+			"PA will not be available\n");
+	}
+
+	if (rte_eal_memzone_init() < 0) {
+		rte_eal_init_alert("Cannot init memzone");
+		rte_errno = ENODEV;
+		return -1;
+	}
+
+	if (rte_eal_memory_init() < 0) {
+		rte_eal_init_alert("Cannot init memory");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+
+	if (rte_eal_malloc_heap_init() < 0) {
+		rte_eal_init_alert("Cannot init malloc heap");
+		rte_errno = ENODEV;
+		return -1;
+	}
+
+	if (rte_eal_tailqs_init() < 0) {
+		rte_eal_init_alert("Cannot init tail queues for objects");
+		rte_errno = EFAULT;
+		return -1;
+	}
+
 	eal_thread_init_master(rte_config.master_lcore);
 
 	RTE_LCORE_FOREACH_SLAVE(i) {
diff --git a/lib/librte_eal/windows/eal_memalloc.c b/lib/librte_eal/windows/eal_memalloc.c
new file mode 100644
index 000000000..844910f1f
--- /dev/null
+++ b/lib/librte_eal/windows/eal_memalloc.c
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+#include <rte_errno.h>
+#include <rte_os.h>
+#include <rte_windows.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_private.h"
+#include "eal_windows.h"
+
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
+{
+	/* Hugepages have no associated files in Windows. */
+	RTE_SET_USED(list_idx);
+	RTE_SET_USED(seg_idx);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
+{
+	/* Hugepages have no associated files in Windows. */
+	RTE_SET_USED(list_idx);
+	RTE_SET_USED(seg_idx);
+	RTE_SET_USED(offset);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+static int
+alloc_seg(struct rte_memseg *ms, void *requested_addr, int socket_id,
+	struct hugepage_info *hi)
+{
+	HANDLE current_process;
+	unsigned int numa_node;
+	size_t alloc_sz;
+	void *addr;
+	rte_iova_t iova = RTE_BAD_IOVA;
+	PSAPI_WORKING_SET_EX_INFORMATION info;
+	PSAPI_WORKING_SET_EX_BLOCK *page;
+
+	if (ms->len > 0) {
+		/* If a segment is already allocated as needed, return it. */
+		if ((ms->addr == requested_addr) &&
+			(ms->socket_id == socket_id) &&
+			(ms->hugepage_sz == hi->hugepage_sz)) {
+			return 0;
+		}
+
+		/* Bugcheck, should not happen. */
+		RTE_LOG(DEBUG, EAL, "Attempted to reallocate segment %p "
+			"(size %zu) on socket %d", ms->addr,
+			ms->len, ms->socket_id);
+		return -1;
+	}
+
+	current_process = GetCurrentProcess();
+	numa_node = eal_socket_numa_node(socket_id);
+	alloc_sz = hi->hugepage_sz;
+
+	if (requested_addr == NULL) {
+		/* Request a new chunk of memory from OS. */
+		addr = eal_mem_alloc_socket(alloc_sz, socket_id);
+		if (addr == NULL) {
+			RTE_LOG(DEBUG, EAL, "Cannot allocate %zu bytes "
+				"on socket %d\n", alloc_sz, socket_id);
+			return -1;
+		}
+	} else {
+		/* Requested address is already reserved, commit memory. */
+		addr = eal_mem_commit(requested_addr, alloc_sz, socket_id);
+
+		/* During commitment, memory is temporary freed and might
+		 * be allocated by different non-EAL thread. This is a fatal
+		 * error, because it breaks MSL assumptions.
+		 */
+		if ((addr != NULL) && (addr != requested_addr)) {
+			RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
+				" allocation - MSL is not VA-contiguous!\n",
+				requested_addr);
+			return -1;
+		}
+
+		if (addr == NULL) {
+			RTE_LOG(DEBUG, EAL, "Cannot commit reserved memory %p "
+				"(size %zu) on socket %d\n",
+				requested_addr, alloc_sz, socket_id);
+			return -1;
+		}
+	}
+
+	/* Force OS to allocate a physical page and select a NUMA node.
+	 * Hugepages are not pageable in Windows, so there's no race
+	 * for physical address.
+	 */
+	*(volatile int *)addr = *(volatile int *)addr;
+
+	/* Only try to obtain IOVA if it's available, so that applications
+	 * that do not need IOVA can use this allocator.
+	 */
+	if (rte_eal_using_phys_addrs()) {
+		iova = rte_mem_virt2iova(addr);
+		if (iova == RTE_BAD_IOVA) {
+			RTE_LOG(DEBUG, EAL,
+				"Cannot get IOVA of allocated segment\n");
+			goto error;
+		}
+	}
+
+	/* Only "Ex" function can handle hugepages. */
+	info.VirtualAddress = addr;
+	if (!QueryWorkingSetEx(current_process, &info, sizeof(info))) {
+		RTE_LOG_WIN32_ERR("QueryWorkingSetEx(%p)", addr);
+		goto error;
+	}
+
+	page = &info.VirtualAttributes;
+	if (!page->Valid || !page->LargePage) {
+		RTE_LOG(DEBUG, EAL, "Got regular page instead of a hugepage\n");
+		goto error;
+	}
+	if (page->Node != numa_node) {
+		RTE_LOG(DEBUG, EAL,
+			"NUMA node hint %u (socket %d) not respected, got %u\n",
+			numa_node, socket_id, page->Node);
+		goto error;
+	}
+
+	ms->addr = addr;
+	ms->hugepage_sz = hi->hugepage_sz;
+	ms->len = alloc_sz;
+	ms->nchannel = rte_memory_get_nchannel();
+	ms->nrank = rte_memory_get_nrank();
+	ms->iova = iova;
+	ms->socket_id = socket_id;
+
+	return 0;
+
+error:
+	/* Only jump here when `addr` and `alloc_sz` are valid. */
+	if (eal_mem_decommit(addr, alloc_sz) && (rte_errno == EADDRNOTAVAIL)) {
+		/* During decommitment, memory is temporarily returned
+		 * to the system and the address may become unavailable.
+		 */
+		RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
+			" allocation - MSL is not VA-contiguous!\n", addr);
+	}
+	return -1;
+}
+
+static int
+free_seg(struct rte_memseg *ms)
+{
+	if (eal_mem_decommit(ms->addr, ms->len)) {
+		if (rte_errno == EADDRNOTAVAIL) {
+			/* See alloc_seg() for explanation. */
+			RTE_LOG(CRIT, EAL, "Address %p occupied by an alien "
+				" allocation - MSL is not VA-contiguous!\n",
+				ms->addr);
+		}
+		return -1;
+	}
+
+	/* Must clear the segment, because alloc_seg() inspects it. */
+	memset(ms, 0, sizeof(*ms));
+	return 0;
+}
+
+struct alloc_walk_param {
+	struct hugepage_info *hi;
+	struct rte_memseg **ms;
+	size_t page_sz;
+	unsigned int segs_allocated;
+	unsigned int n_segs;
+	int socket;
+	bool exact;
+};
+
+static int
+alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct alloc_walk_param *wa = arg;
+	struct rte_memseg_list *cur_msl;
+	size_t page_sz;
+	int cur_idx, start_idx, j;
+	unsigned int msl_idx, need, i;
+
+	if (msl->page_sz != wa->page_sz)
+		return 0;
+	if (msl->socket_id != wa->socket)
+		return 0;
+
+	page_sz = (size_t)msl->page_sz;
+
+	msl_idx = msl - mcfg->memsegs;
+	cur_msl = &mcfg->memsegs[msl_idx];
+
+	need = wa->n_segs;
+
+	/* try finding space in memseg list */
+	if (wa->exact) {
+		/* if we require exact number of pages in a list, find them */
+		cur_idx = rte_fbarray_find_next_n_free(
+			&cur_msl->memseg_arr, 0, need);
+		if (cur_idx < 0)
+			return 0;
+		start_idx = cur_idx;
+	} else {
+		int cur_len;
+
+		/* we don't require exact number of pages, so we're going to go
+		 * for best-effort allocation. that means finding the biggest
+		 * unused block, and going with that.
+		 */
+		cur_idx = rte_fbarray_find_biggest_free(
+			&cur_msl->memseg_arr, 0);
+		if (cur_idx < 0)
+			return 0;
+		start_idx = cur_idx;
+		/* adjust the size to possibly be smaller than original
+		 * request, but do not allow it to be bigger.
+		 */
+		cur_len = rte_fbarray_find_contig_free(
+			&cur_msl->memseg_arr, cur_idx);
+		need = RTE_MIN(need, (unsigned int)cur_len);
+	}
+
+	for (i = 0; i < need; i++, cur_idx++) {
+		struct rte_memseg *cur;
+		void *map_addr;
+
+		cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
+		map_addr = RTE_PTR_ADD(cur_msl->base_va, cur_idx * page_sz);
+
+		if (alloc_seg(cur, map_addr, wa->socket, wa->hi)) {
+			RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, "
+				"but only %i were allocated\n", need, i);
+
+			/* if exact number wasn't requested, stop */
+			if (!wa->exact)
+				goto out;
+
+			/* clean up */
+			for (j = start_idx; j < cur_idx; j++) {
+				struct rte_memseg *tmp;
+				struct rte_fbarray *arr = &cur_msl->memseg_arr;
+
+				tmp = rte_fbarray_get(arr, j);
+				rte_fbarray_set_free(arr, j);
+
+				if (free_seg(tmp))
+					RTE_LOG(DEBUG, EAL, "Cannot free page\n");
+			}
+			/* clear the list */
+			if (wa->ms)
+				memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);
+
+			return -1;
+		}
+		if (wa->ms)
+			wa->ms[i] = cur;
+
+		rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
+	}
+
+out:
+	wa->segs_allocated = i;
+	if (i > 0)
+		cur_msl->version++;
+
+	/* if we didn't allocate any segments, move on to the next list */
+	return i > 0;
+}
+
+struct free_walk_param {
+	struct hugepage_info *hi;
+	struct rte_memseg *ms;
+};
+static int
+free_seg_walk(const struct rte_memseg_list *msl, void *arg)
+{
+	struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+	struct rte_memseg_list *found_msl;
+	struct free_walk_param *wa = arg;
+	uintptr_t start_addr, end_addr;
+	int msl_idx, seg_idx, ret;
+
+	start_addr = (uintptr_t) msl->base_va;
+	end_addr = start_addr + msl->len;
+
+	if ((uintptr_t)wa->ms->addr < start_addr ||
+		(uintptr_t)wa->ms->addr >= end_addr)
+		return 0;
+
+	msl_idx = msl - mcfg->memsegs;
+	seg_idx = RTE_PTR_DIFF(wa->ms->addr, start_addr) / msl->page_sz;
+
+	/* msl is const */
+	found_msl = &mcfg->memsegs[msl_idx];
+	found_msl->version++;
+
+	rte_fbarray_set_free(&found_msl->memseg_arr, seg_idx);
+
+	ret = free_seg(wa->ms);
+
+	return (ret < 0) ? (-1) : 1;
+}
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs,
+		size_t page_sz, int socket, bool exact)
+{
+	unsigned int i;
+	int ret = -1;
+	struct alloc_walk_param wa;
+	struct hugepage_info *hi = NULL;
+
+	if (internal_config.legacy_mem) {
+		RTE_LOG(ERR, EAL, "dynamic allocation not supported in legacy mode\n");
+		return -ENOTSUP;
+	}
+
+	for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+		struct hugepage_info *hpi = &internal_config.hugepage_info[i];
+		if (page_sz == hpi->hugepage_sz) {
+			hi = hpi;
+			break;
+		}
+	}
+	if (!hi) {
+		RTE_LOG(ERR, EAL, "cannot find relevant hugepage_info entry\n");
+		return -1;
+	}
+
+	memset(&wa, 0, sizeof(wa));
+	wa.exact = exact;
+	wa.hi = hi;
+	wa.ms = ms;
+	wa.n_segs = n_segs;
+	wa.page_sz = page_sz;
+	wa.socket = socket;
+	wa.segs_allocated = 0;
+
+	/* memalloc is locked, so it's safe to use thread-unsafe version */
+	ret = rte_memseg_list_walk_thread_unsafe(alloc_seg_walk, &wa);
+	if (ret == 0) {
+		RTE_LOG(ERR, EAL, "cannot find suitable memseg_list\n");
+		ret = -1;
+	} else if (ret > 0) {
+		ret = (int)wa.segs_allocated;
+	}
+
+	return ret;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket)
+{
+	struct rte_memseg *ms = NULL;
+	eal_memalloc_alloc_seg_bulk(&ms, 1, page_sz, socket, true);
+	return ms;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs)
+{
+	int seg, ret = 0;
+
+	/* dynamic free not supported in legacy mode */
+	if (internal_config.legacy_mem)
+		return -1;
+
+	for (seg = 0; seg < n_segs; seg++) {
+		struct rte_memseg *cur = ms[seg];
+		struct hugepage_info *hi = NULL;
+		struct free_walk_param wa;
+		size_t i;
+		int walk_res;
+
+		/* if this page is marked as unfreeable, fail */
+		if (cur->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+			RTE_LOG(DEBUG, EAL, "Page is not allowed to be freed\n");
+			ret = -1;
+			continue;
+		}
+
+		memset(&wa, 0, sizeof(wa));
+
+		for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+			hi = &internal_config.hugepage_info[i];
+			if (cur->hugepage_sz == hi->hugepage_sz)
+				break;
+		}
+		if (i == RTE_DIM(internal_config.hugepage_info)) {
+			RTE_LOG(ERR, EAL, "Can't find relevant hugepage_info entry\n");
+			ret = -1;
+			continue;
+		}
+
+		wa.ms = cur;
+		wa.hi = hi;
+
+		/* memalloc is locked, so it's safe to use thread-unsafe version
+		 */
+		walk_res = rte_memseg_list_walk_thread_unsafe(free_seg_walk,
+				&wa);
+		if (walk_res == 1)
+			continue;
+		if (walk_res == 0)
+			RTE_LOG(ERR, EAL, "Couldn't find memseg list\n");
+		ret = -1;
+	}
+	return ret;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms)
+{
+	return eal_memalloc_free_seg_bulk(&ms, 1);
+}
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+	/* No multi-process support. */
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+eal_memalloc_init(void)
+{
+	/* No action required. */
+	return 0;
+}
diff --git a/lib/librte_eal/windows/eal_memory.c b/lib/librte_eal/windows/eal_memory.c
new file mode 100644
index 000000000..ec40bab16
--- /dev/null
+++ b/lib/librte_eal/windows/eal_memory.c
@@ -0,0 +1,710 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+#include <inttypes.h>
+#include <io.h>
+
+#include <rte_errno.h>
+#include <rte_memory.h>
+
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+#include "eal_memcfg.h"
+#include "eal_options.h"
+#include "eal_private.h"
+#include "eal_windows.h"
+
+#include <rte_virt2phys.h>
+
+/* MinGW-w64 headers lack VirtualAlloc2() in some distributions.
+ * Provide a copy of definitions and code to load it dynamically.
+ * Note: definitions are copied verbatim from Microsoft documentation
+ * and don't follow DPDK code style.
+ *
+ * MEM_RESERVE_PLACEHOLDER being defined means VirtualAlloc2() is present too.
+ */
+#ifndef MEM_PRESERVE_PLACEHOLDER
+
+/* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ne-winnt-mem_extended_parameter_type */
+typedef enum MEM_EXTENDED_PARAMETER_TYPE {
+	MemExtendedParameterInvalidType,
+	MemExtendedParameterAddressRequirements,
+	MemExtendedParameterNumaNode,
+	MemExtendedParameterPartitionHandle,
+	MemExtendedParameterUserPhysicalHandle,
+	MemExtendedParameterAttributeFlags,
+	MemExtendedParameterMax
+} *PMEM_EXTENDED_PARAMETER_TYPE;
+
+#define MEM_EXTENDED_PARAMETER_TYPE_BITS 4
+
+/* https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-mem_extended_parameter */
+typedef struct MEM_EXTENDED_PARAMETER {
+	struct {
+		DWORD64 Type : MEM_EXTENDED_PARAMETER_TYPE_BITS;
+		DWORD64 Reserved : 64 - MEM_EXTENDED_PARAMETER_TYPE_BITS;
+	} DUMMYSTRUCTNAME;
+	union {
+		DWORD64 ULong64;
+		PVOID   Pointer;
+		SIZE_T  Size;
+		HANDLE  Handle;
+		DWORD   ULong;
+	} DUMMYUNIONNAME;
+} MEM_EXTENDED_PARAMETER, *PMEM_EXTENDED_PARAMETER;
+
+/* https://docs.microsoft.com/en-us/windows/win32/api/memoryapi/nf-memoryapi-virtualalloc2 */
+typedef PVOID (*VirtualAlloc2_type)(
+	HANDLE                 Process,
+	PVOID                  BaseAddress,
+	SIZE_T                 Size,
+	ULONG                  AllocationType,
+	ULONG                  PageProtection,
+	MEM_EXTENDED_PARAMETER *ExtendedParameters,
+	ULONG                  ParameterCount
+);
+
+/* VirtualAlloc2() flags. */
+#define MEM_COALESCE_PLACEHOLDERS 0x00000001
+#define MEM_PRESERVE_PLACEHOLDER  0x00000002
+#define MEM_REPLACE_PLACEHOLDER   0x00004000
+#define MEM_RESERVE_PLACEHOLDER   0x00040000
+
+/* Named exactly as the function, so that user code does not depend
+ * on it being found at compile time or dynamically.
+ */
+static VirtualAlloc2_type VirtualAlloc2;
+
+int
+eal_mem_win32api_init(void)
+{
+	/* Contrary to the docs, VirtualAlloc2() is not in kernel32.dll,
+	 * see https://github.com/MicrosoftDocs/feedback/issues/1129.
+	 */
+	static const char library_name[] = "kernelbase.dll";
+	static const char function[] = "VirtualAlloc2";
+
+	HMODULE library = NULL;
+	int ret = 0;
+
+	/* Already done. */
+	if (VirtualAlloc2 != NULL)
+		return 0;
+
+	library = LoadLibraryA(library_name);
+	if (library == NULL) {
+		RTE_LOG_WIN32_ERR("LoadLibraryA(\"%s\")", library_name);
+		return -1;
+	}
+
+	VirtualAlloc2 = (VirtualAlloc2_type)(
+		(void *)GetProcAddress(library, function));
+	if (VirtualAlloc2 == NULL) {
+		RTE_LOG_WIN32_ERR("GetProcAddress(\"%s\", \"%s\")\n",
+			library_name, function);
+
+		/* Contrary to the docs, Server 2016 is not supported. */
+		RTE_LOG(ERR, EAL, "Windows 10 or Windows Server 2019 "
+			" is required for memory management\n");
+		ret = -1;
+	}
+
+	FreeLibrary(library);
+
+	return ret;
+}
+
+#else
+
+/* Stub in case VirtualAlloc2() is provided by the compiler. */
+int
+eal_mem_win32api_init(void)
+{
+	return 0;
+}
+
+#endif /* defined(MEM_RESERVE_PLACEHOLDER) */
+
+static HANDLE virt2phys_device = INVALID_HANDLE_VALUE;
+
+int
+eal_mem_virt2iova_init(void)
+{
+	HDEVINFO list = INVALID_HANDLE_VALUE;
+	SP_DEVICE_INTERFACE_DATA ifdata;
+	SP_DEVICE_INTERFACE_DETAIL_DATA *detail = NULL;
+	DWORD detail_size;
+	int ret = -1;
+
+	list = SetupDiGetClassDevs(
+		&GUID_DEVINTERFACE_VIRT2PHYS, NULL, NULL,
+		DIGCF_DEVICEINTERFACE | DIGCF_PRESENT);
+	if (list == INVALID_HANDLE_VALUE) {
+		RTE_LOG_WIN32_ERR("SetupDiGetClassDevs()");
+		goto exit;
+	}
+
+	ifdata.cbSize = sizeof(ifdata);
+	if (!SetupDiEnumDeviceInterfaces(
+		list, NULL, &GUID_DEVINTERFACE_VIRT2PHYS, 0, &ifdata)) {
+		RTE_LOG_WIN32_ERR("SetupDiEnumDeviceInterfaces()");
+		goto exit;
+	}
+
+	if (!SetupDiGetDeviceInterfaceDetail(
+		list, &ifdata, NULL, 0, &detail_size, NULL)) {
+		if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
+			RTE_LOG_WIN32_ERR(
+				"SetupDiGetDeviceInterfaceDetail(probe)");
+			goto exit;
+		}
+	}
+
+	detail = malloc(detail_size);
+	if (detail == NULL) {
+		RTE_LOG(ERR, EAL, "Cannot allocate virt2phys "
+			"device interface detail data\n");
+		goto exit;
+	}
+
+	detail->cbSize = sizeof(*detail);
+	if (!SetupDiGetDeviceInterfaceDetail(
+		list, &ifdata, detail, detail_size, NULL, NULL)) {
+		RTE_LOG_WIN32_ERR("SetupDiGetDeviceInterfaceDetail(read)");
+		goto exit;
+	}
+
+	RTE_LOG(DEBUG, EAL, "Found virt2phys device: %s\n", detail->DevicePath);
+
+	virt2phys_device = CreateFile(
+		detail->DevicePath, 0, 0, NULL, OPEN_EXISTING, 0, NULL);
+	if (virt2phys_device == INVALID_HANDLE_VALUE) {
+		RTE_LOG_WIN32_ERR("CreateFile()");
+		goto exit;
+	}
+
+	/* Indicate success. */
+	ret = 0;
+
+exit:
+	if (detail != NULL)
+		free(detail);
+	if (list != INVALID_HANDLE_VALUE)
+		SetupDiDestroyDeviceInfoList(list);
+
+	return ret;
+}
+
+phys_addr_t
+rte_mem_virt2phy(const void *virt)
+{
+	LARGE_INTEGER phys;
+	DWORD bytes_returned;
+
+	if (virt2phys_device == INVALID_HANDLE_VALUE)
+		return RTE_BAD_PHYS_ADDR;
+
+	if (!DeviceIoControl(
+			virt2phys_device, IOCTL_VIRT2PHYS_TRANSLATE,
+			&virt, sizeof(virt), &phys, sizeof(phys),
+			&bytes_returned, NULL)) {
+		RTE_LOG_WIN32_ERR("DeviceIoControl(IOCTL_VIRT2PHYS_TRANSLATE)");
+		return RTE_BAD_PHYS_ADDR;
+	}
+
+	return phys.QuadPart;
+}
+
+/* Windows currently only supports IOVA as PA. */
+rte_iova_t
+rte_mem_virt2iova(const void *virt)
+{
+	phys_addr_t phys;
+
+	if (virt2phys_device == INVALID_HANDLE_VALUE)
+		return RTE_BAD_IOVA;
+
+	phys = rte_mem_virt2phy(virt);
+	if (phys == RTE_BAD_PHYS_ADDR)
+		return RTE_BAD_IOVA;
+
+	return (rte_iova_t)phys;
+}
+
+/* Always using physical addresses under Windows if they can be obtained. */
+int
+rte_eal_using_phys_addrs(void)
+{
+	return virt2phys_device != INVALID_HANDLE_VALUE;
+}
+
+/* Approximate error mapping from VirtualAlloc2() to POSIX mmap(3). */
+static void
+set_errno_from_win32_alloc_error(DWORD code)
+{
+	switch (code) {
+	case ERROR_SUCCESS:
+		rte_errno = 0;
+		break;
+
+	case ERROR_INVALID_ADDRESS:
+		/* A valid requested address is not available. */
+	case ERROR_COMMITMENT_LIMIT:
+		/* May occcur when committing regular memory. */
+	case ERROR_NO_SYSTEM_RESOURCES:
+		/* Occurs when the system runs out of hugepages. */
+		rte_errno = ENOMEM;
+		break;
+
+	case ERROR_INVALID_PARAMETER:
+	default:
+		rte_errno = EINVAL;
+		break;
+	}
+}
+
+void *
+eal_mem_reserve(void *requested_addr, size_t size, int flags)
+{
+	HANDLE process;
+	void *virt;
+
+	/* Windows requires hugepages to be committed. */
+	if (flags & EAL_RESERVE_HUGEPAGES) {
+		rte_errno = ENOTSUP;
+		return NULL;
+	}
+
+	process = GetCurrentProcess();
+
+	virt = VirtualAlloc2(process, requested_addr, size,
+		MEM_RESERVE | MEM_RESERVE_PLACEHOLDER, PAGE_NOACCESS,
+		NULL, 0);
+	if (virt == NULL) {
+		DWORD err = GetLastError();
+		RTE_LOG_WIN32_ERR("VirtualAlloc2()");
+		set_errno_from_win32_alloc_error(err);
+		return NULL;
+	}
+
+	if ((flags & EAL_RESERVE_FORCE_ADDRESS) && (virt != requested_addr)) {
+		if (!VirtualFreeEx(process, virt, 0, MEM_RELEASE))
+			RTE_LOG_WIN32_ERR("VirtualFreeEx()");
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	return virt;
+}
+
+void *
+eal_mem_alloc_socket(size_t size, int socket_id)
+{
+	DWORD flags = MEM_RESERVE | MEM_COMMIT;
+	void *addr;
+
+	flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
+	addr = VirtualAllocExNuma(GetCurrentProcess(), NULL, size, flags,
+		PAGE_READWRITE, eal_socket_numa_node(socket_id));
+	if (addr == NULL)
+		rte_errno = ENOMEM;
+	return addr;
+}
+
+void *
+eal_mem_commit(void *requested_addr, size_t size, int socket_id)
+{
+	HANDLE process;
+	MEM_EXTENDED_PARAMETER param;
+	DWORD param_count = 0;
+	DWORD flags;
+	void *addr;
+
+	process = GetCurrentProcess();
+
+	if (requested_addr != NULL) {
+		MEMORY_BASIC_INFORMATION info;
+
+		if (VirtualQueryEx(process, requested_addr, &info,
+				sizeof(info)) != sizeof(info)) {
+			RTE_LOG_WIN32_ERR("VirtualQuery(%p)", requested_addr);
+			return NULL;
+		}
+
+		/* Split reserved region if only a part is committed. */
+		flags = MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER;
+		if ((info.RegionSize > size) && !VirtualFreeEx(
+				process, requested_addr, size, flags)) {
+			RTE_LOG_WIN32_ERR(
+				"VirtualFreeEx(%p, %zu, preserve placeholder)",
+				requested_addr, size);
+			return NULL;
+		}
+
+		/* Temporarily release the region to be committed.
+		 *
+		 * There is an inherent race for this memory range
+		 * if another thread allocates memory via OS API.
+		 * However, VirtualAlloc2(MEM_REPLACE_PLACEHOLDER)
+		 * doesn't work with MEM_LARGE_PAGES on Windows Server.
+		 */
+		if (!VirtualFreeEx(process, requested_addr, 0, MEM_RELEASE)) {
+			RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
+				requested_addr);
+			return NULL;
+		}
+	}
+
+	if (socket_id != SOCKET_ID_ANY) {
+		param_count = 1;
+		memset(&param, 0, sizeof(param));
+		param.Type = MemExtendedParameterNumaNode;
+		param.ULong = eal_socket_numa_node(socket_id);
+	}
+
+	flags = MEM_RESERVE | MEM_COMMIT | MEM_LARGE_PAGES;
+	addr = VirtualAlloc2(process, requested_addr, size,
+		flags, PAGE_READWRITE, &param, param_count);
+	if (addr == NULL) {
+		/* Logging may overwrite GetLastError() result. */
+		DWORD err = GetLastError();
+		RTE_LOG_WIN32_ERR("VirtualAlloc2(%p, %zu, commit large pages)",
+			requested_addr, size);
+		set_errno_from_win32_alloc_error(err);
+		return NULL;
+	}
+
+	if ((requested_addr != NULL) && (addr != requested_addr)) {
+		/* We lost the race for the requested_addr. */
+		if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE))
+			RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", addr);
+
+		rte_errno = EADDRNOTAVAIL;
+		return NULL;
+	}
+
+	return addr;
+}
+
+int
+eal_mem_decommit(void *addr, size_t size)
+{
+	HANDLE process;
+	void *stub;
+	DWORD flags;
+
+	process = GetCurrentProcess();
+
+	/* Hugepages cannot be decommited on Windows,
+	 * so free them and replace the block with a placeholder.
+	 * There is a race for VA in this block until VirtualAlloc2 call.
+	 */
+	if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
+		RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
+		return -1;
+	}
+
+	flags = MEM_RESERVE | MEM_RESERVE_PLACEHOLDER;
+	stub = VirtualAlloc2(
+		process, addr, size, flags, PAGE_NOACCESS, NULL, 0);
+	if (stub == NULL) {
+		/* We lost the race for the VA. */
+		if (!VirtualFreeEx(process, stub, 0, MEM_RELEASE))
+			RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, release)", stub);
+		rte_errno = EADDRNOTAVAIL;
+		return -1;
+	}
+
+	/* No need to join reserved regions adjascent to the freed one:
+	 * eal_mem_commit() will just pick up the page-size placeholder
+	 * created here.
+	 */
+	return 0;
+}
+
+/**
+ * Free a reserved memory region in full or in part.
+ *
+ * @param addr
+ *  Starting address of the area to free.
+ * @param size
+ *  Number of bytes to free. Must be a multiple of page size.
+ * @param reserved
+ *  Fail if the region is not in reserved state.
+ * @return
+ *  * 0 on successful deallocation;
+ *  * 1 if region mut be in reserved state but it is not;
+ *  * (-1) on system API failures.
+ */
+static int
+mem_free(void *addr, size_t size, bool reserved)
+{
+	MEMORY_BASIC_INFORMATION info;
+	HANDLE process;
+
+	process = GetCurrentProcess();
+
+	if (VirtualQueryEx(
+			process, addr, &info, sizeof(info)) != sizeof(info)) {
+		RTE_LOG_WIN32_ERR("VirtualQueryEx(%p)", addr);
+		return -1;
+	}
+
+	if (reserved && (info.State != MEM_RESERVE))
+		return 1;
+
+	/* Free complete region. */
+	if ((addr == info.AllocationBase) && (size == info.RegionSize)) {
+		if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
+			RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)",
+				addr);
+		}
+		return 0;
+	}
+
+	/* Split the part to be freed and the remaining reservation. */
+	if (!VirtualFreeEx(process, addr, size,
+			MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
+		RTE_LOG_WIN32_ERR(
+			"VirtualFreeEx(%p, %zu, preserve placeholder)",
+			addr, size);
+		return -1;
+	}
+
+	/* Actually free reservation part. */
+	if (!VirtualFreeEx(process, addr, 0, MEM_RELEASE)) {
+		RTE_LOG_WIN32_ERR("VirtualFreeEx(%p, 0, release)", addr);
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+eal_mem_free(void *virt, size_t size)
+{
+	mem_free(virt, size, false);
+}
+
+int
+eal_mem_set_dump(void *virt, size_t size, bool dump)
+{
+	RTE_SET_USED(virt);
+	RTE_SET_USED(size);
+	RTE_SET_USED(dump);
+
+	/* Windows does not dump reserved memory by default.
+	 *
+	 * There is <werapi.h> to include or exclude regions from the dump,
+	 * but this is not currently required by EAL.
+	 */
+
+	rte_errno = ENOTSUP;
+	return -1;
+}
+
+void *
+rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
+	int fd, size_t offset)
+{
+	HANDLE file_handle = INVALID_HANDLE_VALUE;
+	HANDLE mapping_handle = INVALID_HANDLE_VALUE;
+	DWORD sys_prot = 0;
+	DWORD sys_access = 0;
+	DWORD size_high = (DWORD)(size >> 32);
+	DWORD size_low = (DWORD)size;
+	DWORD offset_high = (DWORD)(offset >> 32);
+	DWORD offset_low = (DWORD)offset;
+	LPVOID virt = NULL;
+
+	if (prot & RTE_PROT_EXECUTE) {
+		if (prot & RTE_PROT_READ) {
+			sys_prot = PAGE_EXECUTE_READ;
+			sys_access = FILE_MAP_READ | FILE_MAP_EXECUTE;
+		}
+		if (prot & RTE_PROT_WRITE) {
+			sys_prot = PAGE_EXECUTE_READWRITE;
+			sys_access = FILE_MAP_WRITE | FILE_MAP_EXECUTE;
+		}
+	} else {
+		if (prot & RTE_PROT_READ) {
+			sys_prot = PAGE_READONLY;
+			sys_access = FILE_MAP_READ;
+		}
+		if (prot & RTE_PROT_WRITE) {
+			sys_prot = PAGE_READWRITE;
+			sys_access = FILE_MAP_WRITE;
+		}
+	}
+
+	if (flags & RTE_MAP_PRIVATE)
+		sys_access |= FILE_MAP_COPY;
+
+	if ((flags & RTE_MAP_ANONYMOUS) == 0)
+		file_handle = (HANDLE)_get_osfhandle(fd);
+
+	mapping_handle = CreateFileMapping(
+		file_handle, NULL, sys_prot, size_high, size_low, NULL);
+	if (mapping_handle == INVALID_HANDLE_VALUE) {
+		RTE_LOG_WIN32_ERR("CreateFileMapping()");
+		return NULL;
+	}
+
+	/* There is a race for the requested_addr between mem_free()
+	 * and MapViewOfFileEx(). MapViewOfFile3() that can replace a reserved
+	 * region with a mapping in a single operation, but it does not support
+	 * private mappings.
+	 */
+	if (requested_addr != NULL) {
+		int ret = mem_free(requested_addr, size, true);
+		if (ret) {
+			if (ret > 0) {
+				RTE_LOG(ERR, EAL, "Cannot map memory "
+					"to a region not reserved\n");
+				rte_errno = EADDRNOTAVAIL;
+			}
+			return NULL;
+		}
+	}
+
+	virt = MapViewOfFileEx(mapping_handle, sys_access,
+		offset_high, offset_low, size, requested_addr);
+	if (!virt) {
+		RTE_LOG_WIN32_ERR("MapViewOfFileEx()");
+		return NULL;
+	}
+
+	if ((flags & RTE_MAP_FORCE_ADDRESS) && (virt != requested_addr)) {
+		if (!UnmapViewOfFile(virt))
+			RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
+		virt = NULL;
+	}
+
+	if (!CloseHandle(mapping_handle))
+		RTE_LOG_WIN32_ERR("CloseHandle()");
+
+	return virt;
+}
+
+int
+rte_mem_unmap(void *virt, size_t size)
+{
+	RTE_SET_USED(size);
+
+	if (!UnmapViewOfFile(virt)) {
+		RTE_LOG_WIN32_ERR("UnmapViewOfFile()");
+		rte_errno = EINVAL;
+		return -1;
+	}
+	return 0;
+}
+
+uint64_t
+eal_get_baseaddr(void)
+{
+	/* Windows strategy for memory allocation is undocumented.
+	 * Returning 0 here effectively disables address guessing
+	 * unless user provides an address hint.
+	 */
+	return 0;
+}
+
+size_t
+rte_get_page_size(void)
+{
+	static SYSTEM_INFO info;
+
+	if (info.dwPageSize == 0)
+		GetSystemInfo(&info);
+
+	return info.dwPageSize;
+}
+
+int
+rte_mem_lock(const void *virt, size_t size)
+{
+	/* VirtualLock() takes `void*`, work around compiler warning. */
+	void *addr = (void *)((uintptr_t)virt);
+
+	if (!VirtualLock(addr, size)) {
+		RTE_LOG_WIN32_ERR("VirtualLock(%p %#zx)", virt, size);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+		EAL_LOG_NOT_IMPLEMENTED();
+		return -1;
+	}
+
+	return eal_dynmem_memseg_lists_init();
+}
+
+static int
+eal_nohuge_init(void)
+{
+	struct rte_mem_config *mcfg;
+	struct rte_memseg_list *msl;
+	int n_segs;
+	uint64_t mem_sz, page_sz;
+	void *addr;
+
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* nohuge mode is legacy mode */
+	internal_config.legacy_mem = 1;
+
+	msl = &mcfg->memsegs[0];
+
+	mem_sz = internal_config.memory;
+	page_sz = RTE_PGSIZE_4K;
+	n_segs = mem_sz / page_sz;
+
+	if (eal_memseg_list_init_named(
+			msl, "nohugemem", page_sz, n_segs, 0, true)) {
+		return -1;
+	}
+
+	addr = VirtualAlloc(
+		NULL, mem_sz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
+	if (addr == NULL) {
+		RTE_LOG_WIN32_ERR("VirtualAlloc(size=%#zx)", mem_sz);
+		RTE_LOG(ERR, EAL, "Cannot allocate memory\n");
+		return -1;
+	}
+
+	msl->base_va = addr;
+	msl->len = mem_sz;
+
+	eal_memseg_list_populate(msl, addr, n_segs);
+
+	if (mcfg->dma_maskbits &&
+		rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
+		RTE_LOG(ERR, EAL,
+			"%s(): couldn't allocate memory due to IOVA "
+			"exceeding limits of current DMA mask.\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+int
+rte_eal_hugepage_init(void)
+{
+	return internal_config.no_hugetlbfs ?
+		eal_nohuge_init() : eal_dynmem_hugepage_init();
+}
+
+int
+rte_eal_hugepage_attach(void)
+{
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
diff --git a/lib/librte_eal/windows/eal_mp.c b/lib/librte_eal/windows/eal_mp.c
new file mode 100644
index 000000000..16a5e8ba0
--- /dev/null
+++ b/lib/librte_eal/windows/eal_mp.c
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+/**
+ * @file Multiprocess support stubs
+ *
+ * Stubs must log an error until implemented. If success is required
+ * for non-multiprocess operation, stub must log a warning and a comment
+ * must document what requires success emulation.
+ */
+
+#include <rte_eal.h>
+#include <rte_errno.h>
+
+#include "eal_private.h"
+#include "eal_windows.h"
+#include "malloc_mp.h"
+
+void
+rte_mp_channel_cleanup(void)
+{
+	EAL_LOG_NOT_IMPLEMENTED();
+}
+
+int
+rte_mp_action_register(const char *name, rte_mp_t action)
+{
+	RTE_SET_USED(name);
+	RTE_SET_USED(action);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+void
+rte_mp_action_unregister(const char *name)
+{
+	RTE_SET_USED(name);
+	EAL_LOG_NOT_IMPLEMENTED();
+}
+
+int
+rte_mp_sendmsg(struct rte_mp_msg *msg)
+{
+	RTE_SET_USED(msg);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+	const struct timespec *ts)
+{
+	RTE_SET_USED(req);
+	RTE_SET_USED(reply);
+	RTE_SET_USED(ts);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+		rte_mp_async_reply_t clb)
+{
+	RTE_SET_USED(req);
+	RTE_SET_USED(ts);
+	RTE_SET_USED(clb);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
+	RTE_SET_USED(msg);
+	RTE_SET_USED(peer);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+register_mp_requests(void)
+{
+	/* Non-stub function succeeds if multi-process is not supported. */
+	EAL_LOG_STUB();
+	return 0;
+}
+
+int
+request_to_primary(struct malloc_mp_req *req)
+{
+	RTE_SET_USED(req);
+	EAL_LOG_NOT_IMPLEMENTED();
+	return -1;
+}
+
+int
+request_sync(void)
+{
+	/* Common memory allocator depends on this function success. */
+	EAL_LOG_STUB();
+	return 0;
+}
diff --git a/lib/librte_eal/windows/eal_windows.h b/lib/librte_eal/windows/eal_windows.h
index 390d2fd66..caabffedf 100644
--- a/lib/librte_eal/windows/eal_windows.h
+++ b/lib/librte_eal/windows/eal_windows.h
@@ -9,8 +9,24 @@
  * @file Facilities private to Windows EAL
  */
 
+#include <rte_errno.h>
 #include <rte_windows.h>
 
+/**
+ * Log current function as not implemented and set rte_errno.
+ */
+#define EAL_LOG_NOT_IMPLEMENTED() \
+	do { \
+		RTE_LOG(DEBUG, EAL, "%s() is not implemented\n", __func__); \
+		rte_errno = ENOTSUP; \
+	} while (0)
+
+/**
+ * Log current function as a stub.
+ */
+#define EAL_LOG_STUB() \
+	RTE_LOG(DEBUG, EAL, "Windows: %s() is a stub\n", __func__)
+
 /**
  * Create a map of processors and cores on the system.
  */
@@ -36,4 +52,63 @@ int eal_thread_create(pthread_t *thread);
  */
 unsigned int eal_socket_numa_node(unsigned int socket_id);
 
+/**
+ * Open virt2phys driver interface device.
+ *
+ * @return 0 on success, (-1) on failure.
+ */
+int eal_mem_virt2iova_init(void);
+
+/**
+ * Locate Win32 memory management routines in system libraries.
+ *
+ * @return 0 on success, (-1) on failure.
+ */
+int eal_mem_win32api_init(void);
+
+/**
+ * Allocate new memory in hugepages on the specified NUMA node.
+ *
+ * @param size
+ *  Number of bytes to allocate. Must be a multiple of huge page size.
+ * @param socket_id
+ *  Socket ID.
+ * @return
+ *  Address of the memory allocated on success or NULL on failure.
+ */
+void *eal_mem_alloc_socket(size_t size, int socket_id);
+
+/**
+ * Commit memory previously reserved with eal_mem_reserve()
+ * or decommitted from hugepages by eal_mem_decommit().
+ *
+ * @param requested_addr
+ *  Address within a reserved region. Must not be NULL.
+ * @param size
+ *  Number of bytes to commit. Must be a multiple of page size.
+ * @param socket_id
+ *  Socket ID to allocate on. Can be SOCKET_ID_ANY.
+ * @return
+ *  On success, address of the committed memory, that is, requested_addr.
+ *  On failure, NULL and rte_errno is set.
+ */
+void *eal_mem_commit(void *requested_addr, size_t size, int socket_id);
+
+/**
+ * Put allocated or committed memory back into reserved state.
+ *
+ * @param addr
+ *  Address of the region to decommit.
+ * @param size
+ *  Number of bytes to decommit, must be the size of a page
+ *  (hugepage or regular one).
+ *
+ * The *addr* and *size* must match location and size
+ * of a previously allocated or committed region.
+ *
+ * @return
+ *  0 on success, (-1) on failure.
+ */
+int eal_mem_decommit(void *addr, size_t size);
+
 #endif /* _EAL_WINDOWS_H_ */
diff --git a/lib/librte_eal/windows/include/meson.build b/lib/librte_eal/windows/include/meson.build
index 5fb1962ac..b3534b025 100644
--- a/lib/librte_eal/windows/include/meson.build
+++ b/lib/librte_eal/windows/include/meson.build
@@ -5,5 +5,6 @@ includes += include_directories('.')
 
 headers += files(
         'rte_os.h',
+        'rte_virt2phys.h',
         'rte_windows.h',
 )
diff --git a/lib/librte_eal/windows/include/rte_os.h b/lib/librte_eal/windows/include/rte_os.h
index 510e39e03..fdd4a6f40 100644
--- a/lib/librte_eal/windows/include/rte_os.h
+++ b/lib/librte_eal/windows/include/rte_os.h
@@ -36,6 +36,15 @@ extern "C" {
 
 #define strncasecmp(s1, s2, count)        _strnicmp(s1, s2, count)
 
+/* MinGW-w64 7.0.0 defines _open() and open() as inlines, 6.0.0 doesn't.
+ * Other pairs of functions are only defined, not declared.
+ */
+#if !defined RTE_TOOLCHAIN_GCC || defined NO_OLDNAMES
+#define open _open
+#endif
+#define close _close
+#define unlink _unlink
+
 /* cpu_set macros implementation */
 #define RTE_CPU_AND(dst, src1, src2) CPU_AND(dst, src1, src2)
 #define RTE_CPU_OR(dst, src1, src2) CPU_OR(dst, src1, src2)
diff --git a/lib/librte_eal/windows/include/rte_virt2phys.h b/lib/librte_eal/windows/include/rte_virt2phys.h
new file mode 100644
index 000000000..4bb2b4aaf
--- /dev/null
+++ b/lib/librte_eal/windows/include/rte_virt2phys.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2020 Dmitry Kozlyuk
+ */
+
+/**
+ * @file virt2phys driver interface
+ */
+
+/**
+ * Driver device interface GUID {539c2135-793a-4926-afec-d3a1b61bbc8a}.
+ */
+DEFINE_GUID(GUID_DEVINTERFACE_VIRT2PHYS,
+	0x539c2135, 0x793a, 0x4926,
+	0xaf, 0xec, 0xd3, 0xa1, 0xb6, 0x1b, 0xbc, 0x8a);
+
+/**
+ * Driver device type for IO control codes.
+ */
+#define VIRT2PHYS_DEVTYPE 0x8000
+
+/**
+ * Translate a valid non-paged virtual address to a physical address.
+ *
+ * Note: A physical address zero (0) is reported if input address
+ * is paged out or not mapped. However, if input is a valid mapping
+ * of I/O port 0x0000, output is also zero. There is no way
+ * to distinguish between these cases by return value only.
+ *
+ * Input: a non-paged virtual address (PVOID).
+ *
+ * Output: the corresponding physical address (LARGE_INTEGER).
+ */
+#define IOCTL_VIRT2PHYS_TRANSLATE CTL_CODE( \
+	VIRT2PHYS_DEVTYPE, 0x800, METHOD_BUFFERED, FILE_ANY_ACCESS)
diff --git a/lib/librte_eal/windows/include/rte_windows.h b/lib/librte_eal/windows/include/rte_windows.h
index ed6e4c148..899ed7d87 100644
--- a/lib/librte_eal/windows/include/rte_windows.h
+++ b/lib/librte_eal/windows/include/rte_windows.h
@@ -23,6 +23,8 @@
 
 #include <basetsd.h>
 #include <psapi.h>
+#include <setupapi.h>
+#include <winioctl.h>
 
 /* Have GUIDs defined. */
 #ifndef INITGUID
diff --git a/lib/librte_eal/windows/include/unistd.h b/lib/librte_eal/windows/include/unistd.h
index 757b7f3c5..6b33005b2 100644
--- a/lib/librte_eal/windows/include/unistd.h
+++ b/lib/librte_eal/windows/include/unistd.h
@@ -9,4 +9,7 @@
  * as Microsoft libc does not contain unistd.h. This may be removed
  * in future releases.
  */
+
+#include <io.h>
+
 #endif /* _UNISTD_H_ */
diff --git a/lib/librte_eal/windows/meson.build b/lib/librte_eal/windows/meson.build
index 52978e9d7..f2387762a 100644
--- a/lib/librte_eal/windows/meson.build
+++ b/lib/librte_eal/windows/meson.build
@@ -9,7 +9,12 @@ sources += files(
 	'eal_hugepages.c',
 	'eal_lcore.c',
 	'eal_log.c',
+	'eal_memalloc.c',
+	'eal_memory.c',
+	'eal_mp.c',
 	'eal_thread.c',
 	'fnmatch.c',
 	'getopt.c',
 )
+
+dpdk_conf.set10('RTE_EAL_NUMA_AWARE_HUGEPAGES', true)
-- 
2.25.4



More information about the dev mailing list