[dpdk-dev] [PATCH 1/4] mem: add --single-file to create single	mem-backed file
    Jianfeng Tan 
    jianfeng.tan at intel.com
       
    Sun Jan 10 12:42:59 CET 2016
    
    
  
Originally, there're two cons in using hugepage: a. needs root
privilege to touch /proc/self/pagemap, which is a premise to
alllocate physically contiguous memseg; b. possibly too many
hugepage file are created, especially used with 2M hugepage.
For virtual devices, they don't care about physical-contiguity
of allocated hugepages at all. Option --single-file is to
provide a way to allocate all hugepages into single mem-backed
file.
Known issue:
a. single-file option relys on kernel to allocate numa-affinitive
memory.
b. possible ABI break, originally, --no-huge uses anonymous memory
instead of file-backed way to create memory.
Signed-off-by: Huawei Xie <huawei.xie at intel.com>
Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
---
 lib/librte_eal/common/eal_common_options.c | 17 +++++++++++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 ++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 45 ++++++++++++++++++++++++++----
 4 files changed, 60 insertions(+), 5 deletions(-)
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 29942ea..65bccbd 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -95,6 +95,7 @@ eal_long_options[] = {
 	{OPT_VFIO_INTR,         1, NULL, OPT_VFIO_INTR_NUM        },
 	{OPT_VMWARE_TSC_MAP,    0, NULL, OPT_VMWARE_TSC_MAP_NUM   },
 	{OPT_XEN_DOM0,          0, NULL, OPT_XEN_DOM0_NUM         },
+	{OPT_SINGLE_FILE,       0, NULL, OPT_SINGLE_FILE_NUM      },
 	{0,                     0, NULL, 0                        }
 };
 
@@ -897,6 +898,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		}
 		break;
 
+	case OPT_SINGLE_FILE_NUM:
+		conf->single_file = 1;
+		break;
+
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -956,6 +961,16 @@ eal_check_common_options(struct internal_config *internal_cfg)
 			"be specified together with --"OPT_NO_HUGE"\n");
 		return -1;
 	}
+	if (internal_cfg->single_file && internal_cfg->force_sockets == 1) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE" cannot "
+			"be specified together with --"OPT_SOCKET_MEM"\n");
+		return -1;
+	}
+	if (internal_cfg->single_file && internal_cfg->hugepage_unlink) {
+		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
+			"be specified together with --"OPT_SINGLE_FILE"\n");
+		return -1;
+	}
 
 	if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
 		RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
@@ -994,6 +1009,8 @@ eal_common_usage(void)
 	       "  -n CHANNELS         Number of memory channels\n"
 	       "  -m MB               Memory to allocate (see also --"OPT_SOCKET_MEM")\n"
 	       "  -r RANKS            Force number of memory ranks (don't detect)\n"
+	       "  --"OPT_SINGLE_FILE" Create just single file for shared memory, and \n"
+	       "                      do not promise physical contiguity of memseg\n"
 	       "  -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n"
 	       "                      Prevent EAL from using this PCI device. The argument\n"
 	       "                      format is <domain:bus:devid.func>.\n"
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..9117ed9 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -61,6 +61,7 @@ struct hugepage_info {
  */
 struct internal_config {
 	volatile size_t memory;           /**< amount of asked memory */
+	volatile unsigned single_file;    /**< mmap all hugepages in single file */
 	volatile unsigned force_nchannel; /**< force number of channels */
 	volatile unsigned force_nrank;    /**< force number of ranks */
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..e5da14a 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -83,6 +83,8 @@ enum {
 	OPT_VMWARE_TSC_MAP_NUM,
 #define OPT_XEN_DOM0          "xen-dom0"
 	OPT_XEN_DOM0_NUM,
+#define OPT_SINGLE_FILE       "single-file"
+	OPT_SINGLE_FILE_NUM,
 	OPT_LONG_MAX_NUM
 };
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..2bb1163 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -80,6 +80,10 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
+#include <mntent.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <sys/vfs.h>
 
 #include <rte_log.h>
 #include <rte_memory.h>
@@ -92,6 +96,9 @@
 #include <rte_common.h>
 #include <rte_string_fns.h>
 
+#define _GNU_SOURCE
+#include <sys/syscall.h>
+
 #include "eal_private.h"
 #include "eal_internal_cfg.h"
 #include "eal_filesystem.h"
@@ -768,6 +775,7 @@ create_shared_memory(const char *filename, const size_t mem_size)
 	}
 	retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
 	close(fd);
+
 	return retval;
 }
 
@@ -1110,10 +1118,34 @@ rte_eal_hugepage_init(void)
 	/* get pointer to global configuration */
 	mcfg = rte_eal_get_configuration()->mem_config;
 
-	/* hugetlbfs can be disabled */
-	if (internal_config.no_hugetlbfs) {
+	/* when hugetlbfs is disabled or single-file option is specified */
+	if (internal_config.no_hugetlbfs || internal_config.single_file) {
+		int fd;
+		uint64_t pagesize;
+		unsigned socket_id;
+		char filepath[MAX_HUGEPAGE_PATH];
+
+		syscall(SYS_getcpu, NULL, &socket_id, NULL);
+
+		if (internal_config.no_hugetlbfs) {
+			eal_get_hugefile_path(filepath, sizeof(filepath),
+					"/dev/shm", 0);
+			pagesize = RTE_PGSIZE_4K;
+		} else {
+			struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+			eal_get_hugefile_path(filepath, sizeof(filepath),
+					hpi->hugedir, 0);
+			pagesize = hpi->hugepage_sz;
+		}
+		fd = open(filepath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+		if (fd < 0) {
+			RTE_LOG(ERR, EAL, "%s: open %s failed: %s\n", __func__,
+					filepath, strerror(errno));
+			return -1;
+		}
+
 		addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
-				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+				MAP_SHARED | MAP_POPULATE, fd, 0);
 		if (addr == MAP_FAILED) {
 			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
 					strerror(errno));
@@ -1121,9 +1153,12 @@ rte_eal_hugepage_init(void)
 		}
 		mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr;
 		mcfg->memseg[0].addr = addr;
-		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
+		mcfg->memseg[0].hugepage_sz = pagesize;
 		mcfg->memseg[0].len = internal_config.memory;
-		mcfg->memseg[0].socket_id = 0;
+		mcfg->memseg[0].socket_id = socket_id;
+
+		close(fd);
+
 		return 0;
 	}
 
-- 
2.1.4
    
    
More information about the dev
mailing list