[dpdk-dev] [RFC 4/5] virtio/container: adjust memory initialization process

Jianfeng Tan jianfeng.tan at intel.com
Thu Nov 5 19:31:15 CET 2015


When using virtio for container, we should specify --no-huge so
that in memory initialization, shm_open() is used to alloc memory
from tmpfs filesystem /dev/shm/.

Signed-off-by: Huawei Xie <huawei.xie at intel.com>
Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
---
 lib/librte_eal/common/include/rte_memory.h |  5 +++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 58 ++++++++++++++++++++++++++++--
 lib/librte_mempool/rte_mempool.c           | 16 ++++-----
 3 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 1bed415..9c1effc 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -100,6 +100,7 @@ struct rte_memseg {
 	int32_t socket_id;          /**< NUMA socket ID. */
 	uint32_t nchannel;          /**< Number of channels. */
 	uint32_t nrank;             /**< Number of ranks. */
+	int fd;                     /**< fd used for share this memory */
 #ifdef RTE_LIBRTE_XEN_DOM0
 	 /**< store segment MFNs */
 	uint64_t mfn[DOM0_NUM_MEMBLOCK];
@@ -128,6 +129,10 @@ int rte_mem_lock_page(const void *virt);
  */
 phys_addr_t rte_mem_virt2phy(const void *virt);
 
+
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr);
+
 /**
  * Get the layout of the available physical memory.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index ac2745e..9abbfc6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -80,6 +80,9 @@
 #include <errno.h>
 #include <sys/ioctl.h>
 #include <sys/time.h>
+#include <mntent.h>
+#include <sys/mman.h>
+#include <sys/file.h>
 
 #include <rte_log.h>
 #include <rte_memory.h>
@@ -143,6 +146,18 @@ rte_mem_lock_page(const void *virt)
 	return mlock((void*)aligned, page_size);
 }
 
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr)
+{
+	struct rte_mem_config *mcfg;
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	*pfd = mcfg->memseg[index].fd;
+	*psize = (uint64_t)mcfg->memseg[index].len;
+	*paddr = (void *)(uint64_t)mcfg->memseg[index].addr;
+	return 0;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */
@@ -1044,6 +1059,42 @@ calc_num_pages_per_socket(uint64_t * memory,
 	return total_num_pages;
 }
 
+static void *
+rte_eal_shm_create(int *pfd)
+{
+	int ret, fd;
+	char filepath[256];
+	void *vaddr;
+	uint64_t size = internal_config.memory;
+
+	sprintf(filepath, "/%s_cvio", internal_config.hugefile_prefix);
+
+	fd = shm_open(filepath, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+	if (fd < 0) {
+		rte_panic("shm_open %s failed: %s\n", filepath, strerror(errno));
+	}
+	ret = flock(fd, LOCK_EX);
+	if (ret < 0) {
+		close(fd);
+		rte_panic("flock %s failed: %s\n", filepath, strerror(errno));
+	}
+
+	ret = ftruncate(fd, size);
+	if (ret < 0) {
+		rte_panic("ftruncate failed: %s\n", strerror(errno));
+	}
+	/* flag: MAP_HUGETLB */
+	vaddr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (vaddr == MAP_FAILED) {
+		rte_panic("mmap failed: %s\n", strerror(errno));
+	}
+	memset(vaddr, 0, size);
+	*pfd = fd;
+
+	return vaddr;
+}
+
+
 /*
  * Prepare physical memory mapping: fill configuration structure with
  * these infos, return 0 on success.
@@ -1072,7 +1123,9 @@ rte_eal_hugepage_init(void)
 	int new_pages_count[MAX_HUGEPAGE_SIZES];
 #endif
 
+#ifndef RTE_VIRTIO_VDEV
 	test_proc_pagemap_readable();
+#endif
 
 	memset(used_hp, 0, sizeof(used_hp));
 
@@ -1081,8 +1134,8 @@ rte_eal_hugepage_init(void)
 
 	/* hugetlbfs can be disabled */
 	if (internal_config.no_hugetlbfs) {
-		addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE,
-				MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+		int fd;
+		addr = rte_eal_shm_create(&fd);
 		if (addr == MAP_FAILED) {
 			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
 					strerror(errno));
@@ -1093,6 +1146,7 @@ rte_eal_hugepage_init(void)
 		mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
 		mcfg->memseg[0].len = internal_config.memory;
 		mcfg->memseg[0].socket_id = 0;
+		mcfg->memseg[0].fd = fd;
 		return 0;
 	}
 
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index e57cbbd..8f8852b 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -453,13 +453,6 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 		rte_errno = EINVAL;
 		return NULL;
 	}
-
-	/* check that we have both VA and PA */
-	if (vaddr != NULL && paddr == NULL) {
-		rte_errno = EINVAL;
-		return NULL;
-	}
-
 	/* Check that pg_num and pg_shift parameters are valid. */
 	if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) {
 		rte_errno = EINVAL;
@@ -596,8 +589,15 @@ rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size,
 
 	/* mempool elements in a separate chunk of memory. */
 	} else {
+		/* when VA is specified, PA should be specified? */
+		if (rte_eal_has_hugepages()) {
+			if (paddr == NULL) {
+				rte_errno = EINVAL;
+				return NULL;
+			}
+			memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
+		}
 		mp->elt_va_start = (uintptr_t)vaddr;
-		memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num);
 	}
 
 	mp->elt_va_end = mp->elt_va_start;
-- 
2.1.4



More information about the dev mailing list