[dpdk-dev] [RFC PATCH 4/5] EAL: Add new EAL "--shm" option.

Tetsuya Mukawa mukawa at igel.co.jp
Thu Jan 21 12:07:57 CET 2016


This is a temporary patch to get EAL memory under 16T(1 << 44).

The patch adds new EAL "--shm" option. If the option is specified,
EAL will allocate one file from hugetlbfs. This memory is for sharing
memory between DPDK applicaiton and QEMU ivhsmem device.

Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
---
 lib/librte_eal/common/eal_common_options.c |  5 ++
 lib/librte_eal/common/eal_internal_cfg.h   |  1 +
 lib/librte_eal/common/eal_options.h        |  2 +
 lib/librte_eal/common/include/rte_memory.h |  5 ++
 lib/librte_eal/linuxapp/eal/eal_memory.c   | 76 ++++++++++++++++++++++++++++++
 5 files changed, 89 insertions(+)

diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 29942ea..a752bf3 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -86,6 +86,7 @@ eal_long_options[] = {
 	{OPT_NO_HUGE,           0, NULL, OPT_NO_HUGE_NUM          },
 	{OPT_NO_PCI,            0, NULL, OPT_NO_PCI_NUM           },
 	{OPT_NO_SHCONF,         0, NULL, OPT_NO_SHCONF_NUM        },
+	{OPT_SHM,               0, NULL, OPT_SHM_NUM              },
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
 	{OPT_PCI_WHITELIST,     1, NULL, OPT_PCI_WHITELIST_NUM    },
 	{OPT_PROC_TYPE,         1, NULL, OPT_PROC_TYPE_NUM        },
@@ -834,6 +835,10 @@ eal_parse_common_option(int opt, const char *optarg,
 		conf->no_hugetlbfs = 1;
 		break;
 
+	case OPT_SHM_NUM:
+		conf->shm = 1;
+		break;
+
 	case OPT_NO_PCI_NUM:
 		conf->no_pci = 1;
 		break;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 5f1367e..362ce12 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -66,6 +66,7 @@ struct internal_config {
 	volatile unsigned no_hugetlbfs;   /**< true to disable hugetlbfs */
 	unsigned hugepage_unlink;         /**< true to unlink backing files */
 	volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/
+	volatile unsigned shm;            /**< true to create shared memory for ivshmem */
 	volatile unsigned no_pci;         /**< true to disable PCI */
 	volatile unsigned no_hpet;        /**< true to disable HPET */
 	volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index a881c62..c1e586a 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -55,6 +55,8 @@ enum {
 	OPT_HUGE_DIR_NUM,
 #define OPT_HUGE_UNLINK       "huge-unlink"
 	OPT_HUGE_UNLINK_NUM,
+#define OPT_SHM               "shm"
+	OPT_SHM_NUM,
 #define OPT_LCORES            "lcores"
 	OPT_LCORES_NUM,
 #define OPT_LOG_LEVEL         "log-level"
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 9c9e40f..3ad155b 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -102,6 +102,7 @@ struct rte_memseg {
 	int32_t socket_id;          /**< NUMA socket ID. */
 	uint32_t nchannel;          /**< Number of channels. */
 	uint32_t nrank;             /**< Number of ranks. */
+	int fd;                     /**< fd used for share this memory */
 #ifdef RTE_LIBRTE_XEN_DOM0
 	 /**< store segment MFNs */
 	uint64_t mfn[DOM0_NUM_MEMBLOCK];
@@ -130,6 +131,10 @@ int rte_mem_lock_page(const void *virt);
  */
 phys_addr_t rte_mem_virt2phy(const void *virt);
 
+
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr);
+
 /**
  * Get the layout of the available physical memory.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 846fd31..7122f16 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -150,6 +150,21 @@ rte_mem_lock_page(const void *virt)
 	return mlock((void*)aligned, page_size);
 }
 
+int
+rte_memseg_info_get(int index, int *pfd, uint64_t *psize, void **paddr)
+{
+	struct rte_mem_config *mcfg;
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	if (pfd != NULL)
+		*pfd = mcfg->memseg[index].fd;
+	if (psize != NULL)
+		*psize = (uint64_t)mcfg->memseg[index].len;
+	if (paddr != NULL)
+		*paddr = (void *)(uint64_t)mcfg->memseg[index].addr;
+	return 0;
+}
+
 /*
  * Get physical address of any mapped virtual address in the current process.
  */
@@ -1075,6 +1090,46 @@ calc_num_pages_per_socket(uint64_t * memory,
 	return total_num_pages;
 }
 
+static void *
+rte_eal_shm_create(int *pfd, const char *hugedir)
+{
+	int ret, fd;
+	char filepath[256];
+	void *vaddr;
+	uint64_t size = internal_config.memory;
+
+	sprintf(filepath, "%s/%s_cvio", hugedir,
+			internal_config.hugefile_prefix);
+
+	fd = open(filepath, O_CREAT | O_RDWR, 0600);
+	if (fd < 0)
+		rte_panic("open %s failed: %s\n", filepath, strerror(errno));
+
+	ret = flock(fd, LOCK_EX);
+	if (ret < 0) {
+		close(fd);
+		rte_panic("flock %s failed: %s\n", filepath, strerror(errno));
+	}
+
+	ret = ftruncate(fd, size);
+	if (ret < 0)
+		rte_panic("ftruncate failed: %s\n", strerror(errno));
+
+	/*
+	 * Here, we need to map under (1 << 44).
+	 * This is temporary implementation.
+	 */
+	vaddr = mmap((void *)(1ULL << 43), size, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, fd, 0);
+	if (vaddr != MAP_FAILED) {
+		memset(vaddr, 0, size);
+		*pfd = fd;
+	}
+	memset(vaddr, 0, size);
+
+	return vaddr;
+}
+
 /*
  * Prepare physical memory mapping: fill configuration structure with
  * these infos, return 0 on success.
@@ -1127,6 +1182,27 @@ rte_eal_hugepage_init(void)
 		return 0;
 	}
 
+	/* create shared memory consist of only one file */
+	if (internal_config.shm) {
+		int fd;
+		struct hugepage_info *hpi;
+
+		hpi = &internal_config.hugepage_info[0];
+		addr = rte_eal_shm_create(&fd, hpi->hugedir);
+		if (addr == MAP_FAILED) {
+			RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+					strerror(errno));
+			return -1;
+		}
+		mcfg->memseg[0].phys_addr = rte_mem_virt2phy(addr);
+		mcfg->memseg[0].addr = addr;
+		mcfg->memseg[0].hugepage_sz = hpi->hugepage_sz;
+		mcfg->memseg[0].len = internal_config.memory;
+		mcfg->memseg[0].socket_id = 0;
+		mcfg->memseg[0].fd = fd;
+		return 0;
+	}
+
 /* check if app runs on Xen Dom0 */
 	if (internal_config.xen_dom0_support) {
 #ifdef RTE_LIBRTE_XEN_DOM0
-- 
2.1.4



More information about the dev mailing list