[dpdk-dev] [PATCH] librte_eal: add APIs to speedup virt2iova/phys

Li Feng fengli at smartx.com
Mon Apr 20 13:09:53 CEST 2020


Using pread to replace lseek + read.
And add new APIs to reduce open/close/lseek system call frequency when the
user needs to convert a large range of virtual address space.
    - rte_mem_virt2iova_with_fd
    - rte_mem_virt2phy_with_fd

Currently it will be used by spdk in spdk_mem_register.

Signed-off-by: Li Feng <fengli at smartx.com>
---
 lib/librte_eal/freebsd/eal_memory.c | 18 ++++++++++++++
 lib/librte_eal/include/rte_memory.h | 36 +++++++++++++++++++++++++++
 lib/librte_eal/linux/eal_memory.c   | 49 +++++++++++++++++++++++--------------
 lib/librte_eal/rte_eal_version.map  |  3 +++
 4 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/lib/librte_eal/freebsd/eal_memory.c b/lib/librte_eal/freebsd/eal_memory.c
index a97d8f0f0..fc0debf23 100644
--- a/lib/librte_eal/freebsd/eal_memory.c
+++ b/lib/librte_eal/freebsd/eal_memory.c
@@ -44,12 +44,30 @@ rte_mem_virt2phy(const void *virtaddr)
 	(void)virtaddr;
 	return RTE_BAD_IOVA;
 }
+
 rte_iova_t
 rte_mem_virt2iova(const void *virtaddr)
 {
 	return rte_mem_virt2phy(virtaddr);
 }
 
+phys_addr_t
+rte_mem_virt2phy_with_fd(int fd, const void *virtaddr)
+{
+	/*
+	 * XXX not implemented. This function is only used by
+	 * rte_mempool_virt2iova_with_fd() when hugepages are disabled.
+	 */
+	(void)virtaddr;
+	return RTE_BAD_IOVA;
+}
+
+rte_iova_t
+rte_mem_virt2iova_with_fd(int fd, const void *virtaddr)
+{
+	return rte_mem_virt2phy_with_fd(fd, virtaddr);
+}
+
 int
 rte_eal_hugepage_init(void)
 {
diff --git a/lib/librte_eal/include/rte_memory.h b/lib/librte_eal/include/rte_memory.h
index 3d8d0bd69..c75782fa7 100644
--- a/lib/librte_eal/include/rte_memory.h
+++ b/lib/librte_eal/include/rte_memory.h
@@ -108,6 +108,23 @@ int rte_mem_lock_page(const void *virt);
 phys_addr_t rte_mem_virt2phy(const void *virt);
 
 /**
+ * Get physical address of any mapped virtual address in the current process.
+ * It is found by reading fd which is the opened /proc/self/pagemap special file
+ * descriptor. This is a optimization of rte_mem_virt2phy when the
+ * rte_mem_virt2phy is needed to be called many times.
+ * The page must be locked.
+ *
+ * @param fd
+ *   The opened fd of /proc/self/pagemap.
+ * @param virt
+ *   The virtual address.
+ * @return
+ *   The physical address or RTE_BAD_IOVA on error.
+ */
+__rte_experimental
+phys_addr_t rte_mem_virt2phy_with_fd(int fd, const void *virt);
+
+/**
  * Get IO virtual address of any mapped virtual address in the current process.
  *
  * @note This function will not check internal page table. Instead, in IOVA as
@@ -123,6 +140,25 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
 rte_iova_t rte_mem_virt2iova(const void *virt);
 
 /**
+ * Get IO virtual address of any mapped virtual address in the current process.
+ *
+ * @note This function will not check internal page table. Instead, in IOVA as
+ *       PA mode, it will fall back to getting real physical address (which may
+ *       not match the expected IOVA, such as what was specified for external
+ *       memory).
+ *
+ * @param virt
+ *   The virtual address.
+ * @param fd
+ *   The opened fd of /proc/self/pagemap.
+ * @return
+ *   The IO address or RTE_BAD_IOVA on error.
+ */
+__rte_experimental
+rte_iova_t rte_mem_virt2iova_with_fd(int fd, const void *virt);
+
+
+/**
  * Get virtual memory address corresponding to iova address.
  *
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
diff --git a/lib/librte_eal/linux/eal_memory.c b/lib/librte_eal/linux/eal_memory.c
index 7a9c97ff8..918796700 100644
--- a/lib/librte_eal/linux/eal_memory.c
+++ b/lib/librte_eal/linux/eal_memory.c
@@ -91,11 +91,11 @@ uint64_t eal_get_baseaddr(void)
 
 /*
  * Get physical address of any mapped virtual address in the current process.
+ * fd is used to avoid open/close pagemap repeatly.
  */
 phys_addr_t
-rte_mem_virt2phy(const void *virtaddr)
-{
-	int fd, retval;
+rte_mem_virt2phy_with_fd(int fd, const void *virtaddr) {
+	int retval;
 	uint64_t page, physaddr;
 	unsigned long virt_pfn;
 	int page_size;
@@ -107,24 +107,10 @@ rte_mem_virt2phy(const void *virtaddr)
 	/* standard page size */
 	page_size = getpagesize();
 
-	fd = open("/proc/self/pagemap", O_RDONLY);
-	if (fd < 0) {
-		RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
-			__func__, strerror(errno));
-		return RTE_BAD_IOVA;
-	}
-
 	virt_pfn = (unsigned long)virtaddr / page_size;
 	offset = sizeof(uint64_t) * virt_pfn;
-	if (lseek(fd, offset, SEEK_SET) == (off_t) -1) {
-		RTE_LOG(INFO, EAL, "%s(): seek error in /proc/self/pagemap: %s\n",
-				__func__, strerror(errno));
-		close(fd);
-		return RTE_BAD_IOVA;
-	}
 
-	retval = read(fd, &page, PFN_MASK_SIZE);
-	close(fd);
+	retval = pread(fd, &page, PFN_MASK_SIZE, offset);
 	if (retval < 0) {
 		RTE_LOG(INFO, EAL, "%s(): cannot read /proc/self/pagemap: %s\n",
 				__func__, strerror(errno));
@@ -149,6 +135,33 @@ rte_mem_virt2phy(const void *virtaddr)
 	return physaddr;
 }
 
+/*
+ * Get physical address of any mapped virtual address in the current process.
+ */
+phys_addr_t
+rte_mem_virt2phy(const void *virtaddr)
+{
+	uint64_t physaddr;
+	int fd;
+	fd = open("/proc/self/pagemap", O_RDONLY);
+	if (fd < 0) {
+		RTE_LOG(INFO, EAL, "%s(): cannot open /proc/self/pagemap: %s\n",
+			__func__, strerror(errno));
+		return RTE_BAD_IOVA;
+	}
+	physaddr = rte_mem_virt2phy_with_fd(fd, virtaddr);
+	close(fd);
+	return physaddr;
+}
+
+rte_iova_t
+rte_mem_virt2iova_with_fd(int fd, const void *virtaddr)
+{
+	if (rte_eal_iova_mode() == RTE_IOVA_VA)
+		return (uintptr_t)virtaddr;
+	return rte_mem_virt2phy_with_fd(fd, virtaddr);
+}
+
 rte_iova_t
 rte_mem_virt2iova(const void *virtaddr)
 {
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f9ede5b41..fc3a436e7 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -338,4 +338,7 @@ EXPERIMENTAL {
 
 	# added in 20.05
 	rte_log_can_log;
+
+	rte_mem_virt2iova_with_fd;
+	rte_mem_virt2phy_with_fd;
 };
-- 
2.11.0


-- 
The SmartX email address is only for business purpose. Any sent message 
that is not related to the business is not authorized or permitted by 
SmartX.
本邮箱为北京志凌海纳科技有限公司(SmartX)工作邮箱. 如本邮箱发出的邮件与工作无关,该邮件未得到本公司任何的明示或默示的授权.




More information about the dev mailing list