[dpdk-dev] [PATCH] eal/vfio: share the default container in multi process

Darek Stojaczyk dariusz.stojaczyk at intel.com
Wed Oct 3 14:39:25 CEST 2018


So far each process in MP used to have a separate container
and relied on the primary process to register all memsegs.

Mapping external memory via rte_vfio_container_dma_map()
in secondary processes was broken, because the default
(process-local) container had no groups bound. There was
even no way to bind any groups to it, because the container
fd was deeply encapsulated within EAL.

This patch introduces a new SOCKET_REQ_DEFAULT_CONTAINER
message type for MP synchronization, makes all processes
within a MP party use a single default container, and hence
fixes rte_vfio_container_dma_map() for secondary processes.

>From what I checked this behavior was always the same, but
started to be invalid/insufficient once mapping external
memory was allowed.

Fixes: 73a639085938 ("vfio: allow to map other memory regions")
Cc: anatoly.burakov at intel.com
Cc: pawelx.wodkowski at intel.com
Cc: gowrishankar.m at linux.vnet.ibm.com
Cc: stable at dpdk.org

While here, fix up the comment on rte_vfio_get_container_fd().
This function always opens a new container, never reuses
an old one.

Signed-off-by: Darek Stojaczyk <dariusz.stojaczyk at intel.com>
---
 lib/librte_eal/common/include/rte_vfio.h      |  2 +-
 lib/librte_eal/linuxapp/eal/eal_vfio.c        | 49 ++++++++++++++++++-
 lib/librte_eal/linuxapp/eal/eal_vfio.h        |  4 ++
 .../linuxapp/eal/eal_vfio_mp_sync.c           | 11 +++++
 4 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcce..1064426eb 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -227,7 +227,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
 		      const char *dev_addr, int *iommu_group_num);
 
 /**
- * Open VFIO container fd or get an existing one
+ * Open a new VFIO container fd
  *
  * This function is only relevant to linux and will return
  * an error on BSD.
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index c68dc38e0..ee4a1d90b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -896,7 +896,15 @@ rte_vfio_enable(const char *modname)
 		return 0;
 	}
 
-	default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd();
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		/* open a new container */
+		default_vfio_cfg->vfio_container_fd =
+				rte_vfio_get_container_fd();
+	} else {
+		/* get the default container from the primary process */
+		default_vfio_cfg->vfio_container_fd =
+				vfio_get_default_container_fd();
+	}
 
 	/* check if we have VFIO driver enabled */
 	if (default_vfio_cfg->vfio_container_fd != -1) {
@@ -916,6 +924,45 @@ rte_vfio_is_enabled(const char *modname)
 	return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
+int
+vfio_get_default_container_fd(void)
+{
+	struct rte_mp_msg mp_req, *mp_rep;
+	struct rte_mp_reply mp_reply;
+	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+	if (default_vfio_cfg->vfio_enabled)
+		return default_vfio_cfg->vfio_container_fd;
+
+	if (internal_config.process_type == RTE_PROC_PRIMARY) {
+		/* if we were secondary process we would try requesting
+		 * container fd from the primary, but we're the primary
+		 * process so just exit here
+		 */
+		return -1;
+	}
+
+	p->req = SOCKET_REQ_DEFAULT_CONTAINER;
+	strcpy(mp_req.name, EAL_VFIO_MP);
+	mp_req.len_param = sizeof(*p);
+	mp_req.num_fds = 0;
+
+	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+	    mp_reply.nb_received == 1) {
+		mp_rep = &mp_reply.msgs[0];
+		p = (struct vfio_mp_param *)mp_rep->param;
+		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+			free(mp_reply.msgs);
+			return mp_rep->fds[0];
+		}
+		free(mp_reply.msgs);
+	}
+
+	RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
+	return -1;
+}
+
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd)
 {
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 68d4750a5..63ae115c3 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -115,6 +115,9 @@ struct vfio_iommu_type {
 	vfio_dma_func_t dma_map_func;
 };
 
+/* get the vfio container that devices are bound to by default */
+int vfio_get_default_container_fd(void);
+
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd);
@@ -129,6 +132,7 @@ int vfio_mp_sync_setup(void);
 
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
+#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index 680a24aae..a1e8c834f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -66,6 +66,17 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
 			reply.fds[0] = fd;
 		}
 		break;
+	case SOCKET_REQ_DEFAULT_CONTAINER:
+		r->req = SOCKET_REQ_DEFAULT_CONTAINER;
+		fd = vfio_get_default_container_fd();
+		if (fd < 0)
+			r->result = SOCKET_ERR;
+		else {
+			r->result = SOCKET_OK;
+			reply.num_fds = 1;
+			reply.fds[0] = fd;
+		}
+		break;
 	default:
 		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
 		return -1;
-- 
2.17.1



More information about the dev mailing list