[dpdk-dev] [PATCH v7] vfio: change to use generic multi-process channel

Tan, Jianfeng jianfeng.tan at intel.com
Sun Apr 15 17:10:44 CEST 2018


Sorry, forget the version change log. FYI:

v6->v7:
   - Rebase on master.
v5->v6: (Address comments from Anatoly)
   - Naming, return checking, logging.
   - Move vfio action register after rte_bus_probe().


On 4/15/2018 11:06 PM, Jianfeng Tan wrote:
> Previously, vfio uses its own private channel for the secondary
> process to get container fd and group fd from the primary process.
>
> This patch changes to use the generic mp channel.
>
> Test:
>    1. Bind two NICs to vfio-pci.
>
>    2. Start the primary and secondary process.
>      $ (symmetric_mp) -c 2 -- -p 3 --num-procs=2 --proc-id=0
>      $ (symmetric_mp) -c 4 --proc-type=auto -- -p 3 \
> 				--num-procs=2 --proc-id=1
>
> Cc: anatoly.burakov at intel.com
>
> Signed-off-by: Jianfeng Tan <jianfeng.tan at intel.com>
> Acked-by: Anatoly Burakov <anatoly.burakov at intel.com>
> ---
>   lib/librte_eal/linuxapp/eal/eal.c              |  22 +-
>   lib/librte_eal/linuxapp/eal/eal_vfio.c         | 178 +++++------
>   lib/librte_eal/linuxapp/eal/eal_vfio.h         |  17 +-
>   lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c | 410 ++++---------------------
>   4 files changed, 148 insertions(+), 479 deletions(-)
>
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 99c2242..21afa73 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -696,24 +696,8 @@ rte_eal_iopl_init(void)
>   #ifdef VFIO_PRESENT
>   static int rte_eal_vfio_setup(void)
>   {
> -	int vfio_enabled = 0;
> -
>   	if (rte_vfio_enable("vfio"))
>   		return -1;
> -	vfio_enabled = rte_vfio_is_enabled("vfio");
> -
> -	if (vfio_enabled) {
> -
> -		/* if we are primary process, create a thread to communicate with
> -		 * secondary processes. the thread will use a socket to wait for
> -		 * requests from secondary process to send open file descriptors,
> -		 * because VFIO does not allow multiple open descriptors on a group or
> -		 * VFIO container.
> -		 */
> -		if (internal_config.process_type == RTE_PROC_PRIMARY &&
> -				vfio_mp_sync_setup() < 0)
> -			return -1;
> -	}
>   
>   	return 0;
>   }
> @@ -970,6 +954,12 @@ rte_eal_init(int argc, char **argv)
>   		return -1;
>   	}
>   
> +#ifdef VFIO_PRESENT
> +	/* Register mp action after probe() so that we got enough info */
> +	if (rte_vfio_is_enabled("vfio") && vfio_mp_sync_setup() < 0)
> +		return -1;
> +#endif
> +
>   	/* initialize default service/lcore mappings and start running. Ignore
>   	 * -ENOTSUP, as it indicates no service coremask passed to EAL.
>   	 */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> index 16ee730..957a537 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
> @@ -1,5 +1,5 @@
>   /* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2014 Intel Corporation
> + * Copyright(c) 2010-2018 Intel Corporation
>    */
>   
>   #include <inttypes.h>
> @@ -290,6 +290,10 @@ rte_vfio_get_group_fd(int iommu_group_num)
>   	int vfio_group_fd;
>   	char filename[PATH_MAX];
>   	struct vfio_group *cur_grp;
> +	struct rte_mp_msg mp_req, *mp_rep;
> +	struct rte_mp_reply mp_reply;
> +	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>   
>   	/* check if we already have the group descriptor open */
>   	for (i = 0; i < VFIO_MAX_GROUPS; i++)
> @@ -350,50 +354,34 @@ rte_vfio_get_group_fd(int iommu_group_num)
>   		return vfio_group_fd;
>   	}
>   	/* if we're in a secondary process, request group fd from the primary
> -	 * process via our socket
> +	 * process via mp channel.
>   	 */
> -	else {
> -		int socket_fd, ret;
> -
> -		socket_fd = vfio_mp_sync_connect_to_primary();
> -
> -		if (socket_fd < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
> -			return -1;
> -		}
> -		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
> -			close(socket_fd);
> -			return -1;
> -		}
> -		if (vfio_mp_sync_send_request(socket_fd, iommu_group_num) < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot send group number!\n");
> -			close(socket_fd);
> -			return -1;
> -		}
> -		ret = vfio_mp_sync_receive_request(socket_fd);
> -		switch (ret) {
> -		case SOCKET_NO_FD:
> -			close(socket_fd);
> -			return 0;
> -		case SOCKET_OK:
> -			vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
> -			/* if we got the fd, store it and return it */
> -			if (vfio_group_fd > 0) {
> -				close(socket_fd);
> -				cur_grp->group_num = iommu_group_num;
> -				cur_grp->fd = vfio_group_fd;
> -				vfio_cfg.vfio_active_groups++;
> -				return vfio_group_fd;
> -			}
> -			/* fall-through on error */
> -		default:
> -			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
> -			close(socket_fd);
> -			return -1;
> +	p->req = SOCKET_REQ_GROUP;
> +	p->group_num = iommu_group_num;
> +	strcpy(mp_req.name, EAL_VFIO_MP);
> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
> +
> +	vfio_group_fd = -1;
> +	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
> +			cur_grp->group_num = iommu_group_num;
> +			vfio_group_fd = mp_rep->fds[0];
> +			cur_grp->fd = vfio_group_fd;
> +			vfio_cfg.vfio_active_groups++;
> +		} else if (p->result == SOCKET_NO_FD) {
> +			RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
> +			vfio_group_fd = 0;
>   		}
> +		free(mp_reply.msgs);
>   	}
> -	return -1;
> +
> +	if (vfio_group_fd < 0)
> +		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
> +	return vfio_group_fd;
>   }
>   
>   
> @@ -481,7 +469,10 @@ int
>   rte_vfio_clear_group(int vfio_group_fd)
>   {
>   	int i;
> -	int socket_fd, ret;
> +	struct rte_mp_msg mp_req, *mp_rep;
> +	struct rte_mp_reply mp_reply;
> +	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>   
>   	if (internal_config.process_type == RTE_PROC_PRIMARY) {
>   
> @@ -495,43 +486,27 @@ rte_vfio_clear_group(int vfio_group_fd)
>   		return 0;
>   	}
>   
> -	/* This is just for SECONDARY processes */
> -	socket_fd = vfio_mp_sync_connect_to_primary();
> -
> -	if (socket_fd < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
> -		return -1;
> -	}
> -
> -	if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
> -		close(socket_fd);
> -		return -1;
> -	}
> +	p->req = SOCKET_CLR_GROUP;
> +	p->group_num = vfio_group_fd;
> +	strcpy(mp_req.name, EAL_VFIO_MP);
> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
> +
> +	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK) {
> +			free(mp_reply.msgs);
> +			return 0;
> +		} else if (p->result == SOCKET_NO_FD)
> +			RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
> +		else
> +			RTE_LOG(ERR, EAL, "  no such VFIO group fd!\n");
>   
> -	if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
> -		RTE_LOG(ERR, EAL, "  cannot send group fd!\n");
> -		close(socket_fd);
> -		return -1;
> +		free(mp_reply.msgs);
>   	}
>   
> -	ret = vfio_mp_sync_receive_request(socket_fd);
> -	switch (ret) {
> -	case SOCKET_NO_FD:
> -		RTE_LOG(ERR, EAL, "  BAD VFIO group fd!\n");
> -		close(socket_fd);
> -		break;
> -	case SOCKET_OK:
> -		close(socket_fd);
> -		return 0;
> -	case SOCKET_ERR:
> -		RTE_LOG(ERR, EAL, "  Socket error\n");
> -		close(socket_fd);
> -		break;
> -	default:
> -		RTE_LOG(ERR, EAL, "  UNKNOWN reply, %d\n", ret);
> -		close(socket_fd);
> -	}
>   	return -1;
>   }
>   
> @@ -924,6 +899,11 @@ int
>   rte_vfio_get_container_fd(void)
>   {
>   	int ret, vfio_container_fd;
> +	struct rte_mp_msg mp_req, *mp_rep;
> +	struct rte_mp_reply mp_reply;
> +	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> +	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
> +
>   
>   	/* if we're in a primary process, try to open the container */
>   	if (internal_config.process_type == RTE_PROC_PRIMARY) {
> @@ -954,33 +934,29 @@ rte_vfio_get_container_fd(void)
>   		}
>   
>   		return vfio_container_fd;
> -	} else {
> -		/*
> -		 * if we're in a secondary process, request container fd from the
> -		 * primary process via our socket
> -		 */
> -		int socket_fd;
> -
> -		socket_fd = vfio_mp_sync_connect_to_primary();
> -		if (socket_fd < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot connect to primary process!\n");
> -			return -1;
> -		}
> -		if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot request container fd!\n");
> -			close(socket_fd);
> -			return -1;
> -		}
> -		vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
> -		if (vfio_container_fd < 0) {
> -			RTE_LOG(ERR, EAL, "  cannot get container fd!\n");
> -			close(socket_fd);
> -			return -1;
> +	}
> +	/*
> +	 * if we're in a secondary process, request container fd from the
> +	 * primary process via mp channel
> +	 */
> +	p->req = SOCKET_REQ_CONTAINER;
> +	strcpy(mp_req.name, EAL_VFIO_MP);
> +	mp_req.len_param = sizeof(*p);
> +	mp_req.num_fds = 0;
> +
> +	vfio_container_fd = -1;
> +	if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
> +	    mp_reply.nb_received == 1) {
> +		mp_rep = &mp_reply.msgs[0];
> +		p = (struct vfio_mp_param *)mp_rep->param;
> +		if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
> +			free(mp_reply.msgs);
> +			return mp_rep->fds[0];
>   		}
> -		close(socket_fd);
> -		return vfio_container_fd;
> +		free(mp_reply.msgs);
>   	}
>   
> +	RTE_LOG(ERR, EAL, "  cannot request container fd\n");
>   	return -1;
>   }
>   
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> index c788bba..c8c6ee4 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
> @@ -84,15 +84,6 @@ struct vfio_iommu_spapr_tce_info {
>   #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS
>   
>   /*
> - * Function prototypes for VFIO multiprocess sync functions
> - */
> -int vfio_mp_sync_send_request(int socket, int req);
> -int vfio_mp_sync_receive_request(int socket);
> -int vfio_mp_sync_send_fd(int socket, int fd);
> -int vfio_mp_sync_receive_fd(int socket);
> -int vfio_mp_sync_connect_to_primary(void);
> -
> -/*
>    * we don't need to store device fd's anywhere since they can be obtained from
>    * the group fd via an ioctl() call.
>    */
> @@ -141,6 +132,8 @@ vfio_has_supported_extensions(int vfio_container_fd);
>   
>   int vfio_mp_sync_setup(void);
>   
> +#define EAL_VFIO_MP "eal_vfio_mp_sync"
> +
>   #define SOCKET_REQ_CONTAINER 0x100
>   #define SOCKET_REQ_GROUP 0x200
>   #define SOCKET_CLR_GROUP 0x300
> @@ -148,6 +141,12 @@ int vfio_mp_sync_setup(void);
>   #define SOCKET_NO_FD 0x1
>   #define SOCKET_ERR 0xFF
>   
> +struct vfio_mp_param {
> +	int req;
> +	int result;
> +	int group_num;
> +};
> +
>   #endif /* VFIO_PRESENT */
>   
>   #endif /* EAL_VFIO_H_ */
> diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
> index e19b571..9c202bb 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
> @@ -1,32 +1,16 @@
>   /* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2014 Intel Corporation
> + * Copyright(c) 2010-2018 Intel Corporation
>    */
>   
> +#include <unistd.h>
>   #include <string.h>
> -#include <fcntl.h>
> -#include <sys/socket.h>
> -#include <pthread.h>
> -
> -/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
> -#ifdef __USE_MISC
> -#define REMOVED_USE_MISC
> -#undef __USE_MISC
> -#endif
> -#include <sys/un.h>
> -/* make sure we redefine __USE_MISC only if it was previously undefined */
> -#ifdef REMOVED_USE_MISC
> -#define __USE_MISC
> -#undef REMOVED_USE_MISC
> -#endif
>   
> +#include <rte_compat.h>
>   #include <rte_log.h>
> -#include <rte_eal_memconfig.h>
> -#include <rte_malloc.h>
>   #include <rte_vfio.h>
> +#include <rte_eal.h>
>   
> -#include "eal_filesystem.h"
>   #include "eal_vfio.h"
> -#include "eal_thread.h"
>   
>   /**
>    * @file
> @@ -37,358 +21,78 @@
>   
>   #ifdef VFIO_PRESENT
>   
> -#define SOCKET_PATH_FMT "%s/.%s_mp_socket"
> -#define CMSGLEN (CMSG_LEN(sizeof(int)))
> -#define FD_TO_CMSGHDR(fd, chdr) \
> -		do {\
> -			(chdr).cmsg_len = CMSGLEN;\
> -			(chdr).cmsg_level = SOL_SOCKET;\
> -			(chdr).cmsg_type = SCM_RIGHTS;\
> -			memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\
> -		} while (0)
> -#define CMSGHDR_TO_FD(chdr, fd) \
> -			memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd))
> -
> -static pthread_t socket_thread;
> -static int mp_socket_fd;
> -
> -
> -/* get socket path (/var/run if root, $HOME otherwise) */
> -static void
> -get_socket_path(char *buffer, int bufsz)
> -{
> -	const char *dir = "/var/run";
> -	const char *home_dir = getenv("HOME");
> -
> -	if (getuid() != 0 && home_dir != NULL)
> -		dir = home_dir;
> -
> -	/* use current prefix as file path */
> -	snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir,
> -			internal_config.hugefile_prefix);
> -}
> -
> -
> -
> -/*
> - * data flow for socket comm protocol:
> - * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP
> - * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number
> - * 2. server receives message
> - * 2a. in case of invalid group, SOCKET_ERR is sent back to client
> - * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client
> - * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd
> - *
> - * in case of any error, socket is closed.
> - */
> -
> -/* send a request, return -1 on error */
> -int
> -vfio_mp_sync_send_request(int socket, int req)
> -{
> -	struct msghdr hdr;
> -	struct iovec iov;
> -	int buf;
> -	int ret;
> -
> -	memset(&hdr, 0, sizeof(hdr));
> -
> -	buf = req;
> -
> -	hdr.msg_iov = &iov;
> -	hdr.msg_iovlen = 1;
> -	iov.iov_base = (char *) &buf;
> -	iov.iov_len = sizeof(buf);
> -
> -	ret = sendmsg(socket, &hdr, 0);
> -	if (ret < 0)
> -		return -1;
> -	return 0;
> -}
> -
> -/* receive a request and return it */
> -int
> -vfio_mp_sync_receive_request(int socket)
> -{
> -	int buf;
> -	struct msghdr hdr;
> -	struct iovec iov;
> -	int ret, req;
> -
> -	memset(&hdr, 0, sizeof(hdr));
> -
> -	buf = SOCKET_ERR;
> -
> -	hdr.msg_iov = &iov;
> -	hdr.msg_iovlen = 1;
> -	iov.iov_base = (char *) &buf;
> -	iov.iov_len = sizeof(buf);
> -
> -	ret = recvmsg(socket, &hdr, 0);
> -	if (ret < 0)
> -		return -1;
> -
> -	req = buf;
> -
> -	return req;
> -}
> -
> -/* send OK in message, fd in control message */
> -int
> -vfio_mp_sync_send_fd(int socket, int fd)
> +static int
> +vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
>   {
> -	int buf;
> -	struct msghdr hdr;
> -	struct cmsghdr *chdr;
> -	char chdr_buf[CMSGLEN];
> -	struct iovec iov;
> +	int fd = -1;
>   	int ret;
> +	struct rte_mp_msg reply;
> +	struct vfio_mp_param *r = (struct vfio_mp_param *)reply.param;
> +	const struct vfio_mp_param *m =
> +		(const struct vfio_mp_param *)msg->param;
>   
> -	chdr = (struct cmsghdr *) chdr_buf;
> -	memset(chdr, 0, sizeof(chdr_buf));
> -	memset(&hdr, 0, sizeof(hdr));
> -
> -	hdr.msg_iov = &iov;
> -	hdr.msg_iovlen = 1;
> -	iov.iov_base = (char *) &buf;
> -	iov.iov_len = sizeof(buf);
> -	hdr.msg_control = chdr;
> -	hdr.msg_controllen = CMSGLEN;
> -
> -	buf = SOCKET_OK;
> -	FD_TO_CMSGHDR(fd, *chdr);
> -
> -	ret = sendmsg(socket, &hdr, 0);
> -	if (ret < 0)
> -		return -1;
> -	return 0;
> -}
> -
> -/* receive OK in message, fd in control message */
> -int
> -vfio_mp_sync_receive_fd(int socket)
> -{
> -	int buf;
> -	struct msghdr hdr;
> -	struct cmsghdr *chdr;
> -	char chdr_buf[CMSGLEN];
> -	struct iovec iov;
> -	int ret, req, fd;
> -
> -	buf = SOCKET_ERR;
> -
> -	chdr = (struct cmsghdr *) chdr_buf;
> -	memset(chdr, 0, sizeof(chdr_buf));
> -	memset(&hdr, 0, sizeof(hdr));
> -
> -	hdr.msg_iov = &iov;
> -	hdr.msg_iovlen = 1;
> -	iov.iov_base = (char *) &buf;
> -	iov.iov_len = sizeof(buf);
> -	hdr.msg_control = chdr;
> -	hdr.msg_controllen = CMSGLEN;
> -
> -	ret = recvmsg(socket, &hdr, 0);
> -	if (ret < 0)
> -		return -1;
> -
> -	req = buf;
> -
> -	if (req != SOCKET_OK)
> -		return -1;
> -
> -	CMSGHDR_TO_FD(*chdr, fd);
> -
> -	return fd;
> -}
> -
> -/* connect socket_fd in secondary process to the primary process's socket */
> -int
> -vfio_mp_sync_connect_to_primary(void)
> -{
> -	struct sockaddr_un addr;
> -	socklen_t sockaddr_len;
> -	int socket_fd;
> -
> -	/* set up a socket */
> -	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> -	if (socket_fd < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
> +	if (msg->len_param != sizeof(*m)) {
> +		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
>   		return -1;
>   	}
>   
> -	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
> -	addr.sun_family = AF_UNIX;
> -
> -	sockaddr_len = sizeof(struct sockaddr_un);
> -
> -	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
> -		return socket_fd;
> -
> -	/* if connect failed */
> -	close(socket_fd);
> -	return -1;
> -}
> -
> +	memset(&reply, 0, sizeof(reply));
>   
> -
> -/*
> - * socket listening thread for primary process
> - */
> -static __attribute__((noreturn)) void *
> -vfio_mp_sync_thread(void __rte_unused * arg)
> -{
> -	int ret, fd, vfio_data;
> -
> -	/* wait for requests on the socket */
> -	for (;;) {
> -		int conn_sock;
> -		struct sockaddr_un addr;
> -		socklen_t sockaddr_len = sizeof(addr);
> -
> -		/* this is a blocking call */
> -		conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr,
> -				&sockaddr_len);
> -
> -		/* just restart on error */
> -		if (conn_sock == -1)
> -			continue;
> -
> -		/* set socket to linger after close */
> -		struct linger l;
> -		l.l_onoff = 1;
> -		l.l_linger = 60;
> -
> -		if (setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)) < 0)
> -			RTE_LOG(WARNING, EAL, "Cannot set SO_LINGER option "
> -					"on listen socket (%s)\n", strerror(errno));
> -
> -		ret = vfio_mp_sync_receive_request(conn_sock);
> -
> -		switch (ret) {
> -		case SOCKET_REQ_CONTAINER:
> -			fd = rte_vfio_get_container_fd();
> -			if (fd < 0)
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
> -			else
> -				vfio_mp_sync_send_fd(conn_sock, fd);
> -			if (fd >= 0)
> -				close(fd);
> -			break;
> -		case SOCKET_REQ_GROUP:
> -			/* wait for group number */
> -			vfio_data = vfio_mp_sync_receive_request(conn_sock);
> -			if (vfio_data < 0) {
> -				close(conn_sock);
> -				continue;
> -			}
> -
> -			fd = rte_vfio_get_group_fd(vfio_data);
> -
> -			if (fd < 0)
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
> +	switch (m->req) {
> +	case SOCKET_REQ_GROUP:
> +		r->req = SOCKET_REQ_GROUP;
> +		r->group_num = m->group_num;
> +		fd = rte_vfio_get_group_fd(m->group_num);
> +		if (fd < 0)
> +			r->result = SOCKET_ERR;
> +		else if (fd == 0)
>   			/* if VFIO group exists but isn't bound to VFIO driver */
> -			else if (fd == 0)
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
> +			r->result = SOCKET_NO_FD;
> +		else {
>   			/* if group exists and is bound to VFIO driver */
> -			else {
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
> -				vfio_mp_sync_send_fd(conn_sock, fd);
> -			}
> -			break;
> -		case SOCKET_CLR_GROUP:
> -			/* wait for group fd */
> -			vfio_data = vfio_mp_sync_receive_request(conn_sock);
> -			if (vfio_data < 0) {
> -				close(conn_sock);
> -				continue;
> -			}
> -
> -			ret = rte_vfio_clear_group(vfio_data);
> -
> -			if (ret < 0)
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
> -			else
> -				vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
> -			break;
> -		default:
> -			vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
> -			break;
> +			r->result = SOCKET_OK;
> +			reply.num_fds = 1;
> +			reply.fds[0] = fd;
>   		}
> -		close(conn_sock);
> -	}
> -}
> -
> -static int
> -vfio_mp_sync_socket_setup(void)
> -{
> -	int ret, socket_fd;
> -	struct sockaddr_un addr;
> -	socklen_t sockaddr_len;
> -
> -	/* set up a socket */
> -	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
> -	if (socket_fd < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
> -		return -1;
> -	}
> -
> -	get_socket_path(addr.sun_path, sizeof(addr.sun_path));
> -	addr.sun_family = AF_UNIX;
> -
> -	sockaddr_len = sizeof(struct sockaddr_un);
> -
> -	unlink(addr.sun_path);
> -
> -	ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno));
> -		close(socket_fd);
> -		return -1;
> -	}
> -
> -	ret = listen(socket_fd, 50);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno));
> -		close(socket_fd);
> +		break;
> +	case SOCKET_CLR_GROUP:
> +		r->req = SOCKET_CLR_GROUP;
> +		r->group_num = m->group_num;
> +		if (rte_vfio_clear_group(m->group_num) < 0)
> +			r->result = SOCKET_NO_FD;
> +		else
> +			r->result = SOCKET_OK;
> +		break;
> +	case SOCKET_REQ_CONTAINER:
> +		r->req = SOCKET_REQ_CONTAINER;
> +		fd = rte_vfio_get_container_fd();
> +		if (fd < 0)
> +			r->result = SOCKET_ERR;
> +		else {
> +			r->result = SOCKET_OK;
> +			reply.num_fds = 1;
> +			reply.fds[0] = fd;
> +		}
> +		break;
> +	default:
> +		RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
>   		return -1;
>   	}
>   
> -	/* save the socket in local configuration */
> -	mp_socket_fd = socket_fd;
> +	strcpy(reply.name, EAL_VFIO_MP);
> +	reply.len_param = sizeof(*r);
>   
> -	return 0;
> +	ret = rte_mp_reply(&reply, peer);
> +	if (m->req == SOCKET_REQ_CONTAINER && fd >= 0)
> +		close(fd);
> +	return ret;
>   }
>   
> -/*
> - * set up a local socket and tell it to listen for incoming connections
> - */
>   int
>   vfio_mp_sync_setup(void)
>   {
> -	int ret;
> -	char thread_name[RTE_MAX_THREAD_NAME_LEN];
> -
> -	if (vfio_mp_sync_socket_setup() < 0) {
> -		RTE_LOG(ERR, EAL, "Failed to set up local socket!\n");
> -		return -1;
> -	}
> -
> -	ret = pthread_create(&socket_thread, NULL,
> -			vfio_mp_sync_thread, NULL);
> -	if (ret) {
> -		RTE_LOG(ERR, EAL,
> -			"Failed to create thread for communication with secondary processes!\n");
> -		close(mp_socket_fd);
> -		return -1;
> -	}
> -
> -	/* Set thread_name for aid in debugging. */
> -	snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
> -	ret = rte_thread_setname(socket_thread, thread_name);
> -	if (ret)
> -		RTE_LOG(DEBUG, EAL,
> -			"Failed to set thread name for secondary processes!\n");
> +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> +		return rte_mp_action_register(EAL_VFIO_MP, vfio_mp_primary);
>   
>   	return 0;
>   }



More information about the dev mailing list