[dpdk-dev] [PATCH RFC v2 08/12] lib/librte_vhost: vhost-user support

Linhaifeng haifeng.lin at huawei.com
Thu Dec 11 07:04:08 CET 2014



On 2014/12/11 5:37, Huawei Xie wrote:
> vhost-user support
> 
> 
> Signed-off-by: Huawei Xie <huawei.xie at intel.com>
> ---
>  lib/librte_vhost/Makefile                     |   5 +-
>  lib/librte_vhost/vhost-net.h                  |   4 +
>  lib/librte_vhost/vhost_cuse/virtio-net-cdev.c |   9 +
>  lib/librte_vhost/vhost_user/vhost-net-user.c  | 422 ++++++++++++++++++++++++++
>  lib/librte_vhost/vhost_user/vhost-net-user.h  | 108 +++++++
>  lib/librte_vhost/vhost_user/virtio-net-user.c | 199 ++++++++++++
>  lib/librte_vhost/vhost_user/virtio-net-user.h |  48 +++
>  lib/librte_vhost/virtio-net.c                 |  16 +-
>  lib/librte_vhost/virtio-net.h                 |  43 +++
>  9 files changed, 842 insertions(+), 12 deletions(-)
>  create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c
>  create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h
>  create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c
>  create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h
>  create mode 100644 lib/librte_vhost/virtio-net.h
> 
> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index e0d0ef6..b2f14a0 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -34,10 +34,11 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  # library name
>  LIB = librte_vhost.a
>  
> -CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -O3 -D_FILE_OFFSET_BITS=64 -lfuse
> +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -I vhost_cuse -I vhost_user -O3 -D_FILE_OFFSET_BITS=64 -lfuse
>  LDFLAGS += -lfuse
>  # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +#SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c virtio-net.c vhost_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c virtio-net.c vhost_rxtx.c
>  
>  # install includes
>  SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h
> diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h
> index f7e96fd..f9ec40b 100644
> --- a/lib/librte_vhost/vhost-net.h
> +++ b/lib/librte_vhost/vhost-net.h
> @@ -41,8 +41,12 @@
>  
>  #include <rte_log.h>
>  
> +#include "rte_virtio_net.h"
> +
>  #define VHOST_MEMORY_MAX_NREGIONS 8
>  
> +extern struct vhost_net_device_ops const *ops;
> +
>  /* Macros for printing using RTE_LOG */
>  #define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
>  #define RTE_LOGTYPE_VHOST_DATA   RTE_LOGTYPE_USER1
> diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> index edcbc10..8ac3360 100644
> --- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
> @@ -268,6 +268,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>  	struct vhost_memory_region *mem_regions = (void *)(uintptr_t)
>  		((uint64_t)(uintptr_t)mem_regions_addr + size);
>  	uint64_t base_address = 0, mapped_address, mapped_size;
> +	struct virtio_dev *dev;
>  
>  	for (idx = 0; idx < nregions; idx++) {
>  		regions[idx].guest_phys_address =
> @@ -335,6 +336,14 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
>  			regions[idx].guest_phys_address;
>  	}
>  
> +	dev = get_device(ctx);
> +	if (dev && dev->mem && dev->mmaped_address) {
> +		munmap((void *)(uintptr_t)dev->mmaped_address,
> +			(size_t)dev->mmaped_size);
> +		free(dev->mem);
> +		dev->mem = NULL;
> +	}
> +
>  	ops->set_mem_table(ctx, &regions[0], valid_regions);
>  	return 0;
>  }
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> new file mode 100644
> index 0000000..841d7e6
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -0,0 +1,422 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <limits.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <errno.h>
> +
> +#include <rte_log.h>
> +#include <rte_virtio_net.h>
> +
> +#include "fd_man.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +#include "virtio-net-user.h"
> +
> +static void vserver_new_vq_conn(int fd, uint64_t data);
> +static void vserver_message_handler(int fd, uint64_t dat);
> +struct vhost_net_device_ops const *ops;
> +
> +static struct vhost_server *g_vhost_server;

Only support one vhost-user port ?

> +
> +static const char *vhost_message_str[VHOST_USER_MAX] = {
> +	[VHOST_USER_NONE] = "VHOST_USER_NONE",
> +	[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
> +	[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
> +	[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
> +	[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
> +	[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
> +	[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
> +	[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
> +	[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
> +	[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
> +	[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
> +	[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
> +	[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
> +	[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
> +	[VHOST_USER_SET_VRING_ERR]  = "VHOST_USER_SET_VRING_ERR"
> +};
> +
> +/**
> + * Create a unix domain socket, bind to path and listen for connection.
> + * @return
> + *  socket fd or -1 on failure
> + */
> +static int
> +uds_socket(const char *path)
> +{
> +	struct sockaddr_un un;
> +	int sockfd;
> +	int ret;
> +
> +	if (path == NULL)
> +		return -1;
> +
> +	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
> +	if (sockfd < 0)
> +		return -1;
> +	RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd);
> +
> +	memset(&un, 0, sizeof(un));
> +	un.sun_family = AF_UNIX;
> +	snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
> +	ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un));
> +	if (ret == -1)
> +		goto err;
> +	RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path);
> +
> +	ret = listen(sockfd, 1);
> +	if (ret == -1)
> +		goto err;
> +
> +	return sockfd;
> +
> +err:
> +	close(sockfd);
> +	return -1;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +	struct iovec iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len  = buflen;
> +
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +	msgh.msg_control = control;
> +	msgh.msg_controllen = sizeof(control);
> +
> +	ret = recvmsg(sockfd, &msgh, 0);
> +	if (ret <= 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n");
> +		return ret;
> +	}
> +
> +	if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "Truncted msg\n");
> +		return -1;
> +	}
> +
> +	for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL;
> +		cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
> +		if ((cmsg->cmsg_level == SOL_SOCKET) &&
> +			(cmsg->cmsg_type == SCM_RIGHTS)) {
> +			memcpy(fds, CMSG_DATA(cmsg), fdsize);
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +/* return bytes# of read on success or negative val on failure. */
> +static int
> +read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
> +		msg->fds, VHOST_MEMORY_MAX_NREGIONS);
> +	if (ret <= 0)
> +		return ret;
> +
> +	if (msg && msg->size) {
> +		if (msg->size > sizeof(msg->payload)) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"invalid msg size: %d\n", msg->size);
> +			return -1;
> +		}
> +		ret = read(sockfd, &msg->payload, msg->size);
> +		if (ret <= 0)
> +			return ret;
> +		if (ret != (int)msg->size) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"read control message failed\n");
> +			return -1;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
> +{
> +
> +	struct iovec iov;
> +	struct msghdr msgh = { 0 };
> +	size_t fdsize = fd_num * sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	iov.iov_base = buf;
> +	iov.iov_len = buflen;
> +
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +
> +	if (fds && fd_num > 0) {
> +		msgh.msg_control = control;
> +		msgh.msg_controllen = sizeof(control);
> +		cmsg = CMSG_FIRSTHDR(&msgh);
> +		cmsg->cmsg_len = CMSG_LEN(fdsize);
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		memcpy(CMSG_DATA(cmsg), fds, fdsize);
> +	} else {
> +		msgh.msg_control = NULL;
> +		msgh.msg_controllen = 0;
> +	}
> +
> +	do {
> +		ret = sendmsg(sockfd, &msgh, 0);
> +	} while (ret < 0 && errno == EINTR);
> +
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,  "sendmsg error\n");
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +send_vhost_message(int sockfd, struct VhostUserMsg *msg)
> +{
> +	int ret;
> +
> +	if (!msg)
> +		return 0;
> +
> +	msg->flags &= ~VHOST_USER_VERSION_MASK;
> +	msg->flags |= VHOST_USER_VERSION;
> +	sg->flags |= VHOST_USER_REPLY_MASK;
> +
> +	ret = send_fd_message(sockfd, (char *)msg,
> +		VHOST_USER_HDR_SIZE + msg->size, NULL, 0);
> +
> +	return ret;
> +}
> +
> +/* call back when there is new virtio connection.  */
> +static void
> +vserver_new_vq_conn(int fd, uint64_t dat)

why not use vserver_new_vq_conn(int fd, void* dat) ?


> +{
> +	struct vhost_server *vserver = (void *)(uintptr_t)dat;
> +	int conn_fd;
> +	uint32_t fh;
> +	struct vhost_device_ctx vdev_ctx = { 0 };
> +
> +	conn_fd = accept(fd, NULL, NULL);
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"new virtio connection is %d\n", conn_fd);
> +	if (conn_fd < 0)
> +		return;
> +
> +	fh = ops->new_device(vdev_ctx);
> +	RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh);
> +
> +	fdset_add(&vserver->fdset,
> +		conn_fd, vserver_message_handler, NULL, fh);
> +}
> +
> +/* callback when there is message on the connfd */
> +static void
> +vserver_message_handler(int connfd, uint64_t dat)
> +{
> +	struct vhost_device_ctx ctx;
> +	uint32_t fh = (uint32_t)dat;
> +	struct VhostUserMsg msg;
> +	uint64_t features;
> +	int ret;
> +
> +	ctx.fh = fh;
> +	ret = read_vhost_message(connfd, &msg);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"vhost read message failed\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	} else if (ret == 0) {
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"vhost peer closed\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +		ops->destroy_device(ctx);
> +
> +		return;
> +	}
> +	if (msg.request > VHOST_USER_MAX) {
> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"vhost read incorrect message\n");
> +
> +		/*TODO: cleanup */
> +		close(connfd);
> +		fdset_del(&g_vhost_server->fdset, connfd);
> +
> +		return;
> +	}
> +
> +	RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n",
> +		vhost_message_str[msg.request]);
> +	switch (msg.request) {
> +	case VHOST_USER_GET_FEATURES:
> +		ret = ops->get_features(ctx, &features);
> +		msg.payload.u64 = ret;
> +		msg.size = sizeof(msg.payload.u64);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +	case VHOST_USER_SET_FEATURES:
> +		ops->set_features(ctx, &features);
> +		break;
> +
> +	case VHOST_USER_SET_OWNER:
> +		ops->set_owner(ctx);
> +		break;
> +	case VHOST_USER_RESET_OWNER:
> +		ops->reset_owner(ctx);
> +		break;
> +
> +	case VHOST_USER_SET_MEM_TABLE:
> +		user_set_mem_table(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_LOG_BASE:
> +	case VHOST_USER_SET_LOG_FD:
> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> +		break;
> +
> +	case VHOST_USER_SET_VRING_NUM:
> +		ops->set_vring_num(ctx, &msg.payload.state);
> +		break;
> +	case VHOST_USER_SET_VRING_ADDR:
> +		ops->set_vring_addr(ctx, &msg.payload.addr);
> +		break;
> +	case VHOST_USER_SET_VRING_BASE:
> +		ops->set_vring_base(ctx, &msg.payload.state);
> +		break;
> +
> +	case VHOST_USER_GET_VRING_BASE:
> +		ret = user_get_vring_base(ctx, &msg.payload.state);
> +		msg.size = sizeof(msg.payload.state);
> +		send_vhost_message(connfd, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_KICK:
> +		user_set_vring_kick(ctx, &msg);
> +		break;
> +	case VHOST_USER_SET_VRING_CALL:
> +		user_set_vring_call(ctx, &msg);
> +		break;
> +
> +	case VHOST_USER_SET_VRING_ERR:
> +		RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
> +		break;
> +
> +	default:
> +		break;
> +
> +	}
> +}
> +
> +
> +/**
> + * Creates and initialise the vhost server.
> + */
> +int
> +rte_vhost_driver_register(const char *path)
> +{
> +
> +	struct vhost_server *vserver;
> +
> +	if (g_vhost_server != NULL)
> +		return -1;
> +
> +	vserver = calloc(sizeof(struct vhost_server), 1);
> +	if (vserver == NULL)
> +		return -1;
> +
> +	fdset_init(&vserver->fdset);
> +
> +	unlink(path);
> +
> +	vserver->listenfd = uds_socket(path);
> +	if (vserver->listenfd < 0) {
> +		free(vserver);
> +		return -1;
> +	}
> +	vserver->path = path;
> +
> +	fdset_add(&vserver->fdset, vserver->listenfd,
> +		vserver_new_vq_conn, NULL,
> +		(uint64_t)(uintptr_t)vserver);
> +
> +	ops = get_virtio_net_callbacks();
> +
> +	g_vhost_server = vserver;
> +
> +	return 0;
> +}
> +
> +
> +int
> +rte_vhost_driver_session_start(void)
> +{
> +	fdset_event_dispatch(&g_vhost_server->fdset);
> +	return 0;
> +}
> +
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> new file mode 100644
> index 0000000..c138844
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> @@ -0,0 +1,108 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VHOST_NET_USER_H
> +#define _VHOST_NET_USER_H
> +
> +#include <stdint.h>
> +#include <linux/vhost.h>
> +
> +#include "fd_man.h"
> +
> +struct vhost_server {
> +	const char *path; /**< The path the uds is bind to. */
> +	int listenfd;     /**< The listener sockfd. */
> +	struct fdset fdset; /**< The fd list this vhost server manages. */
> +};
> +
> +/* refer to hw/virtio/vhost-user.c */
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +
> +typedef enum VhostUserRequest {
> +	VHOST_USER_NONE = 0,
> +	VHOST_USER_GET_FEATURES = 1,
> +	VHOST_USER_SET_FEATURES = 2,
> +	VHOST_USER_SET_OWNER = 3,
> +	VHOST_USER_RESET_OWNER = 4,
> +	VHOST_USER_SET_MEM_TABLE = 5,
> +	VHOST_USER_SET_LOG_BASE = 6,
> +	VHOST_USER_SET_LOG_FD = 7,
> +	VHOST_USER_SET_VRING_NUM = 8,
> +	VHOST_USER_SET_VRING_ADDR = 9,
> +	VHOST_USER_SET_VRING_BASE = 10,
> +	VHOST_USER_GET_VRING_BASE = 11,
> +	VHOST_USER_SET_VRING_KICK = 12,
> +	VHOST_USER_SET_VRING_CALL = 13,
> +	VHOST_USER_SET_VRING_ERR = 14,
> +	VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +	uint64_t guest_phys_addr;
> +	uint64_t memory_size;
> +	uint64_t userspace_addr;
> +	uint64_t mmap_offset;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +	uint32_t nregions;
> +	uint32_t padding;
> +	VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +	VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1 << 2)
> +	uint32_t flags;
> +	uint32_t size; /* the following payload size */
> +	union {
> +#define VHOST_USER_VRING_IDX_MASK   (0xff)
> +#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
> +		uint64_t u64;
> +		struct vhost_vring_state state;
> +		struct vhost_vring_addr addr;
> +	VhostUserMemory memory;
> +	} payload;
> +	int fds[VHOST_MEMORY_MAX_NREGIONS];
> +} __attribute((packed)) VhostUserMsg;
> +
> +#define VHOST_USER_HDR_SIZE (intptr_t)(&((VhostUserMsg *)0)->payload.u64)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +
> +/*****************************************************************************/
> +#endif
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> new file mode 100644
> index 0000000..ad59fcc
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -0,0 +1,199 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include <rte_log.h>
> +
> +#include "virtio-net.h"
> +#include "virtio-net-user.h"
> +#include "vhost-net-user.h"
> +#include "vhost-net.h"
> +
> +int
> +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	unsigned int idx;
> +	struct VhostUserMemory memory = pmsg->payload.memory;
> +	struct virtio_memory_regions regions[VHOST_MEMORY_MAX_NREGIONS];
> +	uint64_t mapped_address, base_address = 0;
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		if (memory.regions[idx].guest_phys_addr == 0)
> +			base_address = memory.regions[idx].userspace_addr;
> +	}
> +	if (base_address == 0) {

when will base_address is 0? how to test to run this code?

> +		RTE_LOG(ERR, VHOST_CONFIG,
> +			"couldn't find the mem region whose GPA is 0.\n");
> +		return -1;
> +	}
> +
> +	for (idx = 0; idx < memory.nregions; idx++) {
> +		regions[idx].guest_phys_address =
> +			memory.regions[idx].guest_phys_addr;
> +		regions[idx].guest_phys_address_end =
> +			memory.regions[idx].guest_phys_addr +
> +			memory.regions[idx].memory_size;
> +		regions[idx].memory_size = memory.regions[idx].memory_size;
> +		regions[idx].userspace_address =
> +			memory.regions[idx].userspace_addr;
> +
> +		/* This is ugly */
> +		mapped_address = (uint64_t)(uintptr_t)mmap(NULL,
> +			regions[idx].memory_size +
> +				memory.regions[idx].mmap_offset,
> +			PROT_READ | PROT_WRITE, MAP_SHARED,
> +			pmsg->fds[idx],
> +			0);

Can you mmap the region if gpa is 0? When i run VM with two numa node (qemu will create two hugepage file) found that always failed to mmap with the region which gpa is 0.

BTW can we ensure the memory regions cover with all the memory of hugepage for VM?

> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"mapped region %d to %p\n",
> +			idx, (void *)mapped_address);
> +
> +		if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) {
> +			RTE_LOG(ERR, VHOST_CONFIG,
> +				"mmap qemu guest failed.\n");
> +			return -1;
> +		}
> +
> +		mapped_address +=  memory.regions[idx].mmap_offset;
> +
> +		regions[idx].address_offset = mapped_address -
> +			regions[idx].guest_phys_address;
> +		LOG_DEBUG(VHOST_CONFIG,
> +			"REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n",
> +			idx,
> +			(void *)(uintptr_t)regions[idx].guest_phys_address,
> +			(void *)(uintptr_t)regions[idx].userspace_address,
> +			 regions[idx].memory_size);
> +	}
> +	ops->set_mem_table(ctx, regions, memory.nregions);
> +	return 0;
> +}
> +
> +
> +static int
> +virtio_is_ready(struct virtio_net *dev)
> +{
> +	struct vhost_virtqueue *rvq, *tvq;
> +
> +	/* mq support in future.*/
> +	rvq = dev->virtqueue[VIRTIO_RXQ];
> +	tvq = dev->virtqueue[VIRTIO_TXQ];
> +	if (rvq && tvq && rvq->desc && tvq->desc &&
> +		(rvq->kickfd != (eventfd_t)-1) &&
> +		(rvq->callfd != (eventfd_t)-1) &&
> +		(tvq->kickfd != (eventfd_t)-1) &&
> +		(tvq->callfd != (eventfd_t)-1)) {
> +		RTE_LOG(INFO, VHOST_CONFIG,
> +			"virtio is now ready for processing.\n");
> +		return 1;
> +	}
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"virtio isn't ready for processing.\n");
> +	return 0;
> +}
> +
> +void
> +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring call idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_call(ctx, &file);
> +}
> +
> +
> +/*
> + *  In vhost-user, when we receive kick message, will test whether virtio
> + *  device is ready for packet processing.
> + */
> +void
> +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
> +{
> +	struct vhost_vring_file file;
> +	struct virtio_net *dev = get_device(ctx);
> +
> +	file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
> +	file.fd = pmsg->fds[0];
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring kick idx:%d file:%d\n", file.index, file.fd);
> +	ops->set_vring_kick(ctx, &file);
> +
> +	if (virtio_is_ready(dev) &&
> +		!(dev->flags & VIRTIO_DEV_RUNNING))
> +			notify_ops->new_device(dev);
> +
> +}
> +
> +/*
> + * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
> + */
> +int
> +user_get_vring_base(struct vhost_device_ctx ctx,
> +	struct vhost_vring_state *state)
> +{
> +	struct virtio_net *dev = get_device(ctx);
> +
> +	/* We have to stop the queue (virtio) if it is running. */
> +	if (dev->flags & VIRTIO_DEV_RUNNING)
> +		notify_ops->destroy_device(dev);
> +
> +	/* Here we are safe to get the last used index */
> +	ops->get_vring_base(ctx, state->index, state);
> +
> +	RTE_LOG(INFO, VHOST_CONFIG,
> +		"vring base idx:%d file:%d\n", state->index, state->num);
> +	/*
> +	 * Based on current qemu vhost-user implementation, this message is
> +	 * sent and only sent in vhost_vring_stop.
> +	 * TODO: cleanup the vring, it isn't usable since here.
> +	 */
> +	if (dev->virtqueue[VIRTIO_RXQ]->callfd) {
> +		close(dev->virtqueue[VIRTIO_RXQ]->callfd);
> +		dev->virtqueue[VIRTIO_RXQ]->callfd = (eventfd_t)-1;
> +	}
> +	if (dev->virtqueue[VIRTIO_TXQ]->callfd) {
> +		close(dev->virtqueue[VIRTIO_TXQ]->callfd);
> +		dev->virtqueue[VIRTIO_TXQ]->callfd = (eventfd_t)-1;
> +	}
> +
> +	return 0;
> +
> +}
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> new file mode 100644
> index 0000000..0f6a75a
> --- /dev/null
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -0,0 +1,48 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_USER_H
> +#define _VIRTIO_NET_USER_H
> +
> +#include "vhost-net.h"
> +#include "vhost-net-user.h"
> +
> +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> +
> +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> +
> +#endif
> diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
> index f81e459..0b49f1b 100644
> --- a/lib/librte_vhost/virtio-net.c
> +++ b/lib/librte_vhost/virtio-net.c
> @@ -46,6 +46,7 @@
>  #include <rte_virtio_net.h>
>  
>  #include "vhost-net.h"
> +#include "virtio-net.h"
>  
>  /*
>   * Device linked list structure for configuration.
> @@ -56,7 +57,7 @@ struct virtio_net_config_ll {
>  };
>  
>  /* device ops to add/remove device to/from data core. */
> -static struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net_device_ops const *notify_ops;
>  /* root address of the linked list of managed virtio devices */
>  static struct virtio_net_config_ll *ll_root;
>  
> @@ -83,8 +84,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
>  		if ((qemu_va >= region->userspace_address) &&
>  			(qemu_va <= region->userspace_address +
>  			region->memory_size)) {
> -			vhost_va = dev->mem->mapped_address + qemu_va -
> -					dev->mem->base_address;
> +			vhost_va = qemu_va + region->guest_phys_address +
> +				region->address_offset -
> +				region->userspace_address;
>  			break;
>  		}
>  	}
> @@ -114,7 +116,7 @@ get_config_ll_entry(struct vhost_device_ctx ctx)
>   * Searches the configuration core linked list and
>   * retrieves the device if it exists.
>   */
> -static struct virtio_net *
> +struct virtio_net *
>  get_device(struct vhost_device_ctx ctx)
>  {
>  	struct virtio_net_config_ll *ll_dev;
> @@ -450,12 +452,6 @@ set_mem_table(struct vhost_device_ctx ctx,
>  	if (dev == NULL)
>  		return -1;
>  
> -	if (dev->mem) {
> -		munmap((void *)(uintptr_t)dev->mem->mapped_address,
> -			(size_t)dev->mem->mapped_size);
> -		free(dev->mem);
> -	}
> -
>  	/* Malloc the memory structure depending on the number of regions. */
>  	mem = calloc(1, sizeof(struct virtio_memory) +
>  		(sizeof(struct virtio_memory_regions) * nregions));
> diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h
> new file mode 100644
> index 0000000..da4ade0
> --- /dev/null
> +++ b/lib/librte_vhost/virtio-net.h
> @@ -0,0 +1,43 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#ifndef _VIRTIO_NET_H
> +#define _VIRTIO_NET_H
> +
> +#include "vhost-net.h"
> +#include "rte_virtio_net.h"
> +
> +struct virtio_net_device_ops const *notify_ops;
> +struct virtio_net * get_device(struct vhost_device_ctx ctx);
> +
> +#endif
> 

-- 
Regards,
Haifeng



More information about the dev mailing list