[dpdk-dev] [PATCH v1 2/2] virtio: Extend virtio-net PMD to support container environment

Pavel Fedin p.fedin at samsung.com
Mon Dec 28 12:57:41 CET 2015


 Hello!

> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Tetsuya Mukawa
> Sent: Wednesday, December 16, 2015 11:37 AM
> To: dev at dpdk.org
> Cc: nakajima.yoshihiro at lab.ntt.co.jp; mst at redhat.com
> Subject: [dpdk-dev] [PATCH v1 2/2] virtio: Extend virtio-net PMD to support container
> environment
> 
> The patch adds a new virtio-net PMD configuration that allows the PMD to
> work on host as if the PMD is in VM.
> Here is new configuration for virtio-net PMD.
>  - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
> To use this mode, EAL needs physically contiguous memory. To allocate
> such memory, enable below option, and add "--contig-mem" option to
> application command line.
>  - CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS
> 
> To prepare virtio-net device on host, the users need to invoke QEMU process
> in special qtest mode. This mode is mainly used for testing QEMU devices
> from outer process. In this mode, no guest runs.
> Here is QEMU command line.
> 
>  $ qemu-system-x86_64 \
> 		-machine pc-i440fx-1.4,accel=qtest \
> 		-display none -qtest-log /dev/null \
> 		-qtest unix:/tmp/socket,server \
> 		-netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
> 		-device virtio-net-pci,netdev=net0,mq=on \
> 		-chardev socket,id=chr1,path=/tmp/ivshmem,server \
> 		-device ivshmem,size=1G,chardev=chr1,vectors=1
> 
> * QEMU process is needed per port.
> * In most cases, just using above command is enough.
> * The vhost backends like vhost-net and vhost-user can be specified.
> * Only checked "pc-i440fx-1.4" machine, but may work with other
>   machines. It depends on a machine has piix3 south bridge.
>   If the machine doesn't have, virtio-net PMD cannot receive status
>   changed interrupts.
> * Should not add "--enable-kvm" to QEMU command line.
> 
> After invoking QEMU, the PMD can connect to QEMU process using unix
> domain sockets. Over these sockets, virtio-net, ivshmem and piix3
> device in QEMU are probed by the PMD.
> Here is example of command line.
> 
>  $ testpmd -c f -n 1 -m 1024 --contig-mem \
>          --vdev="eth_virtio_net0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem" \
>          -- --disable-hw-vlan --txqflags=0xf00 -i
> 
> Please specify same unix domain sockets and memory size in both QEMU and
> DPDK command lines like above.
> The share memory size should be power of 2, because ivshmem only accepts
> such memry size.
> 
> Also, "--contig-mem" option is needed for the PMD like above. This option
> allocates contiguous memory, and create one hugepage file on hugetlbfs.
> If there is no enough contiguous memory, initialization will be failed.
> 
> This contiguous memory is used as shared memory between DPDK application
> and ivshmem device in QEMU.
> 
> Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
> ---
>  config/common_linuxapp             |    1 +
>  drivers/net/virtio/Makefile        |    4 +
>  drivers/net/virtio/qtest.c         | 1107 ++++++++++++++++++++++++++++++++++++
>  drivers/net/virtio/virtio_ethdev.c |  341 ++++++++++-
>  drivers/net/virtio/virtio_ethdev.h |   12 +
>  drivers/net/virtio/virtio_pci.h    |   25 +
>  6 files changed, 1461 insertions(+), 29 deletions(-)
>  create mode 100644 drivers/net/virtio/qtest.c
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 74bc515..eaa720c 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -269,6 +269,7 @@ CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
>  # Compile burst-oriented VIRTIO PMD driver
>  #
>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> +CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE=n
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
>  CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
> diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
> index 43835ba..697e629 100644
> --- a/drivers/net/virtio/Makefile
> +++ b/drivers/net/virtio/Makefile
> @@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
>  SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
> 
> +ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE),y)
> +	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
> +endif
> +
>  # this lib depends upon:
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
>  DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
> diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
> new file mode 100644
> index 0000000..4ffdefb
> --- /dev/null
> +++ b/drivers/net/virtio/qtest.c
> @@ -0,0 +1,1107 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2015 IGEL Co., Ltd. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of IGEL Co., Ltd. nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +#include <stdint.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <sys/queue.h>
> +#include <signal.h>
> +#include <pthread.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +
> +#include <rte_memory.h>
> +#include <rte_malloc.h>
> +#include <rte_common.h>
> +#include <rte_interrupts.h>
> +
> +#include "virtio_pci.h"
> +#include "virtio_logs.h"
> +#include "virtio_ethdev.h"
> +
> +#define NB_BUS                          256
> +#define NB_DEVICE                       32
> +#define NB_BAR                          6
> +
> +/* PCI common configuration registers */
> +#define REG_ADDR_VENDOR_ID              0x0
> +#define REG_ADDR_DEVICE_ID              0x2
> +#define REG_ADDR_COMMAND                0x4
> +#define REG_ADDR_STATUS                 0x6
> +#define REG_ADDR_REVISION_ID            0x8
> +#define REG_ADDR_CLASS_CODE             0x9
> +#define REG_ADDR_CACHE_LINE_S           0xc
> +#define REG_ADDR_LAT_TIMER              0xd
> +#define REG_ADDR_HEADER_TYPE            0xe
> +#define REG_ADDR_BIST                   0xf
> +#define REG_ADDR_BAR0                   0x10
> +#define REG_ADDR_BAR1                   0x14
> +#define REG_ADDR_BAR2                   0x18
> +#define REG_ADDR_BAR3                   0x1c
> +#define REG_ADDR_BAR4                   0x20
> +#define REG_ADDR_BAR5                   0x24
> +
> +/* PCI common configuration register values */
> +#define REG_VAL_COMMAND_IO              0x1
> +#define REG_VAL_COMMAND_MEMORY          0x2
> +#define REG_VAL_COMMAND_MASTER          0x4
> +#define REG_VAL_HEADER_TYPE_ENDPOINT    0x0
> +#define REG_VAL_BAR_MEMORY              0x0
> +#define REG_VAL_BAR_IO                  0x1
> +#define REG_VAL_BAR_LOCATE_32           0x0
> +#define REG_VAL_BAR_LOCATE_UNDER_1MB    0x2
> +#define REG_VAL_BAR_LOCATE_64           0x4
> +
> +/* PIIX3 configuration registers */
> +#define PIIX3_REG_ADDR_PIRQA            0x60
> +#define PIIX3_REG_ADDR_PIRQB            0x61
> +#define PIIX3_REG_ADDR_PIRQC            0x62
> +#define PIIX3_REG_ADDR_PIRQD            0x63
> +
> +/* Device information */
> +#define VIRTIO_NET_DEVICE_ID            0x1000
> +#define VIRTIO_NET_VENDOR_ID            0x1af4
> +#define VIRTIO_NET_IO_START             0xc000
> +#define VIRTIO_NET_IRQ_NUM              10
> +#define IVSHMEM_DEVICE_ID               0x1110
> +#define IVSHMEM_VENDOR_ID               0x1af4
> +#define IVSHMEM_MEMORY_START            0x1000
> +#define IVSHMEM_PROTOCOL_VERSION        0
> +#define PIIX3_DEVICE_ID                 0x7000
> +#define PIIX3_VENDOR_ID                 0x8086
> +
> +#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
> +	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
> +	((_function) & 0xf) << 8 | ((_offset) & 0xfc))
> +
> +static char interrupt_message[32];
> +
> +enum qtest_pci_bar_type {
> +	QTEST_PCI_BAR_DISABLE = 0,
> +	QTEST_PCI_BAR_IO,
> +	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
> +	QTEST_PCI_BAR_MEMORY_32,
> +	QTEST_PCI_BAR_MEMORY_64
> +};
> +
> +struct qtest_pci_bar {
> +	enum qtest_pci_bar_type type;
> +	uint8_t addr;
> +	uint64_t region_start;
> +	uint64_t region_size;
> +};
> +
> +struct qtest_session;
> +TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
> +struct qtest_pci_device {
> +	TAILQ_ENTRY(qtest_pci_device) next;
> +	const char *name;
> +	uint16_t device_id;
> +	uint16_t vendor_id;
> +	uint8_t bus_addr;
> +	uint8_t device_addr;
> +	struct qtest_pci_bar bar[NB_BAR];
> +	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
> +};
> +
> +union qtest_pipefds {
> +	struct {
> +		int pipefd[2];
> +	};
> +	struct {
> +		int readfd;
> +		int writefd;
> +	};
> +};
> +
> +struct qtest_session {
> +	int qtest_socket;
> +	pthread_mutex_t qtest_session_lock;
> +
> +	struct qtest_pci_device_list head;
> +	int ivshmem_socket;
> +
> +	pthread_t event_th;
> +	union qtest_pipefds msgfds;
> +
> +	pthread_t intr_th;
> +	union qtest_pipefds irqfds;
> +	rte_atomic16_t enable_intr;
> +	rte_intr_callback_fn cb;
> +	void *cb_arg;
> +};
> +
> +static int
> +qtest_write(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = write(fd, buf, len);
> +		if (ret == (int)len)
> +			break;
> +		if (ret == -1) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
> +static int
> +qtest_read(int fd, char *buf, size_t count)
> +{
> +	size_t len = count;
> +	size_t total_len = 0;
> +	int ret = 0;
> +
> +	while (len > 0) {
> +		ret = read(fd, buf, len);
> +		if (ret == (int)len)
> +			break;
> +		if (*(buf + ret - 1) == '\n')
> +			break;
> +		if (ret == -1) {
> +			if (errno == EINTR)
> +				continue;
> +			return ret;
> +		}
> +		total_len += ret;
> +		buf += ret;
> +		len -= ret;
> +	}
> +	return total_len + ret;
> +}
> +
> +/*
> + * To know QTest protocol specification, see below QEMU source code.
> + *  - qemu/qtest.c
> + */
> +static uint32_t
> +qtest_in(struct qtest_session *s, uint16_t addr, char type)
> +{
> +	char buf[1024];
> +	int ret;
> +
> +	if ((type != 'l') && (type != 'w') && (type != 'b'))
> +		rte_panic("Invalid value\n");
> +
> +	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
> +	/* write to qtest socket */
> +	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
> +	/* read reply from event handler */
> +	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
> +	buf[ret] = '\0';
> +	return strtoul(buf + strlen("OK "), NULL, 16);
> +}
> +
> +static void
> +qtest_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
> +{
> +	char buf[1024];
> +	int ret __rte_unused;
> +
> +	if ((type != 'l') && (type != 'w') && (type != 'b'))
> +		rte_panic("Invalid value\n");
> +
> +	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
> +	/* write to qtest socket */
> +	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
> +	/* read reply from event handler */
> +	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
> +}
> +
> +/*
> + * qtest_pci_read/write are based on PCI configuration space specification.
> + * Accroding to the spec, access size of read()/write() should be 4 bytes.
> + */
> +static int
> +qtest_pci_readb(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset)
> +{
> +	uint32_t tmp;
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	tmp = qtest_in(s, 0xcfc, 'l');
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +
> +	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
> +}
> +
> +static void
> +qtest_pci_writeb(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset, uint8_t value)
> +{
> +	uint32_t addr, tmp, pos;
> +
> +	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
> +	pos = (offset % 4) * 8;
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, addr, 'l');
> +	tmp = qtest_in(s, 0xcfc, 'l');
> +	tmp = (tmp & ~(0xff << pos)) | (value << pos);
> +
> +	qtest_out(s, 0xcf8, addr, 'l');
> +	qtest_out(s, 0xcfc, tmp, 'l');
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +}
> +
> +static uint32_t
> +qtest_pci_readl(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset)
> +{
> +	uint32_t tmp;
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	tmp = qtest_in(s, 0xcfc, 'l');
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +
> +	return tmp;
> +}
> +
> +static void
> +qtest_pci_writel(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset, uint32_t value)
> +{
> +	uint32_t tmp;
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	qtest_out(s, 0xcfc, value, 'l');
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +}
> +
> +static uint64_t
> +qtest_pci_readq(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset)
> +{
> +	uint32_t tmp;
> +	uint64_t val;
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	val = (uint64_t)qtest_in(s, 0xcfc, 'l');
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	val |= (uint64_t)qtest_in(s, 0xcfc, 'l') << 32;
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +
> +	return val;
> +}
> +
> +static void
> +qtest_pci_writeq(struct qtest_session *s, uint8_t bus, uint8_t device,
> +		uint8_t function, uint8_t offset, uint64_t value)
> +{
> +	uint32_t tmp;
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	qtest_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
> +
> +	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
> +
> +	qtest_out(s, 0xcf8, tmp, 'l');
> +	qtest_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot unlock mutex\n");
> +}
> +
> +/*
> + * virtio_ioport_read/write are Used by virtio-net PMD
> + */
> +void
> +virtio_ioport_write(struct virtio_hw *hw, uint64_t addr, uint64_t val, char type)
> +{
> +	struct qtest_session *s = (struct qtest_session *)hw->qsession;
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	qtest_out(s, VIRTIO_NET_IO_START + (uint16_t)addr, val, type);
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +}
> +
> +uint32_t
> +virtio_ioport_read(struct virtio_hw *hw, uint64_t addr, char type)
> +{
> +	struct qtest_session *s = (struct qtest_session *)hw->qsession;
> +	uint32_t val;
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	val = qtest_in(s, VIRTIO_NET_IO_START + (uint16_t)addr, type);
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	return val;
> +}
> +
> +int
> +qtest_intr_enable(void *data)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +	rte_atomic16_set(&s->enable_intr, 1);
> +
> +	return 0;
> +}
> +
> +int
> +qtest_intr_disable(void *data)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +	rte_atomic16_set(&s->enable_intr, 0);
> +
> +	return 0;
> +}
> +
> +void
> +qtest_intr_callback_register(void *data,
> +		rte_intr_callback_fn cb, void *cb_arg)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +	s->cb = cb;
> +	s->cb_arg = cb_arg;
> +	rte_atomic16_set(&s->enable_intr, 1);
> +}
> +
> +void
> +qtest_intr_callback_unregister(void *data,
> +		rte_intr_callback_fn cb __rte_unused,
> +		void *cb_arg __rte_unused)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +	rte_atomic16_set(&s->enable_intr, 0);
> +	s->cb = NULL;
> +	s->cb_arg = NULL;
> +}
> +
> +static void *
> +qtest_intr_handler(void *data) {
> +	struct qtest_session *s = (struct qtest_session *)data;
> +	char buf[1];
> +	int ret;
> +
> +	for (;;) {
> +		ret = qtest_read(s->irqfds.readfd, buf, sizeof(buf));
> +		if (ret < 0)
> +			return NULL;
> +		s->cb(NULL, s->cb_arg);
> +	}
> +	return NULL;
> +}
> +
> +static int
> +qtest_intr_initialize(void *data)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +	char buf[1024];
> +	int ret;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +
> +	/* This message will come when interrupt occurs */
> +	snprintf(interrupt_message, sizeof(interrupt_message),
> +			"IRQ raise %d", VIRTIO_NET_IRQ_NUM);
> +
> +	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
> +
> +	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
> +	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
> +	if (ret < 0) {
> +		pthread_mutex_unlock(&s->qtest_session_lock);
> +		return -1;
> +	}
> +
> +	/* just ignore QEMU response */
> +	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
> +	if (ret < 0) {
> +		pthread_mutex_unlock(&s->qtest_session_lock);
> +		return -1;
> +	}
> +
> +	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
> +		rte_panic("Cannot lock mutex\n");
> +
> +	return 0;
> +}
> +
> +static void
> +qtest_handle_one_message(struct qtest_session *s, char *buf)
> +{
> +	int ret;
> +
> +	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
> +		if (rte_atomic16_read(&s->enable_intr) == 0)
> +			return;
> +
> +		/* relay interrupt to pipe */
> +		ret = write(s->irqfds.writefd, "1", 1);
> +		if (ret < 0)
> +			rte_panic("cannot relay interrupt\n");
> +	} else {
> +		/* relay normal message to pipe */
> +		ret = qtest_write(s->msgfds.writefd, buf, strlen(buf));
> +		if (ret < 0)
> +			rte_panic("cannot relay normal message\n");
> +	}
> +}
> +
> +static char *
> +qtest_get_next_message(char *p)
> +{
> +	p = strchr(p, '\n');
> +	if ((p == NULL) || (*(p + 1) == '\0'))
> +		return NULL;
> +	return p + 1;
> +}
> +
> +static void
> +qtest_close_one_socket(int *fd)
> +{
> +	if (*fd > 0) {
> +		close(*fd);
> +		*fd = -1;
> +	}
> +}
> +
> +static void
> +qtest_close_sockets(struct qtest_session *s)
> +{
> +	qtest_close_one_socket(&s->qtest_socket);
> +	qtest_close_one_socket(&s->msgfds.readfd);
> +	qtest_close_one_socket(&s->msgfds.writefd);
> +	qtest_close_one_socket(&s->irqfds.readfd);
> +	qtest_close_one_socket(&s->irqfds.writefd);
> +	qtest_close_one_socket(&s->ivshmem_socket);
> +}
> +
> +/*
> + * This thread relays QTest response using pipe.
> + * The function is needed because we need to separate IRQ message from others.
> + */
> +static void *
> +qtest_event_handler(void *data) {
> +	struct qtest_session *s = (struct qtest_session *)data;
> +	char buf[1024];
> +	char *p;
> +	int ret;
> +
> +	for (;;) {
> +		memset(buf, 0, sizeof(buf));
> +		ret = qtest_read(s->qtest_socket, buf, sizeof(buf));
> +		if (ret < 0) {
> +			qtest_close_sockets(s);
> +			return NULL;
> +		}
> +
> +		/* may receive multiple messages at the same time */
> +		p = buf;
> +		do {
> +			qtest_handle_one_message(s, p);
> +		} while ((p = qtest_get_next_message(p)) != NULL);
> +	}
> +	return NULL;
> +}
> +
> +static int
> +qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
> +{
> +	uint8_t bus, device, virtio_net_slot = 0;
> +	struct qtest_pci_device *tmpdev;
> +	uint8_t pcislot2regaddr[] = {	0xff,
> +					0xff,
> +					0xff,
> +					PIIX3_REG_ADDR_PIRQC,
> +					PIIX3_REG_ADDR_PIRQD,
> +					PIIX3_REG_ADDR_PIRQA,
> +					PIIX3_REG_ADDR_PIRQB};
> +
> +	bus = dev->bus_addr;
> +	device = dev->device_addr;
> +
> +	PMD_DRV_LOG(INFO,
> +		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
> +		dev->name, bus, device);
> +
> +	/* Get slot id that is connected to virtio-net */
> +	TAILQ_FOREACH(tmpdev, &s->head, next) {
> +		if (strcmp(tmpdev->name, "virtio-net") == 0) {
> +			virtio_net_slot = tmpdev->device_addr;
> +			break;
> +		}
> +	}
> +
> +	if (virtio_net_slot == 0)
> +		return -1;
> +
> +	/*
> +	 * Set interrupt routing for virtio-net device.
> +	 * Here is i440fx/piix3 connection settings
> +	 * ---------------------------------------
> +	 * PCI Slot3 -> PIRQC
> +	 * PCI Slot4 -> PIRQD
> +	 * PCI Slot5 -> PIRQA
> +	 * PCI Slot6 -> PIRQB
> +	 */
> +	if (pcislot2regaddr[virtio_net_slot] != 0xff) {
> +		qtest_pci_writeb(s, bus, device, 0,
> +				pcislot2regaddr[virtio_net_slot],
> +				VIRTIO_NET_IRQ_NUM);
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Common initialization of PCI device.
> + * To know detail, see pci specification.
> + */
> +static int
> +qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
> +{
> +	uint8_t i, bus, device;
> +	uint32_t val;
> +	uint64_t val64;
> +
> +	bus = dev->bus_addr;
> +	device = dev->device_addr;
> +
> +	PMD_DRV_LOG(INFO,
> +		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
> +		dev->name, bus, device);
> +
> +	/* Check header type */
> +	val = qtest_pci_readb(s, bus, device, 0, REG_ADDR_HEADER_TYPE);
> +	if (val != REG_VAL_HEADER_TYPE_ENDPOINT) {
> +		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
> +		return -1;
> +	}
> +
> +	/* Check BAR type */
> +	for (i = 0; i < NB_BAR; i++) {
> +		val = qtest_pci_readl(s, bus, device, 0, dev->bar[i].addr);
> +
> +		switch (dev->bar[i].type) {
> +		case QTEST_PCI_BAR_IO:
> +			if ((val & 0x1) != REG_VAL_BAR_IO)
> +				goto error;
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
> +			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
> +				goto error;
> +			if ((val & 0x6) != REG_VAL_BAR_LOCATE_UNDER_1MB)
> +				goto error;
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_32:
> +			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
> +				goto error;
> +			if ((val & 0x6) != REG_VAL_BAR_LOCATE_32)
> +				goto error;
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_64:
> +			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
> +				goto error;
> +			if ((val & 0x6) != REG_VAL_BAR_LOCATE_64)
> +				goto error;
> +			break;
> +		case QTEST_PCI_BAR_DISABLE:
> +			break;
> +		}
> +	}
> +
> +	/* Enable device */
> +	val = qtest_pci_readl(s, bus, device, 0, REG_ADDR_COMMAND);
> +	val |= REG_VAL_COMMAND_IO | REG_VAL_COMMAND_MEMORY | REG_VAL_COMMAND_MASTER;
> +	qtest_pci_writel(s, bus, device, 0, REG_ADDR_COMMAND, val);
> +
> +	/* Calculate BAR size */
> +	for (i = 0; i < NB_BAR; i++) {
> +		switch (dev->bar[i].type) {
> +		case QTEST_PCI_BAR_IO:
> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
> +		case QTEST_PCI_BAR_MEMORY_32:
> +			qtest_pci_writel(s, bus, device, 0,
> +					dev->bar[i].addr, 0xffffffff);
> +			val = qtest_pci_readl(s, bus, device,
> +					0, dev->bar[i].addr);
> +			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_64:
> +			qtest_pci_writeq(s, bus, device, 0,
> +					dev->bar[i].addr, 0xffffffffffffffff);
> +			val64 = qtest_pci_readq(s, bus, device,
> +					0, dev->bar[i].addr);
> +			dev->bar[i].region_size =
> +					~(val64 & 0xfffffffffffffff0) + 1;
> +			break;
> +		case QTEST_PCI_BAR_DISABLE:
> +			break;
> +		}
> +	}
> +
> +	/* Set BAR region */
> +	for (i = 0; i < NB_BAR; i++) {
> +		switch (dev->bar[i].type) {
> +		case QTEST_PCI_BAR_IO:
> +		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
> +		case QTEST_PCI_BAR_MEMORY_32:
> +			qtest_pci_writel(s, bus, device, 0, dev->bar[i].addr,
> +				dev->bar[i].region_start);
> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
> +				dev->name, dev->bar[i].region_start,
> +				dev->bar[i].region_start + dev->bar[i].region_size);
> +			break;
> +		case QTEST_PCI_BAR_MEMORY_64:
> +			qtest_pci_writeq(s, bus, device, 0, dev->bar[i].addr,
> +				dev->bar[i].region_start);
> +			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
> +				dev->name, dev->bar[i].region_start,
> +				dev->bar[i].region_start + dev->bar[i].region_size);
> +			break;
> +		case QTEST_PCI_BAR_DISABLE:
> +			break;
> +		}
> +	}
> +
> +	return 0;
> +
> +error:
> +	PMD_DRV_LOG(ERR, "Unexpected BAR type\n");
> +	return -1;
> +}
> +
> +static void
> +qtest_find_pci_device(struct qtest_session *s, uint16_t bus, uint8_t device)
> +{
> +	struct qtest_pci_device *dev;
> +	uint32_t val;
> +
> +	val = qtest_pci_readl(s, bus, device, 0, 0);
> +	TAILQ_FOREACH(dev, &s->head, next) {
> +		if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
> +			/* device is found, then store it */
> +			dev->bus_addr = bus;
> +			dev->device_addr = device;
> +			return;
> +		}
> +	}
> +}
> +
> +static int
> +qtest_init_pci_devices(struct qtest_session *s)
> +{
> +	struct qtest_pci_device *dev;
> +	uint16_t bus;
> +	uint8_t device;
> +	int ret;
> +
> +	/* Find devices */
> +	bus = 0;
> +	do {
> +		device = 0;
> +		do {
> +			qtest_find_pci_device(s, bus, device);
> +		} while (device++ != NB_DEVICE - 1);
> +	} while (bus++ != NB_BUS - 1);
> +
> +	/* Initialize devices */
> +	TAILQ_FOREACH(dev, &s->head, next) {
> +		ret = dev->init(s, dev);
> +		if (ret != 0)
> +			return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +struct rte_pci_id
> +qtest_get_pci_id_of_virtio_net(void)
> +{
> +	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
> +		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
> +
> +	return id;
> +}
> +
> +static int
> +qtest_register_target_devices(struct qtest_session *s)
> +{
> +	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
> +	const struct rte_memseg *ms;
> +
> +	ms = rte_eal_get_physmem_layout();
> +	/* if EAL memory size isn't pow of 2, ivshmem refuse it */
> +	if ((ms[0].len & (ms[0].len - 1)) != 0) {
> +		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
> +		return -1;
> +	}
> +
> +	virtio_net = malloc(sizeof(*virtio_net));
> +	if (virtio_net == NULL)
> +		return -1;
> +
> +	ivshmem = malloc(sizeof(*ivshmem));
> +	if (ivshmem == NULL)
> +		return -1;
> +
> +	piix3 = malloc(sizeof(*piix3));
> +	if (piix3 == NULL)
> +		return -1;
> +
> +	memset(virtio_net, 0, sizeof(*virtio_net));
> +	memset(ivshmem, 0, sizeof(*ivshmem));
> +
> +	TAILQ_INIT(&s->head);
> +
> +	virtio_net->name = "virtio-net";
> +	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
> +	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
> +	virtio_net->init = qtest_init_pci_device;
> +	virtio_net->bar[0].addr = REG_ADDR_BAR0;
> +	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
> +	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
> +	TAILQ_INSERT_TAIL(&s->head, virtio_net, next);
> +
> +	ivshmem->name = "ivshmem";
> +	ivshmem->device_id = IVSHMEM_DEVICE_ID;
> +	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
> +	ivshmem->init = qtest_init_pci_device;
> +	ivshmem->bar[0].addr = REG_ADDR_BAR0;
> +	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
> +	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
> +	ivshmem->bar[1].addr = REG_ADDR_BAR2;
> +	ivshmem->bar[1].type = QTEST_PCI_BAR_MEMORY_64;
> +	/* In host mode, only one memory segment is vaild */
> +	ivshmem->bar[1].region_start = ms[0].phys_addr;
> +	TAILQ_INSERT_TAIL(&s->head, ivshmem, next);
> +
> +	/* piix3 is needed to route irqs from virtio-net to ioapic */
> +	piix3->name = "piix3";
> +	piix3->device_id = PIIX3_DEVICE_ID;
> +	piix3->vendor_id = PIIX3_VENDOR_ID;
> +	piix3->init = qtest_init_piix3_device;
> +	TAILQ_INSERT_TAIL(&s->head, piix3, next);
> +
> +	return 0;
> +}
> +
> +static int
> +qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
> +{
> +	struct iovec iov;
> +	struct msghdr msgh;
> +	size_t fdsize = sizeof(int);
> +	char control[CMSG_SPACE(fdsize)];
> +	struct cmsghdr *cmsg;
> +	int ret;
> +
> +	memset(&msgh, 0, sizeof(msgh));
> +	iov.iov_base = &client_id;
> +	iov.iov_len = sizeof(client_id);
> +
> +	msgh.msg_iov = &iov;
> +	msgh.msg_iovlen = 1;
> +
> +	if (shm_fd >= 0) {
> +		msgh.msg_control = &control;
> +		msgh.msg_controllen = sizeof(control);
> +		cmsg = CMSG_FIRSTHDR(&msgh);
> +		cmsg->cmsg_len = CMSG_LEN(fdsize);
> +		cmsg->cmsg_level = SOL_SOCKET;
> +		cmsg->cmsg_type = SCM_RIGHTS;
> +		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
> +	}
> +
> +	do {
> +		ret = sendmsg(sock_fd, &msgh, 0);
> +	} while (ret < 0 && errno == EINTR);
> +
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "sendmsg error\n");
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +qtest_open_shared_memory(void)
> +{
> +	const struct rte_memseg *ms;
> +	int shm_fd = -1;
> +	uint64_t vaddr;
> +	char buf[1024];
> +	char *p;
> +	FILE *f;
> +
> +	ms = rte_eal_get_physmem_layout();
> +	f = fopen("/proc/self/maps", "r");
> +	if (f == NULL)
> +		return -1;
> +
> +	/* parse maps */
> +	while (fgets(buf, sizeof(buf), f) != NULL) {
> +		/* get vaddr */
> +		vaddr = strtoul(buf, NULL, 16);
> +
> +		/* check if this region is EAL memory */
> +		if (vaddr == ms[0].addr_64) {
> +			p = strchr(buf, '/');
> +			if (p == NULL)
> +				return -1;
> +			buf[strlen(buf) - 1] = '\0';
> +			shm_fd = open(p, O_RDWR);
> +			break;
> +		}
> +	}
> +	fclose(f);
> +
> +	return shm_fd;
> +}
> +
> +static int
> +qtest_setup_shared_memory(struct qtest_session *s)
> +{
> +	int shm_fd, ret;
> +
> +	/* To share DPDK EAL memory, open EAL memory again */
> +	shm_fd = qtest_open_shared_memory();
> +	if (shm_fd < 0) {
> +		PMD_DRV_LOG(ERR,
> +			"Failed to open EAL memory\n");
> +		return -1;
> +	}
> +
> +	/* send our protocol version first */
> +	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
> +			IVSHMEM_PROTOCOL_VERSION, -1);
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR,
> +			"Failed to send protocol version to ivshmem\n");
> +		return -1;
> +	}
> +
> +	/* send client id */
> +	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
> +		return -1;
> +	}
> +
> +	/* send message to ivshmem */
> +	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
> +	if (ret < 0) {
> +		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
> +		return -1;
> +	}
> +
> +	/* close EAL memory again */
> +	close(shm_fd);
> +
> +	return 0;
> +}
> +
> +int
> +qtest_vdev_init(struct rte_eth_dev_data *data,
> +		int qtest_socket, int ivshmem_socket)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +	int ret;
> +
> +	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
> +
> +	ret = pipe(s->msgfds.pipefd);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
> +		return -1;
> +	}
> +
> +	ret = pipe(s->irqfds.pipefd);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize irq pipe\n");
> +		return -1;
> +	}
> +
> +	ret = qtest_register_target_devices(s);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
> +		return -1;
> +	}
> +
> +	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
> +		return -1;
> +	}
> +
> +	rte_atomic16_set(&s->enable_intr, 0);
> +	s->qtest_socket = qtest_socket;
> +	s->ivshmem_socket = ivshmem_socket;
> +	hw->qsession = (void *)s;
> +
> +	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
> +		return -1;
> +	}
> +
> +	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
> +		return -1;
> +	}
> +
> +	ret = qtest_intr_initialize(data);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
> +		return -1;
> +	}
> +
> +	ret = qtest_setup_shared_memory(s);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
> +		return -1;
> +	}
> +
> +	ret = qtest_init_pci_devices(s);
> +	if (ret != 0) {
> +		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static void
> +qtest_remove_target_devices(struct qtest_session *s)
> +{
> +	struct qtest_pci_device *dev, *next;
> +
> +	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
> +		next = TAILQ_NEXT(dev, next);
> +		TAILQ_REMOVE(&s->head, dev, next);
> +		free(dev);
> +	}
> +}
> +
> +void
> +qtest_vdev_uninit(struct rte_eth_dev_data *data)
> +{
> +	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
> +	struct qtest_session *s;
> +
> +	s = (struct qtest_session *)hw->qsession;
> +
> +	qtest_close_sockets(s);
> +
> +	pthread_cancel(s->event_th);
> +	pthread_join(s->event_th, NULL);
> +
> +	pthread_cancel(s->intr_th);
> +	pthread_join(s->intr_th, NULL);
> +
> +	pthread_mutex_destroy(&s->qtest_session_lock);
> +
> +	qtest_remove_target_devices(s);
> +
> +	rte_free(s);
> +}
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index d928339..234b561 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -36,6 +36,11 @@
>  #include <stdio.h>
>  #include <errno.h>
>  #include <unistd.h>
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +#include <sys/types.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#endif
>  #ifdef RTE_EXEC_ENV_LINUXAPP
>  #include <dirent.h>
>  #include <fcntl.h>
> @@ -56,6 +61,10 @@
>  #include <rte_memory.h>
>  #include <rte_eal.h>
>  #include <rte_dev.h>
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +#include <rte_eal_memconfig.h>
> +#include <rte_kvargs.h>
> +#endif
> 
>  #include "virtio_ethdev.h"
>  #include "virtio_pci.h"
> @@ -491,8 +500,12 @@ virtio_dev_close(struct rte_eth_dev *dev)
>  	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
> 
>  	/* reset the NIC */
> -	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
> +	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
> +			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
> +			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
> +			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
>  		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
> +	}
>  	vtpci_reset(hw);
>  	hw->started = 0;
>  	virtio_dev_free_mbufs(dev);
> @@ -1233,15 +1246,22 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
>  	isr = vtpci_isr(hw);
>  	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
> 
> -	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
> -		PMD_DRV_LOG(ERR, "interrupt enable failed");
> +	if (dev->dev_type == RTE_ETH_DEV_PCI) {
> +		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +	}
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
> +		if (qtest_intr_enable(dev->data) < 0)
> +			PMD_DRV_LOG(ERR, "interrupt enable failed");
> +	}
> +#endif
> 
>  	if (isr & VIRTIO_PCI_ISR_CONFIG) {
>  		if (virtio_dev_link_update(dev, 0) == 0)
>  			_rte_eth_dev_callback_process(dev,
>  						      RTE_ETH_EVENT_INTR_LSC);
>  	}
> -
>  }
> 
>  static void
> @@ -1264,7 +1284,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
>  	struct virtio_hw *hw = eth_dev->data->dev_private;
>  	struct virtio_net_config *config;
>  	struct virtio_net_config local_config;
> -	struct rte_pci_device *pci_dev;
> +	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
> +	struct rte_pci_id id;
> 
>  	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
> 
> @@ -1285,13 +1306,20 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
>  		return -ENOMEM;
>  	}
> 
> -	pci_dev = eth_dev->pci_dev;
> +	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
> +		if (virtio_resource_init(pci_dev) < 0)
> +			return -1;
> 
> -	if (virtio_resource_init(pci_dev) < 0)
> -		return -1;
> -
> -	hw->use_msix = virtio_has_msix(&pci_dev->addr);
> -	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
> +		hw->use_msix = virtio_has_msix(&pci_dev->addr);
> +		hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
> +	}
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
> +		hw->use_msix = 0;
> +		hw->io_base = 0;
> +		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
> +	}
> +#endif
> 
>  	/* Reset the device although not necessary at startup */
>  	vtpci_reset(hw);
> @@ -1304,8 +1332,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
>  	virtio_negotiate_features(hw);
> 
>  	/* If host does not support status then disable LSC */
> -	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
> -		pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
> +	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
> +		if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
> +			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +		else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
> +			eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
> +#endif
> +	}
> 
>  	rte_eth_copy_pci_info(eth_dev, pci_dev);
> 
> @@ -1383,14 +1417,30 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
>  			hw->max_rx_queues, hw->max_tx_queues);
> +
> +	memset(&id, 0, sizeof(id));
> +	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
> +		id = pci_dev->id;
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
> +		id = qtest_get_pci_id_of_virtio_net();
> +#endif
> +
>  	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
> -			eth_dev->data->port_id, pci_dev->id.vendor_id,
> -			pci_dev->id.device_id);
> +			eth_dev->data->port_id,
> +			id.vendor_id, id.device_id);
> 
>  	/* Setup interrupt callback  */
> -	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
> +	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
> +			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
>  		rte_intr_callback_register(&pci_dev->intr_handle,
> -				   virtio_interrupt_handler, eth_dev);
> +				virtio_interrupt_handler, eth_dev);
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
> +			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
> +		qtest_intr_callback_register(eth_dev->data,
> +				virtio_interrupt_handler, eth_dev);
> +#endif
> 
>  	virtio_dev_cq_start(eth_dev);
> 
> @@ -1424,10 +1474,17 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
>  	eth_dev->data->mac_addrs = NULL;
> 
>  	/* reset interrupt callback  */
> -	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
> +	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
> +			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
>  		rte_intr_callback_unregister(&pci_dev->intr_handle,
>  						virtio_interrupt_handler,
>  						eth_dev);
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
> +			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
> +		qtest_intr_callback_unregister(eth_dev->data,
> +				virtio_interrupt_handler, eth_dev);
> +#endif
> 
>  	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
> 
> @@ -1491,11 +1548,15 @@ virtio_dev_configure(struct rte_eth_dev *dev)
>  		return -ENOTSUP;
>  	}
> 
> -	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
> +	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
> +			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
> +			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
> +			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
>  		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
>  			PMD_DRV_LOG(ERR, "failed to set config vector");
>  			return -EBUSY;
>  		}
> +	}
> 
>  	return 0;
>  }
> @@ -1510,15 +1571,31 @@ virtio_dev_start(struct rte_eth_dev *dev)
> 
>  	/* check if lsc interrupt feature is enabled */
>  	if (dev->data->dev_conf.intr_conf.lsc) {
> -		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
> -			PMD_DRV_LOG(ERR, "link status not supported by host");
> -			return -ENOTSUP;
> -		}
> +		if (dev->dev_type == RTE_ETH_DEV_PCI) {
> +			if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
> +				PMD_DRV_LOG(ERR,
> +					"link status not supported by host");
> +				return -ENOTSUP;
> +			}
> 
> -		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
> -			PMD_DRV_LOG(ERR, "interrupt enable failed");
> -			return -EIO;
> +			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
> +				PMD_DRV_LOG(ERR, "interrupt enable failed");
> +				return -EIO;
> +			}
>  		}
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
> +			if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
> +				PMD_DRV_LOG(ERR,
> +					"link status not supported by host");
> +				return -ENOTSUP;
> +			}
> +			if (qtest_intr_enable(dev->data) < 0) {
> +				PMD_DRV_LOG(ERR, "interrupt enable failed");
> +				return -EIO;
> +			}
> +		}
> +#endif
>  	}
> 
>  	/* Initialize Link state */
> @@ -1615,8 +1692,15 @@ virtio_dev_stop(struct rte_eth_dev *dev)
> 
>  	PMD_INIT_LOG(DEBUG, "stop");
> 
> -	if (dev->data->dev_conf.intr_conf.lsc)
> -		rte_intr_disable(&dev->pci_dev->intr_handle);
> +	if (dev->data->dev_conf.intr_conf.lsc) {
> +		if (dev->dev_type == RTE_ETH_DEV_PCI)
> +			rte_intr_disable(&dev->pci_dev->intr_handle);
> +
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +		if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
> +			qtest_intr_disable(dev->data);
> +#endif
> +	}
> 
>  	memset(&link, 0, sizeof(link));
>  	virtio_dev_atomic_write_link_status(dev, &link);
> @@ -1661,7 +1745,13 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info
> *dev_info)
>  {
>  	struct virtio_hw *hw = dev->data->dev_private;
> 
> -	dev_info->driver_name = dev->driver->pci_drv.name;
> +	if (dev->dev_type == RTE_ETH_DEV_PCI)
> +		dev_info->driver_name = dev->driver->pci_drv.name;
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
> +		dev_info->driver_name =  dev->data->drv_name;
> +#endif
> +
>  	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
>  	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
>  	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
> @@ -1689,3 +1779,196 @@ static struct rte_driver rte_virtio_driver = {
>  };
> 
>  PMD_REGISTER_DRIVER(rte_virtio_driver);
> +
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +
> +#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
> +#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
> +
> +static const char *valid_args[] = {
> +	ETH_VIRTIO_NET_ARG_QTEST_PATH,
> +	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
> +	NULL
> +};
> +
> +static int
> +get_string_arg(const char *key __rte_unused,
> +		const char *value, void *extra_args)
> +{
> +	int ret, fd, loop = 3;
> +	int *pfd = extra_args;
> +	struct sockaddr_un sa = {0};
> +
> +	if ((value == NULL) || (extra_args == NULL))
> +		return -EINVAL;
> +
> +	fd = socket(AF_UNIX, SOCK_STREAM, 0);
> +	if (fd < 0)
> +		return -1;
> +
> +	sa.sun_family = AF_UNIX;
> +	strncpy(sa.sun_path, value, sizeof(sa.sun_path));
> +
> +	while (loop--) {
> +		/*
> +		 * may need to wait for qtest and ivshmem
> +		 * sockets are prepared by QEMU.
> +		 */
> +		ret = connect(fd, (struct sockaddr *)&sa,
> +				sizeof(struct sockaddr_un));
> +		if (ret != 0)
> +			sleep(1);
> +		else
> +			break;
> +	}
> +
> +	if (ret != 0) {
> +		close(fd);
> +		return -1;
> +	}
> +
> +	*pfd = fd;
> +
> +	return 0;
> +}
> +
> +static struct rte_eth_dev *
> +virtio_net_eth_dev_alloc(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	struct rte_eth_dev_data *data;
> +	struct virtio_hw *hw;
> +
> +	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
> +	if (eth_dev == NULL)
> +		rte_panic("cannot alloc rte_eth_dev\n");
> +
> +	data = eth_dev->data;
> +
> +	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
> +	if (!hw)
> +		rte_panic("malloc virtio_hw failed\n");
> +
> +	data->dev_private = hw;
> +	eth_dev->driver = &rte_virtio_pmd;
> +	return eth_dev;
> +}
> +
> +/*
> + * Initialization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
> + */
> +static int
> +rte_virtio_net_pmd_init(const char *name, const char *params)
> +{
> +	struct rte_kvargs *kvlist = NULL;
> +	struct rte_eth_dev *eth_dev = NULL;
> +	int ret, qtest_sock, ivshmem_sock;
> +	struct rte_mem_config *mcfg;
> +
> +	if (params == NULL || params[0] == '\0')
> +		goto error;
> +
> +	/* get pointer to global configuration */
> +	mcfg = rte_eal_get_configuration()->mem_config;
> +
> +	/* Check if EAL memory consists of one memory segment */
> +	if ((RTE_MAX_MEMSEG > 1) && (mcfg->memseg[1].addr != NULL)) {
> +		PMD_INIT_LOG(ERR, "Non contigious memory");
> +		goto error;
> +	}
> +
> +	kvlist = rte_kvargs_parse(params, valid_args);
> +	if (!kvlist) {
> +		PMD_INIT_LOG(ERR, "error when parsing param");
> +		goto error;
> +	}
> +
> +	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
> +				&get_string_arg, &ivshmem_sock);
> +		if (ret != 0) {
> +			PMD_INIT_LOG(ERR,
> +				"Failed to connect to ivshmem socket");
> +			goto error;
> +		}
> +	} else {
> +		PMD_INIT_LOG(ERR, "No argument specified for %s",
> +				ETH_VIRTIO_NET_ARG_IVSHMEM_PATH);
> +		goto error;
> +	}
> +
> +	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH,
> +				&get_string_arg, &qtest_sock);
> +		if (ret != 0) {
> +			PMD_INIT_LOG(ERR,
> +				"Failed to connect to qtest socket");
> +			goto error;
> +		}
> +	} else {
> +		PMD_INIT_LOG(ERR, "No argument specified for %s",
> +				ETH_VIRTIO_NET_ARG_QTEST_PATH);
> +		goto error;
> +	}
> +
> +	eth_dev = virtio_net_eth_dev_alloc(name);
> +
> +	qtest_vdev_init(eth_dev->data, qtest_sock, ivshmem_sock);
> +
> +	/* originally, this will be called in rte_eal_pci_probe() */
> +	eth_virtio_dev_init(eth_dev);
> +
> +	eth_dev->driver = NULL;
> +	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
> +	eth_dev->data->kdrv = RTE_KDRV_NONE;
> +	eth_dev->data->drv_name = "rte_virtio_pmd";
> +
> +	rte_kvargs_free(kvlist);
> +	return 0;
> +
> +error:
> +	rte_kvargs_free(kvlist);
> +	return -EFAULT;
> +}
> +
> +/*
> + * Finalization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
> + */
> +static int
> +rte_virtio_net_pmd_uninit(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev = NULL;
> +	int ret;
> +
> +	if (name == NULL)
> +		return -EINVAL;
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);
> +	if (eth_dev == NULL)
> +		return -ENODEV;
> +
> +	ret = eth_virtio_dev_uninit(eth_dev);
> +	if (ret != 0)
> +		return -EFAULT;
> +
> +	qtest_vdev_uninit(eth_dev->data);
> +	rte_free(eth_dev->data->dev_private);
> +
> +	ret = rte_eth_dev_release_port(eth_dev);
> +	if (ret != 0)
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
> +static struct rte_driver rte_virtio_net_driver = {
> +	.name   = "eth_virtio_net",
> +	.type   = PMD_VDEV,
> +	.init   = rte_virtio_net_pmd_init,
> +	.uninit = rte_virtio_net_pmd_uninit,
> +};
> +
> +PMD_REGISTER_DRIVER(rte_virtio_net_driver);
> +
> +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
> diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
> index ae2d47d..eefc7be 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -122,5 +122,17 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
>  			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
> 
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +int qtest_vdev_init(struct rte_eth_dev_data *data,
> +		int qtest_socket, int ivshmem_socket);
> +void qtest_vdev_uninit(struct rte_eth_dev_data *data);
> +void qtest_intr_callback_register(void *data,
> +		rte_intr_callback_fn cb, void *cb_arg);
> +void qtest_intr_callback_unregister(void *data,
> +		rte_intr_callback_fn cb, void *cb_arg);
> +int qtest_intr_enable(void *data);
> +int qtest_intr_disable(void *data);
> +struct rte_pci_id qtest_get_pci_id_of_virtio_net(void);
> +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
> 
>  #endif /* _VIRTIO_ETHDEV_H_ */
> diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
> index 47f722a..d4ede73 100644
> --- a/drivers/net/virtio/virtio_pci.h
> +++ b/drivers/net/virtio/virtio_pci.h
> @@ -165,6 +165,9 @@ struct virtqueue;
> 
>  struct virtio_hw {
>  	struct virtqueue *cvq;
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +	void        *qsession;
> +#endif
>  	uint32_t    io_base;
>  	uint32_t    guest_features;
>  	uint32_t    max_tx_queues;
> @@ -226,6 +229,26 @@ outl_p(unsigned int data, unsigned int port)
>  }
>  #endif
> 
> +#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
> +
> +uint32_t virtio_ioport_read(struct virtio_hw *, uint64_t, char type);
> +void virtio_ioport_write(struct virtio_hw *, uint64_t, uint64_t, char type);
> +
> +#define VIRTIO_READ_REG_1(hw, reg) \
> +	virtio_ioport_read(hw, reg, 'b')
> +#define VIRTIO_WRITE_REG_1(hw, reg, value) \
> +	virtio_ioport_write(hw, reg, value, 'b')
> +#define VIRTIO_READ_REG_2(hw, reg) \
> +	virtio_ioport_read(hw, reg, 'w')
> +#define VIRTIO_WRITE_REG_2(hw, reg, value) \
> +	virtio_ioport_write(hw, reg, value, 'w')
> +#define VIRTIO_READ_REG_4(hw, reg) \
> +	virtio_ioport_read(hw, reg, 'l')
> +#define VIRTIO_WRITE_REG_4(hw, reg, value) \
> +	virtio_ioport_write(hw, reg, value, 'l')
> +
> +#else /* RTE_LIBRTE_VIRTIO_HOST_MODE */
> +

 I have a concern against such compile-time switches. What if we want the same code to work for both 'real' virtio and socket-based?
Shouldn't we introduce some function pointers here to be able to switch them at runtime?

>  #define VIRTIO_PCI_REG_ADDR(hw, reg) \
>  	(unsigned short)((hw)->io_base + (reg))
> 
> @@ -244,6 +267,8 @@ outl_p(unsigned int data, unsigned int port)
>  #define VIRTIO_WRITE_REG_4(hw, reg, value) \
>  	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
> 
> +#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
> +
>  static inline int
>  vtpci_with_feature(struct virtio_hw *hw, uint32_t bit)
>  {
> --
> 2.1.4

Kind regards,
Pavel Fedin
Expert Engineer
Samsung Electronics Research center Russia




More information about the dev mailing list