[dpdk-dev] [PATCH v1 2/2] virtio: Extend virtio-net PMD to support container environment

Tetsuya Mukawa mukawa at igel.co.jp
Wed Dec 16 09:37:29 CET 2015


The patch adds a new virtio-net PMD configuration that allows the PMD to
work on host as if the PMD is in VM.
Here is new configuration for virtio-net PMD.
 - CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE
To use this mode, EAL needs physically contiguous memory. To allocate
such memory, enable below option, and add "--contig-mem" option to
application command line.
 - CONFIG_RTE_EAL_SINGLE_FILE_SEGMENTS

To prepare virtio-net device on host, the users need to invoke QEMU process
in special qtest mode. This mode is mainly used for testing QEMU devices
from outer process. In this mode, no guest runs.
Here is QEMU command line.

 $ qemu-system-x86_64 \
		-machine pc-i440fx-1.4,accel=qtest \
		-display none -qtest-log /dev/null \
		-qtest unix:/tmp/socket,server \
		-netdev type=tap,script=/etc/qemu-ifup,id=net0,queues=1 \
		-device virtio-net-pci,netdev=net0,mq=on \
		-chardev socket,id=chr1,path=/tmp/ivshmem,server \
		-device ivshmem,size=1G,chardev=chr1,vectors=1

* QEMU process is needed per port.
* In most cases, just using above command is enough.
* The vhost backends like vhost-net and vhost-user can be specified.
* Only checked "pc-i440fx-1.4" machine, but may work with other
  machines. It depends on a machine has piix3 south bridge.
  If the machine doesn't have, virtio-net PMD cannot receive status
  changed interrupts.
* Should not add "--enable-kvm" to QEMU command line.

After invoking QEMU, the PMD can connect to QEMU process using unix
domain sockets. Over these sockets, virtio-net, ivshmem and piix3
device in QEMU are probed by the PMD.
Here is example of command line.

 $ testpmd -c f -n 1 -m 1024 --contig-mem \
         --vdev="eth_virtio_net0,qtest=/tmp/socket,ivshmem=/tmp/ivshmem" \
         -- --disable-hw-vlan --txqflags=0xf00 -i

Please specify same unix domain sockets and memory size in both QEMU and
DPDK command lines like above.
The share memory size should be power of 2, because ivshmem only accepts
such memry size.

Also, "--contig-mem" option is needed for the PMD like above. This option
allocates contiguous memory, and create one hugepage file on hugetlbfs.
If there is no enough contiguous memory, initialization will be failed.

This contiguous memory is used as shared memory between DPDK application
and ivshmem device in QEMU.

Signed-off-by: Tetsuya Mukawa <mukawa at igel.co.jp>
---
 config/common_linuxapp             |    1 +
 drivers/net/virtio/Makefile        |    4 +
 drivers/net/virtio/qtest.c         | 1107 ++++++++++++++++++++++++++++++++++++
 drivers/net/virtio/virtio_ethdev.c |  341 ++++++++++-
 drivers/net/virtio/virtio_ethdev.h |   12 +
 drivers/net/virtio/virtio_pci.h    |   25 +
 6 files changed, 1461 insertions(+), 29 deletions(-)
 create mode 100644 drivers/net/virtio/qtest.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 74bc515..eaa720c 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -269,6 +269,7 @@ CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
+CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_INIT=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_RX=n
 CONFIG_RTE_LIBRTE_VIRTIO_DEBUG_TX=n
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile
index 43835ba..697e629 100644
--- a/drivers/net/virtio/Makefile
+++ b/drivers/net/virtio/Makefile
@@ -52,6 +52,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c
 
+ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE),y)
+	SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += qtest.c
+endif
+
 # this lib depends upon:
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether
 DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf
diff --git a/drivers/net/virtio/qtest.c b/drivers/net/virtio/qtest.c
new file mode 100644
index 0000000..4ffdefb
--- /dev/null
+++ b/drivers/net/virtio/qtest.c
@@ -0,0 +1,1107 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2015 IGEL Co., Ltd. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of IGEL Co., Ltd. nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+#include <rte_interrupts.h>
+
+#include "virtio_pci.h"
+#include "virtio_logs.h"
+#include "virtio_ethdev.h"
+
+#define NB_BUS                          256
+#define NB_DEVICE                       32
+#define NB_BAR                          6
+
+/* PCI common configuration registers */
+#define REG_ADDR_VENDOR_ID              0x0
+#define REG_ADDR_DEVICE_ID              0x2
+#define REG_ADDR_COMMAND                0x4
+#define REG_ADDR_STATUS                 0x6
+#define REG_ADDR_REVISION_ID            0x8
+#define REG_ADDR_CLASS_CODE             0x9
+#define REG_ADDR_CACHE_LINE_S           0xc
+#define REG_ADDR_LAT_TIMER              0xd
+#define REG_ADDR_HEADER_TYPE            0xe
+#define REG_ADDR_BIST                   0xf
+#define REG_ADDR_BAR0                   0x10
+#define REG_ADDR_BAR1                   0x14
+#define REG_ADDR_BAR2                   0x18
+#define REG_ADDR_BAR3                   0x1c
+#define REG_ADDR_BAR4                   0x20
+#define REG_ADDR_BAR5                   0x24
+
+/* PCI common configuration register values */
+#define REG_VAL_COMMAND_IO              0x1
+#define REG_VAL_COMMAND_MEMORY          0x2
+#define REG_VAL_COMMAND_MASTER          0x4
+#define REG_VAL_HEADER_TYPE_ENDPOINT    0x0
+#define REG_VAL_BAR_MEMORY              0x0
+#define REG_VAL_BAR_IO                  0x1
+#define REG_VAL_BAR_LOCATE_32           0x0
+#define REG_VAL_BAR_LOCATE_UNDER_1MB    0x2
+#define REG_VAL_BAR_LOCATE_64           0x4
+
+/* PIIX3 configuration registers */
+#define PIIX3_REG_ADDR_PIRQA            0x60
+#define PIIX3_REG_ADDR_PIRQB            0x61
+#define PIIX3_REG_ADDR_PIRQC            0x62
+#define PIIX3_REG_ADDR_PIRQD            0x63
+
+/* Device information */
+#define VIRTIO_NET_DEVICE_ID            0x1000
+#define VIRTIO_NET_VENDOR_ID            0x1af4
+#define VIRTIO_NET_IO_START             0xc000
+#define VIRTIO_NET_IRQ_NUM              10
+#define IVSHMEM_DEVICE_ID               0x1110
+#define IVSHMEM_VENDOR_ID               0x1af4
+#define IVSHMEM_MEMORY_START            0x1000
+#define IVSHMEM_PROTOCOL_VERSION        0
+#define PIIX3_DEVICE_ID                 0x7000
+#define PIIX3_VENDOR_ID                 0x8086
+
+#define PCI_CONFIG_ADDR(_bus, _device, _function, _offset) ( \
+	(1 << 31) | ((_bus) & 0xff) << 16 | ((_device) & 0x1f) << 11 | \
+	((_function) & 0xf) << 8 | ((_offset) & 0xfc))
+
+static char interrupt_message[32];
+
+enum qtest_pci_bar_type {
+	QTEST_PCI_BAR_DISABLE = 0,
+	QTEST_PCI_BAR_IO,
+	QTEST_PCI_BAR_MEMORY_UNDER_1MB,
+	QTEST_PCI_BAR_MEMORY_32,
+	QTEST_PCI_BAR_MEMORY_64
+};
+
+struct qtest_pci_bar {
+	enum qtest_pci_bar_type type;
+	uint8_t addr;
+	uint64_t region_start;
+	uint64_t region_size;
+};
+
+struct qtest_session;
+TAILQ_HEAD(qtest_pci_device_list, qtest_pci_device);
+struct qtest_pci_device {
+	TAILQ_ENTRY(qtest_pci_device) next;
+	const char *name;
+	uint16_t device_id;
+	uint16_t vendor_id;
+	uint8_t bus_addr;
+	uint8_t device_addr;
+	struct qtest_pci_bar bar[NB_BAR];
+	int (*init)(struct qtest_session *s, struct qtest_pci_device *dev);
+};
+
+union qtest_pipefds {
+	struct {
+		int pipefd[2];
+	};
+	struct {
+		int readfd;
+		int writefd;
+	};
+};
+
+struct qtest_session {
+	int qtest_socket;
+	pthread_mutex_t qtest_session_lock;
+
+	struct qtest_pci_device_list head;
+	int ivshmem_socket;
+
+	pthread_t event_th;
+	union qtest_pipefds msgfds;
+
+	pthread_t intr_th;
+	union qtest_pipefds irqfds;
+	rte_atomic16_t enable_intr;
+	rte_intr_callback_fn cb;
+	void *cb_arg;
+};
+
+static int
+qtest_write(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = write(fd, buf, len);
+		if (ret == (int)len)
+			break;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+static int
+qtest_read(int fd, char *buf, size_t count)
+{
+	size_t len = count;
+	size_t total_len = 0;
+	int ret = 0;
+
+	while (len > 0) {
+		ret = read(fd, buf, len);
+		if (ret == (int)len)
+			break;
+		if (*(buf + ret - 1) == '\n')
+			break;
+		if (ret == -1) {
+			if (errno == EINTR)
+				continue;
+			return ret;
+		}
+		total_len += ret;
+		buf += ret;
+		len -= ret;
+	}
+	return total_len + ret;
+}
+
+/*
+ * To know QTest protocol specification, see below QEMU source code.
+ *  - qemu/qtest.c
+ */
+static uint32_t
+qtest_in(struct qtest_session *s, uint16_t addr, char type)
+{
+	char buf[1024];
+	int ret;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "in%c 0x%x\n", type, addr);
+	/* write to qtest socket */
+	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
+	buf[ret] = '\0';
+	return strtoul(buf + strlen("OK "), NULL, 16);
+}
+
+static void
+qtest_out(struct qtest_session *s, uint16_t addr, uint32_t val, char type)
+{
+	char buf[1024];
+	int ret __rte_unused;
+
+	if ((type != 'l') && (type != 'w') && (type != 'b'))
+		rte_panic("Invalid value\n");
+
+	snprintf(buf, sizeof(buf), "out%c 0x%x 0x%x\n", type, addr, val);
+	/* write to qtest socket */
+	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
+	/* read reply from event handler */
+	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
+}
+
+/*
+ * qtest_pci_read/write are based on PCI configuration space specification.
+ * Accroding to the spec, access size of read()/write() should be 4 bytes.
+ */
+static int
+qtest_pci_readb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return (tmp >> ((offset & 0x3) * 8)) & 0xff;
+}
+
+static void
+qtest_pci_writeb(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint8_t value)
+{
+	uint32_t addr, tmp, pos;
+
+	addr = PCI_CONFIG_ADDR(bus, device, function, offset);
+	pos = (offset % 4) * 8;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, addr, 'l');
+	tmp = qtest_in(s, 0xcfc, 'l');
+	tmp = (tmp & ~(0xff << pos)) | (value << pos);
+
+	qtest_out(s, 0xcf8, addr, 'l');
+	qtest_out(s, 0xcfc, tmp, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint32_t
+qtest_pci_readl(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	tmp = qtest_in(s, 0xcfc, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return tmp;
+}
+
+static void
+qtest_pci_writel(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint32_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	qtest_out(s, 0xcfc, value, 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+static uint64_t
+qtest_pci_readq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset)
+{
+	uint32_t tmp;
+	uint64_t val;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	val = (uint64_t)qtest_in(s, 0xcfc, 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	val |= (uint64_t)qtest_in(s, 0xcfc, 'l') << 32;
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+
+	return val;
+}
+
+static void
+qtest_pci_writeq(struct qtest_session *s, uint8_t bus, uint8_t device,
+		uint8_t function, uint8_t offset, uint64_t value)
+{
+	uint32_t tmp;
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset);
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	qtest_out(s, 0xcfc, (uint32_t)(value & 0xffffffff), 'l');
+
+	tmp = PCI_CONFIG_ADDR(bus, device, function, offset + 4);
+
+	qtest_out(s, 0xcf8, tmp, 'l');
+	qtest_out(s, 0xcfc, (uint32_t)(value >> 32), 'l');
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot unlock mutex\n");
+}
+
+/*
+ * virtio_ioport_read/write are Used by virtio-net PMD
+ */
+void
+virtio_ioport_write(struct virtio_hw *hw, uint64_t addr, uint64_t val, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	qtest_out(s, VIRTIO_NET_IO_START + (uint16_t)addr, val, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+}
+
+uint32_t
+virtio_ioport_read(struct virtio_hw *hw, uint64_t addr, char type)
+{
+	struct qtest_session *s = (struct qtest_session *)hw->qsession;
+	uint32_t val;
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	val = qtest_in(s, VIRTIO_NET_IO_START + (uint16_t)addr, type);
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return val;
+}
+
+int
+qtest_intr_enable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 1);
+
+	return 0;
+}
+
+int
+qtest_intr_disable(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+
+	return 0;
+}
+
+void
+qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	s->cb = cb;
+	s->cb_arg = cb_arg;
+	rte_atomic16_set(&s->enable_intr, 1);
+}
+
+void
+qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb __rte_unused,
+		void *cb_arg __rte_unused)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->cb = NULL;
+	s->cb_arg = NULL;
+}
+
+static void *
+qtest_intr_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[1];
+	int ret;
+
+	for (;;) {
+		ret = qtest_read(s->irqfds.readfd, buf, sizeof(buf));
+		if (ret < 0)
+			return NULL;
+		s->cb(NULL, s->cb_arg);
+	}
+	return NULL;
+}
+
+static int
+qtest_intr_initialize(void *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	char buf[1024];
+	int ret;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	/* This message will come when interrupt occurs */
+	snprintf(interrupt_message, sizeof(interrupt_message),
+			"IRQ raise %d", VIRTIO_NET_IRQ_NUM);
+
+	snprintf(buf, sizeof(buf), "irq_intercept_in ioapic\n");
+
+	if (pthread_mutex_lock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	/* To enable interrupt, send "irq_intercept_in" message to QEMU */
+	ret = qtest_write(s->qtest_socket, buf, strlen(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	/* just ignore QEMU response */
+	ret = qtest_read(s->msgfds.readfd, buf, sizeof(buf));
+	if (ret < 0) {
+		pthread_mutex_unlock(&s->qtest_session_lock);
+		return -1;
+	}
+
+	if (pthread_mutex_unlock(&s->qtest_session_lock) < 0)
+		rte_panic("Cannot lock mutex\n");
+
+	return 0;
+}
+
+static void
+qtest_handle_one_message(struct qtest_session *s, char *buf)
+{
+	int ret;
+
+	if (strncmp(buf, interrupt_message, strlen(interrupt_message)) == 0) {
+		if (rte_atomic16_read(&s->enable_intr) == 0)
+			return;
+
+		/* relay interrupt to pipe */
+		ret = write(s->irqfds.writefd, "1", 1);
+		if (ret < 0)
+			rte_panic("cannot relay interrupt\n");
+	} else {
+		/* relay normal message to pipe */
+		ret = qtest_write(s->msgfds.writefd, buf, strlen(buf));
+		if (ret < 0)
+			rte_panic("cannot relay normal message\n");
+	}
+}
+
+static char *
+qtest_get_next_message(char *p)
+{
+	p = strchr(p, '\n');
+	if ((p == NULL) || (*(p + 1) == '\0'))
+		return NULL;
+	return p + 1;
+}
+
+static void
+qtest_close_one_socket(int *fd)
+{
+	if (*fd > 0) {
+		close(*fd);
+		*fd = -1;
+	}
+}
+
+static void
+qtest_close_sockets(struct qtest_session *s)
+{
+	qtest_close_one_socket(&s->qtest_socket);
+	qtest_close_one_socket(&s->msgfds.readfd);
+	qtest_close_one_socket(&s->msgfds.writefd);
+	qtest_close_one_socket(&s->irqfds.readfd);
+	qtest_close_one_socket(&s->irqfds.writefd);
+	qtest_close_one_socket(&s->ivshmem_socket);
+}
+
+/*
+ * This thread relays QTest response using pipe.
+ * The function is needed because we need to separate IRQ message from others.
+ */
+static void *
+qtest_event_handler(void *data) {
+	struct qtest_session *s = (struct qtest_session *)data;
+	char buf[1024];
+	char *p;
+	int ret;
+
+	for (;;) {
+		memset(buf, 0, sizeof(buf));
+		ret = qtest_read(s->qtest_socket, buf, sizeof(buf));
+		if (ret < 0) {
+			qtest_close_sockets(s);
+			return NULL;
+		}
+
+		/* may receive multiple messages at the same time */
+		p = buf;
+		do {
+			qtest_handle_one_message(s, p);
+		} while ((p = qtest_get_next_message(p)) != NULL);
+	}
+	return NULL;
+}
+
+static int
+qtest_init_piix3_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t bus, device, virtio_net_slot = 0;
+	struct qtest_pci_device *tmpdev;
+	uint8_t pcislot2regaddr[] = {	0xff,
+					0xff,
+					0xff,
+					PIIX3_REG_ADDR_PIRQC,
+					PIIX3_REG_ADDR_PIRQD,
+					PIIX3_REG_ADDR_PIRQA,
+					PIIX3_REG_ADDR_PIRQB};
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Get slot id that is connected to virtio-net */
+	TAILQ_FOREACH(tmpdev, &s->head, next) {
+		if (strcmp(tmpdev->name, "virtio-net") == 0) {
+			virtio_net_slot = tmpdev->device_addr;
+			break;
+		}
+	}
+
+	if (virtio_net_slot == 0)
+		return -1;
+
+	/*
+	 * Set interrupt routing for virtio-net device.
+	 * Here is i440fx/piix3 connection settings
+	 * ---------------------------------------
+	 * PCI Slot3 -> PIRQC
+	 * PCI Slot4 -> PIRQD
+	 * PCI Slot5 -> PIRQA
+	 * PCI Slot6 -> PIRQB
+	 */
+	if (pcislot2regaddr[virtio_net_slot] != 0xff) {
+		qtest_pci_writeb(s, bus, device, 0,
+				pcislot2regaddr[virtio_net_slot],
+				VIRTIO_NET_IRQ_NUM);
+	}
+
+	return 0;
+}
+
+/*
+ * Common initialization of PCI device.
+ * To know detail, see pci specification.
+ */
+static int
+qtest_init_pci_device(struct qtest_session *s, struct qtest_pci_device *dev)
+{
+	uint8_t i, bus, device;
+	uint32_t val;
+	uint64_t val64;
+
+	bus = dev->bus_addr;
+	device = dev->device_addr;
+
+	PMD_DRV_LOG(INFO,
+		"Find %s on virtual PCI bus: %04x:%02x:00.0\n",
+		dev->name, bus, device);
+
+	/* Check header type */
+	val = qtest_pci_readb(s, bus, device, 0, REG_ADDR_HEADER_TYPE);
+	if (val != REG_VAL_HEADER_TYPE_ENDPOINT) {
+		PMD_DRV_LOG(ERR, "Unexpected header type %d\n", val);
+		return -1;
+	}
+
+	/* Check BAR type */
+	for (i = 0; i < NB_BAR; i++) {
+		val = qtest_pci_readl(s, bus, device, 0, dev->bar[i].addr);
+
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+			if ((val & 0x1) != REG_VAL_BAR_IO)
+				goto error;
+			break;
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				goto error;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_UNDER_1MB)
+				goto error;
+			break;
+		case QTEST_PCI_BAR_MEMORY_32:
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				goto error;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_32)
+				goto error;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			if ((val & 0x1) != REG_VAL_BAR_MEMORY)
+				goto error;
+			if ((val & 0x6) != REG_VAL_BAR_LOCATE_64)
+				goto error;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Enable device */
+	val = qtest_pci_readl(s, bus, device, 0, REG_ADDR_COMMAND);
+	val |= REG_VAL_COMMAND_IO | REG_VAL_COMMAND_MEMORY | REG_VAL_COMMAND_MASTER;
+	qtest_pci_writel(s, bus, device, 0, REG_ADDR_COMMAND, val);
+
+	/* Calculate BAR size */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_writel(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffff);
+			val = qtest_pci_readl(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size = ~(val & 0xfffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_writeq(s, bus, device, 0,
+					dev->bar[i].addr, 0xffffffffffffffff);
+			val64 = qtest_pci_readq(s, bus, device,
+					0, dev->bar[i].addr);
+			dev->bar[i].region_size =
+					~(val64 & 0xfffffffffffffff0) + 1;
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	/* Set BAR region */
+	for (i = 0; i < NB_BAR; i++) {
+		switch (dev->bar[i].type) {
+		case QTEST_PCI_BAR_IO:
+		case QTEST_PCI_BAR_MEMORY_UNDER_1MB:
+		case QTEST_PCI_BAR_MEMORY_32:
+			qtest_pci_writel(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_MEMORY_64:
+			qtest_pci_writeq(s, bus, device, 0, dev->bar[i].addr,
+				dev->bar[i].region_start);
+			PMD_DRV_LOG(INFO, "Set BAR of %s device: 0x%lx - 0x%lx\n",
+				dev->name, dev->bar[i].region_start,
+				dev->bar[i].region_start + dev->bar[i].region_size);
+			break;
+		case QTEST_PCI_BAR_DISABLE:
+			break;
+		}
+	}
+
+	return 0;
+
+error:
+	PMD_DRV_LOG(ERR, "Unexpected BAR type\n");
+	return -1;
+}
+
+static void
+qtest_find_pci_device(struct qtest_session *s, uint16_t bus, uint8_t device)
+{
+	struct qtest_pci_device *dev;
+	uint32_t val;
+
+	val = qtest_pci_readl(s, bus, device, 0, 0);
+	TAILQ_FOREACH(dev, &s->head, next) {
+		if (val == ((uint32_t)dev->device_id << 16 | dev->vendor_id)) {
+			/* device is found, then store it */
+			dev->bus_addr = bus;
+			dev->device_addr = device;
+			return;
+		}
+	}
+}
+
+static int
+qtest_init_pci_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev;
+	uint16_t bus;
+	uint8_t device;
+	int ret;
+
+	/* Find devices */
+	bus = 0;
+	do {
+		device = 0;
+		do {
+			qtest_find_pci_device(s, bus, device);
+		} while (device++ != NB_DEVICE - 1);
+	} while (bus++ != NB_BUS - 1);
+
+	/* Initialize devices */
+	TAILQ_FOREACH(dev, &s->head, next) {
+		ret = dev->init(s, dev);
+		if (ret != 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+struct rte_pci_id
+qtest_get_pci_id_of_virtio_net(void)
+{
+	struct rte_pci_id id =  {VIRTIO_NET_DEVICE_ID,
+		VIRTIO_NET_VENDOR_ID, PCI_ANY_ID, PCI_ANY_ID};
+
+	return id;
+}
+
+static int
+qtest_register_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *virtio_net, *ivshmem, *piix3;
+	const struct rte_memseg *ms;
+
+	ms = rte_eal_get_physmem_layout();
+	/* if EAL memory size isn't pow of 2, ivshmem refuse it */
+	if ((ms[0].len & (ms[0].len - 1)) != 0) {
+		PMD_DRV_LOG(ERR, "memory size must be power of 2\n");
+		return -1;
+	}
+
+	virtio_net = malloc(sizeof(*virtio_net));
+	if (virtio_net == NULL)
+		return -1;
+
+	ivshmem = malloc(sizeof(*ivshmem));
+	if (ivshmem == NULL)
+		return -1;
+
+	piix3 = malloc(sizeof(*piix3));
+	if (piix3 == NULL)
+		return -1;
+
+	memset(virtio_net, 0, sizeof(*virtio_net));
+	memset(ivshmem, 0, sizeof(*ivshmem));
+
+	TAILQ_INIT(&s->head);
+
+	virtio_net->name = "virtio-net";
+	virtio_net->device_id = VIRTIO_NET_DEVICE_ID;
+	virtio_net->vendor_id = VIRTIO_NET_VENDOR_ID;
+	virtio_net->init = qtest_init_pci_device;
+	virtio_net->bar[0].addr = REG_ADDR_BAR0;
+	virtio_net->bar[0].type = QTEST_PCI_BAR_IO;
+	virtio_net->bar[0].region_start = VIRTIO_NET_IO_START;
+	TAILQ_INSERT_TAIL(&s->head, virtio_net, next);
+
+	ivshmem->name = "ivshmem";
+	ivshmem->device_id = IVSHMEM_DEVICE_ID;
+	ivshmem->vendor_id = IVSHMEM_VENDOR_ID;
+	ivshmem->init = qtest_init_pci_device;
+	ivshmem->bar[0].addr = REG_ADDR_BAR0;
+	ivshmem->bar[0].type = QTEST_PCI_BAR_MEMORY_32;
+	ivshmem->bar[0].region_start = IVSHMEM_MEMORY_START;
+	ivshmem->bar[1].addr = REG_ADDR_BAR2;
+	ivshmem->bar[1].type = QTEST_PCI_BAR_MEMORY_64;
+	/* In host mode, only one memory segment is vaild */
+	ivshmem->bar[1].region_start = ms[0].phys_addr;
+	TAILQ_INSERT_TAIL(&s->head, ivshmem, next);
+
+	/* piix3 is needed to route irqs from virtio-net to ioapic */
+	piix3->name = "piix3";
+	piix3->device_id = PIIX3_DEVICE_ID;
+	piix3->vendor_id = PIIX3_VENDOR_ID;
+	piix3->init = qtest_init_piix3_device;
+	TAILQ_INSERT_TAIL(&s->head, piix3, next);
+
+	return 0;
+}
+
+static int
+qtest_send_message_to_ivshmem(int sock_fd, uint64_t client_id, int shm_fd)
+{
+	struct iovec iov;
+	struct msghdr msgh;
+	size_t fdsize = sizeof(int);
+	char control[CMSG_SPACE(fdsize)];
+	struct cmsghdr *cmsg;
+	int ret;
+
+	memset(&msgh, 0, sizeof(msgh));
+	iov.iov_base = &client_id;
+	iov.iov_len = sizeof(client_id);
+
+	msgh.msg_iov = &iov;
+	msgh.msg_iovlen = 1;
+
+	if (shm_fd >= 0) {
+		msgh.msg_control = &control;
+		msgh.msg_controllen = sizeof(control);
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_len = CMSG_LEN(fdsize);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		memcpy(CMSG_DATA(cmsg), &shm_fd, fdsize);
+	}
+
+	do {
+		ret = sendmsg(sock_fd, &msgh, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "sendmsg error\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static int
+qtest_open_shared_memory(void)
+{
+	const struct rte_memseg *ms;
+	int shm_fd = -1;
+	uint64_t vaddr;
+	char buf[1024];
+	char *p;
+	FILE *f;
+
+	ms = rte_eal_get_physmem_layout();
+	f = fopen("/proc/self/maps", "r");
+	if (f == NULL)
+		return -1;
+
+	/* parse maps */
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+		/* get vaddr */
+		vaddr = strtoul(buf, NULL, 16);
+
+		/* check if this region is EAL memory */
+		if (vaddr == ms[0].addr_64) {
+			p = strchr(buf, '/');
+			if (p == NULL)
+				return -1;
+			buf[strlen(buf) - 1] = '\0';
+			shm_fd = open(p, O_RDWR);
+			break;
+		}
+	}
+	fclose(f);
+
+	return shm_fd;
+}
+
+static int
+qtest_setup_shared_memory(struct qtest_session *s)
+{
+	int shm_fd, ret;
+
+	/* To share DPDK EAL memory, open EAL memory again */
+	shm_fd = qtest_open_shared_memory();
+	if (shm_fd < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to open EAL memory\n");
+		return -1;
+	}
+
+	/* send our protocol version first */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket,
+			IVSHMEM_PROTOCOL_VERSION, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR,
+			"Failed to send protocol version to ivshmem\n");
+		return -1;
+	}
+
+	/* send client id */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, 0, -1);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to send VMID to ivshmem\n");
+		return -1;
+	}
+
+	/* send message to ivshmem */
+	ret = qtest_send_message_to_ivshmem(s->ivshmem_socket, -1, shm_fd);
+	if (ret < 0) {
+		PMD_DRV_LOG(ERR, "Failed to file descriptor to ivshmem\n");
+		return -1;
+	}
+
+	/* close EAL memory again */
+	close(shm_fd);
+
+	return 0;
+}
+
+int
+qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+	int ret;
+
+	s = rte_zmalloc(NULL, sizeof(*s), RTE_CACHE_LINE_SIZE);
+
+	ret = pipe(s->msgfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize message pipe\n");
+		return -1;
+	}
+
+	ret = pipe(s->irqfds.pipefd);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize irq pipe\n");
+		return -1;
+	}
+
+	ret = qtest_register_target_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize qtest session\n");
+		return -1;
+	}
+
+	ret = pthread_mutex_init(&s->qtest_session_lock, NULL);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize mutex\n");
+		return -1;
+	}
+
+	rte_atomic16_set(&s->enable_intr, 0);
+	s->qtest_socket = qtest_socket;
+	s->ivshmem_socket = ivshmem_socket;
+	hw->qsession = (void *)s;
+
+	ret = pthread_create(&s->event_th, NULL, qtest_event_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create event handler\n");
+		return -1;
+	}
+
+	ret = pthread_create(&s->intr_th, NULL, qtest_intr_handler, s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to create interrupt handler\n");
+		return -1;
+	}
+
+	ret = qtest_intr_initialize(data);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize interrupt\n");
+		return -1;
+	}
+
+	ret = qtest_setup_shared_memory(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to setup shared memory\n");
+		return -1;
+	}
+
+	ret = qtest_init_pci_devices(s);
+	if (ret != 0) {
+		PMD_DRV_LOG(ERR, "Failed to initialize devices\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+qtest_remove_target_devices(struct qtest_session *s)
+{
+	struct qtest_pci_device *dev, *next;
+
+	for (dev = TAILQ_FIRST(&s->head); dev != NULL; dev = next) {
+		next = TAILQ_NEXT(dev, next);
+		TAILQ_REMOVE(&s->head, dev, next);
+		free(dev);
+	}
+}
+
+void
+qtest_vdev_uninit(struct rte_eth_dev_data *data)
+{
+	struct virtio_hw *hw = ((struct rte_eth_dev_data *)data)->dev_private;
+	struct qtest_session *s;
+
+	s = (struct qtest_session *)hw->qsession;
+
+	qtest_close_sockets(s);
+
+	pthread_cancel(s->event_th);
+	pthread_join(s->event_th, NULL);
+
+	pthread_cancel(s->intr_th);
+	pthread_join(s->intr_th, NULL);
+
+	pthread_mutex_destroy(&s->qtest_session_lock);
+
+	qtest_remove_target_devices(s);
+
+	rte_free(s);
+}
diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index d928339..234b561 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -36,6 +36,11 @@
 #include <stdio.h>
 #include <errno.h>
 #include <unistd.h>
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#endif
 #ifdef RTE_EXEC_ENV_LINUXAPP
 #include <dirent.h>
 #include <fcntl.h>
@@ -56,6 +61,10 @@
 #include <rte_memory.h>
 #include <rte_eal.h>
 #include <rte_dev.h>
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+#include <rte_eal_memconfig.h>
+#include <rte_kvargs.h>
+#endif
 
 #include "virtio_ethdev.h"
 #include "virtio_pci.h"
@@ -491,8 +500,12 @@ virtio_dev_close(struct rte_eth_dev *dev)
 	PMD_INIT_LOG(DEBUG, "virtio_dev_close");
 
 	/* reset the NIC */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
+			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
 		vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR);
+	}
 	vtpci_reset(hw);
 	hw->started = 0;
 	virtio_dev_free_mbufs(dev);
@@ -1233,15 +1246,22 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle,
 	isr = vtpci_isr(hw);
 	PMD_DRV_LOG(INFO, "interrupt status = %#x", isr);
 
-	if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
-		PMD_DRV_LOG(ERR, "interrupt enable failed");
+	if (dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+	}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+		if (qtest_intr_enable(dev->data) < 0)
+			PMD_DRV_LOG(ERR, "interrupt enable failed");
+	}
+#endif
 
 	if (isr & VIRTIO_PCI_ISR_CONFIG) {
 		if (virtio_dev_link_update(dev, 0) == 0)
 			_rte_eth_dev_callback_process(dev,
 						      RTE_ETH_EVENT_INTR_LSC);
 	}
-
 }
 
 static void
@@ -1264,7 +1284,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	struct virtio_hw *hw = eth_dev->data->dev_private;
 	struct virtio_net_config *config;
 	struct virtio_net_config local_config;
-	struct rte_pci_device *pci_dev;
+	struct rte_pci_device *pci_dev = eth_dev->pci_dev;
+	struct rte_pci_id id;
 
 	RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr));
 
@@ -1285,13 +1306,20 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 		return -ENOMEM;
 	}
 
-	pci_dev = eth_dev->pci_dev;
+	if (eth_dev->dev_type == RTE_ETH_DEV_PCI) {
+		if (virtio_resource_init(pci_dev) < 0)
+			return -1;
 
-	if (virtio_resource_init(pci_dev) < 0)
-		return -1;
-
-	hw->use_msix = virtio_has_msix(&pci_dev->addr);
-	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
+		hw->use_msix = virtio_has_msix(&pci_dev->addr);
+		hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
+	}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+		hw->use_msix = 0;
+		hw->io_base = 0;
+		eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+	}
+#endif
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1304,8 +1332,14 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	virtio_negotiate_features(hw);
 
 	/* If host does not support status then disable LSC */
-	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
-		pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) {
+		if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+			pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC;
+#endif
+	}
 
 	rte_eth_copy_pci_info(eth_dev, pci_dev);
 
@@ -1383,14 +1417,30 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d   hw->max_tx_queues=%d",
 			hw->max_rx_queues, hw->max_tx_queues);
+
+	memset(&id, 0, sizeof(id));
+	if (eth_dev->dev_type == RTE_ETH_DEV_PCI)
+		id = pci_dev->id;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+		id = qtest_get_pci_id_of_virtio_net();
+#endif
+
 	PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
-			eth_dev->data->port_id, pci_dev->id.vendor_id,
-			pci_dev->id.device_id);
+			eth_dev->data->port_id,
+			id.vendor_id, id.device_id);
 
 	/* Setup interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
 		rte_intr_callback_register(&pci_dev->intr_handle,
-				   virtio_interrupt_handler, eth_dev);
+				virtio_interrupt_handler, eth_dev);
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_register(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
 
 	virtio_dev_cq_start(eth_dev);
 
@@ -1424,10 +1474,17 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 	eth_dev->data->mac_addrs = NULL;
 
 	/* reset interrupt callback  */
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if ((eth_dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC))
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if ((eth_dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))
+		qtest_intr_callback_unregister(eth_dev->data,
+				virtio_interrupt_handler, eth_dev);
+#endif
 
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
@@ -1491,11 +1548,15 @@ virtio_dev_configure(struct rte_eth_dev *dev)
 		return -ENOTSUP;
 	}
 
-	if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)
+	if (((dev->dev_type == RTE_ETH_DEV_PCI) &&
+			(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) ||
+			((dev->dev_type == RTE_ETH_DEV_VIRTUAL) &&
+			(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) {
 		if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) {
 			PMD_DRV_LOG(ERR, "failed to set config vector");
 			return -EBUSY;
 		}
+	}
 
 	return 0;
 }
@@ -1510,15 +1571,31 @@ virtio_dev_start(struct rte_eth_dev *dev)
 
 	/* check if lsc interrupt feature is enabled */
 	if (dev->data->dev_conf.intr_conf.lsc) {
-		if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
-			PMD_DRV_LOG(ERR, "link status not supported by host");
-			return -ENOTSUP;
-		}
+		if (dev->dev_type == RTE_ETH_DEV_PCI) {
+			if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) {
+				PMD_DRV_LOG(ERR,
+					"link status not supported by host");
+				return -ENOTSUP;
+			}
 
-		if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
-			PMD_DRV_LOG(ERR, "interrupt enable failed");
-			return -EIO;
+			if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
 		}
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL) {
+			if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+				PMD_DRV_LOG(ERR,
+					"link status not supported by host");
+				return -ENOTSUP;
+			}
+			if (qtest_intr_enable(dev->data) < 0) {
+				PMD_DRV_LOG(ERR, "interrupt enable failed");
+				return -EIO;
+			}
+		}
+#endif
 	}
 
 	/* Initialize Link state */
@@ -1615,8 +1692,15 @@ virtio_dev_stop(struct rte_eth_dev *dev)
 
 	PMD_INIT_LOG(DEBUG, "stop");
 
-	if (dev->data->dev_conf.intr_conf.lsc)
-		rte_intr_disable(&dev->pci_dev->intr_handle);
+	if (dev->data->dev_conf.intr_conf.lsc) {
+		if (dev->dev_type == RTE_ETH_DEV_PCI)
+			rte_intr_disable(&dev->pci_dev->intr_handle);
+
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+		if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+			qtest_intr_disable(dev->data);
+#endif
+	}
 
 	memset(&link, 0, sizeof(link));
 	virtio_dev_atomic_write_link_status(dev, &link);
@@ -1661,7 +1745,13 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
 	struct virtio_hw *hw = dev->data->dev_private;
 
-	dev_info->driver_name = dev->driver->pci_drv.name;
+	if (dev->dev_type == RTE_ETH_DEV_PCI)
+		dev_info->driver_name = dev->driver->pci_drv.name;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	else if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+		dev_info->driver_name =  dev->data->drv_name;
+#endif
+
 	dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues;
 	dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues;
 	dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE;
@@ -1689,3 +1779,196 @@ static struct rte_driver rte_virtio_driver = {
 };
 
 PMD_REGISTER_DRIVER(rte_virtio_driver);
+
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+
+#define ETH_VIRTIO_NET_ARG_QTEST_PATH           "qtest"
+#define ETH_VIRTIO_NET_ARG_IVSHMEM_PATH         "ivshmem"
+
+static const char *valid_args[] = {
+	ETH_VIRTIO_NET_ARG_QTEST_PATH,
+	ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+	NULL
+};
+
+static int
+get_string_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	int ret, fd, loop = 3;
+	int *pfd = extra_args;
+	struct sockaddr_un sa = {0};
+
+	if ((value == NULL) || (extra_args == NULL))
+		return -EINVAL;
+
+	fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (fd < 0)
+		return -1;
+
+	sa.sun_family = AF_UNIX;
+	strncpy(sa.sun_path, value, sizeof(sa.sun_path));
+
+	while (loop--) {
+		/*
+		 * may need to wait for qtest and ivshmem
+		 * sockets are prepared by QEMU.
+		 */
+		ret = connect(fd, (struct sockaddr *)&sa,
+				sizeof(struct sockaddr_un));
+		if (ret != 0)
+			sleep(1);
+		else
+			break;
+	}
+
+	if (ret != 0) {
+		close(fd);
+		return -1;
+	}
+
+	*pfd = fd;
+
+	return 0;
+}
+
+static struct rte_eth_dev *
+virtio_net_eth_dev_alloc(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct rte_eth_dev_data *data;
+	struct virtio_hw *hw;
+
+	eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL);
+	if (eth_dev == NULL)
+		rte_panic("cannot alloc rte_eth_dev\n");
+
+	data = eth_dev->data;
+
+	hw = rte_zmalloc(NULL, sizeof(*hw), 0);
+	if (!hw)
+		rte_panic("malloc virtio_hw failed\n");
+
+	data->dev_private = hw;
+	eth_dev->driver = &rte_virtio_pmd;
+	return eth_dev;
+}
+
+/*
+ * Initialization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
+ */
+static int
+rte_virtio_net_pmd_init(const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist = NULL;
+	struct rte_eth_dev *eth_dev = NULL;
+	int ret, qtest_sock, ivshmem_sock;
+	struct rte_mem_config *mcfg;
+
+	if (params == NULL || params[0] == '\0')
+		goto error;
+
+	/* get pointer to global configuration */
+	mcfg = rte_eal_get_configuration()->mem_config;
+
+	/* Check if EAL memory consists of one memory segment */
+	if ((RTE_MAX_MEMSEG > 1) && (mcfg->memseg[1].addr != NULL)) {
+		PMD_INIT_LOG(ERR, "Non contigious memory");
+		goto error;
+	}
+
+	kvlist = rte_kvargs_parse(params, valid_args);
+	if (!kvlist) {
+		PMD_INIT_LOG(ERR, "error when parsing param");
+		goto error;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_IVSHMEM_PATH,
+				&get_string_arg, &ivshmem_sock);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+				"Failed to connect to ivshmem socket");
+			goto error;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s",
+				ETH_VIRTIO_NET_ARG_IVSHMEM_PATH);
+		goto error;
+	}
+
+	if (rte_kvargs_count(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_VIRTIO_NET_ARG_QTEST_PATH,
+				&get_string_arg, &qtest_sock);
+		if (ret != 0) {
+			PMD_INIT_LOG(ERR,
+				"Failed to connect to qtest socket");
+			goto error;
+		}
+	} else {
+		PMD_INIT_LOG(ERR, "No argument specified for %s",
+				ETH_VIRTIO_NET_ARG_QTEST_PATH);
+		goto error;
+	}
+
+	eth_dev = virtio_net_eth_dev_alloc(name);
+
+	qtest_vdev_init(eth_dev->data, qtest_sock, ivshmem_sock);
+
+	/* originally, this will be called in rte_eal_pci_probe() */
+	eth_virtio_dev_init(eth_dev);
+
+	eth_dev->driver = NULL;
+	eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+	eth_dev->data->kdrv = RTE_KDRV_NONE;
+	eth_dev->data->drv_name = "rte_virtio_pmd";
+
+	rte_kvargs_free(kvlist);
+	return 0;
+
+error:
+	rte_kvargs_free(kvlist);
+	return -EFAULT;
+}
+
+/*
+ * Finalization when "CONFIG_RTE_LIBRTE_VIRTIO_HOST_MODE" is enabled.
+ */
+static int
+rte_virtio_net_pmd_uninit(const char *name)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	int ret;
+
+	if (name == NULL)
+		return -EINVAL;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -ENODEV;
+
+	ret = eth_virtio_dev_uninit(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	qtest_vdev_uninit(eth_dev->data);
+	rte_free(eth_dev->data->dev_private);
+
+	ret = rte_eth_dev_release_port(eth_dev);
+	if (ret != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
+static struct rte_driver rte_virtio_net_driver = {
+	.name   = "eth_virtio_net",
+	.type   = PMD_VDEV,
+	.init   = rte_virtio_net_pmd_init,
+	.uninit = rte_virtio_net_pmd_uninit,
+};
+
+PMD_REGISTER_DRIVER(rte_virtio_net_driver);
+
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index ae2d47d..eefc7be 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -122,5 +122,17 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
 #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \
 			    VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+int qtest_vdev_init(struct rte_eth_dev_data *data,
+		int qtest_socket, int ivshmem_socket);
+void qtest_vdev_uninit(struct rte_eth_dev_data *data);
+void qtest_intr_callback_register(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+void qtest_intr_callback_unregister(void *data,
+		rte_intr_callback_fn cb, void *cb_arg);
+int qtest_intr_enable(void *data);
+int qtest_intr_disable(void *data);
+struct rte_pci_id qtest_get_pci_id_of_virtio_net(void);
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
 
 #endif /* _VIRTIO_ETHDEV_H_ */
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 47f722a..d4ede73 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -165,6 +165,9 @@ struct virtqueue;
 
 struct virtio_hw {
 	struct virtqueue *cvq;
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+	void        *qsession;
+#endif
 	uint32_t    io_base;
 	uint32_t    guest_features;
 	uint32_t    max_tx_queues;
@@ -226,6 +229,26 @@ outl_p(unsigned int data, unsigned int port)
 }
 #endif
 
+#ifdef RTE_LIBRTE_VIRTIO_HOST_MODE
+
+uint32_t virtio_ioport_read(struct virtio_hw *, uint64_t, char type);
+void virtio_ioport_write(struct virtio_hw *, uint64_t, uint64_t, char type);
+
+#define VIRTIO_READ_REG_1(hw, reg) \
+	virtio_ioport_read(hw, reg, 'b')
+#define VIRTIO_WRITE_REG_1(hw, reg, value) \
+	virtio_ioport_write(hw, reg, value, 'b')
+#define VIRTIO_READ_REG_2(hw, reg) \
+	virtio_ioport_read(hw, reg, 'w')
+#define VIRTIO_WRITE_REG_2(hw, reg, value) \
+	virtio_ioport_write(hw, reg, value, 'w')
+#define VIRTIO_READ_REG_4(hw, reg) \
+	virtio_ioport_read(hw, reg, 'l')
+#define VIRTIO_WRITE_REG_4(hw, reg, value) \
+	virtio_ioport_write(hw, reg, value, 'l')
+
+#else /* RTE_LIBRTE_VIRTIO_HOST_MODE */
+
 #define VIRTIO_PCI_REG_ADDR(hw, reg) \
 	(unsigned short)((hw)->io_base + (reg))
 
@@ -244,6 +267,8 @@ outl_p(unsigned int data, unsigned int port)
 #define VIRTIO_WRITE_REG_4(hw, reg, value) \
 	outl_p((unsigned int)(value), (VIRTIO_PCI_REG_ADDR((hw), (reg))))
 
+#endif /* RTE_LIBRTE_VIRTIO_HOST_MODE */
+
 static inline int
 vtpci_with_feature(struct virtio_hw *hw, uint32_t bit)
 {
-- 
2.1.4



More information about the dev mailing list