[dpdk-dev] [PATCH 04/11] net/vhostpci: add basic framework

Zhiyong Yang zhiyong.yang at intel.com
Thu Nov 30 10:46:50 CET 2017


This commit introduces the vhostpci framework in DPDK. Including:

1. Register vhostpci PMD.
2. Implement the pci device probe and remove functions.
3. vhostpci_net PMD allocates memory and initializes.
4. start, stop, close and info_gets functions.

Signed-off-by: Zhiyong Yang <zhiyong.yang at intel.com>
---
 config/common_linuxapp                 |   1 +
 drivers/net/vhostpci/Makefile          |   2 +
 drivers/net/vhostpci/vhostpci_ethdev.c | 539 +++++++++++++++++++++++++++++++++
 drivers/net/vhostpci/vhostpci_pci.c    | 334 ++++++++++++++++++++
 mk/rte.app.mk                          |   1 +
 5 files changed, 877 insertions(+)
 create mode 100644 drivers/net/vhostpci/vhostpci_ethdev.c
 create mode 100644 drivers/net/vhostpci/vhostpci_pci.c

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 74c7d64ec..d5e2132a3 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -49,4 +49,5 @@ CONFIG_RTE_LIBRTE_PMD_TAP=y
 CONFIG_RTE_LIBRTE_AVP_PMD=y
 CONFIG_RTE_LIBRTE_NFP_PMD=y
 CONFIG_RTE_LIBRTE_POWER=y
+CONFIG_RTE_LIBRTE_VHOSTPCI_PMD=y
 CONFIG_RTE_VIRTIO_USER=y
diff --git a/drivers/net/vhostpci/Makefile b/drivers/net/vhostpci/Makefile
index 3467e7cbe..3089e54d8 100644
--- a/drivers/net/vhostpci/Makefile
+++ b/drivers/net/vhostpci/Makefile
@@ -48,5 +48,7 @@ LIBABIVER := 1
 #
 # all source are stored in SRCS-y
 #
+SRCS-$(CONFIG_RTE_LIBRTE_VHOSTPCI_PMD) += vhostpci_pci.c
+SRCS-$(CONFIG_RTE_LIBRTE_VHOSTPCI_PMD) += vhostpci_ethdev.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/vhostpci/vhostpci_ethdev.c b/drivers/net/vhostpci/vhostpci_ethdev.c
new file mode 100644
index 000000000..873ff7482
--- /dev/null
+++ b/drivers/net/vhostpci/vhostpci_ethdev.c
@@ -0,0 +1,539 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <stdbool.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_memzone.h>
+#include <rte_bus_pci.h>
+#include <rte_ethdev_pci.h>
+
+#include "vhostpci_logs.h"
+#include "vhostpci_ethdev.h"
+
+static void
+vhostpci_dev_info_get(struct rte_eth_dev *dev,
+		struct rte_eth_dev_info *dev_info);
+
+static void
+vhostpci_get_hwaddr(struct vhostpci_hw *hw);
+
+static int
+vhostpci_dev_configure(struct rte_eth_dev *dev);
+
+static int
+eth_vhostpci_dev_init(struct rte_eth_dev *eth_dev);
+
+static int
+vhostpci_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link);
+
+static int
+vhostpci_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features);
+
+static int
+vhostpci_dev_start(struct rte_eth_dev *dev);
+
+static void
+update_queuing_status(struct rte_eth_dev *dev);
+
+static void
+vhostpci_dev_close(struct rte_eth_dev *dev);
+
+static void
+vhostpci_dev_stop(struct rte_eth_dev *dev);
+
+static const struct eth_dev_ops vhostpci_eth_dev_ops = {
+	.dev_start               = vhostpci_dev_start,
+	.dev_stop                = vhostpci_dev_stop,
+	.dev_close               = vhostpci_dev_close,
+	.dev_infos_get		 = vhostpci_dev_info_get,
+	.dev_configure		 = vhostpci_dev_configure,
+};
+
+static inline bool
+is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
+{
+	return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
+}
+
+static int
+vhostpci_negotiate_features(struct vhostpci_hw *hw, uint64_t req_features);
+
+static inline int
+vhostpci_pci_with_feature(struct vhostpci_hw *hw, uint64_t bit)
+{
+	return (hw->guest_features & (1ULL << bit)) != 0;
+}
+
+static int
+vhostpci_dev_start(struct rte_eth_dev *dev)
+{
+	struct vhostpci_hw *hw = dev->data->dev_private;
+
+	hw->started = 1;
+	update_queuing_status(dev);
+
+	return 0;
+}
+
+static void
+vhostpci_get_hwaddr(struct vhostpci_hw *hw)
+{
+	if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) {
+		vhpci_read_dev_config(hw,
+			offsetof(struct vpnet_pci_config, mac),
+			&hw->mac_addr, ETHER_ADDR_LEN);
+	} else {
+		eth_random_addr(&hw->mac_addr[0]);
+	}
+}
+
+static void
+update_queuing_status(struct rte_eth_dev *dev)
+{
+	struct vhostpci_hw *hw = dev->data->dev_private;
+	struct vhostpci_queue *vq;
+	int i;
+	int allow_queuing = 1;
+
+	if (hw->started == 0)
+		allow_queuing = 0;
+
+	/* Wait until rx/tx_pkt_burst stops accessing vhost device */
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		vq = dev->data->rx_queues[i];
+		if (vq == NULL)
+			continue;
+		rte_atomic32_set(&vq->allow_queuing, allow_queuing);
+		while (rte_atomic32_read(&vq->while_queuing))
+			rte_pause();
+	}
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		vq = dev->data->tx_queues[i];
+		if (vq == NULL)
+			continue;
+		rte_atomic32_set(&vq->allow_queuing, allow_queuing);
+		while (rte_atomic32_read(&vq->while_queuing))
+			rte_pause();
+	}
+}
+
+static int
+vhostpci_negotiate_features(struct vhostpci_hw *hw, uint64_t req_features)
+{
+	uint64_t host_features;
+
+	/* Prepare guest_features: feature that driver wants to support */
+	PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %x",
+		req_features);
+
+	/* Read device(host) feature bits */
+	host_features = VTPCI_OPS(hw)->get_features(hw);
+	PMD_INIT_LOG(DEBUG, "host_features before negotiate = %x",
+		host_features);
+
+	/**
+	 * Negotiate features: Subset of device feature bits are written back
+	 * guest feature bits.
+	 */
+	hw->guest_features = req_features;
+	hw->guest_features = vhpci_negotiate_features(hw, host_features);
+	PMD_INIT_LOG(DEBUG, "features after negotiate = %x",
+		hw->guest_features);
+
+	if (hw->modern) {
+		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
+			PMD_INIT_LOG(ERR,
+				"VIRTIO_F_VERSION_1 features is not enabled.");
+			return -1;
+		}
+		vhpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+		if (!(vhpci_get_status(hw) &
+				VIRTIO_CONFIG_STATUS_FEATURES_OK)) {
+			PMD_INIT_LOG(ERR,
+				"failed to set FEATURES_OK status!");
+			return -1;
+		}
+	}
+
+	hw->req_guest_features = req_features;
+
+	return 0;
+}
+
+static void
+vhostpci_dev_info_get(struct rte_eth_dev *dev,
+		struct rte_eth_dev_info *dev_info)
+{
+
+	struct vhostpci_hw *hw = dev->data->dev_private;
+
+	dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */
+
+	dev_info->pci_dev = dev->device ? RTE_ETH_DEV_TO_PCI(dev) : NULL;
+
+	dev_info->max_rx_queues =
+		RTE_MIN(hw->max_queue_pairs, VHOSTPCI_MAX_RX_QUEUES);
+	dev_info->max_tx_queues =
+		RTE_MIN(hw->max_queue_pairs, VHOSTPCI_MAX_TX_QUEUES);
+
+	dev_info->min_rx_bufsize = VHOSTPCI_MIN_RX_BUFSIZE;
+	dev_info->max_rx_pktlen = VHOSTPCI_MAX_RX_PKTLEN;
+	dev_info->max_mac_addrs = VHOSTPCI_MAX_MAC_ADDRS;
+
+	dev_info->rx_offload_capa = 0;
+	dev_info->tx_offload_capa = 0;
+
+}
+
+static int
+vhostpci_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static int
+vhostpci_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &(dev->data->dev_link);
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+			*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+/* reset device and renegotiate features if needed */
+static int
+vhostpci_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features)
+{
+	struct vhostpci_hw *hw = eth_dev->data->dev_private;
+	struct rte_pci_device *pci_dev;
+
+	/* To indicate we've noticed this device. */
+	vhpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK);
+
+	/* To indicate we've known how to drive the device. */
+	vhpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
+	if (vhostpci_negotiate_features(hw, req_features) < 0)
+		return -1;
+
+	pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+	rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+	/* Setting up rx_header size for the device, only support MRG header.*/
+	if (vhostpci_pci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) ||
+	    vhostpci_pci_with_feature(hw, VIRTIO_F_VERSION_1))
+		hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+	else
+		return -1;
+
+	hw->vpnet->vhost_hlen = hw->vtnet_hdr_size;
+
+	/* Copy the permanent MAC address to: virtio_hw */
+	vhostpci_get_hwaddr(hw);
+	ether_addr_copy((struct ether_addr *)hw->mac_addr,
+			&eth_dev->data->mac_addrs[0]);
+	PMD_INIT_LOG(DEBUG,
+		     "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X",
+		     hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2],
+		     hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]);
+
+	/* support 1 queue pairs by default */
+	hw->max_queue_pairs = VHOSTPCI_MAX_QUEUE_PAIRS;
+
+	vhpci_init_complete(hw);
+
+	if (pci_dev)
+		PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x",
+			eth_dev->data->port_id, pci_dev->id.vendor_id,
+			pci_dev->id.device_id);
+
+	return 0;
+}
+
+static void
+vhostpci_free_queues(struct rte_eth_dev *dev)
+{
+	struct vhostpci_hw *hw = dev->data->dev_private;
+	uint32_t i;
+
+	for (i = 0; i < hw->max_queue_pairs; i++) {
+		if (dev->data->rx_queues[i] != NULL) {
+			rte_free(dev->data->rx_queues[i]);
+			dev->data->tx_queues[i] = NULL;
+		}
+
+		if (dev->data->tx_queues[i] != NULL) {
+			rte_free(dev->data->tx_queues[i]);
+			dev->data->tx_queues[i] = NULL;
+		}
+	}
+}
+
+static void
+vhostpci_dev_stop(struct rte_eth_dev *dev)
+{
+	struct rte_eth_link link;
+	struct vhostpci_hw *hw = dev->data->dev_private;
+	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
+
+	hw->started = 0;
+
+	if (intr_conf->lsc)
+		rte_intr_disable(dev->intr_handle);
+
+	memset(&link, 0, sizeof(link));
+	vhostpci_dev_atomic_write_link_status(dev, &link);
+}
+
+static int
+vhostpci_net_free(struct rte_eth_dev *dev)
+{
+	struct vhostpci_hw *hw = dev->data->dev_private;
+	struct vhostpci_net *vpnet = hw->vpnet;
+	struct vhostpci_virtqueue *vq;
+	int i;
+
+	if (vpnet == NULL)
+		return -1;
+
+	for (i = 0; i < VHOSTPCI_MAX_QUEUE_PAIRS * 2; i++) {
+		vq = vpnet->virtqueue[i];
+
+		if (vpnet->virtqueue[i] == NULL)
+			continue;
+
+		if (vq->shadow_used_ring != NULL) {
+			rte_free(vq->shadow_used_ring);
+			vq->shadow_used_ring = NULL;
+		}
+
+		if (vq->batch_copy_elems != NULL) {
+			rte_free(vq->batch_copy_elems);
+			vq->batch_copy_elems = NULL;
+		}
+
+		rte_free(vpnet->virtqueue[i]);
+		vpnet->virtqueue[i] = NULL;
+	}
+
+	rte_free(hw->vpnet);
+	hw->vpnet = NULL;
+
+	return 0;
+};
+
+static void
+vhostpci_dev_close(struct rte_eth_dev *dev)
+{
+	struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf;
+
+	PMD_INIT_LOG(DEBUG, "hostpci_dev_close");
+
+	if (intr_conf->lsc)
+		rte_intr_disable(dev->intr_handle);
+
+	vhostpci_net_free(dev);
+
+	vhostpci_free_queues(dev);
+}
+
+static int
+vhostpci_net_allocate(struct rte_eth_dev *dev)
+{
+	struct vhostpci_hw *hw = dev->data->dev_private;
+	struct vring_used_elem *shadow_used_ring;
+	struct vhostpci_net *vpnet;
+	struct vhostpci_virtqueue *vq;
+	struct batch_copy_elem *batch_copy_elems;
+	int i;
+
+	vpnet = rte_zmalloc_socket(NULL,
+		sizeof(struct vhostpci_net), RTE_CACHE_LINE_SIZE,
+		dev->device->numa_node);
+	if (vpnet == NULL) {
+		rte_eth_dev_release_port(dev);
+		return -1;
+	}
+
+	hw->vpnet = vpnet;
+	for (i = 0; i < VHOSTPCI_MAX_QUEUE_PAIRS * 2; i++) {
+
+		vq = rte_zmalloc_socket(NULL, sizeof(*vq), RTE_CACHE_LINE_SIZE,
+			dev->device->numa_node);
+		if (vq == NULL) {
+			if (vpnet != NULL) {
+				vhostpci_net_free(dev);
+				rte_eth_dev_release_port(dev);
+				return -1;
+			}
+		}
+
+		vpnet->virtqueue[i] = vq;
+		vq->size = VHOSTPCI_NUM_DESCRIPTORS;
+
+		shadow_used_ring = rte_zmalloc_socket(NULL,
+				sizeof(struct vring_used_elem) * vq->size,
+				RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+		if (shadow_used_ring == NULL) {
+			vhostpci_net_free(dev);
+			rte_eth_dev_release_port(dev);
+			return -1;
+		}
+		vq->shadow_used_ring = shadow_used_ring;
+
+		batch_copy_elems = rte_zmalloc_socket(NULL,
+			sizeof(struct batch_copy_elem) * vq->size,
+			RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+		if (!batch_copy_elems) {
+			vhostpci_net_free(dev);
+			rte_eth_dev_release_port(dev);
+			return -1;
+		}
+		vq->batch_copy_elems = batch_copy_elems;
+	}
+
+	return 0;
+}
+
+static int
+eth_vhostpci_dev_init(struct rte_eth_dev *eth_dev)
+{
+	struct vhostpci_hw *hw = eth_dev->data->dev_private;
+	int ret;
+
+	eth_dev->dev_ops = &vhostpci_eth_dev_ops;
+
+	/* Allocate memory for storing MAC addresses */
+	eth_dev->data->mac_addrs = rte_zmalloc("vhostpci",
+			VHOSTPCI_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0);
+
+	if (eth_dev->data->mac_addrs == NULL) {
+		PMD_INIT_LOG(ERR,
+			"Failed to allocate %d bytes needed to store MAC "
+			"addresses",
+			1 * ETHER_ADDR_LEN);
+
+		return -ENOMEM;
+	}
+
+	hw->port_id = eth_dev->data->port_id;
+	ret = vhostpci_pci_init(RTE_ETH_DEV_TO_PCI(eth_dev), hw);
+	if (ret)
+		return ret;
+
+	ret = vhostpci_net_allocate(eth_dev);
+	if (ret)
+		return ret;
+
+	ret = vhostpci_init_device(eth_dev,
+			VHOSTPCI_PMD_DEFAULT_GUEST_FEATURES);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+eth_vhostpci_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+		return -EPERM;
+
+	vhostpci_dev_stop(eth_dev);
+	vhostpci_dev_close(eth_dev);
+
+	eth_dev->dev_ops = NULL;
+	eth_dev->tx_pkt_burst = NULL;
+	eth_dev->rx_pkt_burst = NULL;
+
+	rte_free(eth_dev->data->mac_addrs);
+	eth_dev->data->mac_addrs = NULL;
+
+	if (eth_dev->device != NULL)
+		rte_pci_unmap_device(RTE_ETH_DEV_TO_PCI(eth_dev));
+
+	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
+
+	return 0;
+}
+
+static int
+eth_vhostpci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+		   struct rte_pci_device *pci_dev)
+{
+	return rte_eth_dev_pci_generic_probe(pci_dev,
+			sizeof(struct vhostpci_hw), eth_vhostpci_dev_init);
+}
+
+static int
+eth_vhostpci_remove(struct rte_pci_device *pci_dev)
+{
+	return rte_eth_dev_pci_generic_remove(pci_dev,
+			eth_vhostpci_dev_uninit);
+}
+
+/**
+ * The set of PCI devices this driver supports
+ */
+const struct rte_pci_id pci_id_vhostpci_map[] = {
+	{ RTE_PCI_DEVICE(VHOST_PCI_VENDORID, VHOST_PCI_NET_MODERN_DEVICEID) },
+	{ .vendor_id = 0, /* sentinel */ },
+};
+
+static struct rte_pci_driver rte_vhostpci_pmd = {
+	.driver = {
+		.name = "net_vhostpci",
+	},
+	.id_table = pci_id_vhostpci_map,
+	.drv_flags = 0,
+	.probe = eth_vhostpci_probe,
+	.remove = eth_vhostpci_remove,
+};
+
+RTE_PMD_REGISTER_PCI(net_vhostpci, rte_vhostpci_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_vhostpci, pci_id_vhostpci_map);
diff --git a/drivers/net/vhostpci/vhostpci_pci.c b/drivers/net/vhostpci/vhostpci_pci.c
new file mode 100644
index 000000000..9ec7ee6e6
--- /dev/null
+++ b/drivers/net/vhostpci/vhostpci_pci.c
@@ -0,0 +1,334 @@
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_io.h>
+
+#include "vhostpci_pci.h"
+#include "vhostpci_logs.h"
+
+static void
+modern_read_dev_config(struct vhostpci_hw *hw, size_t offset,
+		       void *dst, int length);
+
+static void
+modern_write_dev_config(struct vhostpci_hw *hw, size_t offset,
+			const void *src, int length);
+
+static uint8_t
+modern_get_status(struct vhostpci_hw *hw);
+
+static void
+modern_set_status(struct vhostpci_hw *hw, uint8_t status);
+
+static uint64_t
+modern_get_features(struct vhostpci_hw *hw);
+
+static void
+modern_set_features(struct vhostpci_hw *hw, uint64_t features);
+
+static uint8_t
+modern_get_isr(struct vhostpci_hw *hw);
+
+const struct vpnet_pci_ops vpnet_modern_ops = {
+	.read_dev_cfg	= modern_read_dev_config,
+	.write_dev_cfg	= modern_write_dev_config,
+	.get_status	= modern_get_status,
+	.set_status	= modern_set_status,
+	.get_features	= modern_get_features,
+	.set_features	= modern_set_features,
+	.get_isr	= modern_get_isr,
+};
+
+struct vhostpci_hw_internal vhostpci_hw_internal[RTE_MAX_ETHPORTS];
+
+static void
+modern_read_dev_config(struct vhostpci_hw *hw, size_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = rte_read8(&hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = rte_read8(&hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+
+}
+
+static void
+modern_write_dev_config(struct vhostpci_hw *hw, size_t offset,
+			const void *src, int length)
+{
+	int i;
+	const uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i));
+}
+
+static uint8_t
+modern_get_status(struct vhostpci_hw *hw)
+{
+	return rte_read8(&hw->common_cfg->device_status);
+}
+
+static void
+modern_set_status(struct vhostpci_hw *hw, uint8_t status)
+{
+	rte_write8(status, &hw->common_cfg->device_status);
+}
+
+static uint64_t
+modern_get_features(struct vhostpci_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	rte_write32(0, &hw->common_cfg->device_feature_select);
+	features_lo = rte_read32(&hw->common_cfg->device_feature);
+
+	rte_write32(1, &hw->common_cfg->device_feature_select);
+	features_hi = rte_read32(&hw->common_cfg->device_feature);
+
+	return ((uint64_t)features_hi << 32) | features_lo;
+}
+
+static void
+modern_set_features(struct vhostpci_hw *hw, uint64_t features)
+{
+	rte_write32(0, &hw->common_cfg->guest_feature_select);
+	rte_write32(features & ((1ULL << 32) - 1),
+		    &hw->common_cfg->guest_feature);
+
+	rte_write32(1, &hw->common_cfg->guest_feature_select);
+	rte_write32(features >> 32,
+		    &hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+modern_get_isr(struct vhostpci_hw *hw)
+{
+	return rte_read8(hw->isr);
+}
+
+uint64_t
+vhpci_negotiate_features(struct vhostpci_hw *hw, uint64_t host_features)
+{
+	uint64_t features;
+
+	/**
+	 * Limit negotiated features to what the driver, virtqueue, and
+	 * host all support.
+	 */
+	features = host_features & hw->guest_features;
+	VTPCI_OPS(hw)->set_features(hw, features);
+
+	return features;
+}
+
+uint8_t
+vhpci_isr(struct vhostpci_hw *hw)
+{
+	return VTPCI_OPS(hw)->get_isr(hw);
+};
+
+void
+vhpci_set_status(struct vhostpci_hw *hw, uint8_t status)
+{
+	if (status != VIRTIO_CONFIG_STATUS_RESET)
+		status |= VTPCI_OPS(hw)->get_status(hw);
+
+	VTPCI_OPS(hw)->set_status(hw, status);
+}
+
+uint8_t
+vhpci_get_status(struct vhostpci_hw *hw)
+{
+	return VTPCI_OPS(hw)->get_status(hw);
+}
+
+void
+vhpci_init_complete(struct vhostpci_hw *hw)
+{
+	vhpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK);
+}
+
+void
+vhpci_read_dev_config(struct vhostpci_hw *hw, size_t offset,
+		      void *dst, int length)
+{
+	VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length);
+}
+
+static void *
+get_cfg_addr(struct rte_pci_device *dev, struct vpnet_pci_cap *cap)
+{
+	uint8_t bar     = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+	uint8_t *base;
+
+	if (bar >= PCI_MAX_RESOURCE) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (offset + length < offset) {
+		PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows",
+			offset, length);
+		return NULL;
+	}
+
+	if (offset + length > dev->mem_resource[bar].len) {
+
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %" PRIu64,
+			offset + length, dev->mem_resource[bar].len);
+
+		return NULL;
+	}
+
+	base = dev->mem_resource[bar].addr;
+	if (base == NULL) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+/**
+ * Following macros are derived from linux/pci_regs.h, however,
+ * we can't simply include that header here, as there is no such
+ * file for non-Linux platform.
+ */
+#define PCI_CAPABILITY_LIST	0x34
+#define PCI_CAP_ID_VNDR		0x09
+#define PCI_CAP_ID_MSIX		0x11
+
+static int
+vhostpci_pci_read_caps(struct rte_pci_device *dev, struct vhostpci_hw *hw)
+{
+	uint8_t pos;
+	struct vpnet_pci_cap cap;
+	int ret;
+
+	if (rte_pci_map_device(dev)) {
+		PMD_INIT_LOG(DEBUG, "failed to map pci device!");
+		return -1;
+	}
+
+	ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	if (ret < 0) {
+		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
+		return -1;
+	}
+
+	while (pos) {
+		ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+		if (ret < 0) {
+			PMD_INIT_LOG(ERR,
+				"failed to read pci cap at pos: %x", pos);
+			break;
+		}
+
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
+
+			PMD_INIT_LOG(DEBUG,
+				"[%2x] skipping non VNDR cap id: %02x",
+				pos, cap.cap_vndr);
+
+			goto next;
+		}
+
+		PMD_INIT_LOG(DEBUG,
+			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+		switch (cap.cfg_type) {
+		case VIRTIO_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cfg_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_NOTIFY_CFG:
+			rte_pci_read_config(dev, &hw->notify_off_multiplier,
+					4, pos + sizeof(cap));
+			hw->notify_base = get_cfg_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_ISR_CFG:
+			hw->isr = get_cfg_addr(dev, &cap);
+			break;
+		}
+
+next:
+		pos = cap.cap_next;
+	}
+
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+	    hw->dev_cfg == NULL    || hw->isr == NULL) {
+		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
+		return -1;
+	}
+
+	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
+
+	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
+	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
+	PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr);
+	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
+		hw->notify_base, hw->notify_off_multiplier);
+
+	return 0;
+}
+
+int
+vhostpci_pci_init(struct rte_pci_device *dev, struct vhostpci_hw *hw)
+{
+
+	if (vhostpci_pci_read_caps(dev, hw) == 0) {
+		PMD_INIT_LOG(INFO, "modern vhostpci device is detected.");
+		vhostpci_hw_internal[hw->port_id].vtpci_ops = &vpnet_modern_ops;
+		hw->modern = 1;
+		return 0;
+	} else {
+		hw->modern = 0;
+		return -1;
+	}
+}
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 6a6a7452e..1c8b8a202 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -160,6 +160,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD)     += -lrte_pmd_virtio
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VHOST)      += -lrte_pmd_vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_VHOSTPCI_PMD)   += -lrte_pmd_vhostpci
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD)    += -lrte_pmd_vmxnet3_uio
 
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
-- 
2.13.3



More information about the dev mailing list