[dpdk-dev] [RFC PATCH 3/6] lib_vhost: Set memory layout for multiple queues mode

Ouyang Changchun changchun.ouyang at intel.com
Thu May 7 15:00:42 CEST 2015


QEMU sends separate commands orderly to set the memory layout for each queue
in one virtio device, accordingly vhost need keep memory layout information
for each queue of the virtio device.

This also need adjust the interface a bit for function gpa_to_vva by
introducing the queue index to specify queue of device to look up its
virtual vhost address for the incoming guest physical address.

Signed-off-by: Changchun Ouyang <changchun.ouyang at intel.com>
---
 examples/vhost/main.c                         | 21 +++++-----
 lib/librte_vhost/rte_virtio_net.h             | 10 +++--
 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 57 ++++++++++++++------------
 lib/librte_vhost/vhost_rxtx.c                 | 21 +++++-----
 lib/librte_vhost/vhost_user/virtio-net-user.c | 59 ++++++++++++++-------------
 lib/librte_vhost/virtio-net.c                 | 26 +++++++-----
 6 files changed, 106 insertions(+), 88 deletions(-)

diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index c3fcb80..87dfc67 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -1467,11 +1467,11 @@ attach_rxmbuf_zcp(struct virtio_net *dev)
 		desc = &vq->desc[desc_idx];
 		if (desc->flags & VRING_DESC_F_NEXT) {
 			desc = &vq->desc[desc->next];
-			buff_addr = gpa_to_vva(dev, desc->addr);
+			buff_addr = gpa_to_vva(dev, 0, desc->addr);
 			phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len,
 					&addr_type);
 		} else {
-			buff_addr = gpa_to_vva(dev,
+			buff_addr = gpa_to_vva(dev, 0,
 					desc->addr + vq->vhost_hlen);
 			phys_addr = gpa_to_hpa(vdev,
 					desc->addr + vq->vhost_hlen,
@@ -1723,7 +1723,7 @@ virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts,
 			rte_pktmbuf_data_len(buff), 0);
 
 		/* Buffer address translation for virtio header. */
-		buff_hdr_addr = gpa_to_vva(dev, desc->addr);
+		buff_hdr_addr = gpa_to_vva(dev, 0, desc->addr);
 		packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
 
 		/*
@@ -1947,7 +1947,7 @@ virtio_dev_tx_zcp(struct virtio_net *dev)
 		desc = &vq->desc[desc->next];
 
 		/* Buffer address translation. */
-		buff_addr = gpa_to_vva(dev, desc->addr);
+		buff_addr = gpa_to_vva(dev, 0, desc->addr);
 		/* Need check extra VLAN_HLEN size for inserting VLAN tag */
 		phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len + VLAN_HLEN,
 			&addr_type);
@@ -2605,13 +2605,14 @@ new_device (struct virtio_net *dev)
 	dev->priv = vdev;
 
 	if (zero_copy) {
-		vdev->nregions_hpa = dev->mem->nregions;
-		for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+		struct virtio_memory *dev_mem = dev->mem_arr[0];
+		vdev->nregions_hpa = dev_mem->nregions;
+		for (regionidx = 0; regionidx < dev_mem->nregions; regionidx++) {
 			vdev->nregions_hpa
 				+= check_hpa_regions(
-					dev->mem->regions[regionidx].guest_phys_address
-					+ dev->mem->regions[regionidx].address_offset,
-					dev->mem->regions[regionidx].memory_size);
+					dev_mem->regions[regionidx].guest_phys_address
+					+ dev_mem->regions[regionidx].address_offset,
+					dev_mem->regions[regionidx].memory_size);
 
 		}
 
@@ -2627,7 +2628,7 @@ new_device (struct virtio_net *dev)
 
 
 		if (fill_hpa_memory_regions(
-			vdev->regions_hpa, dev->mem
+			vdev->regions_hpa, dev_mem
 			) != vdev->nregions_hpa) {
 
 			RTE_LOG(ERR, VHOST_CONFIG,
diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
index 5fb6006..c10c023 100644
--- a/lib/librte_vhost/rte_virtio_net.h
+++ b/lib/librte_vhost/rte_virtio_net.h
@@ -99,14 +99,15 @@ struct vhost_virtqueue {
  * Device structure contains all configuration information relating to the device.
  */
 struct virtio_net {
-	struct virtio_memory	*mem;		/**< QEMU memory and memory region information. */
 	struct vhost_virtqueue	*virtqueue[VIRTIO_QNUM * VIRTIO_MAX_VIRTQUEUES]; /**< Contains all virtqueue information. */
+	struct virtio_memory    *mem_arr[VIRTIO_MAX_VIRTQUEUES];        /**< Array for QEMU memory and memory region information. */
 	uint64_t		features;	/**< Negotiated feature set. */
 	uint64_t		device_fh;	/**< device identifier. */
 	uint32_t		flags;		/**< Device flags. Only used to check if device is running on data core. */
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 	char			ifname[IF_NAME_SZ];	/**< Name of the tap device or socket path. */
 	uint32_t                num_virt_queues;
+	uint32_t                mem_idx;        /** Used in set memory layout, unique for each queue within virtio device. */
 	void			*priv;		/**< private context */
 } __rte_cache_aligned;
 
@@ -153,14 +154,15 @@ rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id)
  * This is used to convert guest virtio buffer addresses.
  */
 static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+gpa_to_vva(struct virtio_net *dev, uint32_t q_idx, uint64_t guest_pa)
 {
 	struct virtio_memory_regions *region;
+	struct virtio_memory * dev_mem = dev->mem_arr[q_idx];
 	uint32_t regionidx;
 	uint64_t vhost_va = 0;
 
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		region = &dev->mem->regions[regionidx];
+	for (regionidx = 0; regionidx < dev_mem->nregions; regionidx++) {
+		region = &dev_mem->regions[regionidx];
 		if ((guest_pa >= region->guest_phys_address) &&
 			(guest_pa <= region->guest_phys_address_end)) {
 			vhost_va = region->address_offset + guest_pa;
diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
index ae2c3fa..d90a47b 100644
--- a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
+++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c
@@ -273,28 +273,32 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 		((uint64_t)(uintptr_t)mem_regions_addr + size);
 	uint64_t base_address = 0, mapped_address, mapped_size;
 	struct virtio_net *dev;
+	struct virtio_memory * dev_mem = NULL;
 
 	dev = get_device(ctx);
 	if (dev == NULL)
-		return -1;
-
-	if (dev->mem && dev->mem->mapped_address) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
-		free(dev->mem);
-		dev->mem = NULL;
+		goto error;
+
+	dev_mem = dev->mem_arr[dev->mem_idx];
+	if (dev_mem && dev_mem->mapped_address) {
+		munmap((void *)(uintptr_t)dev_mem->mapped_address,
+			(size_t)dev_mem->mapped_size);
+		free(dev_mem);
+		dev->mem_arr[dev->mem_idx] = NULL;
 	}
 
-	dev->mem = calloc(1, sizeof(struct virtio_memory) +
+	mem_arr[dev->mem_idx] = calloc(1, sizeof(struct virtio_memory) +
 		sizeof(struct virtio_memory_regions) * nregions);
-	if (dev->mem == NULL) {
+	dev_mem = dev->mem_arr[dev->mem_idx];
+
+	if (dev_mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
-			dev->device_fh);
-		return -1;
+			"(%"PRIu64") Failed to allocate memory for dev->mem_arr[%d]\n",
+			dev->device_fh, dev->mem_idx);
+		goto error;
 	}
 
-	pregion = &dev->mem->regions[0];
+	pregion = &dev_mem->regions[0];
 
 	for (idx = 0; idx < nregions; idx++) {
 		pregion[idx].guest_phys_address =
@@ -320,14 +324,12 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 				pregion[idx].userspace_address;
 			/* Map VM memory file */
 			if (host_memory_map(ctx.pid, base_address,
-				&mapped_address, &mapped_size) != 0) {
-				free(dev->mem);
-				dev->mem = NULL;
-				return -1;
-			}
-			dev->mem->mapped_address = mapped_address;
-			dev->mem->base_address = base_address;
-			dev->mem->mapped_size = mapped_size;
+				&mapped_address, &mapped_size) != 0)
+				goto free;
+
+			dev_mem->mapped_address = mapped_address;
+			dev_mem->base_address = base_address;
+			dev_mem->mapped_size = mapped_size;
 		}
 	}
 
@@ -335,9 +337,7 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 	if (base_address == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"Failed to find base address of qemu memory file.\n");
-		free(dev->mem);
-		dev->mem = NULL;
-		return -1;
+		goto free;
 	}
 
 	valid_regions = nregions;
@@ -369,9 +369,16 @@ cuse_set_mem_table(struct vhost_device_ctx ctx,
 			pregion[idx].userspace_address -
 			pregion[idx].guest_phys_address;
 	}
-	dev->mem->nregions = valid_regions;
 
+	dev_mem->nregions = valid_regions;
+	dev->mem_idx++;
 	return 0;
+
+free:
+	free(dev_mem);
+	dev->mem_arr[dev->mem_idx] = NULL;
+error:
+	return -1;
 }
 
 /*
diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c
index d8dd5ec..d255369 100644
--- a/lib/librte_vhost/vhost_rxtx.c
+++ b/lib/librte_vhost/vhost_rxtx.c
@@ -119,7 +119,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 		buff = pkts[packet_success];
 
 		/* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
-		buff_addr = gpa_to_vva(dev, desc->addr);
+		buff_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM, desc->addr);
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)buff_addr);
 
@@ -135,7 +135,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
 			desc->len = vq->vhost_hlen;
 			desc = &vq->desc[desc->next];
 			/* Buffer address translation. */
-			buff_addr = gpa_to_vva(dev, desc->addr);
+			buff_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM, desc->addr);
 			desc->len = rte_pktmbuf_data_len(buff);
 		} else {
 			buff_addr += vq->vhost_hlen;
@@ -215,9 +215,9 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
 	 * Convert from gpa to vva
 	 * (guest physical addr -> vhost virtual addr)
 	 */
-	vb_addr =
-		gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
 	vq = dev->virtqueue[queue_id];
+	vb_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM,
+			vq->buf_vec[vec_idx].buf_addr);
 	vb_hdr_addr = vb_addr;
 
 	/* Prefetch buffer address. */
@@ -259,8 +259,8 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
 		}
 
 		vec_idx++;
-		vb_addr =
-			gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr);
+		vb_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM,
+			vq->buf_vec[vec_idx].buf_addr);
 
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
@@ -305,7 +305,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
 			}
 
 			vec_idx++;
-			vb_addr = gpa_to_vva(dev,
+			vb_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM,
 				vq->buf_vec[vec_idx].buf_addr);
 			vb_offset = 0;
 			vb_avail = vq->buf_vec[vec_idx].buf_len;
@@ -349,7 +349,7 @@ copy_from_mbuf_to_vring(struct virtio_net *dev, uint16_t queue_id,
 
 					/* Get next buffer from buf_vec. */
 					vec_idx++;
-					vb_addr = gpa_to_vva(dev,
+					vb_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM,
 						vq->buf_vec[vec_idx].buf_addr);
 					vb_avail =
 						vq->buf_vec[vec_idx].buf_len;
@@ -588,7 +588,7 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 		desc = &vq->desc[desc->next];
 
 		/* Buffer address translation. */
-		vb_addr = gpa_to_vva(dev, desc->addr);
+		vb_addr = gpa_to_vva(dev, queue_id / VIRTIO_QNUM, desc->addr);
 		/* Prefetch buffer address. */
 		rte_prefetch0((void *)(uintptr_t)vb_addr);
 
@@ -694,7 +694,8 @@ rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id,
 					desc = &vq->desc[desc->next];
 
 					/* Buffer address translation. */
-					vb_addr = gpa_to_vva(dev, desc->addr);
+					vb_addr = gpa_to_vva(dev,
+						queue_id / VIRTIO_QNUM, desc->addr);
 					/* Prefetch buffer address. */
 					rte_prefetch0((void *)(uintptr_t)vb_addr);
 					vb_offset = 0;
diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
index 031712c..2690749 100644
--- a/lib/librte_vhost/vhost_user/virtio-net-user.c
+++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
@@ -70,17 +70,17 @@ get_blk_size(int fd)
 }
 
 static void
-free_mem_region(struct virtio_net *dev)
+free_mem_region(struct virtio_memory *dev_mem)
 {
 	struct orig_region_map *region;
 	unsigned int idx;
 	uint64_t alignment;
 
-	if (!dev || !dev->mem)
+	if (!dev_mem)
 		return;
 
-	region = orig_region(dev->mem, dev->mem->nregions);
-	for (idx = 0; idx < dev->mem->nregions; idx++) {
+	region = orig_region(dev_mem, dev_mem->nregions);
+	for (idx = 0; idx < dev_mem->nregions; idx++) {
 		if (region[idx].mapped_address) {
 			alignment = region[idx].blksz;
 			munmap((void *)
@@ -103,37 +103,37 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 	unsigned int idx = 0;
 	struct orig_region_map *pregion_orig;
 	uint64_t alignment;
+	struct virtio_memory *dev_mem = NULL;
 
 	/* unmap old memory regions one by one*/
 	dev = get_device(ctx);
 	if (dev == NULL)
 		return -1;
 
-	/* Remove from the data plane. */
-	if (dev->flags & VIRTIO_DEV_RUNNING)
-		notify_ops->destroy_device(dev);
-
-	if (dev->mem) {
-		free_mem_region(dev);
-		free(dev->mem);
-		dev->mem = NULL;
+	dev_mem = dev->mem_arr[dev->mem_idx];
+	if (dev_mem) {
+		free_mem_region(dev_mem);
+		free(dev_mem);
+		dev->mem_arr[dev->mem_idx] = NULL;
 	}
 
-	dev->mem = calloc(1,
+	dev->mem_arr[dev->mem_idx] = calloc(1,
 		sizeof(struct virtio_memory) +
 		sizeof(struct virtio_memory_regions) * memory.nregions +
 		sizeof(struct orig_region_map) * memory.nregions);
-	if (dev->mem == NULL) {
+
+	dev_mem = dev->mem_arr[dev->mem_idx];
+	if (dev_mem == NULL) {
 		RTE_LOG(ERR, VHOST_CONFIG,
-			"(%"PRIu64") Failed to allocate memory for dev->mem\n",
-			dev->device_fh);
+			"(%"PRIu64") Failed to allocate memory for dev->mem_arr[%d]\n",
+			dev->device_fh, dev->mem_idx);
 		return -1;
 	}
-	dev->mem->nregions = memory.nregions;
+	dev_mem->nregions = memory.nregions;
 
-	pregion_orig = orig_region(dev->mem, memory.nregions);
+	pregion_orig = orig_region(dev_mem, memory.nregions);
 	for (idx = 0; idx < memory.nregions; idx++) {
-		pregion = &dev->mem->regions[idx];
+		pregion = &dev_mem->regions[idx];
 		pregion->guest_phys_address =
 			memory.regions[idx].guest_phys_addr;
 		pregion->guest_phys_address_end =
@@ -175,9 +175,9 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 			pregion->guest_phys_address;
 
 		if (memory.regions[idx].guest_phys_addr == 0) {
-			dev->mem->base_address =
+			dev_mem->base_address =
 				memory.regions[idx].userspace_addr;
-			dev->mem->mapped_address =
+			dev_mem->mapped_address =
 				pregion->address_offset;
 		}
 
@@ -189,6 +189,7 @@ user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg)
 			 pregion->memory_size);
 	}
 
+	dev->mem_idx++;
 	return 0;
 
 err_mmap:
@@ -200,8 +201,8 @@ err_mmap:
 					alignment));
 		close(pregion_orig[idx].fd);
 	}
-	free(dev->mem);
-	dev->mem = NULL;
+	free(dev_mem);
+	dev->mem_arr[dev->mem_idx] = NULL;
 	return -1;
 }
 
@@ -309,13 +310,15 @@ void
 user_destroy_device(struct vhost_device_ctx ctx)
 {
 	struct virtio_net *dev = get_device(ctx);
+	uint32_t i;
 
 	if (dev && (dev->flags & VIRTIO_DEV_RUNNING))
 		notify_ops->destroy_device(dev);
 
-	if (dev && dev->mem) {
-		free_mem_region(dev);
-		free(dev->mem);
-		dev->mem = NULL;
-	}
+	for (i = 0; i < dev->num_virt_queues; i++)
+		if (dev && dev->mem_arr[i]) {
+			free_mem_region(dev->mem_arr[i]);
+			free(dev->mem_arr[i]);
+			dev->mem_arr[i] = NULL;
+		}
 }
diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c
index 55b7440..11834a3 100644
--- a/lib/librte_vhost/virtio-net.c
+++ b/lib/librte_vhost/virtio-net.c
@@ -77,15 +77,16 @@ static uint32_t q_num = 1;
  * used to convert the ring addresses to our address space.
  */
 static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qemu_va)
+qva_to_vva(struct virtio_net *dev, uint32_t q_idx, uint64_t qemu_va)
 {
 	struct virtio_memory_regions *region;
 	uint64_t vhost_va = 0;
 	uint32_t regionidx = 0;
+	struct virtio_memory *dev_mem = dev->mem_arr[q_idx];
 
 	/* Find the region where the address lives. */
-	for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
-		region = &dev->mem->regions[regionidx];
+	for (regionidx = 0; regionidx < dev_mem->nregions; regionidx++) {
+		region = &dev_mem->regions[regionidx];
 		if ((qemu_va >= region->userspace_address) &&
 			(qemu_va <= region->userspace_address +
 			region->memory_size)) {
@@ -182,10 +183,13 @@ cleanup_device(struct virtio_net *dev)
 	uint32_t q_idx;
 
 	/* Unmap QEMU memory file if mapped. */
-	if (dev->mem) {
-		munmap((void *)(uintptr_t)dev->mem->mapped_address,
-			(size_t)dev->mem->mapped_size);
-		free(dev->mem);
+	for (q_idx = 0; q_idx < dev->num_virt_queues; q_idx++) {
+		struct virtio_memory * dev_mem = dev->mem_arr[q_idx];
+		if (dev_mem) {
+			munmap((void *)(uintptr_t)dev_mem->mapped_address,
+				(size_t)dev_mem->mapped_size);
+			free(dev_mem);
+		}
 	}
 
 	/* Close any event notifiers opened by device. */
@@ -260,7 +264,7 @@ init_device(struct virtio_net *dev)
 	 * Virtqueues have already been malloced so
 	 * we don't want to set them to NULL.
 	 */
-	vq_offset = offsetof(struct virtio_net, mem);
+	vq_offset = offsetof(struct virtio_net, mem_arr);
 
 	/* Set everything to 0. */
 	memset((void *)(uintptr_t)((uint64_t)(uintptr_t)dev + vq_offset), 0,
@@ -529,7 +533,7 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 
 	/* The addresses are converted from QEMU virtual to Vhost virtual. */
 	vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
-			addr->desc_user_addr);
+			addr->index / 2, addr->desc_user_addr);
 	if (vq->desc == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"(%"PRIu64") Failed to find desc ring address.\n",
@@ -538,7 +542,7 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 	}
 
 	vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
-			addr->avail_user_addr);
+			addr->index / 2, addr->avail_user_addr);
 	if (vq->avail == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"(%"PRIu64") Failed to find avail ring address.\n",
@@ -547,7 +551,7 @@ set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr)
 	}
 
 	vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
-			addr->used_user_addr);
+			addr->index / 2, addr->used_user_addr);
 	if (vq->used == 0) {
 		RTE_LOG(ERR, VHOST_CONFIG,
 			"(%"PRIu64") Failed to find used ring address.\n",
-- 
1.8.4.2



More information about the dev mailing list