[dpdk-dev] [PATCH 2/9] vhost: provide helpers for virtio ring relay

Tiwei Bie tiwei.bie at intel.com
Tue Dec 4 07:22:41 CET 2018


On Wed, Nov 28, 2018 at 05:46:00PM +0800, Xiao Wang wrote:
[...]
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the available ring from guest to mediate ring, help to
> + * check desc validity to protect against malicious guest driver.
> + *
> + * @param vid
> + *  vhost device id
> + * @param qid
> + *  vhost queue id
> + * @param m_vring
> + *  mediate virtio ring pointer
> + * @return
> + *  number of synced available entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the used ring from mediate ring to guest, log dirty
> + * page for each Rx buffer used.
> + *
> + * @param vid
> + *  vhost device id
> + * @param qid
> + *  vhost queue id
> + * @param m_vring
> + *  mediate virtio ring pointer
> + * @return
> + *  number of synced used entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring);

Above APIs are split ring specific. We also need to take
packed ring into consideration.

>  #endif /* _RTE_VDPA_H_ */
[...]
> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
> index e7d849ee0..e41117776 100644
> --- a/lib/librte_vhost/vdpa.c
> +++ b/lib/librte_vhost/vdpa.c
> @@ -122,3 +122,176 @@ rte_vdpa_get_device_num(void)
>  {
>  	return vdpa_device_num;
>  }
> +
> +static int
> +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
> +		uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
> +{
> +	uint64_t desc_addr, desc_chunck_len;
> +
> +	while (desc_len) {
> +		desc_chunck_len = desc_len;
> +		desc_addr = vhost_iova_to_vva(dev, vq,
> +				desc_iova,
> +				&desc_chunck_len,
> +				perm);
> +
> +		if (!desc_addr)
> +			return -1;
> +
> +		desc_len -= desc_chunck_len;
> +		desc_iova += desc_chunck_len;
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	uint16_t idx, idx_m, desc_id;
> +	struct vring_desc desc;
> +	struct vhost_virtqueue *vq;
> +	struct vring_desc *desc_ring;
> +	struct vring_desc *idesc = NULL;
> +	uint64_t dlen;
> +	int ret;
> +
> +	if (!dev)
> +		return -1;
> +
> +	vq = dev->virtqueue[qid];

Better to also validate qid.

> +	idx = vq->avail->idx;
> +	idx_m = m_vring->avail->idx;
> +	ret = idx - idx_m;

Need to cast (idx - idx_m) to uint16_t.

> +
> +	while (idx_m != idx) {
> +		/* avail entry copy */
> +		desc_id = vq->avail->ring[idx_m % vq->size];

idx_m & (vq->size - 1) should be faster.

> +		m_vring->avail->ring[idx_m % vq->size] = desc_id;
> +		desc_ring = vq->desc;
> +
> +		if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> +			dlen = vq->desc[desc_id].len;
> +			desc_ring = (struct vring_desc *)(uintptr_t)
> +			vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,

The indent needs to be fixed.

> +						&dlen,
> +						VHOST_ACCESS_RO);
> +			if (unlikely(!desc_ring))
> +				return -1;
> +
> +			if (unlikely(dlen < vq->desc[idx].len)) {
> +				idesc = alloc_copy_ind_table(dev, vq,
> +					vq->desc[idx].addr, vq->desc[idx].len);
> +				if (unlikely(!idesc))
> +					return -1;
> +
> +				desc_ring = idesc;
> +			}
> +
> +			desc_id = 0;
> +		}
> +
> +		/* check if the buf addr is within the guest memory */
> +		do {
> +			desc = desc_ring[desc_id];
> +			if (invalid_desc_check(dev, vq, desc.addr, desc.len,
> +						VHOST_ACCESS_RW))

Should check with < 0, otherwise should return bool.

We may just have RO access.

> +				return -1;

The memory allocated for idesc if any will leak in this case.

> +			desc_id = desc.next;
> +		} while (desc.flags & VRING_DESC_F_NEXT);
> +
> +		if (unlikely(!!idesc)) {

The !! isn't needed.

> +			free_ind_table(idesc);
> +			idesc = NULL;
> +		}
> +
> +		idx_m++;
> +	}
> +

Barrier is needed here.

> +	m_vring->avail->idx = idx;
> +
> +	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> +		vhost_avail_event(vq) = vq->avail->idx;

Need to use idx instead of vq->avail->idx which may
have already been changed by driver.

> +
> +	return ret;
> +}
> +
> +int
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring)
> +{
> +	struct virtio_net *dev = get_device(vid);
> +	uint16_t idx, idx_m, desc_id;
> +	struct vhost_virtqueue *vq;
> +	struct vring_desc desc;
> +	struct vring_desc *desc_ring;
> +	struct vring_desc *idesc = NULL;
> +	uint64_t dlen;
> +	int ret;
> +
> +	if (!dev)
> +		return -1;
> +
> +	vq = dev->virtqueue[qid];

Better to also validate qid.

> +	idx = vq->used->idx;
> +	idx_m = m_vring->used->idx;
> +	ret = idx_m - idx;

Need to cast (idx_m - idx) to uint16_t.

> +
> +	while (idx != idx_m) {
> +		/* copy used entry, used ring logging is not covered here */

The used ring logging has been covered here by the following call
to vhost_log_used_vring() after used ring is changed.

> +		vq->used->ring[idx % vq->size] =

idx & (vq->size - 1) should be faster.

> +			m_vring->used->ring[idx % vq->size];
> +
> +		/* dirty page logging for used ring */
> +		vhost_log_used_vring(dev, vq,
> +			offsetof(struct vring_used, ring[idx % vq->size]),
> +			sizeof(struct vring_used_elem));
> +
> +		desc_id = vq->used->ring[idx % vq->size].id;
> +		desc_ring = vq->desc;
> +
> +		if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> +			dlen = vq->desc[desc_id].len;
> +			desc_ring = (struct vring_desc *)(uintptr_t)
> +			vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,

The indent needs to be fixed.

> +						&dlen,
> +						VHOST_ACCESS_RO);
> +			if (unlikely(!desc_ring))
> +				return -1;
> +
> +			if (unlikely(dlen < vq->desc[idx].len)) {
> +				idesc = alloc_copy_ind_table(dev, vq,
> +					vq->desc[idx].addr, vq->desc[idx].len);
> +				if (unlikely(!idesc))
> +					return -1;
> +
> +				desc_ring = idesc;
> +			}
> +
> +			desc_id = 0;
> +		}
> +
> +		/* dirty page logging for Rx buffer */

Rx is for net, this API isn't net specific.

> +		do {
> +			desc = desc_ring[desc_id];
> +			if (desc.flags & VRING_DESC_F_WRITE)
> +				vhost_log_write(dev, desc.addr, desc.len);
> +			desc_id = desc.next;
> +		} while (desc.flags & VRING_DESC_F_NEXT);
> +
> +		if (unlikely(!!idesc)) {

The !! isn't needed.

> +			free_ind_table(idesc);
> +			idesc = NULL;
> +		}
> +
> +		idx++;
> +	}
> +

Barrier is needed here.

> +	vq->used->idx = idx_m;
> +
> +	if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> +		vring_used_event(m_vring) = m_vring->used->idx;
> +
> +	return ret;
> +}
[...]


More information about the dev mailing list