[dpdk-dev] [PATCH 2/9] vhost: provide helpers for virtio ring relay
Tiwei Bie
tiwei.bie at intel.com
Tue Dec 4 07:22:41 CET 2018
On Wed, Nov 28, 2018 at 05:46:00PM +0800, Xiao Wang wrote:
[...]
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the available ring from guest to mediate ring, help to
> + * check desc validity to protect against malicious guest driver.
> + *
> + * @param vid
> + * vhost device id
> + * @param qid
> + * vhost queue id
> + * @param m_vring
> + * mediate virtio ring pointer
> + * @return
> + * number of synced available entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice
> + *
> + * Synchronize the used ring from mediate ring to guest, log dirty
> + * page for each Rx buffer used.
> + *
> + * @param vid
> + * vhost device id
> + * @param qid
> + * vhost queue id
> + * @param m_vring
> + * mediate virtio ring pointer
> + * @return
> + * number of synced used entries on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring);
Above APIs are split ring specific. We also need to take
packed ring into consideration.
> #endif /* _RTE_VDPA_H_ */
[...]
> diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
> index e7d849ee0..e41117776 100644
> --- a/lib/librte_vhost/vdpa.c
> +++ b/lib/librte_vhost/vdpa.c
> @@ -122,3 +122,176 @@ rte_vdpa_get_device_num(void)
> {
> return vdpa_device_num;
> }
> +
> +static int
> +invalid_desc_check(struct virtio_net *dev, struct vhost_virtqueue *vq,
> + uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
> +{
> + uint64_t desc_addr, desc_chunck_len;
> +
> + while (desc_len) {
> + desc_chunck_len = desc_len;
> + desc_addr = vhost_iova_to_vva(dev, vq,
> + desc_iova,
> + &desc_chunck_len,
> + perm);
> +
> + if (!desc_addr)
> + return -1;
> +
> + desc_len -= desc_chunck_len;
> + desc_iova += desc_chunck_len;
> + }
> +
> + return 0;
> +}
> +
> +int
> +rte_vdpa_relay_avail_ring(int vid, int qid, struct vring *m_vring)
> +{
> + struct virtio_net *dev = get_device(vid);
> + uint16_t idx, idx_m, desc_id;
> + struct vring_desc desc;
> + struct vhost_virtqueue *vq;
> + struct vring_desc *desc_ring;
> + struct vring_desc *idesc = NULL;
> + uint64_t dlen;
> + int ret;
> +
> + if (!dev)
> + return -1;
> +
> + vq = dev->virtqueue[qid];
Better to also validate qid.
> + idx = vq->avail->idx;
> + idx_m = m_vring->avail->idx;
> + ret = idx - idx_m;
Need to cast (idx - idx_m) to uint16_t.
> +
> + while (idx_m != idx) {
> + /* avail entry copy */
> + desc_id = vq->avail->ring[idx_m % vq->size];
idx_m & (vq->size - 1) should be faster.
> + m_vring->avail->ring[idx_m % vq->size] = desc_id;
> + desc_ring = vq->desc;
> +
> + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> + dlen = vq->desc[desc_id].len;
> + desc_ring = (struct vring_desc *)(uintptr_t)
> + vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,
The indent needs to be fixed.
> + &dlen,
> + VHOST_ACCESS_RO);
> + if (unlikely(!desc_ring))
> + return -1;
> +
> + if (unlikely(dlen < vq->desc[idx].len)) {
> + idesc = alloc_copy_ind_table(dev, vq,
> + vq->desc[idx].addr, vq->desc[idx].len);
> + if (unlikely(!idesc))
> + return -1;
> +
> + desc_ring = idesc;
> + }
> +
> + desc_id = 0;
> + }
> +
> + /* check if the buf addr is within the guest memory */
> + do {
> + desc = desc_ring[desc_id];
> + if (invalid_desc_check(dev, vq, desc.addr, desc.len,
> + VHOST_ACCESS_RW))
Should check with < 0, otherwise should return bool.
We may just have RO access.
> + return -1;
The memory allocated for idesc if any will leak in this case.
> + desc_id = desc.next;
> + } while (desc.flags & VRING_DESC_F_NEXT);
> +
> + if (unlikely(!!idesc)) {
The !! isn't needed.
> + free_ind_table(idesc);
> + idesc = NULL;
> + }
> +
> + idx_m++;
> + }
> +
Barrier is needed here.
> + m_vring->avail->idx = idx;
> +
> + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> + vhost_avail_event(vq) = vq->avail->idx;
Need to use idx instead of vq->avail->idx which may
have already been changed by driver.
> +
> + return ret;
> +}
> +
> +int
> +rte_vdpa_relay_used_ring(int vid, int qid, struct vring *m_vring)
> +{
> + struct virtio_net *dev = get_device(vid);
> + uint16_t idx, idx_m, desc_id;
> + struct vhost_virtqueue *vq;
> + struct vring_desc desc;
> + struct vring_desc *desc_ring;
> + struct vring_desc *idesc = NULL;
> + uint64_t dlen;
> + int ret;
> +
> + if (!dev)
> + return -1;
> +
> + vq = dev->virtqueue[qid];
Better to also validate qid.
> + idx = vq->used->idx;
> + idx_m = m_vring->used->idx;
> + ret = idx_m - idx;
Need to cast (idx_m - idx) to uint16_t.
> +
> + while (idx != idx_m) {
> + /* copy used entry, used ring logging is not covered here */
The used ring logging has been covered here by the following call
to vhost_log_used_vring() after used ring is changed.
> + vq->used->ring[idx % vq->size] =
idx & (vq->size - 1) should be faster.
> + m_vring->used->ring[idx % vq->size];
> +
> + /* dirty page logging for used ring */
> + vhost_log_used_vring(dev, vq,
> + offsetof(struct vring_used, ring[idx % vq->size]),
> + sizeof(struct vring_used_elem));
> +
> + desc_id = vq->used->ring[idx % vq->size].id;
> + desc_ring = vq->desc;
> +
> + if (vq->desc[desc_id].flags & VRING_DESC_F_INDIRECT) {
> + dlen = vq->desc[desc_id].len;
> + desc_ring = (struct vring_desc *)(uintptr_t)
> + vhost_iova_to_vva(dev, vq, vq->desc[desc_id].addr,
The indent needs to be fixed.
> + &dlen,
> + VHOST_ACCESS_RO);
> + if (unlikely(!desc_ring))
> + return -1;
> +
> + if (unlikely(dlen < vq->desc[idx].len)) {
> + idesc = alloc_copy_ind_table(dev, vq,
> + vq->desc[idx].addr, vq->desc[idx].len);
> + if (unlikely(!idesc))
> + return -1;
> +
> + desc_ring = idesc;
> + }
> +
> + desc_id = 0;
> + }
> +
> + /* dirty page logging for Rx buffer */
Rx is for net, this API isn't net specific.
> + do {
> + desc = desc_ring[desc_id];
> + if (desc.flags & VRING_DESC_F_WRITE)
> + vhost_log_write(dev, desc.addr, desc.len);
> + desc_id = desc.next;
> + } while (desc.flags & VRING_DESC_F_NEXT);
> +
> + if (unlikely(!!idesc)) {
The !! isn't needed.
> + free_ind_table(idesc);
> + idesc = NULL;
> + }
> +
> + idx++;
> + }
> +
Barrier is needed here.
> + vq->used->idx = idx_m;
> +
> + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))
> + vring_used_event(m_vring) = m_vring->used->idx;
> +
> + return ret;
> +}
[...]
More information about the dev
mailing list