[dpdk-dev] [PATCH v2 06/13] vdpa/mlx5: prepare virtio queues

Matan Azrad matan at mellanox.com
Fri Jan 31 08:34:48 CET 2020



From: Maxime Coquelin
> On 1/29/20 11:09 AM, Matan Azrad wrote:
> > The HW virtq object represents an emulated context for a VIRTIO_NET
> > virtqueue which was created and managed by a VIRTIO_NET driver as
> > defined in VIRTIO Specification.
> >
> > Add support to prepare and release all the basic HW resources needed
> > the user virtqs emulation according to the rte_vhost configurations.
> >
> > This patch prepares the basic configurations needed by DevX commands
> > to create a virtq.
> >
> > Add new file mlx5_vdpa_virtq.c to manage virtq operations.
> >
> > Signed-off-by: Matan Azrad <matan at mellanox.com>
> > Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> > ---
> >  drivers/vdpa/mlx5/Makefile          |   1 +
> >  drivers/vdpa/mlx5/meson.build       |   1 +
> >  drivers/vdpa/mlx5/mlx5_vdpa.c       |   1 +
> >  drivers/vdpa/mlx5/mlx5_vdpa.h       |  36 ++++++
> >  drivers/vdpa/mlx5/mlx5_vdpa_virtq.c | 212
> > ++++++++++++++++++++++++++++++++++++
> >  5 files changed, 251 insertions(+)
> >  create mode 100644 drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> >
> > diff --git a/drivers/vdpa/mlx5/Makefile b/drivers/vdpa/mlx5/Makefile
> > index 7f13756..353e262 100644
> > --- a/drivers/vdpa/mlx5/Makefile
> > +++ b/drivers/vdpa/mlx5/Makefile
> > @@ -10,6 +10,7 @@ LIB = librte_pmd_mlx5_vdpa.a
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_mem.c
> >  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_event.c
> > +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_VDPA_PMD) += mlx5_vdpa_virtq.c
> >
> >  # Basic CFLAGS.
> >  CFLAGS += -O3
> > diff --git a/drivers/vdpa/mlx5/meson.build
> > b/drivers/vdpa/mlx5/meson.build index c609f7c..e017f95 100644
> > --- a/drivers/vdpa/mlx5/meson.build
> > +++ b/drivers/vdpa/mlx5/meson.build
> > @@ -14,6 +14,7 @@ sources = files(
> >  	'mlx5_vdpa.c',
> >  	'mlx5_vdpa_mem.c',
> >  	'mlx5_vdpa_event.c',
> > +	'mlx5_vdpa_virtq.c',
> >  )
> >  cflags_options = [
> >  	'-std=c11',
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa.c index c67f93d..4d30b35 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.c
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.c
> > @@ -229,6 +229,7 @@
> >  		goto error;
> >  	}
> >  	SLIST_INIT(&priv->mr_list);
> > +	SLIST_INIT(&priv->virtq_list);
> >  	pthread_mutex_lock(&priv_list_lock);
> >  	TAILQ_INSERT_TAIL(&priv_list, priv, next);
> >  	pthread_mutex_unlock(&priv_list_lock);
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > b/drivers/vdpa/mlx5/mlx5_vdpa.h index 30030b7..a7e2185 100644
> > --- a/drivers/vdpa/mlx5/mlx5_vdpa.h
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa.h
> > @@ -53,6 +53,19 @@ struct mlx5_vdpa_query_mr {
> >  	int is_indirect;
> >  };
> >
> > +struct mlx5_vdpa_virtq {
> > +	SLIST_ENTRY(mlx5_vdpa_virtq) next;
> > +	uint16_t index;
> > +	uint16_t vq_size;
> > +	struct mlx5_devx_obj *virtq;
> > +	struct mlx5_vdpa_event_qp eqp;
> > +	struct {
> > +		struct mlx5dv_devx_umem *obj;
> > +		void *buf;
> > +		uint32_t size;
> > +	} umems[3];
> > +};
> > +
> >  struct mlx5_vdpa_priv {
> >  	TAILQ_ENTRY(mlx5_vdpa_priv) next;
> >  	int id; /* vDPA device id. */
> > @@ -69,6 +82,10 @@ struct mlx5_vdpa_priv {
> >  	struct mlx5dv_devx_event_channel *eventc;
> >  	struct mlx5dv_devx_uar *uar;
> >  	struct rte_intr_handle intr_handle;
> > +	struct mlx5_devx_obj *td;
> > +	struct mlx5_devx_obj *tis;
> > +	uint16_t nr_virtqs;
> > +	SLIST_HEAD(virtq_list, mlx5_vdpa_virtq) virtq_list;
> >  	SLIST_HEAD(mr_list, mlx5_vdpa_query_mr) mr_list;  };
> >
> > @@ -146,4 +163,23 @@ int mlx5_vdpa_event_qp_create(struct
> mlx5_vdpa_priv *priv, uint16_t desc_n,
> >   */
> >  void mlx5_vdpa_cqe_event_unset(struct mlx5_vdpa_priv *priv);
> >
> > +/**
> > + * Release a virtq and all its related resources.
> > + *
> > + * @param[in] priv
> > + *   The vdpa driver private structure.
> > + */
> > +void mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv);
> > +
> > +/**
> > + * Create all the HW virtqs resources and all their related resources.
> > + *
> > + * @param[in] priv
> > + *   The vdpa driver private structure.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +int mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv);
> > +
> >  #endif /* RTE_PMD_MLX5_VDPA_H_ */
> > diff --git a/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > new file mode 100644
> > index 0000000..781bccf
> > --- /dev/null
> > +++ b/drivers/vdpa/mlx5/mlx5_vdpa_virtq.c
> > @@ -0,0 +1,212 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 Mellanox Technologies, Ltd  */ #include <string.h>
> > +
> > +#include <rte_malloc.h>
> > +#include <rte_errno.h>
> > +
> > +#include <mlx5_common.h>
> > +
> > +#include "mlx5_vdpa_utils.h"
> > +#include "mlx5_vdpa.h"
> > +
> > +
> > +static int
> > +mlx5_vdpa_virtq_unset(struct mlx5_vdpa_virtq *virtq) {
> > +	int i;
> > +
> > +	if (virtq->virtq) {
> > +		claim_zero(mlx5_devx_cmd_destroy(virtq->virtq));
> > +		virtq->virtq = NULL;
> > +	}
> > +	for (i = 0; i < 3; ++i) {
> > +		if (virtq->umems[i].obj)
> > +			claim_zero(mlx5_glue->devx_umem_dereg
> > +							 (virtq-
> >umems[i].obj));
> > +		if (virtq->umems[i].buf)
> > +			rte_free(virtq->umems[i].buf);
> > +	}
> > +	memset(&virtq->umems, 0, sizeof(virtq->umems));
> > +	if (virtq->eqp.fw_qp)
> > +		mlx5_vdpa_event_qp_destroy(&virtq->eqp);
> > +	return 0;
> > +}
> > +
> > +void
> > +mlx5_vdpa_virtqs_release(struct mlx5_vdpa_priv *priv) {
> > +	struct mlx5_vdpa_virtq *entry;
> > +	struct mlx5_vdpa_virtq *next;
> > +
> > +	entry = SLIST_FIRST(&priv->virtq_list);
> > +	while (entry) {
> > +		next = SLIST_NEXT(entry, next);
> > +		mlx5_vdpa_virtq_unset(entry);
> > +		SLIST_REMOVE(&priv->virtq_list, entry, mlx5_vdpa_virtq,
> next);
> > +		rte_free(entry);
> > +		entry = next;
> > +	}
> > +	SLIST_INIT(&priv->virtq_list);
> > +	if (priv->tis) {
> > +		claim_zero(mlx5_devx_cmd_destroy(priv->tis));
> > +		priv->tis = NULL;
> > +	}
> > +	if (priv->td) {
> > +		claim_zero(mlx5_devx_cmd_destroy(priv->td));
> > +		priv->td = NULL;
> > +	}
> > +}
> > +
> > +static uint64_t
> > +mlx5_vdpa_hva_to_gpa(struct rte_vhost_memory *mem, uint64_t hva) {
> > +	struct rte_vhost_mem_region *reg;
> > +	uint32_t i;
> > +	uint64_t gpa = 0;
> > +
> > +	for (i = 0; i < mem->nregions; i++) {
> > +		reg = &mem->regions[i];
> > +		if (hva >= reg->host_user_addr &&
> > +		    hva < reg->host_user_addr + reg->size) {
> > +			gpa = hva - reg->host_user_addr + reg-
> >guest_phys_addr;
> > +			break;
> > +		}
> > +	}
> > +	return gpa;
> > +}
> 
> I think you may need a third parameter for the size to map.
> Otherwise, you would be vulnerable to CVE-2018-1059.

Yes, I just read it and understood that the virtio descriptor queues\packets data may be non continues in the guest physical memory and even maybe undefined here in the rte_vhost library, Is it?

Don't you think that the rte_vhost should validate it? at least, that all the queues memory are mapped?

Can you extend more why it may happen? QEMU bug?

In any case,
>From Mellanox perspective, at least for the packet data, it is OK since if the guest will try to access physical address which is not mapped the packet will be ignored by the HW.


 
> > +
> > +static int
> > +mlx5_vdpa_virtq_setup(struct mlx5_vdpa_priv *priv,
> > +		      struct mlx5_vdpa_virtq *virtq, int index) {
> > +	struct rte_vhost_vring vq;
> > +	struct mlx5_devx_virtq_attr attr = {0};
> > +	uint64_t gpa;
> > +	int ret;
> > +	int i;
> > +	uint16_t last_avail_idx;
> > +	uint16_t last_used_idx;
> > +
> > +	ret = rte_vhost_get_vhost_vring(priv->vid, index, &vq);
> > +	if (ret)
> > +		return -1;
> > +	virtq->index = index;
> > +	virtq->vq_size = vq.size;
> > +	/*
> > +	 * No need event QPs creation when the guest in poll mode or when
> the
> > +	 * capability allows it.
> > +	 */
> > +	attr.event_mode = vq.callfd != -1 || !(priv->caps.event_mode & (1
> <<
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX)) ?
> > +
> MLX5_VIRTQ_EVENT_MODE_QP :
> > +
> MLX5_VIRTQ_EVENT_MODE_NO_MSIX;
> > +	if (attr.event_mode == MLX5_VIRTQ_EVENT_MODE_QP) {
> > +		ret = mlx5_vdpa_event_qp_create(priv, vq.size, vq.callfd,
> > +						&virtq->eqp);
> > +		if (ret) {
> > +			DRV_LOG(ERR, "Failed to create event QPs for virtq
> %d.",
> > +				index);
> > +			return -1;
> > +		}
> > +		attr.qp_id = virtq->eqp.fw_qp->id;
> > +	} else {
> > +		DRV_LOG(INFO, "Virtq %d is, for sure, working by poll mode,
> no"
> > +			" need event QPs and event mechanism.", index);
> > +	}
> > +	/* Setup 3 UMEMs for each virtq. */
> > +	for (i = 0; i < 3; ++i) {
> > +		virtq->umems[i].size = priv->caps.umems[i].a * vq.size +
> > +							  priv-
> >caps.umems[i].b;
> > +		virtq->umems[i].buf = rte_zmalloc(__func__,
> > +						  virtq->umems[i].size, 4096);
> > +		if (!virtq->umems[i].buf) {
> > +			DRV_LOG(ERR, "Cannot allocate umem %d memory
> for virtq"
> > +				" %u.", i, index);
> > +			goto error;
> > +		}
> > +		virtq->umems[i].obj = mlx5_glue->devx_umem_reg(priv-
> >ctx,
> > +							virtq->umems[i].buf,
> > +							virtq->umems[i].size,
> > +
> 	IBV_ACCESS_LOCAL_WRITE);
> > +		if (!virtq->umems[i].obj) {
> > +			DRV_LOG(ERR, "Failed to register umem %d for virtq
> %u.",
> > +				i, index);
> > +			goto error;
> > +		}
> > +		attr.umems[i].id = virtq->umems[i].obj->umem_id;
> > +		attr.umems[i].offset = 0;
> > +		attr.umems[i].size = virtq->umems[i].size;
> > +	}
> > +	gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.desc);
> > +	if (!gpa) {
> > +		DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
> > +		goto error;
> > +	}
> > +	attr.desc_addr = gpa;
> > +	gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.used);
> > +	if (!gpa) {
> > +		DRV_LOG(ERR, "Fail to get GPA for used ring.");
> > +		goto error;
> > +	}
> > +	attr.used_addr = gpa;
> > +	gpa = mlx5_vdpa_hva_to_gpa(priv->vmem,
> (uint64_t)(uintptr_t)vq.avail);
> > +	if (!gpa) {
> > +		DRV_LOG(ERR, "Fail to get GPA for available ring.");
> > +		goto error;
> > +	}
> > +	attr.available_addr = gpa;
> > +	rte_vhost_get_vring_base(priv->vid, index, &last_avail_idx,
> > +				 &last_used_idx);
> > +	DRV_LOG(INFO, "vid %d: Init last_avail_idx=%d, last_used_idx=%d
> for "
> > +		"virtq %d.", priv->vid, last_avail_idx, last_used_idx, index);
> > +	attr.hw_available_index = last_avail_idx;
> > +	attr.hw_used_index = last_used_idx;
> > +	attr.q_size = vq.size;
> > +	attr.mkey = priv->gpa_mkey_index;
> > +	attr.tis_id = priv->tis->id;
> > +	attr.queue_index = index;
> > +	virtq->virtq = mlx5_devx_cmd_create_virtq(priv->ctx, &attr);
> > +	if (!virtq->virtq)
> > +		goto error;
> > +	return 0;
> > +error:
> > +	mlx5_vdpa_virtq_unset(virtq);
> > +	return -1;
> > +}
> > +
> > +int
> > +mlx5_vdpa_virtqs_prepare(struct mlx5_vdpa_priv *priv) {
> > +	struct mlx5_devx_tis_attr tis_attr = {0};
> > +	struct mlx5_vdpa_virtq *virtq;
> > +	uint32_t i;
> > +	uint16_t nr_vring = rte_vhost_get_vring_num(priv->vid);
> > +
> > +	priv->td = mlx5_devx_cmd_create_td(priv->ctx);
> > +	if (!priv->td) {
> > +		DRV_LOG(ERR, "Failed to create transport domain.");
> > +		return -rte_errno;
> > +	}
> > +	tis_attr.transport_domain = priv->td->id;
> > +	priv->tis = mlx5_devx_cmd_create_tis(priv->ctx, &tis_attr);
> > +	if (!priv->tis) {
> > +		DRV_LOG(ERR, "Failed to create TIS.");
> > +		goto error;
> > +	}
> > +	for (i = 0; i < nr_vring; i++) {
> > +		virtq = rte_zmalloc(__func__, sizeof(*virtq), 0);
> > +		if (!virtq || mlx5_vdpa_virtq_setup(priv, virtq, i)) {
> > +			if (virtq)
> > +				rte_free(virtq);
> > +			goto error;
> > +		}
> > +		SLIST_INSERT_HEAD(&priv->virtq_list, virtq, next);
> > +	}
> > +	priv->nr_virtqs = nr_vring;
> > +	return 0;
> > +error:
> > +	mlx5_vdpa_virtqs_release(priv);
> > +	return -1;
> > +}
> >



More information about the dev mailing list