[dpdk-dev] [PATCH v5 1/2] vhost: support inflight share memory protocol feature

Tiwei Bie tiwei.bie at intel.com
Mon Aug 26 10:28:11 CEST 2019


On Wed, Aug 07, 2019 at 02:24:59AM +0800, JinYu wrote:
> This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> buffer between qemu and backend.
> 
> Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> shared buffer from backend. Then qemu should send it back
> through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
> 
> This shared buffer is used to process inflight I/O when backend
> reconnect.
> 
> Signed-off-by: Lin Li <lilin24 at baidu.com>
> Signed-off-by: Xun Ni <nixun at baidu.com>
> Signed-off-by: Yu Zhang <zhangyu31 at baidu.com>
> Signed-off-by: JinYu <jin.yu at intel.com>

s/JinYu/Jin Yu/

> ---
> v1 - specify the APIs are split-ring only
> v2 - fix APIs and judge split or packed
> v3 - Add rte_vhost_ prefix and fix one issue.
> v4 - add the packed ring support
> v5 - revise get_vring_base func depend on Tiwei's suggestion
> ---
>  lib/librte_vhost/rte_vhost.h           | 255 ++++++++++++++-
>  lib/librte_vhost/rte_vhost_version.map |  12 +
>  lib/librte_vhost/vhost.c               | 396 +++++++++++++++++++++-
>  lib/librte_vhost/vhost.h               |  61 ++--
>  lib/librte_vhost/vhost_user.c          | 437 ++++++++++++++++++++++++-
>  lib/librte_vhost/vhost_user.h          |  13 +-
>  6 files changed, 1128 insertions(+), 46 deletions(-)

This patch is too big, please divide it into small patches.
E.g. rte_vhost_vq_is_packed() can be introduced in a separate patch.

> 
> diff --git a/lib/librte_vhost/rte_vhost.h b/lib/librte_vhost/rte_vhost.h
> index 0226b3eff..3f01429b1 100644
> --- a/lib/librte_vhost/rte_vhost.h
> +++ b/lib/librte_vhost/rte_vhost.h
> @@ -11,6 +11,7 @@
>   */
>  
>  #include <stdint.h>
> +#include <stdbool.h>
>  #include <sys/eventfd.h>
>  
>  #include <rte_memory.h>
> @@ -71,6 +72,10 @@ extern "C" {
>  #define VHOST_USER_PROTOCOL_F_HOST_NOTIFIER 11
>  #endif
>  
> +#ifndef VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD
> +#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD 12
> +#endif
> +
>  /** Indicate whether protocol features negotiation is supported. */
>  #ifndef VHOST_USER_F_PROTOCOL_FEATURES
>  #define VHOST_USER_F_PROTOCOL_FEATURES	30
> @@ -98,10 +103,92 @@ struct rte_vhost_memory {
>  	struct rte_vhost_mem_region regions[];
>  };
>  
> +struct inflight_desc_packed {
> +	uint8_t inflight;
> +	uint8_t padding;
> +	uint16_t next;
> +	uint16_t last;
> +	uint16_t num;
> +	uint64_t counter;
> +	uint16_t id;
> +	uint16_t flags;
> +	uint32_t len;
> +	uint64_t addr;
> +};

Why struct inflight_desc_split doesn't have to be part of
vhost API but struct inflight_desc_packed has to be?

> +
> +struct inflight_info_packed {
> +	uint64_t features;
> +	uint16_t version;
> +	uint16_t desc_num;
> +	uint16_t free_head;
> +	uint16_t old_free_head;
> +	uint16_t used_idx;
> +	uint16_t old_used_idx;
> +	uint8_t used_wrap_counter;
> +	uint8_t old_used_wrap_counter;
> +	uint8_t padding[7];
> +	struct inflight_desc_packed desc[0];
> +};
> +
> +struct rte_vhost_resubmit_desc {
> +	uint16_t index;
> +	uint64_t counter;
> +};
> +
> +struct rte_vhost_resubmit_info {
> +	struct rte_vhost_resubmit_desc	*resubmit_list;
> +	uint16_t resubmit_num;
> +};
> +
> +struct rte_vhost_ring_inflight {
> +	union {
> +		struct inflight_info_split *inflight_split;

struct inflight_info_split is used but not declared.

> +		struct inflight_info_packed *inflight_packed;
> +	};
> +
> +	struct rte_vhost_resubmit_info *resubmit_inflight;
> +};
> +
> +/*
> + * Declare below packed ring defines unconditionally
> + * as Kernel header might use different names.
> + */
> +#ifndef VIRTIO_F_RING_PACKED
> +#define VIRTIO_F_RING_PACKED 34
> +
> +#define VRING_DESC_F_AVAIL	(1ULL << 7)
> +#define VRING_DESC_F_USED	(1ULL << 15)

You shouldn't put above macros under VIRTIO_F_RING_PACKED as
kernel doesn't define them. And the build will be broken when
kernel defines VIRTIO_F_RING_PACKED.

Besides, it seems not a good idea to make them parts of vhost
API.

> +
> +struct vring_packed_desc {
> +	uint64_t addr;
> +	uint32_t len;
> +	uint16_t id;
> +	uint16_t flags;
> +};
> +
> +#define VRING_EVENT_F_ENABLE 0x0
> +#define VRING_EVENT_F_DISABLE 0x1
> +#define VRING_EVENT_F_DESC 0x2
> +struct vring_packed_desc_event {
> +	uint16_t off_wrap;
> +	uint16_t flags;
> +};

You just need to declare instead of defining
vring_packed_desc/vring_packed_desc_event in vhost header.

> +#endif
> +
>  struct rte_vhost_vring {
> -	struct vring_desc	*desc;
> -	struct vring_avail	*avail;
> -	struct vring_used	*used;
> +	union {
> +		struct vring_desc	*desc;
> +		struct vring_packed_desc *desc_packed;
> +	};
> +	union {
> +		struct vring_avail	*avail;
> +		struct vring_packed_desc_event *driver_event;
> +	};
> +	union {
> +		struct vring_used	*used;
> +		struct vring_packed_desc_event *device_event;
> +	};
> +
>  	uint64_t		log_guest_addr;
>  
>  	/** Deprecated, use rte_vhost_vring_call() instead. */
> @@ -603,6 +690,33 @@ uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
>   */
>  int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
>  
> +/**
> + * Get vq is packed
> + *
> + * @param vid
> + *  vhost device ID
> + * @return
> + *  0 on success, -1 on failure

Return value should be true or false.

> + */
> +int __rte_experimental
> +rte_vhost_vq_is_packed(int vid);
> +
> +/**
> + * Get guest inflight vring info, including inflight ring and resubmit list.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param vring
> + *  the structure to hold the requested inflight vring info
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental

You should put the `__rte_experimental` tag at the beginning.
See this for more details:
http://git.dpdk.org/dpdk/commit/lib/librte_vhost?id=18218713bf4248c4c6b97a12231e7d59b8a86865

> +rte_vhost_get_vhost_ring_inflight(int vid, uint16_t vring_idx,
> +	struct rte_vhost_ring_inflight *vring);
> +
>  /**
>   * Get guest vring info, including the vring address, vring size, etc.
>   *
> @@ -616,7 +730,7 @@ int rte_vhost_get_mem_table(int vid, struct rte_vhost_memory **mem);
>   *  0 on success, -1 on failure
>   */
>  int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
> -			      struct rte_vhost_vring *vring);
> +	struct rte_vhost_vring *vring);
>  
>  /**
>   * Notify the guest that used descriptors have been added to the vring.  This
> @@ -631,6 +745,112 @@ int rte_vhost_get_vhost_vring(int vid, uint16_t vring_idx,
>   */
>  int rte_vhost_vring_call(int vid, uint16_t vring_idx);
>  
> +/**
> + * set split inflight descriptor.

Please capitalize the first letter consistently.

> + *
> + * This function save descriptors that has been comsumed in available
> + * ring
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_split(int vid, uint16_t vring_idx,
> +	uint16_t idx);
> +
> +/**
> + * set packed inflight descriptor and get corresponding inflight entry
> + *
> + * This function save descriptors that has been comsumed
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_inflight_desc_packed(int vid, uint16_t vring_idx,
> +	uint16_t head, uint16_t last, uint16_t *inflight_entry);
> +
> +/**
> + * save the head of list that the last batch of used descriptors.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  descriptor entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_split(int vid,
> +	uint16_t vring_idx, uint16_t idx);
> +
> +/**
> + * update the inflight free_head, used_idx and used_wrap_counter.
> + *
> + * This function will update status first before updating descriptors
> + * to used
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_set_last_inflight_io_packed(int vid,
> +	uint16_t vring_idx, uint16_t head);
> +
> +/**
> + * clear the split inflight status.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param last_used_idx
> + *  last used idx of used ring
> + * @param idx
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_split(int vid, uint16_t vring_idx,
> +	uint16_t last_used_idx, uint16_t idx);
> +
> +/**
> + * clear the packed inflight status.
> + *
> + * @param vid
> + *  vhost device ID
> + * @param vring_idx
> + *  vring index
> + * @param head
> + *  inflight entry index
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_clr_inflight_desc_packed(int vid, uint16_t vring_idx,
> +	uint16_t head);
> +
>  /**
>   * Get vhost RX queue avail count.
>   *
> @@ -656,7 +876,8 @@ uint32_t rte_vhost_rx_queue_count(int vid, uint16_t qid);
>   *  0 on success, -1 on failure
>   */
>  int __rte_experimental
> -rte_vhost_get_log_base(int vid, uint64_t *log_base, uint64_t *log_size);
> +rte_vhost_get_log_base(int vid, uint64_t *log_base,
> +	uint64_t *log_size);

This change isn't necessary.

>  
>  /**
>   * Get last_avail/used_idx of the vhost virtqueue
> @@ -676,6 +897,28 @@ int __rte_experimental
>  rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  		uint16_t *last_avail_idx, uint16_t *last_used_idx);
>  
> +/**
> + * Get last_avail/last_used of the vhost virtqueue
> + *
> + * This function is designed for the reconnection and it's specific for
> + * the packed ring as we can get the two parameters from the inflight
> + * queueregion
> + *
> + * @param vid
> + *  vhost device ID
> + * @param queue_id
> + *  vhost queue index
> + * @param last_avail_idx
> + *  vhost last_avail_idx to get
> + * @param last_used_idx
> + *  vhost last_used_idx to get
> + * @return
> + *  0 on success, -1 on failure
> + */
> +int __rte_experimental
> +rte_vhost_get_vring_base_from_inflight(int vid,
> +	uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx);
> +
>  /**
>   * Set last_avail/used_idx of the vhost virtqueue
>   *
> @@ -692,7 +935,7 @@ rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>   */
>  int __rte_experimental
>  rte_vhost_set_vring_base(int vid, uint16_t queue_id,
> -		uint16_t last_avail_idx, uint16_t last_used_idx);
> +	uint16_t last_avail_idx, uint16_t last_used_idx);
>  
>  /**
>   * Register external message handling callbacks
> diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
> index 5f1d4a75c..99f1134ea 100644
> --- a/lib/librte_vhost/rte_vhost_version.map
> +++ b/lib/librte_vhost/rte_vhost_version.map
> @@ -87,4 +87,16 @@ EXPERIMENTAL {
>  	rte_vdpa_relay_vring_used;
>  	rte_vhost_extern_callback_register;
>  	rte_vhost_driver_set_protocol_features;
> +	rte_vhost_set_inflight_desc_split;
> +	rte_vhost_clr_inflight_desc_split;
> +	rte_vhost_set_last_inflight_io_split;
> +	rte_vhost_get_vhost_ring_inflight;
> +	rte_vhost_vq_is_packed;
> +	rte_vhost_set_inflight_desc_packed;
> +	rte_vhost_clr_inflight_desc_packed;
> +	rte_vhost_set_last_inflight_io_packed;
> +	rte_vhost_get_vring_base_counter;
> +	rte_vhost_get_vring_base_from_inflight;
> +	rte_vhost_get_vring_base_counter_from_inflight;
> +	rte_vhost_set_vring_base_counter;
>  };



> @@ -939,13 +1270,48 @@ int rte_vhost_get_log_base(int vid, uint64_t *log_base,
>  int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  		uint16_t *last_avail_idx, uint16_t *last_used_idx)
>  {
> +	struct vhost_virtqueue *vq;
>  	struct virtio_net *dev = get_device(vid);
>  
>  	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
>  		return -1;
>  
> -	*last_avail_idx = dev->virtqueue[queue_id]->last_avail_idx;
> -	*last_used_idx = dev->virtqueue[queue_id]->last_used_idx;
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return -1;
> +
> +	if (vq_is_packed(dev)) {
> +		*last_avail_idx = (vq->avail_wrap_counter << 15) |
> +					vq->last_avail_idx;
> +		*last_used_idx = (vq->used_wrap_counter << 15) |
> +					vq->last_used_idx;
> +	} else {
> +		*last_avail_idx = vq->last_avail_idx;
> +		*last_used_idx = vq->last_used_idx;
> +	}

This should be a fix. Without this change, the user of this API
can't use packed ring properly.

Please do this in a separate patch, add a fixes line and Cc stable.

> +
> +	return 0;
> +}
> +
> +int rte_vhost_get_vring_base_from_inflight(int vid,
> +	uint16_t queue_id, uint16_t *last_avail_idx, uint16_t *last_used_idx)
> +{
> +	struct inflight_info_packed *inflight_info;
> +	struct virtio_net *dev = get_device(vid);
> +
> +	if (dev == NULL || last_avail_idx == NULL || last_used_idx == NULL)
> +		return -1;
> +
> +	if (!vq_is_packed(dev))
> +		return -1;
> +
> +	inflight_info = dev->virtqueue[queue_id]->inflight_packed;
> +	if (!inflight_info)
> +		return -1;
> +
> +	*last_avail_idx = (inflight_info->old_used_wrap_counter << 15) |
> +				inflight_info->old_used_idx;
> +	*last_used_idx = *last_avail_idx;
>  
>  	return 0;
>  }
> @@ -953,13 +1319,25 @@ int rte_vhost_get_vring_base(int vid, uint16_t queue_id,
>  int rte_vhost_set_vring_base(int vid, uint16_t queue_id,
>  		uint16_t last_avail_idx, uint16_t last_used_idx)
>  {
> +	struct vhost_virtqueue *vq;
>  	struct virtio_net *dev = get_device(vid);
>  
>  	if (!dev)
>  		return -1;
>  
> -	dev->virtqueue[queue_id]->last_avail_idx = last_avail_idx;
> -	dev->virtqueue[queue_id]->last_used_idx = last_used_idx;
> +	vq = dev->virtqueue[queue_id];
> +	if (!vq)
> +		return -1;
> +
> +	if (vq_is_packed(dev)) {
> +		vq->last_avail_idx = last_avail_idx & 0x7fff;
> +		vq->avail_wrap_counter = !!(last_avail_idx & (1 << 15));
> +		vq->last_used_idx = last_used_idx & 0x7fff;
> +		vq->used_wrap_counter = !!(last_used_idx & (1 << 15));
> +	} else {
> +		vq->last_avail_idx = last_avail_idx;
> +		vq->last_used_idx = last_used_idx;
> +	}

Ditto.

>  
>  	return 0;
>  }
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 884befa85..e9d0b983d 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -88,6 +88,22 @@ struct vring_used_elem_packed {
>  	uint32_t count;
>  };
>  
> +struct inflight_desc_split {
> +	uint8_t		inflight;
> +	uint8_t		padding[5];
> +	uint16_t	next;
> +	uint64_t	counter;
> +};
> +
> +struct inflight_info_split {
> +	uint64_t		features;
> +	uint16_t		version;
> +	uint16_t		desc_num;
> +	uint16_t		last_inflight_io;
> +	uint16_t		used_idx;
> +	struct inflight_desc_split desc[0];
> +};
> +


More information about the dev mailing list