[dpdk-dev] [PATCH 02/10] vdpa/sfc: add support for device initialization

Xia, Chenbo chenbo.xia at intel.com
Mon Aug 30 12:52:10 CEST 2021


Hi Vijay,

> -----Original Message-----
> From: Vijay Srivastava <vijay.srivastava at xilinx.com>
> Sent: Wednesday, July 7, 2021 12:44 AM
> To: dev at dpdk.org
> Cc: maxime.coquelin at redhat.com; Xia, Chenbo <chenbo.xia at intel.com>;
> andrew.rybchenko at oktetlabs.ru; Vijay Kumar Srivastava <vsrivast at xilinx.com>
> Subject: [PATCH 02/10] vdpa/sfc: add support for device initialization
> 
> From: Vijay Kumar Srivastava <vsrivast at xilinx.com>
> 
> Add HW initialization and vDPA device registration support.
> 
> Signed-off-by: Vijay Kumar Srivastava <vsrivast at xilinx.com>
> ---
>  doc/guides/vdpadevs/sfc.rst       |   6 +
>  drivers/vdpa/sfc/meson.build      |   3 +
>  drivers/vdpa/sfc/sfc_vdpa.c       |  23 +++
>  drivers/vdpa/sfc/sfc_vdpa.h       |  49 +++++-
>  drivers/vdpa/sfc/sfc_vdpa_debug.h |  21 +++
>  drivers/vdpa/sfc/sfc_vdpa_hw.c    | 322
> ++++++++++++++++++++++++++++++++++++++
>  drivers/vdpa/sfc/sfc_vdpa_log.h   |   3 +
>  drivers/vdpa/sfc/sfc_vdpa_mcdi.c  |  74 +++++++++
>  drivers/vdpa/sfc/sfc_vdpa_ops.c   | 129 +++++++++++++++
>  drivers/vdpa/sfc/sfc_vdpa_ops.h   |  36 +++++
>  10 files changed, 665 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_debug.h
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_hw.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_mcdi.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.c
>  create mode 100644 drivers/vdpa/sfc/sfc_vdpa_ops.h
> 
> diff --git a/doc/guides/vdpadevs/sfc.rst b/doc/guides/vdpadevs/sfc.rst
> index 59f990b..abb5900 100644
> --- a/doc/guides/vdpadevs/sfc.rst
> +++ b/doc/guides/vdpadevs/sfc.rst
> @@ -95,3 +95,9 @@ SFC vDPA PMD provides the following log types available for
> control:
>    Matches a subset of per-port log types registered during runtime.
>    A full name for a particular type may be obtained by appending a
>    dot and a PCI device identifier (``XXXX:XX:XX.X``) to the prefix.
> +
> +- ``pmd.vdpa.sfc.mcdi`` (default level is **notice**)
> +
> +  Extra logging of the communication with the NIC's management CPU.
> +  The format of the log is consumed by the netlogdecode cross-platform
> +  tool. May be managed per-port, as explained above.
> diff --git a/drivers/vdpa/sfc/meson.build b/drivers/vdpa/sfc/meson.build
> index d916389..aac7c51 100644
> --- a/drivers/vdpa/sfc/meson.build
> +++ b/drivers/vdpa/sfc/meson.build
> @@ -30,4 +30,7 @@ endforeach
>  deps += ['common_sfc_efx', 'bus_pci']
>  sources = files(
>  	'sfc_vdpa.c',
> +	'sfc_vdpa_hw.c',
> +	'sfc_vdpa_mcdi.c',
> +	'sfc_vdpa_ops.c',
>  )
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.c b/drivers/vdpa/sfc/sfc_vdpa.c
> index d8faaca..12e8d6e 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.c
> +++ b/drivers/vdpa/sfc/sfc_vdpa.c
> @@ -232,6 +232,19 @@ struct sfc_vdpa_adapter *
>  		goto fail_vfio_setup;
>  	}
> 
> +	sfc_vdpa_log_init(sva, "hw init");
> +	if (sfc_vdpa_hw_init(sva) != 0) {
> +		sfc_vdpa_err(sva, "failed to init HW %s", pci_dev->name);
> +		goto fail_hw_init;
> +	}
> +
> +	sfc_vdpa_log_init(sva, "dev init");
> +	sva->ops_data = sfc_vdpa_device_init(sva, SFC_VDPA_AS_VF);
> +	if (sva->ops_data == NULL) {
> +		sfc_vdpa_err(sva, "failed vDPA dev init %s", pci_dev->name);
> +		goto fail_dev_init;
> +	}
> +
>  	pthread_mutex_lock(&sfc_vdpa_adapter_list_lock);
>  	TAILQ_INSERT_TAIL(&sfc_vdpa_adapter_list, sva, next);
>  	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
> @@ -240,6 +253,12 @@ struct sfc_vdpa_adapter *
> 
>  	return 0;
> 
> +fail_dev_init:
> +	sfc_vdpa_hw_fini(sva);
> +
> +fail_hw_init:
> +	sfc_vdpa_vfio_teardown(sva);
> +
>  fail_vfio_setup:
>  fail_set_log_prefix:
>  	rte_free(sva);
> @@ -266,6 +285,10 @@ struct sfc_vdpa_adapter *
>  	TAILQ_REMOVE(&sfc_vdpa_adapter_list, sva, next);
>  	pthread_mutex_unlock(&sfc_vdpa_adapter_list_lock);
> 
> +	sfc_vdpa_device_fini(sva->ops_data);
> +
> +	sfc_vdpa_hw_fini(sva);
> +
>  	sfc_vdpa_vfio_teardown(sva);
> 
>  	rte_free(sva);
> diff --git a/drivers/vdpa/sfc/sfc_vdpa.h b/drivers/vdpa/sfc/sfc_vdpa.h
> index 3b77900..fb97258 100644
> --- a/drivers/vdpa/sfc/sfc_vdpa.h
> +++ b/drivers/vdpa/sfc/sfc_vdpa.h
> @@ -11,14 +11,38 @@
> 
>  #include <rte_bus_pci.h>
> 
> +#include "sfc_efx.h"
> +#include "sfc_efx_mcdi.h"
> +#include "sfc_vdpa_debug.h"
>  #include "sfc_vdpa_log.h"
> +#include "sfc_vdpa_ops.h"
> +
> +#define SFC_VDPA_DEFAULT_MCDI_IOVA		0x200000000000
> 
>  /* Adapter private data */
>  struct sfc_vdpa_adapter {
>  	TAILQ_ENTRY(sfc_vdpa_adapter)	next;
> +	/*
> +	 * PMD setup and configuration is not thread safe. Since it is not
> +	 * performance sensitive, it is better to guarantee thread-safety
> +	 * and add device level lock. vDPA control operations which
> +	 * change its state should acquire the lock.
> +	 */
> +	rte_spinlock_t			lock;
>  	struct rte_pci_device		*pdev;
>  	struct rte_pci_addr		pci_addr;
> 
> +	efx_family_t			family;
> +	efx_nic_t			*nic;
> +	rte_spinlock_t			nic_lock;
> +
> +	efsys_bar_t			mem_bar;
> +
> +	struct sfc_efx_mcdi		mcdi;
> +	size_t				mcdi_buff_size;
> +
> +	uint32_t			max_queue_count;
> +
>  	char				log_prefix[SFC_VDPA_LOG_PREFIX_MAX];
>  	uint32_t			logtype_main;
> 
> @@ -26,6 +50,7 @@ struct sfc_vdpa_adapter {
>  	int				vfio_dev_fd;
>  	int				vfio_container_fd;
>  	int				iommu_group_num;
> +	struct sfc_vdpa_ops_data	*ops_data;
>  };
> 
>  uint32_t
> @@ -36,5 +61,27 @@ struct sfc_vdpa_adapter {
>  struct sfc_vdpa_adapter *
>  sfc_vdpa_get_adapter_by_dev(struct rte_pci_device *pdev);
> 
> -#endif  /* _SFC_VDPA_H */
> +int
> +sfc_vdpa_hw_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_hw_fini(struct sfc_vdpa_adapter *sa);

Better to align the name here: sa -> sva

> 
> +int
> +sfc_vdpa_mcdi_init(struct sfc_vdpa_adapter *sva);
> +void
> +sfc_vdpa_mcdi_fini(struct sfc_vdpa_adapter *sva);
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp);
> +
> +void
> +sfc_vdpa_dma_free(struct sfc_vdpa_adapter *sva, efsys_mem_t *esmp);
> +
> +static inline struct sfc_vdpa_adapter *
> +sfc_vdpa_adapter_by_dev_handle(void *dev_handle)
> +{
> +	return (struct sfc_vdpa_adapter *)dev_handle;
> +}
> +
> +#endif  /* _SFC_VDPA_H */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_debug.h
> b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> new file mode 100644
> index 0000000..cfa8cc5
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_debug.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#ifndef _SFC_VDPA_DEBUG_H_
> +#define _SFC_VDPA_DEBUG_H_
> +
> +#include <rte_debug.h>
> +
> +#ifdef RTE_LIBRTE_SFC_VDPA_DEBUG
> +/* Avoid dependency from RTE_LOG_DP_LEVEL to be able to enable debug check
> + * in the driver only.
> + */
> +#define SFC_VDPA_ASSERT(exp)			RTE_VERIFY(exp)
> +#else
> +/* If the driver debug is not enabled, follow DPDK debug/non-debug */
> +#define SFC_VDPA_ASSERT(exp)			RTE_ASSERT(exp)
> +#endif
> +
> +#endif /* _SFC_VDPA_DEBUG_H_ */
> diff --git a/drivers/vdpa/sfc/sfc_vdpa_hw.c b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> new file mode 100644
> index 0000000..83f3696
> --- /dev/null
> +++ b/drivers/vdpa/sfc/sfc_vdpa_hw.c
> @@ -0,0 +1,322 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + *
> + * Copyright(c) 2020-2021 Xilinx, Inc.
> + */
> +
> +#include <unistd.h>
> +
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_vfio.h>
> +
> +#include "efx.h"
> +#include "sfc_vdpa.h"
> +#include "sfc_vdpa_ops.h"
> +
> +extern uint32_t sfc_logtype_driver;
> +
> +#ifndef PAGE_SIZE
> +#define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
> +#endif
> +
> +int
> +sfc_vdpa_dma_alloc(struct sfc_vdpa_adapter *sva, const char *name,
> +		   size_t len, efsys_mem_t *esmp)
> +{
> +	void *mcdi_buf;
> +	uint64_t mcdi_iova;
> +	size_t mcdi_buff_size;
> +	int ret;
> +
> +	mcdi_buff_size = RTE_ALIGN_CEIL(len, PAGE_SIZE);
> +
> +	sfc_vdpa_log_init(sva, "name=%s, len=%zu", name, len);
> +
> +	mcdi_buf = rte_zmalloc(name, mcdi_buff_size, PAGE_SIZE);
> +	if (mcdi_buf == NULL) {
> +		sfc_vdpa_err(sva, "cannot reserve memory for %s: len=%#x: %s",
> +			     name, (unsigned int)len, rte_strerror(rte_errno));
> +		return -ENOMEM;
> +	}
> +
> +	/* IOVA address for MCDI would be re-calculated if mapping

What is MCDI?

> +	 * using default IOVA would fail.
> +	 * TODO: Earlier there was no way to get valid IOVA range.
> +	 * Recently a patch has been submitted to get the IOVA range
> +	 * using ioctl. VFIO_IOMMU_GET_INFO. This patch is available
> +	 * in the kernel version >= 5.4. Support to get the default
> +	 * IOVA address for MCDI buffer using available IOVA range
> +	 * would be added later. Meanwhile default IOVA for MCDI buffer
> +	 * is kept at high mem at 2TB. In case of overlap new available
> +	 * addresses would be searched and same would be used.
> +	 */
> +	mcdi_iova = SFC_VDPA_DEFAULT_MCDI_IOVA;
> +
> +	do {
> +		ret = rte_vfio_container_dma_map(sva->vfio_container_fd,
> +						 (uint64_t)mcdi_buf, mcdi_iova,
> +						 mcdi_buff_size);
> +		if (ret == 0)
> +			break;
> +
> +		mcdi_iova = mcdi_iova >> 1;
> +		if (mcdi_iova < mcdi_buff_size)	{
> +			sfc_vdpa_err(sva,
> +				     "DMA mapping failed for MCDI : %s",
> +				     rte_strerror(rte_errno));
> +			return ret;
> +		}
> +
> +	} while (ret < 0);

Is this DMA region for some hardware-specific control msg?

And how do you make sure this IOVA space you defined in this driver will
not conflict with the IOVA space that vdpa device consumer (Most likely QEMU)
defines (If QEMU, IOVA = guest physical address)

Thanks,
Chenbo

> +
> +	esmp->esm_addr = mcdi_iova;
> +	esmp->esm_base = mcdi_buf;
> +	sva->mcdi_buff_size = mcdi_buff_size;
> +
> +	sfc_vdpa_info(sva,
> +		      "DMA name=%s len=%zu => virt=%p iova=%" PRIx64,
> +		      name, len, esmp->esm_base, esmp->esm_addr);
> +
> +	return 0;
> +}
> +



More information about the dev mailing list