[dpdk-dev] [PATCH 5/6] net/mlx5: support PCI device DMA map and unmap

Gaëtan Rivet gaetan.rivet at 6wind.com
Wed Feb 13 12:35:04 CET 2019


On Wed, Feb 13, 2019 at 11:10:25AM +0200, Shahaf Shuler wrote:
> The implementation reuses the external memory registration work done by
> commit[1].
> 
> Note about representors:
> 
> The current representor design will not work
> with those map and unmap functions. The reason is that for representors
> we have multiple IB devices share the same PCI function, so mapping will
> happen only on one of the representors and not all of them.
> 
> While it is possible to implement such support, the IB representor
> design is going to be changed during DPDK19.05. The new design will have
> a single IB device for all representors, hence sharing of a single
> memory region between all representors will be possible.
> 
> [1]
> commit 7e43a32ee060
> ("net/mlx5: support externally allocated static memory")
> 
> Signed-off-by: Shahaf Shuler <shahafs at mellanox.com>
> ---
>  drivers/net/mlx5/mlx5.c      |   2 +
>  drivers/net/mlx5/mlx5_mr.c   | 146 ++++++++++++++++++++++++++++++++++++++
>  drivers/net/mlx5/mlx5_rxtx.h |   5 ++
>  3 files changed, 153 insertions(+)
> 
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
> index a913a5955f..7c91701713 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -1626,6 +1626,8 @@ static struct rte_pci_driver mlx5_driver = {
>  	.id_table = mlx5_pci_id_map,
>  	.probe = mlx5_pci_probe,
>  	.remove = mlx5_pci_remove,
> +	.map = mlx5_dma_map,
> +	.unmap = mlx5_dma_unmap,
>  	.drv_flags = (RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
>  		      RTE_PCI_DRV_PROBE_AGAIN),
>  };
> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> index 32be6a5445..7059181959 100644
> --- a/drivers/net/mlx5/mlx5_mr.c
> +++ b/drivers/net/mlx5/mlx5_mr.c
> @@ -14,6 +14,7 @@
>  #include <rte_mempool.h>
>  #include <rte_malloc.h>
>  #include <rte_rwlock.h>
> +#include <rte_bus_pci.h>
>  
>  #include "mlx5.h"
>  #include "mlx5_mr.h"
> @@ -1215,6 +1216,151 @@ mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
>  }
>  
>  /**
> + * Finds the first ethdev that match the pci device.
> + * The existence of multiple ethdev per pci device is only with representors.
> + * On such case, it is enough to get only one of the ports as they all share
> + * the same ibv context.
> + *
> + * @param pdev
> + *   Pointer to the PCI device.
> + *
> + * @return
> + *   Pointer to the ethdev if found, NULL otherwise.
> + */
> +static struct rte_eth_dev *
> +pci_dev_to_eth_dev(struct rte_pci_device *pdev)
> +{
> +	struct rte_eth_dev *dev;
> +	const char *drv_name;
> +	uint16_t port_id = 0;
> +
> +	/**
> +	 *  We really need to iterate all eth devices regardless of
> +	 *  their owner.
> +	 */
> +	while (port_id < RTE_MAX_ETHPORTS) {
> +		port_id = rte_eth_find_next(port_id);
> +		if (port_id >= RTE_MAX_ETHPORTS)
> +			break;
> +		dev = &rte_eth_devices[port_id];
> +		drv_name = dev->device->driver->name;
> +		if (!strncmp(drv_name, MLX5_DRIVER_NAME,
> +			     sizeof(MLX5_DRIVER_NAME) + 1) &&
> +		    pdev == RTE_DEV_TO_PCI(dev->device)) {
> +			/* found the PCI device. */
> +			return dev;
> +		}
> +	}
> +	return NULL;
> +}

Might I interest you in the new API?

   {
           struct rte_dev_iterator it;
           struct rte_device *dev;
   
           RTE_DEV_FOREACH(dev, "class=eth", &it)
                   if (dev == &pdev->device)
                           return it.class_device;
           return NULL;
   }

> +
> +/**
> + * DPDK callback to DMA map external memory to a PCI device.
> + *
> + * @param pdev
> + *   Pointer to the PCI device.
> + * @param addr
> + *   Starting virtual address of memory to be mapped.
> + * @param iova
> + *   Starting IOVA address of memory to be mapped.
> + * @param len
> + *   Length of memory segment being mapped.
> + *
> + * @return
> + *   0 on success, negative value on error.
> + */
> +int
> +mlx5_dma_map(struct rte_pci_device *pdev, void *addr,
> +	     uint64_t iova __rte_unused, size_t len)
> +{
> +	struct rte_eth_dev *dev;
> +	struct mlx5_mr *mr;
> +	struct priv *priv;
> +
> +	dev = pci_dev_to_eth_dev(pdev);
> +	if (!dev) {
> +		DRV_LOG(WARNING, "unable to find matching ethdev "
> +				 "to PCI device %p", (void *)pdev);
> +		return -1;
> +	}
> +	priv = dev->data->dev_private;
> +	mr = mlx5_create_mr_ext(dev, (uintptr_t)addr, len, SOCKET_ID_ANY);
> +	if (!mr) {
> +		DRV_LOG(WARNING,
> +			"port %u unable to dma map", dev->data->port_id);
> +		return -1;
> +	}
> +	rte_rwlock_write_lock(&priv->mr.rwlock);
> +	LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
> +	/* Insert to the global cache table. */
> +	mr_insert_dev_cache(dev, mr);
> +	rte_rwlock_write_unlock(&priv->mr.rwlock);
> +	return 0;
> +}
> +
> +/**
> + * DPDK callback to DMA unmap external memory to a PCI device.
> + *
> + * @param pdev
> + *   Pointer to the PCI device.
> + * @param addr
> + *   Starting virtual address of memory to be unmapped.
> + * @param iova
> + *   Starting IOVA address of memory to be unmapped.
> + * @param len
> + *   Length of memory segment being unmapped.
> + *
> + * @return
> + *   0 on success, negative value on error.
> + */
> +int
> +mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr,
> +	       uint64_t iova __rte_unused, size_t len __rte_unused)
> +{
> +	struct rte_eth_dev *dev;
> +	struct priv *priv;
> +	struct mlx5_mr *mr;
> +	struct mlx5_mr_cache entry;
> +
> +	dev = pci_dev_to_eth_dev(pdev);
> +	if (!dev) {
> +		DRV_LOG(WARNING, "unable to find matching ethdev "
> +				 "to PCI device %p", (void *)pdev);
> +		return -1;
> +	}
> +	priv = dev->data->dev_private;
> +	rte_rwlock_read_lock(&priv->mr.rwlock);
> +	mr = mr_lookup_dev_list(dev, &entry, (uintptr_t)addr);
> +	if (!mr) {
> +		DRV_LOG(WARNING, "address 0x%" PRIxPTR " wasn't registered "
> +				 "to PCI device %p", (uintptr_t)addr,
> +				 (void *)pdev);
> +		rte_rwlock_read_unlock(&priv->mr.rwlock);
> +		return -1;
> +	}
> +	LIST_REMOVE(mr, mr);
> +	LIST_INSERT_HEAD(&priv->mr.mr_free_list, mr, mr);
> +	DEBUG("port %u remove MR(%p) from list", dev->data->port_id,
> +	      (void *)mr);
> +	mr_rebuild_dev_cache(dev);
> +	/*
> +	 * Flush local caches by propagating invalidation across cores.
> +	 * rte_smp_wmb() is enough to synchronize this event. If one of
> +	 * freed memsegs is seen by other core, that means the memseg
> +	 * has been allocated by allocator, which will come after this
> +	 * free call. Therefore, this store instruction (incrementing
> +	 * generation below) will be guaranteed to be seen by other core
> +	 * before the core sees the newly allocated memory.
> +	 */
> +	++priv->mr.dev_gen;
> +	DEBUG("broadcasting local cache flush, gen=%d",
> +			priv->mr.dev_gen);
> +	rte_smp_wmb();
> +	rte_rwlock_read_unlock(&priv->mr.rwlock);
> +	return 0;
> +}
> +
> +/**
>   * Register MR for entire memory chunks in a Mempool having externally allocated
>   * memory and fill in local cache.
>   *
> diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
> index c2529f96bc..f3f84dbac3 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -28,6 +28,7 @@
>  #include <rte_atomic.h>
>  #include <rte_spinlock.h>
>  #include <rte_io.h>
> +#include <rte_bus_pci.h>
>  
>  #include "mlx5_utils.h"
>  #include "mlx5.h"
> @@ -367,6 +368,10 @@ uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
>  uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb);
>  uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
>  			       struct rte_mempool *mp);
> +int mlx5_dma_map(struct rte_pci_device *pdev, void *addr, uint64_t iova,
> +		 size_t len);
> +int mlx5_dma_unmap(struct rte_pci_device *pdev, void *addr, uint64_t iova,
> +		   size_t len);
>  
>  /**
>   * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
> -- 
> 2.12.0
> 

-- 
Gaëtan Rivet
6WIND


More information about the dev mailing list