[dpdk-dev] [PATCH] net/mlx5: fix transmit doorbell register write barrier

Matan Azrad matan at mellanox.com
Sun Sep 22 08:58:56 CEST 2019


Hi Slava

Questions inline.

From: Viacheslav Ovsiienko
> The rdma core library can map doorbell register in two ways, depending on
> the environment variable "MLX5_SHUT_UP_BF":
> 
>   - as regular cached memory, the variable is either missing or
>     set to zero. This type of mapping may cause the significant
>     doorbell register writing latency and requires explicit
>     memory write barrier to mitigate this issue and prevent
>     write combining.
> 
>   - as non-cached memory, the variable is present and set to
>     not "0" value. This type of mapping may cause performance
>     impact under heavy loading conditions but the explicit write
>     memory barrier is not required and it may improve core
>     performance.
> 
> This patch checks the mapping type and provides the memory barrier after
> writing to tx doorbell register if it is needed.
> The mapping type is extracted directly from the uar_mmap_offset field in
> the queue properties.
> 
> Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> ---
>  drivers/net/mlx5/Makefile    |  5 +++++
>  drivers/net/mlx5/meson.build |  2 ++
>  drivers/net/mlx5/mlx5_defs.h |  8 ++++++++  drivers/net/mlx5/mlx5_rxtx.c
> | 17 ++++++++++++++++-  drivers/net/mlx5/mlx5_rxtx.h |  1 +
> drivers/net/mlx5/mlx5_txq.c  | 17 ++++++++++++++++-
>  6 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> d89a7b5..7100fa3 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -189,6 +189,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
>  		func mlx5dv_dr_action_create_dest_devx_tir \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD \
> +		infiniband/mlx5dv.h \
> +		enum MLX5_MMAP_GET_NC_PAGES_CMD \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_ETHTOOL_LINK_MODE_25G \
>  		/usr/include/linux/ethtool.h \
>  		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ diff --
> git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> fb764fa..e56e018 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -128,6 +128,8 @@ if build
>  		'mlx5dv_devx_obj_query_async' ],
>  		[ 'HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR',
> 'infiniband/mlx5dv.h',
>  		'mlx5dv_dr_action_create_dest_devx_tir' ],
> +		[ 'HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD',
> 'infiniband/mlx5dv.h',
> +		'MLX5_MMAP_GET_NC_PAGES_CMD' ],
>  		[ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
>  		'MLX5DV_DR_DOMAIN_TYPE_NIC_RX' ],
>  		[ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h', diff --
> git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index
> d7440fd..03a8086 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -115,6 +115,14 @@
>  #define MLX5_UAR_PAGE_NUM_MAX 64
>  #define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) -
> 1)
> 
> +/* Fields of memory mapping type in offset parameter of mmap() */
> +#define MLX5_UAR_MMAP_CMD_SHIFT 8 #define
> MLX5_UAR_MMAP_CMD_MASK 0xff
> +
> +#ifndef HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD #define
> +MLX5_MMAP_GET_NC_PAGES_CMD 3 #endif
> +
>  /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
> #define MLX5_MPRQ_STRIDE_NUM_N 6U
> 
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index f540977..fa3aa15 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -4730,8 +4730,23 @@ enum mlx5_txcmp_code {
>  	 * to improve latencies. The pure software related data treatment
>  	 * can be completed after doorbell. Tx CQEs for this SQ are
>  	 * processed in this thread only by the polling.
> +	 *
> +	 * The rdma core library can map doorbell register in two ways,
> +	 * depending on the environment variable "MLX5_SHUT_UP_BF":
> +	 *
> +	 * - as regular cached memory, the variable is either missing or
> +	 *   set to zero. This type of mapping may cause the significant
> +	 *   doorbell register writing latency and requires explicit
> +	 *   memory write barrier to mitigate this issue and prevent
> +	 *   write combining.
> +	 *
> +	 * - as non-cached memory, the variable is present and set to
> +	 *   not "0" value. This type of mapping may cause performance
> +	 *   impact under heavy loading conditions but the explicit write
> +	 *   memory barrier is not required and it may improve core
> +	 *   performance.
>  	 */
> -	mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0);
> +	mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc);
>  	/* Not all of the mbufs may be stored into elts yet. */
>  	part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent -
> loc.pkts_copy;
>  	if (!MLX5_TXOFF_CONFIG(INLINE) && part) { diff --git
> a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index
> 4f73d91..ae3a763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -277,6 +277,7 @@ struct mlx5_txq_data {
>  	/* When set TX offload for tunneled packets are supported. */
>  	uint16_t swp_en:1; /* Whether SW parser is enabled. */
>  	uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
> +	uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
>  	uint16_t inlen_send; /* Ordinary send data inline size. */
>  	uint16_t inlen_empw; /* eMPW max packet size to inline. */
>  	uint16_t inlen_mode; /* Minimal data length to inline. */ diff --git
> a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index
> 2b7d6c0..7ec2ef3 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -18,6 +18,7 @@
>  #pragma GCC diagnostic ignored "-Wpedantic"
>  #endif
>  #include <infiniband/verbs.h>
> +#include <infiniband/mlx5dv.h>
>  #ifdef PEDANTIC
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> @@ -241,14 +242,21 @@
>  {
>  	struct mlx5_priv *priv = txq_ctrl->priv;
>  	struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> +	const size_t page_size = sysconf(_SC_PAGESIZE);
> +	unsigned int cmd;
>  #ifndef RTE_ARCH_64
>  	unsigned int lock_idx;
> -	const size_t page_size = sysconf(_SC_PAGESIZE);
>  #endif
> 
>  	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
>  	assert(ppriv);
>  	ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
> +	/* Check the doorbell register mapping type. */
> +	cmd = txq_ctrl->uar_mmap_offset / page_size;
> +	cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> +	cmd &= MLX5_UAR_MMAP_CMD_MASK;
> +	if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> +		txq_ctrl->txq.db_nc = 1;

Are you sure we can't retrieve the value in compile time?

>  #ifndef RTE_ARCH_64
>  	/* Assign an UAR lock according to UAR page number */
>  	lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & @@ -281,6
> +289,7 @@
>  	uintptr_t uar_va;
>  	uintptr_t offset;
>  	const size_t page_size = sysconf(_SC_PAGESIZE);
> +	unsigned int cmd;
> 
>  	assert(ppriv);
>  	/*
> @@ -300,6 +309,12 @@
>  	}
>  	addr = RTE_PTR_ADD(addr, offset);
>  	ppriv->uar_table[txq->idx] = addr;
> +	/* Check the doorbell register mapping type. */
> +	cmd = txq_ctrl->uar_mmap_offset / page_size;
> +	cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> +	cmd &= MLX5_UAR_MMAP_CMD_MASK;
> +	if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> +		txq_ctrl->txq.db_nc = 1;

Looks like double code.
Maybe it is better to get the value by a MACRO?


>  	return 0;
>  }
> 
> --
> 1.8.3.1



More information about the dev mailing list