[dpdk-dev] [PATCH] net/mlx5: fix transmit doorbell register write barrier
Matan Azrad
matan at mellanox.com
Sun Sep 22 08:58:56 CEST 2019
Hi Slava
Questions inline.
From: Viacheslav Ovsiienko
> The rdma core library can map doorbell register in two ways, depending on
> the environment variable "MLX5_SHUT_UP_BF":
>
> - as regular cached memory, the variable is either missing or
> set to zero. This type of mapping may cause the significant
> doorbell register writing latency and requires explicit
> memory write barrier to mitigate this issue and prevent
> write combining.
>
> - as non-cached memory, the variable is present and set to
> not "0" value. This type of mapping may cause performance
> impact under heavy loading conditions but the explicit write
> memory barrier is not required and it may improve core
> performance.
>
> This patch checks the mapping type and provides the memory barrier after
> writing to tx doorbell register if it is needed.
> The mapping type is extracted directly from the uar_mmap_offset field in
> the queue properties.
>
> Fixes: 18a1c20044c0 ("net/mlx5: implement Tx burst template")
>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
> ---
> drivers/net/mlx5/Makefile | 5 +++++
> drivers/net/mlx5/meson.build | 2 ++
> drivers/net/mlx5/mlx5_defs.h | 8 ++++++++ drivers/net/mlx5/mlx5_rxtx.c
> | 17 ++++++++++++++++- drivers/net/mlx5/mlx5_rxtx.h | 1 +
> drivers/net/mlx5/mlx5_txq.c | 17 ++++++++++++++++-
> 6 files changed, 48 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> d89a7b5..7100fa3 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -189,6 +189,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-
> config-h.sh
> func mlx5dv_dr_action_create_dest_devx_tir \
> $(AUTOCONF_OUTPUT)
> $Q sh -- '$<' '$@' \
> + HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD \
> + infiniband/mlx5dv.h \
> + enum MLX5_MMAP_GET_NC_PAGES_CMD \
> + $(AUTOCONF_OUTPUT)
> + $Q sh -- '$<' '$@' \
> HAVE_ETHTOOL_LINK_MODE_25G \
> /usr/include/linux/ethtool.h \
> enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ diff --
> git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> fb764fa..e56e018 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -128,6 +128,8 @@ if build
> 'mlx5dv_devx_obj_query_async' ],
> [ 'HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR',
> 'infiniband/mlx5dv.h',
> 'mlx5dv_dr_action_create_dest_devx_tir' ],
> + [ 'HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD',
> 'infiniband/mlx5dv.h',
> + 'MLX5_MMAP_GET_NC_PAGES_CMD' ],
> [ 'HAVE_MLX5DV_DR', 'infiniband/mlx5dv.h',
> 'MLX5DV_DR_DOMAIN_TYPE_NIC_RX' ],
> [ 'HAVE_MLX5DV_DR_ESWITCH', 'infiniband/mlx5dv.h', diff --
> git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index
> d7440fd..03a8086 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -115,6 +115,14 @@
> #define MLX5_UAR_PAGE_NUM_MAX 64
> #define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) -
> 1)
>
> +/* Fields of memory mapping type in offset parameter of mmap() */
> +#define MLX5_UAR_MMAP_CMD_SHIFT 8 #define
> MLX5_UAR_MMAP_CMD_MASK 0xff
> +
> +#ifndef HAVE_MLX5DV_MMAP_GET_NC_PAGES_CMD #define
> +MLX5_MMAP_GET_NC_PAGES_CMD 3 #endif
> +
> /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
> #define MLX5_MPRQ_STRIDE_NUM_N 6U
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index f540977..fa3aa15 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -4730,8 +4730,23 @@ enum mlx5_txcmp_code {
> * to improve latencies. The pure software related data treatment
> * can be completed after doorbell. Tx CQEs for this SQ are
> * processed in this thread only by the polling.
> + *
> + * The rdma core library can map doorbell register in two ways,
> + * depending on the environment variable "MLX5_SHUT_UP_BF":
> + *
> + * - as regular cached memory, the variable is either missing or
> + * set to zero. This type of mapping may cause the significant
> + * doorbell register writing latency and requires explicit
> + * memory write barrier to mitigate this issue and prevent
> + * write combining.
> + *
> + * - as non-cached memory, the variable is present and set to
> + * not "0" value. This type of mapping may cause performance
> + * impact under heavy loading conditions but the explicit write
> + * memory barrier is not required and it may improve core
> + * performance.
> */
> - mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0);
> + mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc);
> /* Not all of the mbufs may be stored into elts yet. */
> part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent -
> loc.pkts_copy;
> if (!MLX5_TXOFF_CONFIG(INLINE) && part) { diff --git
> a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index
> 4f73d91..ae3a763 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.h
> +++ b/drivers/net/mlx5/mlx5_rxtx.h
> @@ -277,6 +277,7 @@ struct mlx5_txq_data {
> /* When set TX offload for tunneled packets are supported. */
> uint16_t swp_en:1; /* Whether SW parser is enabled. */
> uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
> + uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
> uint16_t inlen_send; /* Ordinary send data inline size. */
> uint16_t inlen_empw; /* eMPW max packet size to inline. */
> uint16_t inlen_mode; /* Minimal data length to inline. */ diff --git
> a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index
> 2b7d6c0..7ec2ef3 100644
> --- a/drivers/net/mlx5/mlx5_txq.c
> +++ b/drivers/net/mlx5/mlx5_txq.c
> @@ -18,6 +18,7 @@
> #pragma GCC diagnostic ignored "-Wpedantic"
> #endif
> #include <infiniband/verbs.h>
> +#include <infiniband/mlx5dv.h>
> #ifdef PEDANTIC
> #pragma GCC diagnostic error "-Wpedantic"
> #endif
> @@ -241,14 +242,21 @@
> {
> struct mlx5_priv *priv = txq_ctrl->priv;
> struct mlx5_proc_priv *ppriv = MLX5_PROC_PRIV(PORT_ID(priv));
> + const size_t page_size = sysconf(_SC_PAGESIZE);
> + unsigned int cmd;
> #ifndef RTE_ARCH_64
> unsigned int lock_idx;
> - const size_t page_size = sysconf(_SC_PAGESIZE);
> #endif
>
> assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> assert(ppriv);
> ppriv->uar_table[txq_ctrl->txq.idx] = txq_ctrl->bf_reg;
> + /* Check the doorbell register mapping type. */
> + cmd = txq_ctrl->uar_mmap_offset / page_size;
> + cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> + cmd &= MLX5_UAR_MMAP_CMD_MASK;
> + if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> + txq_ctrl->txq.db_nc = 1;
Are you sure we can't retrieve the value in compile time?
> #ifndef RTE_ARCH_64
> /* Assign an UAR lock according to UAR page number */
> lock_idx = (txq_ctrl->uar_mmap_offset / page_size) & @@ -281,6
> +289,7 @@
> uintptr_t uar_va;
> uintptr_t offset;
> const size_t page_size = sysconf(_SC_PAGESIZE);
> + unsigned int cmd;
>
> assert(ppriv);
> /*
> @@ -300,6 +309,12 @@
> }
> addr = RTE_PTR_ADD(addr, offset);
> ppriv->uar_table[txq->idx] = addr;
> + /* Check the doorbell register mapping type. */
> + cmd = txq_ctrl->uar_mmap_offset / page_size;
> + cmd >>= MLX5_UAR_MMAP_CMD_SHIFT;
> + cmd &= MLX5_UAR_MMAP_CMD_MASK;
> + if (cmd == MLX5_MMAP_GET_NC_PAGES_CMD)
> + txq_ctrl->txq.db_nc = 1;
Looks like double code.
Maybe it is better to get the value by a MACRO?
> return 0;
> }
>
> --
> 1.8.3.1
More information about the dev
mailing list