[dpdk-dev] [PATCH v3] net/mlx5: add support for 32bit systems

Moti Haimovsky motih at mellanox.com
Thu Jul 12 14:01:31 CEST 2018


This patch adds support for building and running mlx5 PMD on
32bit systems such as i686.

The main issue to tackle was handling the 32bit access to the UAR
as quoted from the mlx5 PRM:
QP and CQ DoorBells require 64-bit writes. For best performance, it
is recommended to execute the QP/CQ DoorBell as a single 64-bit write
operation. For platforms that do not support 64 bit writes, it is
possible to issue the 64 bits DoorBells through two consecutive
writes,
each write 32 bits, as described below:
* The order of writing each of the Dwords is from lower to upper
  addresses.
* No other DoorBell can be rung (or even start ringing) in the midst
 of an on-going write of a DoorBell over a given UAR page.
The last rule implies that in a multi-threaded environment, the access
to a UAR page (which can be accessible by all threads in the process)
must be synchronized (for example, using a semaphore) unless an atomic
write of 64 bits in a single bus operation is guaranteed. Such a
synchronization is not required for when ringing DoorBells on different
UAR pages.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
---
v3:
* Rebased upon latest changes in mlx5 PMD and rdma-core.

v2:
* Fixed coding style issues.
* Modified documentation according to review inputs.
* Fixed merge conflicts.
---
 doc/guides/nics/features/mlx5.ini |  1 +
 doc/guides/nics/mlx5.rst          |  6 +++-
 drivers/net/mlx5/mlx5.c           |  8 ++++-
 drivers/net/mlx5/mlx5.h           |  5 +++
 drivers/net/mlx5/mlx5_defs.h      | 18 ++++++++--
 drivers/net/mlx5/mlx5_rxq.c       |  6 +++-
 drivers/net/mlx5/mlx5_rxtx.c      | 22 +++++++------
 drivers/net/mlx5/mlx5_rxtx.h      | 69 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_txq.c       | 13 +++++++-
 9 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index e75b14b..b28b43e 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -43,5 +43,6 @@ Multiprocess aware   = Y
 Other kdrv           = Y
 ARMv8                = Y
 Power8               = Y
+x86-32               = Y
 x86-64               = Y
 Usage doc            = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 0d0d217..ebf2336 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -49,7 +49,7 @@ libibverbs.
 Features
 --------
 
-- Multi arch support: x86_64, POWER8, ARMv8.
+- Multi arch support: x86_64, POWER8, ARMv8, i686.
 - Multiple TX and RX queues.
 - Support for scattered TX and RX frames.
 - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
@@ -489,6 +489,10 @@ RMDA Core with Linux Kernel
 - Minimal kernel version : v4.14 or the most recent 4.14-rc (see `Linux installation documentation`_)
 - Minimal rdma-core version: v15+ commit 0c5f5765213a ("Merge pull request #227 from yishaih/tm")
   (see `RDMA Core installation documentation`_)
+- When building for i686 use:
+
+  - rdma-core version 18.0 or above built with 32bit support.
+  - Kernel version 4.14.41 or above.
 
 .. _`Linux installation documentation`: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/plain/Documentation/admin-guide/README.rst
 .. _`RDMA Core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index dda50b8..15f1a17 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -598,7 +598,7 @@
 	rte_memseg_walk(find_lower_va_bound, &addr);
 
 	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
+	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
 	/* anonymous mmap, no real memory consumption. */
 	addr = mmap(addr, MLX5_UAR_SIZE,
 		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -939,6 +939,12 @@
 	priv->device_attr = attr;
 	priv->pd = pd;
 	priv->mtu = ETHER_MTU;
+#ifndef RTE_ARCH_64
+	/* Initialize UAR access locks for 32bit implementations. */
+	rte_spinlock_init(&priv->uar_lock_cq);
+	for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
+		rte_spinlock_init(&priv->uar_lock[i]);
+#endif
 	/* Some internal functions rely on Netlink sockets, open them now. */
 	priv->nl_socket_rdma = mlx5_nl_init(0, NETLINK_RDMA);
 	priv->nl_socket_route =	mlx5_nl_init(RTMGRP_LINK, NETLINK_ROUTE);
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 131be33..896158a 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -215,6 +215,11 @@ struct priv {
 	int nl_socket_rdma; /* Netlink socket (NETLINK_RDMA). */
 	int nl_socket_route; /* Netlink socket (NETLINK_ROUTE). */
 	uint32_t nl_sn; /* Netlink message sequence number. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
+	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
+	/* UAR same-page access control required in 32bit implementations. */
+#endif
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 5bbbec2..f6ec415 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -87,14 +87,28 @@
 #define MLX5_LINK_STATUS_TIMEOUT 10
 
 /* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << 32)
+#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
 
 /* Offset of reserved UAR address space to hugepage memory. Offset is used here
  * to minimize possibility of address next to hugepage being used by other code
  * in either primary or secondary process, failing to map TX UAR would make TX
  * packets invisible to HW.
  */
-#define MLX5_UAR_OFFSET (1ULL << 32)
+#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Maximum number of UAR pages used by a port,
+ * These are the size and mask for an array of mutexes used to synchronize
+ * the access to port's UARs on platforms that do not support 64 bit writes.
+ * In such systems it is possible to issue the 64 bits DoorBells through two
+ * consecutive writes, each write 32 bits. The access to a UAR page (which can
+ * be accessible by all threads in the process) must be synchronized
+ * (for example, using a semaphore). Such a synchronization is not required
+ * when ringing DoorBells on different UAR pages.
+ * A port with 512 Tx queues uses 8, 4kBytes, UAR pages which are shared
+ * among the ports.
+ */
+#define MLX5_UAR_PAGE_NUM_MAX 64
+#define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) - 1)
 
 /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
 #define MLX5_MPRQ_STRIDE_NUM_N 6U
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 071740b..16e1641 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -647,7 +647,8 @@
 	doorbell = (uint64_t)doorbell_hi << 32;
 	doorbell |=  rxq->cqn;
 	rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
-	rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+	mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
+			 cq_db_reg, rxq->uar_lock_cq);
 }
 
 /**
@@ -1449,6 +1450,9 @@ struct mlx5_rxq_ctrl *
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+#ifndef RTE_ARCH_64
+	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
+#endif
 	tmpl->idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a7ed8d8..52a1074 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -495,6 +495,7 @@
 	volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
 	unsigned int segs_n = 0;
 	const unsigned int max_inline = txq->max_inline;
+	uint64_t addr_64;
 
 	if (unlikely(!pkts_n))
 		return 0;
@@ -711,12 +712,12 @@
 			ds = 3;
 use_dseg:
 			/* Add the remaining packet as a simple ds. */
-			addr = rte_cpu_to_be_64(addr);
+			addr_64 = rte_cpu_to_be_64(addr);
 			*dseg = (rte_v128u32_t){
 				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
-				addr,
-				addr >> 32,
+				addr_64,
+				addr_64 >> 32,
 			};
 			++ds;
 			if (!segs_n)
@@ -750,12 +751,12 @@
 		total_length += length;
 #endif
 		/* Store segment information. */
-		addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+		addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
 		*dseg = (rte_v128u32_t){
 			rte_cpu_to_be_32(length),
 			mlx5_tx_mb2mr(txq, buf),
-			addr,
-			addr >> 32,
+			addr_64,
+			addr_64 >> 32,
 		};
 		(*txq->elts)[++elts_head & elts_m] = buf;
 		if (--segs_n)
@@ -1450,6 +1451,7 @@
 	unsigned int mpw_room = 0;
 	unsigned int inl_pad = 0;
 	uint32_t inl_hdr;
+	uint64_t addr_64;
 	struct mlx5_mpw mpw = {
 		.state = MLX5_MPW_STATE_CLOSED,
 	};
@@ -1586,13 +1588,13 @@
 					((uintptr_t)mpw.data.raw +
 					 inl_pad);
 			(*txq->elts)[elts_head++ & elts_m] = buf;
-			addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
-								 uintptr_t));
+			addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								    uintptr_t));
 			*dseg = (rte_v128u32_t) {
 				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
-				addr,
-				addr >> 32,
+				addr_64,
+				addr_64 >> 32,
 			};
 			mpw.data.raw = (volatile void *)(dseg + 1);
 			mpw.total_len += (inl_pad + sizeof(*dseg));
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index a04a84f..992e977 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -26,6 +26,8 @@
 #include <rte_common.h>
 #include <rte_hexdump.h>
 #include <rte_atomic.h>
+#include <rte_spinlock.h>
+#include <rte_io.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -118,6 +120,10 @@ struct mlx5_rxq_data {
 	void *cq_uar; /* CQ user access region. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t *uar_lock_cq;
+	/* CQ (UAR) access lock required for 32bit implementations */
+#endif
 	uint32_t tunnel; /* Tunnel information. */
 } __rte_cache_aligned;
 
@@ -198,6 +204,10 @@ struct mlx5_txq_data {
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t *uar_lock;
+	/* UAR access lock required for 32bit implementations */
+#endif
 } __rte_cache_aligned;
 
 /* Verbs Rx queue elements. */
@@ -353,6 +363,63 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
 
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures.
+ *
+ * @param val
+ *   value to write in CPU endian format.
+ * @param addr
+ *   Address to write to.
+ * @param lock
+ *   Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
+			   rte_spinlock_t *lock __rte_unused)
+{
+#ifdef RTE_ARCH_64
+	rte_write64_relaxed(val, addr);
+#else /* !RTE_ARCH_64 */
+	rte_spinlock_lock(lock);
+	rte_write32_relaxed(val, addr);
+	rte_io_wmb();
+	rte_write32_relaxed(val >> 32,
+			    (volatile void *)((volatile char *)addr + 4));
+	rte_spinlock_unlock(lock);
+#endif
+}
+
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures while guaranteeing the order of execution with the
+ * code being executed.
+ *
+ * @param val
+ *   value to write in CPU endian format.
+ * @param addr
+ *   Address to write to.
+ * @param lock
+ *   Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock)
+{
+	rte_io_wmb();
+	__mlx5_uar_write64_relaxed(val, addr, lock);
+}
+
+/* Assist macros, used instead of directly calling the functions they wrap. */
+#ifdef RTE_ARCH_64
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+		__mlx5_uar_write64_relaxed(val, dst, NULL)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
+#else
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+		__mlx5_uar_write64_relaxed(val, dst, lock)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
+#endif
+
 #ifndef NDEBUG
 /**
  * Verify or set magic value in CQE.
@@ -619,7 +686,7 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 	*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
 	/* Ensure ordering between DB record and BF copy. */
 	rte_wmb();
-	*dst = *src;
+	mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
 	if (cond)
 		rte_wmb();
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 5057561..f9bc473 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -255,6 +255,9 @@
 	struct mlx5_txq_ctrl *txq_ctrl;
 	int already_mapped;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+#ifndef RTE_ARCH_64
+	unsigned int lock_idx;
+#endif
 
 	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
 	/*
@@ -281,7 +284,7 @@
 		}
 		/* new address in reserved UAR address space. */
 		addr = RTE_PTR_ADD(priv->uar_base,
-				   uar_va & (MLX5_UAR_SIZE - 1));
+				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
 		if (!already_mapped) {
 			pages[pages_n++] = uar_va;
 			/* fixed mmap to specified address in reserved
@@ -305,6 +308,12 @@
 		else
 			assert(txq_ctrl->txq.bf_reg ==
 			       RTE_PTR_ADD((void *)addr, off));
+#ifndef RTE_ARCH_64
+		/* Assign a UAR lock according to UAR page number */
+		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+			   MLX5_UAR_PAGE_NUM_MASK;
+		txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
 	}
 	return 0;
 }
@@ -511,6 +520,8 @@ struct mlx5_txq_ibv *
 	rte_atomic32_inc(&txq_ibv->refcnt);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
+			dev->data->port_id, txq_ctrl->uar_mmap_offset);
 	} else {
 		DRV_LOG(ERR,
 			"port %u failed to retrieve UAR info, invalid"
-- 
1.8.3.1



More information about the dev mailing list