[dpdk-dev] [PATCH v2] net/mlx5: add support for 32bit systems

Moti Haimovsky motih at mellanox.com
Mon Jul 2 13:11:40 CEST 2018


This patch adds support for building and running mlx5 PMD on
32bit systems such as i686.

The main issue to tackle was handling the 32bit access to the UAR
as quoted from the mlx5 PRM:
QP and CQ DoorBells require 64-bit writes. For best performance, it
is recommended to execute the QP/CQ DoorBell as a single 64-bit write
operation. For platforms that do not support 64 bit writes, it is
possible to issue the 64 bits DoorBells through two consecutive writes,
each write 32 bits, as described below:
* The order of writing each of the Dwords is from lower to upper
  addresses.
* No other DoorBell can be rung (or even start ringing) in the midst of
  an on-going write of a DoorBell over a given UAR page.
The last rule implies that in a multi-threaded environment, the access
to a UAR page (which can be accessible by all threads in the process)
must be synchronized (for example, using a semaphore) unless an atomic
write of 64 bits in a single bus operation is guaranteed. Such a
synchronization is not required for when ringing DoorBells on different
UAR pages.

Signed-off-by: Moti Haimovsky <motih at mellanox.com>
---
v2:
* Fixed coding style issues.
* Modified documentation according to review inputs.
* Fixed merge conflicts.
---
 doc/guides/nics/features/mlx5.ini |  1 +
 doc/guides/nics/mlx5.rst          |  6 +++-
 drivers/net/mlx5/mlx5.c           |  8 ++++-
 drivers/net/mlx5/mlx5.h           |  5 +++
 drivers/net/mlx5/mlx5_defs.h      | 18 ++++++++--
 drivers/net/mlx5/mlx5_rxq.c       |  6 +++-
 drivers/net/mlx5/mlx5_rxtx.c      | 22 +++++++------
 drivers/net/mlx5/mlx5_rxtx.h      | 69 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_txq.c       | 13 +++++++-
 9 files changed, 131 insertions(+), 17 deletions(-)

diff --git a/doc/guides/nics/features/mlx5.ini b/doc/guides/nics/features/mlx5.ini
index e75b14b..b28b43e 100644
--- a/doc/guides/nics/features/mlx5.ini
+++ b/doc/guides/nics/features/mlx5.ini
@@ -43,5 +43,6 @@ Multiprocess aware   = Y
 Other kdrv           = Y
 ARMv8                = Y
 Power8               = Y
+x86-32               = Y
 x86-64               = Y
 Usage doc            = Y
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 7dd9c1c..5fbad60 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -49,7 +49,7 @@ libibverbs.
 Features
 --------
 
-- Multi arch support: x86_64, POWER8, ARMv8.
+- Multi arch support: x86_64, POWER8, ARMv8, i686.
 - Multiple TX and RX queues.
 - Support for scattered TX and RX frames.
 - IPv4, IPv6, TCPv4, TCPv6, UDPv4 and UDPv6 RSS on any number of queues.
@@ -477,6 +477,10 @@ RMDA Core with Linux Kernel
 - Minimal kernel version : v4.14 or the most recent 4.14-rc (see `Linux installation documentation`_)
 - Minimal rdma-core version: v15+ commit 0c5f5765213a ("Merge pull request #227 from yishaih/tm")
   (see `RDMA Core installation documentation`_)
+- When building for i686 use:
+
+  - rdma-core version 18.0 or above built with 32bit support.
+  - Kernel version 4.14.41 or above.
 
 .. _`Linux installation documentation`: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/plain/Documentation/admin-guide/README.rst
 .. _`RDMA Core installation documentation`: https://raw.githubusercontent.com/linux-rdma/rdma-core/master/README.md
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f0e6ed7..5d0f706 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -567,7 +567,7 @@
 	rte_memseg_walk(find_lower_va_bound, &addr);
 
 	/* keep distance to hugepages to minimize potential conflicts. */
-	addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE);
+	addr = RTE_PTR_SUB(addr, (uintptr_t)(MLX5_UAR_OFFSET + MLX5_UAR_SIZE));
 	/* anonymous mmap, no real memory consumption. */
 	addr = mmap(addr, MLX5_UAR_SIZE,
 		    PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
@@ -953,6 +953,12 @@
 		priv->port = port;
 		priv->pd = pd;
 		priv->mtu = ETHER_MTU;
+#ifndef RTE_ARCH_64
+		/* Initialize UAR access locks for 32bit implementations. */
+		rte_spinlock_init(&priv->uar_lock_cq);
+		for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
+			rte_spinlock_init(&priv->uar_lock[i]);
+#endif
 		err = mlx5_args(&config, pci_dev->device.devargs);
 		if (err) {
 			err = rte_errno;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 997b04a..2da32cd 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -198,6 +198,11 @@ struct priv {
 	/* Context for Verbs allocator. */
 	int nl_socket; /* Netlink socket. */
 	uint32_t nl_sn; /* Netlink message sequence number. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t uar_lock_cq; /* CQs share a common distinct UAR */
+	rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
+	/* UAR same-page access control required in 32bit implementations. */
+#endif
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 5bbbec2..f6ec415 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -87,14 +87,28 @@
 #define MLX5_LINK_STATUS_TIMEOUT 10
 
 /* Reserved address space for UAR mapping. */
-#define MLX5_UAR_SIZE (1ULL << 32)
+#define MLX5_UAR_SIZE (1ULL << (sizeof(uintptr_t) * 4))
 
 /* Offset of reserved UAR address space to hugepage memory. Offset is used here
  * to minimize possibility of address next to hugepage being used by other code
  * in either primary or secondary process, failing to map TX UAR would make TX
  * packets invisible to HW.
  */
-#define MLX5_UAR_OFFSET (1ULL << 32)
+#define MLX5_UAR_OFFSET (1ULL << (sizeof(uintptr_t) * 4))
+
+/* Maximum number of UAR pages used by a port,
+ * These are the size and mask for an array of mutexes used to synchronize
+ * the access to port's UARs on platforms that do not support 64 bit writes.
+ * In such systems it is possible to issue the 64 bits DoorBells through two
+ * consecutive writes, each write 32 bits. The access to a UAR page (which can
+ * be accessible by all threads in the process) must be synchronized
+ * (for example, using a semaphore). Such a synchronization is not required
+ * when ringing DoorBells on different UAR pages.
+ * A port with 512 Tx queues uses 8, 4kBytes, UAR pages which are shared
+ * among the ports.
+ */
+#define MLX5_UAR_PAGE_NUM_MAX 64
+#define MLX5_UAR_PAGE_NUM_MASK ((MLX5_UAR_PAGE_NUM_MAX) - 1)
 
 /* Log 2 of the default number of strides per WQE for Multi-Packet RQ. */
 #define MLX5_MPRQ_STRIDE_NUM_N 6U
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index fd0df17..3c036a8 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -645,7 +645,8 @@
 	doorbell = (uint64_t)doorbell_hi << 32;
 	doorbell |=  rxq->cqn;
 	rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
-	rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+	mlx5_uar_write64(rte_cpu_to_be_64(doorbell),
+			 cq_db_reg, rxq->uar_lock_cq);
 }
 
 /**
@@ -1447,6 +1448,9 @@ struct mlx5_rxq_ctrl *
 	tmpl->rxq.elts_n = log2above(desc);
 	tmpl->rxq.elts =
 		(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+#ifndef RTE_ARCH_64
+	tmpl->rxq.uar_lock_cq = &priv->uar_lock_cq;
+#endif
 	tmpl->idx = idx;
 	rte_atomic32_inc(&tmpl->refcnt);
 	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index a7ed8d8..52a1074 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -495,6 +495,7 @@
 	volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
 	unsigned int segs_n = 0;
 	const unsigned int max_inline = txq->max_inline;
+	uint64_t addr_64;
 
 	if (unlikely(!pkts_n))
 		return 0;
@@ -711,12 +712,12 @@
 			ds = 3;
 use_dseg:
 			/* Add the remaining packet as a simple ds. */
-			addr = rte_cpu_to_be_64(addr);
+			addr_64 = rte_cpu_to_be_64(addr);
 			*dseg = (rte_v128u32_t){
 				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
-				addr,
-				addr >> 32,
+				addr_64,
+				addr_64 >> 32,
 			};
 			++ds;
 			if (!segs_n)
@@ -750,12 +751,12 @@
 		total_length += length;
 #endif
 		/* Store segment information. */
-		addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+		addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
 		*dseg = (rte_v128u32_t){
 			rte_cpu_to_be_32(length),
 			mlx5_tx_mb2mr(txq, buf),
-			addr,
-			addr >> 32,
+			addr_64,
+			addr_64 >> 32,
 		};
 		(*txq->elts)[++elts_head & elts_m] = buf;
 		if (--segs_n)
@@ -1450,6 +1451,7 @@
 	unsigned int mpw_room = 0;
 	unsigned int inl_pad = 0;
 	uint32_t inl_hdr;
+	uint64_t addr_64;
 	struct mlx5_mpw mpw = {
 		.state = MLX5_MPW_STATE_CLOSED,
 	};
@@ -1586,13 +1588,13 @@
 					((uintptr_t)mpw.data.raw +
 					 inl_pad);
 			(*txq->elts)[elts_head++ & elts_m] = buf;
-			addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
-								 uintptr_t));
+			addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+								    uintptr_t));
 			*dseg = (rte_v128u32_t) {
 				rte_cpu_to_be_32(length),
 				mlx5_tx_mb2mr(txq, buf),
-				addr,
-				addr >> 32,
+				addr_64,
+				addr_64 >> 32,
 			};
 			mpw.data.raw = (volatile void *)(dseg + 1);
 			mpw.total_len += (inl_pad + sizeof(*dseg));
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 0007be0..2448d73 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -26,6 +26,8 @@
 #include <rte_common.h>
 #include <rte_hexdump.h>
 #include <rte_atomic.h>
+#include <rte_spinlock.h>
+#include <rte_io.h>
 
 #include "mlx5_utils.h"
 #include "mlx5.h"
@@ -115,6 +117,10 @@ struct mlx5_rxq_data {
 	void *cq_uar; /* CQ user access region. */
 	uint32_t cqn; /* CQ number. */
 	uint8_t cq_arm_sn; /* CQ arm seq number. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t *uar_lock_cq;
+	/* CQ (UAR) access lock required for 32bit implementations */
+#endif
 	uint32_t tunnel; /* Tunnel information. */
 } __rte_cache_aligned;
 
@@ -196,6 +202,10 @@ struct mlx5_txq_data {
 	volatile void *bf_reg; /* Blueflame register remapped. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
+#ifndef RTE_ARCH_64
+	rte_spinlock_t *uar_lock;
+	/* UAR access lock required for 32bit implementations */
+#endif
 } __rte_cache_aligned;
 
 /* Verbs Rx queue elements. */
@@ -348,6 +358,63 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
 
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures.
+ *
+ * @param val
+ *   value to write in CPU endian format.
+ * @param addr
+ *   Address to write to.
+ * @param lock
+ *   Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr,
+			   rte_spinlock_t *lock __rte_unused)
+{
+#ifdef RTE_ARCH_64
+	rte_write64_relaxed(val, addr);
+#else /* !RTE_ARCH_64 */
+	rte_spinlock_lock(lock);
+	rte_write32_relaxed(val, addr);
+	rte_io_wmb();
+	rte_write32_relaxed(val >> 32,
+			    (volatile void *)((volatile char *)addr + 4));
+	rte_spinlock_unlock(lock);
+#endif
+}
+
+/**
+ * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
+ * 64bit architectures while guaranteeing the order of execution with the
+ * code being executed.
+ *
+ * @param val
+ *   value to write in CPU endian format.
+ * @param addr
+ *   Address to write to.
+ * @param lock
+ *   Address of the lock to use for that UAR access.
+ */
+static __rte_always_inline void
+__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock)
+{
+	rte_io_wmb();
+	__mlx5_uar_write64_relaxed(val, addr, lock);
+}
+
+/* Assist macros, used instead of directly calling the functions they wrap. */
+#ifdef RTE_ARCH_64
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+		__mlx5_uar_write64_relaxed(val, dst, NULL)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
+#else
+#define mlx5_uar_write64_relaxed(val, dst, lock) \
+		__mlx5_uar_write64_relaxed(val, dst, lock)
+#define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
+#endif
+
 #ifndef NDEBUG
 /**
  * Verify or set magic value in CQE.
@@ -614,7 +681,7 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 	*txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
 	/* Ensure ordering between DB record and BF copy. */
 	rte_wmb();
-	*dst = *src;
+	mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock);
 	if (cond)
 		rte_wmb();
 }
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 669b913..dc786d4 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -255,6 +255,9 @@
 	struct mlx5_txq_ctrl *txq_ctrl;
 	int already_mapped;
 	size_t page_size = sysconf(_SC_PAGESIZE);
+#ifndef RTE_ARCH_64
+	unsigned int lock_idx;
+#endif
 
 	memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
 	/*
@@ -281,7 +284,7 @@
 		}
 		/* new address in reserved UAR address space. */
 		addr = RTE_PTR_ADD(priv->uar_base,
-				   uar_va & (MLX5_UAR_SIZE - 1));
+				   uar_va & (uintptr_t)(MLX5_UAR_SIZE - 1));
 		if (!already_mapped) {
 			pages[pages_n++] = uar_va;
 			/* fixed mmap to specified address in reserved
@@ -305,6 +308,12 @@
 		else
 			assert(txq_ctrl->txq.bf_reg ==
 			       RTE_PTR_ADD((void *)addr, off));
+#ifndef RTE_ARCH_64
+		/* Assign a UAR lock according to UAR page number */
+		lock_idx = (txq_ctrl->uar_mmap_offset / page_size) &
+			   MLX5_UAR_PAGE_NUM_MASK;
+		txq->uar_lock = &priv->uar_lock[lock_idx];
+#endif
 	}
 	return 0;
 }
@@ -511,6 +520,8 @@ struct mlx5_txq_ibv *
 	rte_atomic32_inc(&txq_ibv->refcnt);
 	if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
 		txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+		DRV_LOG(DEBUG, "port %u: uar_mmap_offset 0x%lx",
+			dev->data->port_id, txq_ctrl->uar_mmap_offset);
 	} else {
 		DRV_LOG(ERR,
 			"port %u failed to retrieve UAR info, invalid"
-- 
1.8.3.1



More information about the dev mailing list