[PATCH 1/2] common/mlx5: add device duplication function

Gregory Etelson getelson at nvidia.com
Thu Mar 13 09:33:50 CET 2025


From: Michael Baum <michaelba at nvidia.com>

Add function for creating additional CTX for same device base on
existing CTX.

Signed-off-by: Michael Baum <michaelba at nvidia.com>
---
 drivers/common/mlx5/linux/mlx5_common_os.c   | 52 ++++++++++++++++++++
 drivers/common/mlx5/mlx5_common.h            |  4 ++
 drivers/common/mlx5/version.map              |  1 +
 drivers/common/mlx5/windows/mlx5_common_os.c | 31 ++++++++++++
 4 files changed, 88 insertions(+)

diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c
index 41345e1597..2f5032f0da 100644
--- a/drivers/common/mlx5/linux/mlx5_common_os.c
+++ b/drivers/common/mlx5/linux/mlx5_common_os.c
@@ -873,6 +873,58 @@ mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
 	return 0;
 }
 
+/**
+ * API function to obtain a new InfiniBand (IB) context for a given common device.
+ *
+ * This function provides a port-agnostic IB context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Linux, it imports new context from the existing context.
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
+ *
+ * @return
+ *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+	struct ibv_context *ctx = NULL;
+	int cmd_fd = ((struct ibv_context *)cdev->ctx)->cmd_fd;
+	int new_cmd_fd;
+
+	/*
+	 * Duplicate the command FD to pass it as input to the import device function.
+	 * If the import function succeeds, the new device context takes ownership of
+	 * this FD, which will be freed when the new device is closed.
+	 * If the import function fails, we are responsible for closing this FD.
+	 */
+	new_cmd_fd = dup(cmd_fd);
+	if (new_cmd_fd < 0) {
+		DRV_LOG(ERR,
+			"Failed to duplicate FD %d for IB device \"%s\": %s",
+			cmd_fd, mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(errno));
+		rte_errno = errno;
+		return NULL;
+	}
+	/* Attempt to import the duplicated FD to create a new device context. */
+	ctx = mlx5_glue->import_device(new_cmd_fd);
+	if (!ctx) {
+		DRV_LOG(ERR, "Failed to import IB device \"%s\": %s",
+			mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(errno));
+		close(new_cmd_fd);
+		rte_errno = errno;
+		return NULL;
+	}
+	DRV_LOG(INFO, "IB device \"%s\" successfully imported, old_fd=%d, new_fd=%d",
+		mlx5_os_get_ctx_device_name(cdev->ctx), cmd_fd, new_cmd_fd);
+	return (void *)ctx;
+}
+
 int
 mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
 {
diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index e7bd4c6ec4..bea1382911 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -651,6 +651,10 @@ mlx5_devx_uar_release(struct mlx5_uar *uar);
 
 /* mlx5_common_os.c */
 
+__rte_internal
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev);
+
 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
 int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);
 int mlx5_os_pd_release(struct mlx5_common_device *cdev);
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index 8301485839..819e6b96cb 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -150,6 +150,7 @@ INTERNAL {
 	mlx5_nl_rdma_monitor_info_get; # WINDOWS_NO_EXPORT
 	mlx5_nl_rdma_monitor_cap_get; # WINDOWS_NO_EXPORT
 
+	mlx5_os_get_physical_device_ctx;
 	mlx5_os_umem_dereg;
 	mlx5_os_umem_reg;
 
diff --git a/drivers/common/mlx5/windows/mlx5_common_os.c b/drivers/common/mlx5/windows/mlx5_common_os.c
index 2b6058eb3e..68f1981193 100644
--- a/drivers/common/mlx5/windows/mlx5_common_os.c
+++ b/drivers/common/mlx5/windows/mlx5_common_os.c
@@ -266,6 +266,37 @@ mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
 	return -rte_errno;
 }
 
+/**
+ * API function to obtain a new MLX5 context for a given common device.
+ *
+ * This function provides a port-agnostic context for a physical device, enabling the
+ * device to create and manage resources that can be initialized when a port starts and
+ * released when another port stops.
+ *
+ * For Windows, it creates a new context for the device regardless to existing context.
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
+ *
+ * @return
+ *   Pointer to an `ibv_context` on success, or NULL on failure, with `rte_errno` set.
+ */
+void *
+mlx5_os_get_physical_device_ctx(struct mlx5_common_device *cdev)
+{
+	struct mlx5_common_device temp = {
+		.dev = cdev->dev,
+	};
+
+	if (mlx5_os_open_device(&temp, MLX5_CLASS_ETH) < 0) {
+		DRV_LOG(ERR, "Failed to duplicate DevX device \"%s\": %s",
+			mlx5_os_get_ctx_device_name(cdev->ctx),
+			rte_strerror(rte_errno));
+		return NULL;
+	}
+	return (void *)temp.ctx;
+}
+
 /**
  * Register umem.
  *
-- 
2.45.2



More information about the dev mailing list