[dpdk-dev] [PATCH v2 4/4] net/mlx5: spawn rdma-core dependency plug-in

Adrien Mazarguil adrien.mazarguil at 6wind.com
Fri Jan 26 15:19:04 CET 2018


When mlx5 is not compiled directly as an independent shared object (e.g.
CONFIG_RTE_BUILD_SHARED_LIB not enabled for performance reasons), DPDK
applications inherit its dependencies on libibverbs and libmlx5 through
rte.app.mk.

This is an issue both when DPDK is delivered as a binary package (Linux
distributions) and for end users because rdma-core then propagates as a
mandatory dependency for everything.

Application writers relying on binary DPDK packages are not necessarily
aware of this fact and may end up delivering packages with broken
dependencies.

This patch therefore introduces an intermediate internal plug-in
hard-linked with rdma-core (to preserve symbol versioning) loaded by the
PMD through dlopen(), so that a missing rdma-core does not cause unresolved
symbols, allowing applications to start normally.

Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
---
 config/common_base        |  1 +
 doc/guides/nics/mlx5.rst  | 13 +++++++
 drivers/net/mlx5/Makefile | 41 ++++++++++++++++++++++
 drivers/net/mlx5/mlx5.c   | 79 ++++++++++++++++++++++++++++++++++++++++++
 mk/rte.app.mk             |  4 +++
 5 files changed, 138 insertions(+)

diff --git a/config/common_base b/config/common_base
index f29f9e3a0..5564576fc 100644
--- a/config/common_base
+++ b/config/common_base
@@ -306,6 +306,7 @@ CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE=8
 #
 CONFIG_RTE_LIBRTE_MLX5_PMD=n
 CONFIG_RTE_LIBRTE_MLX5_DEBUG=n
+CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS=y
 CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8
 
 #
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 2e6d1e45a..c8bd02112 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -159,6 +159,19 @@ These options can be modified in the ``.config`` file.
 
   Toggle compilation of librte_pmd_mlx5 itself.
 
+- ``CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS`` (default **y**)
+
+  Build PMD with additional code to make it loadable without hard
+  dependencies on **libibverbs** nor **libmlx5**, which may not be installed
+  on the target system.
+
+  In this mode, their presence is still required for it to run properly,
+  however their absence won't prevent a DPDK application from starting (with
+  ``CONFIG_RTE_BUILD_SHARED_LIB`` disabled) and they won't show up as
+  missing with ``ldd(1)``.
+
+  This option has no performance impact.
+
 - ``CONFIG_RTE_LIBRTE_MLX5_DEBUG`` (default **n**)
 
   Toggle debugging code and stricter compilation flags. Enabling this option
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index bdec30692..761e60afd 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -53,7 +53,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
+ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue_lib.c
+else
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c
+endif
 
 # Basic CFLAGS.
 CFLAGS += -O3
@@ -65,7 +69,12 @@ CFLAGS += -D_DEFAULT_SOURCE
 CFLAGS += -D_XOPEN_SOURCE=600
 CFLAGS += $(WERROR_FLAGS)
 CFLAGS += -Wno-strict-prototypes
+ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
+CFLAGS_mlx5_glue.o += -fPIC
+LDLIBS += -ldl
+else
 LDLIBS += -libverbs -lmlx5
+endif
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_pci
@@ -158,7 +167,39 @@ mlx5_autoconf.h: mlx5_autoconf.h.new
 
 $(SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD):.c=.o): mlx5_autoconf.h
 
+# Generate dependency plug-in for rdma-core when the PMD must not be linked
+# directly, so that applications do not inherit this dependency.
+
+ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
+
+mlx5_glue_lib.c: mlx5_glue_lib.so
+	$Q printf '#include <stddef.h>\n' > $@
+	$Q printf '#include <stdint.h>\n\n' >> $@
+	$Q printf 'const uint8_t mlx5_glue_lib[][16] = {\n' >> $@
+	$Q od -vt x1 $< | \
+	sed -ne '/^[[:xdigit:]]\{1,\}/{' \
+		-e 's///;' \
+		-e '/^$$/d; ' \
+		-e 's/[[:space:]]*$$//;' \
+		-e 's/[[:space:]]\{1,\}/\\x/g;' \
+		-e 's/^/	"/;' \
+		-e 's/$$/",/;' \
+		-e 'p;' \
+		-e '}' >> $@
+	$Q printf '};\n\n' >> $@
+	$Q printf 'const size_t mlx5_glue_lib_size = %u;\n' \
+		$$(wc -c < $<) >> $@
+
+mlx5_glue_lib.so: mlx5_glue.o
+	$Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \
+		-s -shared -o $@ $< -libverbs -lmlx5
+
+mlx5_glue.o: mlx5_autoconf.h
+
+endif
+
 clean_mlx5: FORCE
 	$Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new
+	$Q rm -f -- mlx5_glue.o mlx5_glue_lib.*
 
 clean: clean_mlx5
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b911d0ad4..05c9768d0 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -35,6 +35,7 @@
 #include <unistd.h>
 #include <string.h>
 #include <assert.h>
+#include <dlfcn.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <errno.h>
@@ -57,6 +58,7 @@
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_common.h>
+#include <rte_config.h>
 #include <rte_eal_memconfig.h>
 #include <rte_kvargs.h>
 
@@ -1075,6 +1077,78 @@ static struct rte_pci_driver mlx5_driver = {
 	.drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
 };
 
+#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS
+
+extern const uint8_t mlx5_glue_lib[][16];
+extern const size_t mlx5_glue_lib_size;
+
+/**
+ * Initialization routine for run-time dependency on rdma-core.
+ */
+static int
+mlx5_glue_init(void)
+{
+	char file[] = "/tmp/" MLX5_DRIVER_NAME "_XXXXXX";
+	int fd = mkstemp(file);
+	size_t off = 0;
+	void *handle = NULL;
+	void **sym;
+	const char *dlmsg;
+
+	if (fd == -1) {
+		rte_errno = errno;
+		goto glue_error;
+	}
+	while (off != mlx5_glue_lib_size) {
+		ssize_t ret;
+
+		ret = write(fd, (const uint8_t *)mlx5_glue_lib + off,
+			    mlx5_glue_lib_size - off);
+		if (ret == -1) {
+			if (errno != EINTR) {
+				rte_errno = errno;
+				goto glue_error;
+			}
+			ret = 0;
+		}
+		off += ret;
+	}
+	close(fd);
+	fd = -1;
+	handle = dlopen(file, RTLD_LAZY);
+	unlink(file);
+	if (!handle) {
+		rte_errno = EINVAL;
+		dlmsg = dlerror();
+		if (dlmsg)
+			ERROR("cannot load glue library: %s", dlmsg);
+		goto glue_error;
+	}
+	sym = dlsym(handle, "mlx5_glue");
+	if (!sym || !*sym) {
+		rte_errno = EINVAL;
+		dlmsg = dlerror();
+		if (dlmsg)
+			ERROR("cannot resolve glue symbol: %s", dlmsg);
+		goto glue_error;
+	}
+	mlx5_glue = *sym;
+	return 0;
+glue_error:
+	if (handle)
+		dlclose(handle);
+	if (fd != -1) {
+		close(fd);
+		unlink(file);
+	}
+	ERROR("cannot initialize PMD due to missing run-time"
+	      " dependency on rdma-core libraries (libibverbs,"
+	      " libmlx5)");
+	return -rte_errno;
+}
+
+#endif
+
 /**
  * Driver initialization routine.
  */
@@ -1094,6 +1168,11 @@ rte_mlx5_pmd_init(void)
 	/* Match the size of Rx completion entry to the size of a cacheline. */
 	if (RTE_CACHE_LINE_SIZE == 128)
 		setenv("MLX5_CQE_SIZE", "128", 0);
+#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS
+	if (mlx5_glue_init())
+		return;
+	assert(mlx5_glue);
+#endif
 	mlx5_glue->fork_init();
 	rte_pci_register(&mlx5_driver);
 }
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index fdbe36630..88f626049 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -148,7 +148,11 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -ldl
 else
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -libverbs -lmlx4
 endif
+ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -ldl
+else
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs -lmlx5
+endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MRVL_PMD)       += -lrte_pmd_mrvl -L$(LIBMUSDK_PATH)/lib -lmusdk
 _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD)        += -lrte_pmd_nfp
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL)       += -lrte_pmd_null
-- 
2.11.0


More information about the dev mailing list