[dpdk-dev] [PATCH v2 2/7] net/mlx5: add allocate memory from system devarg

Suanming Mou suanmingm at mellanox.com
Thu Jul 16 11:20:11 CEST 2020


Currently, for MLX5 PMD, once millions of flows created, the memory
consumption of the flows are also very huge. For the system with limited
memory, it means the system need to reserve most of the memory as huge
page memory to serve the flows in advance. And other normal applications
will have no chance to use this reserved memory any more. While most of
the time, the system will not have lots of flows, the  reserved huge page
memory becomes a bit waste of memory at most of the time.

By the new sys_mem_en devarg, once set it to be true, it allows the PMD
allocate the memory from system by default with the new add mlx5 memory
management functions. Only once the MLX5_MEM_RTE flag is set, the memory
will be allocate from rte, otherwise, it allocates memory from system.

So in this case, the system with limited memory no need to reserve most
of the memory for hugepage. Only some needed memory for datapath objects
will be enough to allocated with explicitly flag. Other memory will be
allocated from system. For system with enough memory, no need to care
about the devarg, the memory will always be from rte hugepage.

One restriction is that for DPDK application with multiple PCI devices,
if the sys_mem_en devargs are different between the devices, the
sys_mem_en only gets the value from the first device devargs, and print
out a message to warn that.

Signed-off-by: Suanming Mou <suanmingm at mellanox.com>
Acked-by: Matan Azrad <matan at mellanox.com>
---
 doc/guides/nics/mlx5.rst         | 7 +++++++
 drivers/net/mlx5/linux/mlx5_os.c | 2 ++
 drivers/net/mlx5/mlx5.c          | 6 ++++++
 drivers/net/mlx5/mlx5.h          | 1 +
 4 files changed, 16 insertions(+)

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 4b6d8fb..d86b5c7 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -879,6 +879,13 @@ Driver options
 
   By default, the PMD will set this value to 0.
 
+- ``sys_mem_en`` parameter [int]
+
+  A nonzero value enables the PMD memory management function allocate memory
+  from system by default without explicitly rte memory flag.
+
+  By default, the PMD will set this value to 0.
+
 .. _mlx5_firmware_config:
 
 Firmware configuration
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 2dc57b2..d5acef0 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -43,6 +43,7 @@
 #include <mlx5_common.h>
 #include <mlx5_common_mp.h>
 #include <mlx5_common_mr.h>
+#include <mlx5_malloc.h>
 
 #include "mlx5_defs.h"
 #include "mlx5.h"
@@ -495,6 +496,7 @@
 			strerror(rte_errno));
 		goto error;
 	}
+	mlx5_malloc_mem_select(config.sys_mem_en);
 	sh = mlx5_alloc_shared_dev_ctx(spawn, &config);
 	if (!sh)
 		return NULL;
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 0c654ed..9b17266 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -167,6 +167,9 @@
 /* Flow memory reclaim mode. */
 #define MLX5_RECLAIM_MEM "reclaim_mem_mode"
 
+/* The default memory alloctor used in PMD. */
+#define MLX5_SYS_MEM_EN "sys_mem_en"
+
 static const char *MZ_MLX5_PMD_SHARED_DATA = "mlx5_pmd_shared_data";
 
 /* Shared memory between primary and secondary processes. */
@@ -1374,6 +1377,8 @@ struct mlx5_dev_ctx_shared *
 			return -rte_errno;
 		}
 		config->reclaim_mode = tmp;
+	} else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) {
+		config->sys_mem_en = !!tmp;
 	} else {
 		DRV_LOG(WARNING, "%s: unknown parameter", key);
 		rte_errno = EINVAL;
@@ -1430,6 +1435,7 @@ struct mlx5_dev_ctx_shared *
 		MLX5_CLASS_ARG_NAME,
 		MLX5_HP_BUF_SIZE,
 		MLX5_RECLAIM_MEM,
+		MLX5_SYS_MEM_EN,
 		NULL,
 	};
 	struct rte_kvargs *kvlist;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 46e66eb..967f5d8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -216,6 +216,7 @@ struct mlx5_dev_config {
 	unsigned int devx:1; /* Whether devx interface is available or not. */
 	unsigned int dest_tir:1; /* Whether advanced DR API is available. */
 	unsigned int reclaim_mode:2; /* Memory reclaim mode. */
+	unsigned int sys_mem_en:1; /* The default memory allocator. */
 	struct {
 		unsigned int enabled:1; /* Whether MPRQ is enabled. */
 		unsigned int stride_num_n; /* Number of strides. */
-- 
1.8.3.1



More information about the dev mailing list