[PATCH v2 6/6] eal/memory: add page size VA limits EAL parameter
Burakov, Anatoly
anatoly.burakov at intel.com
Mon Jun 1 11:21:41 CEST 2026
On 5/26/2026 6:16 PM, Bruce Richardson wrote:
> On Fri, Mar 13, 2026 at 04:06:37PM +0000, Anatoly Burakov wrote:
>> Currently, the VA space limits placed on DPDK memory are only informed by
>> the default configuration coming from `rte_config.h` file. Add an EAL flag
>> to specify per-page size memory limits explicitly, thereby overriding the
>> default VA space reservations.
>>
>> Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
>> ---
> Acked-by: Bruce Richardson <bruce.richardson at intel.com>
>
> CI reports some errors with 32-bit builds. Also a couple of small comments
> inline below.
>
> Thanks for the series, looks some nice simplification.
>
>> app/test/test.c | 1 +
>> app/test/test_eal_flags.c | 126 ++++++++++++++++++
>> doc/guides/linux_gsg/linux_eal_parameters.rst | 13 ++
>> .../prog_guide/env_abstraction_layer.rst | 27 +++-
>> lib/eal/common/eal_common_dynmem.c | 9 ++
>> lib/eal/common/eal_common_options.c | 121 +++++++++++++++++
>> lib/eal/common/eal_internal_cfg.h | 6 +
>> lib/eal/common/eal_option_list.h | 1 +
>> 8 files changed, 302 insertions(+), 2 deletions(-)
>>
>> diff --git a/app/test/test.c b/app/test/test.c
>> index 58ef52f312..c610c3588e 100644
>> --- a/app/test/test.c
>> +++ b/app/test/test.c
>> @@ -80,6 +80,7 @@ do_recursive_call(void)
>> { "test_memory_flags", no_action },
>> { "test_file_prefix", no_action },
>> { "test_no_huge_flag", no_action },
>> + { "test_pagesz_mem_flags", no_action },
>> { "test_panic", test_panic },
>> { "test_exit", test_exit },
>> #ifdef RTE_LIB_TIMER
>> diff --git a/app/test/test_eal_flags.c b/app/test/test_eal_flags.c
>> index b3a8d0ae6f..4e1038be75 100644
>> --- a/app/test/test_eal_flags.c
>> +++ b/app/test/test_eal_flags.c
>> @@ -95,6 +95,14 @@ test_misc_flags(void)
>> return TEST_SKIPPED;
>> }
>>
>> +static int
>> +test_pagesz_mem_flags(void)
>> +{
>> + printf("pagesz_mem_flags not supported on Windows, skipping test\n");
>> + return TEST_SKIPPED;
>> +}
>> +
>> +
>> #else
>>
>> #include <libgen.h>
>> @@ -1502,6 +1510,123 @@ populate_socket_mem_param(int num_sockets, const char *mem,
>> offset += written;
>> }
>>
>> +/*
>> + * Tests for correct handling of --pagesz-mem flag
>> + */
>> +static int
>> +test_pagesz_mem_flags(void)
>> +{
>> +#ifdef RTE_EXEC_ENV_FREEBSD
>> + /* FreeBSD does not support --pagesz-mem */
>> + return 0;
>> +#else
>> + const char *in_memory = "--in-memory";
>> +
>> + /* invalid: no value */
>> + const char * const argv0[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem="};
>> +
>> + /* invalid: no colon (missing limit) */
>> + const char * const argv1[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=2M"};
>> +
>> + /* invalid: colon present but limit is empty */
>> + const char * const argv2[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=2M:"};
>> +
>> + /* invalid: limit not aligned to page size (3M is not a multiple of 2M) */
>> + const char * const argv3[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=2M:3M"};
>> +
>> + /* invalid: garbage value */
>> + const char * const argv4[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=garbage"};
>> +
>> + /* invalid: garbage value */
>> + const char * const argv5[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=2M:garbage"};
>> +
>> + /* invalid: --pagesz-mem combined with --no-huge */
>> + const char * const argv6[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, no_huge, "--pagesz-mem=2M:2M"};
>> +
>> + /* valid: single well-formed aligned pair */
>> + const char * const argv7[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=2M:64M"};
>> +
>> + /* valid: multiple occurrences */
>> + const char * const argv8[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory,
>> + "--pagesz-mem=2M:64M", "--pagesz-mem=1K:8K"};
>> +
>> + /* valid: fake page size set to zero (ignored but syntactically valid) */
>> + const char * const argv9[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=1K:0"};
>> +
>> + /* invalid: page size must be a power of two */
>> + const char * const argv10[] = {prgname, eal_debug_logs, no_pci,
>> + "--file-prefix=" memtest, in_memory, "--pagesz-mem=3M:6M"};
>> +
>> + if (launch_proc(argv0) == 0) {
>> + printf("Error (line %d) - process run ok with empty --pagesz-mem!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv1) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem missing colon!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv2) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem missing limit!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv3) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem unaligned limit!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv4) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem garbage value!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv5) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem garbage value!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv6) == 0) {
>> + printf("Error (line %d) - process run ok with --pagesz-mem and --no-huge!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv7) != 0) {
>> + printf("Error (line %d) - process failed with valid --pagesz-mem!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv8) != 0) {
>> + printf("Error (line %d) - process failed with multiple valid --pagesz-mem!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv9) != 0) {
>> + printf("Error (line %d) - process failed with --pagesz-mem zero limit!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> + if (launch_proc(argv10) == 0) {
>> + printf("Error (line %d) - process run ok with non-power-of-two pagesz!\n",
>> + __LINE__);
>> + return -1;
>> + }
>> +
>> + return 0;
>> +#endif /* !RTE_EXEC_ENV_FREEBSD */
>> +}
>> +
>> /*
>> * Tests for correct handling of -m and --socket-mem flags
>> */
>> @@ -1683,5 +1808,6 @@ REGISTER_FAST_TEST(eal_flags_b_opt_autotest, NOHUGE_SKIP, ASAN_SKIP, test_invali
>> REGISTER_FAST_TEST(eal_flags_vdev_opt_autotest, NOHUGE_SKIP, ASAN_SKIP, test_invalid_vdev_flag);
>> REGISTER_FAST_TEST(eal_flags_r_opt_autotest, NOHUGE_SKIP, ASAN_SKIP, test_invalid_r_flag);
>> REGISTER_FAST_TEST(eal_flags_mem_autotest, NOHUGE_SKIP, ASAN_SKIP, test_memory_flags);
>> +REGISTER_FAST_TEST(eal_flags_pagesz_mem_autotest, NOHUGE_SKIP, ASAN_SKIP, test_pagesz_mem_flags);
>> REGISTER_FAST_TEST(eal_flags_file_prefix_autotest, NOHUGE_SKIP, ASAN_SKIP, test_file_prefix);
>> REGISTER_FAST_TEST(eal_flags_misc_autotest, NOHUGE_SKIP, ASAN_SKIP, test_misc_flags);
>> diff --git a/doc/guides/linux_gsg/linux_eal_parameters.rst b/doc/guides/linux_gsg/linux_eal_parameters.rst
>> index 7c5b26ce26..ce38dd128a 100644
>> --- a/doc/guides/linux_gsg/linux_eal_parameters.rst
>> +++ b/doc/guides/linux_gsg/linux_eal_parameters.rst
>> @@ -75,6 +75,19 @@ Memory-related options
>> Place a per-NUMA node upper limit on memory use (non-legacy memory mode only).
>> 0 will disable the limit for a particular NUMA node.
>>
>> +* ``--pagesz-mem <page size:limit>``
>> +
>> + Set memory limit per hugepage size.
>> + Each time the option is used, provide a single ``<pagesz>:<limit>`` pair;
>> + repeat the option to specify additional page sizes.
>> + Both values support K/M/G/T suffixes (for example ``2M:32G``).
>> +
>> + The memory limit must be a multiple of page size.
>> +
>> + For example::
>> +
>> + --pagesz-mem 2M:32G --pagesz-mem 1G:512G
>> +
>> * ``--single-file-segments``
>>
>> Create fewer files in hugetlbfs (non-legacy mode only).
>> diff --git a/doc/guides/prog_guide/env_abstraction_layer.rst b/doc/guides/prog_guide/env_abstraction_layer.rst
>> index 63e0568afa..e2adf0a184 100644
>> --- a/doc/guides/prog_guide/env_abstraction_layer.rst
>> +++ b/doc/guides/prog_guide/env_abstraction_layer.rst
>> @@ -204,13 +204,36 @@ of virtual memory being preallocated at startup by editing the following config
>> variables:
>>
>> * ``RTE_MAX_MEMSEG_LISTS`` controls how many segment lists can DPDK have
>> -* ``RTE_MAX_MEMSEG_PER_TYPE`` controls how many segments each memory type
>> +* ``RTE_MAX_MEMSEG_PER_TYPE`` sets the default number of segments each memory type
>> can have (where "type" is defined as "page size + NUMA node" combination)
>> -* ``RTE_MAX_MEM_MB_PER_TYPE`` controls how much megabytes of memory each
>> +* ``RTE_MAX_MEM_MB_PER_TYPE`` sets the default amount of memory each
>> memory type can address
>>
>> Normally, these options do not need to be changed.
>>
>> +Runtime Override of Per-Page-Size Memory Limits
>> +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> +
>> +By default, DPDK uses compile-time configured limits for memory allocation per page size
>> +(as set by ``RTE_MAX_MEM_MB_PER_TYPE``).
>> +These limits apply uniformly across all NUMA nodes for a given page size.
>> +
>> +It is possible to override these defaults at runtime using the ``--pagesz-mem`` option,
>> +which allows specifying custom memory limits for each page size. This is useful when:
>> +
>> +* The default limits may be insufficient or excessive for your workload
>> +* You want to dedicate more memory to specific page sizes
>> +
>> +The ``--pagesz-mem`` option accepts exactly one ``<pagesz>:<limit>`` pair per
>> +occurrence, where ``pagesz`` is a page size (e.g., ``2M``, ``4M``, ``1G``)
>> +and ``limit`` is the maximum memory to reserve for that page size (e.g., ``64G``, ``512M``).
>> +Both values support standard binary suffixes (K, M, G, T).
>> +Memory limits must be aligned to their corresponding page size.
>> +
>> +Multiple page sizes can be specified by repeating the option::
>> +
>> + --pagesz-mem 2M:64G --pagesz-mem 1G:512G
>> +
>> .. note::
>>
>> Preallocated virtual memory is not to be confused with preallocated hugepage
>> diff --git a/lib/eal/common/eal_common_dynmem.c b/lib/eal/common/eal_common_dynmem.c
>> index c33fbdea6d..7096f46ff3 100644
>> --- a/lib/eal/common/eal_common_dynmem.c
>> +++ b/lib/eal/common/eal_common_dynmem.c
>> @@ -127,6 +127,11 @@ eal_dynmem_memseg_lists_init(void)
>> mem_va_len += type->mem_sz;
>> }
>>
>> + if (mem_va_len == 0) {
>> + EAL_LOG(ERR, "No virtual memory will be reserved");
>> + goto out;
>> + }
>> +
>> mem_va_addr = eal_get_virtual_area(NULL, &mem_va_len,
>> mem_va_page_sz, 0, 0);
>> if (mem_va_addr == NULL) {
>> @@ -141,6 +146,10 @@ eal_dynmem_memseg_lists_init(void)
>> uint64_t pagesz;
>> int socket_id;
>>
>> + /* skip page sizes with zero memory limit */
>> + if (type->n_segs == 0)
>> + continue;
>> +
>> pagesz = type->page_sz;
>> socket_id = type->socket_id;
>>
>> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
>> index bbc4427524..0532d27aaa 100644
>> --- a/lib/eal/common/eal_common_options.c
>> +++ b/lib/eal/common/eal_common_options.c
>> @@ -21,6 +21,7 @@
>> #endif
>>
>> #include <rte_string_fns.h>
>> +#include <rte_common.h>
>> #include <rte_eal.h>
>> #include <rte_log.h>
>> #include <rte_lcore.h>
>> @@ -233,6 +234,20 @@ eal_collate_args(int argc, char **argv)
>> EAL_LOG(ERR, "Options allow (-a) and block (-b) can't be used at the same time");
>> return -1;
>> }
>> +#ifdef RTE_EXEC_ENV_FREEBSD
>> + if (!TAILQ_EMPTY(&args.pagesz_mem)) {
>> + EAL_LOG(ERR, "Option pagesz-mem is not supported on FreeBSD");
>> + return -1;
>> + }
>> +#endif
>> + if (!TAILQ_EMPTY(&args.pagesz_mem) && args.no_huge) {
>> + EAL_LOG(ERR, "Options pagesz-mem and no-huge can't be used at the same time");
>> + return -1;
>> + }
>> + if (!TAILQ_EMPTY(&args.pagesz_mem) && args.legacy_mem) {
>> + EAL_LOG(ERR, "Options pagesz-mem and legacy-mem can't be used at the same time");
>> + return -1;
>> + }
>>
>> /* for non-list args, we can just check for zero/null values using macro */
>> if (CONFLICTING_OPTIONS(args, coremask, lcores) ||
>> @@ -511,7 +526,10 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
>> sizeof(internal_cfg->hugepage_info[0]));
>> internal_cfg->hugepage_info[i].lock_descriptor = -1;
>> internal_cfg->hugepage_mem_sz_limits[i] = 0;
>> + internal_cfg->pagesz_mem_overrides[i].pagesz = 0;
>> + internal_cfg->pagesz_mem_overrides[i].limit = 0;
>> }
>> + internal_cfg->num_pagesz_mem_overrides = 0;
>> internal_cfg->base_virtaddr = 0;
>>
>> /* if set to NONE, interrupt mode is determined automatically */
>> @@ -1867,6 +1885,96 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg)
>> return 0;
>> }
>>
>> +static int
>> +eal_parse_pagesz_mem(char *strval, struct internal_config *internal_cfg)
>> +{
>> + char strval_cpy[1024];
>> + char *fields[3];
>> + char *pagesz_str, *mem_str;
>> + int arg_num;
>> + int len;
>> + unsigned int i;
>> + uint64_t pagesz, mem_limit;
>> + struct pagesz_mem_override *pmo;
>> +
>> + len = strnlen(strval, 1024);
>> + if (len >= 1024) {
>> + EAL_LOG(ERR, "--pagesz-mem parameter is too long");
>> + return -1;
>> + }
>> +
>> + rte_strlcpy(strval_cpy, strval, sizeof(strval_cpy));
>> +
>> + /* parse exactly one pagesz:mem pair per --pagesz-mem option */
>> + arg_num = rte_strsplit(strval_cpy, len, fields, RTE_DIM(fields), ':');
>> + if (arg_num != 2 || fields[0][0] == '\0' || fields[1][0] == '\0') {
>> + EAL_LOG(ERR, "--pagesz-mem parameter format is invalid, expected <pagesz>:<limit>");
>> + return -1;
>> + }
>> + pagesz_str = fields[0];
>> + mem_str = fields[1];
>> +
>> + /* reject accidental multiple pairs in one option */
>> + if (strchr(mem_str, ',') != NULL) {
>> + EAL_LOG(ERR, "--pagesz-mem accepts one <pagesz>:<limit> pair per option");
>> + return -1;
>> + }
>
> If multiple options are given, then the rte_strsplit should return >2 when
> splitting on ":". I'd suggest checking for the comma first, before even
> doing the strlcpy.
>
The intention is to have exactly one pair in one option, so multiple
options are not, um, an option. However, effectively there should *not*
be commas anywhere in the string, so perhaps we can check for commas.
However, *technically* there could be things other than commas, and they
should really not have any effect on parsing except to trigger an error,
so perhaps a specific check for comma is not needed as any deviation
from `--pagesz-mem=A:B` will be invalid.
--
Thanks,
Anatoly
More information about the dev
mailing list