[EXTERNAL] [RFC] app/testpmd: add configurable flow count for txonly multi-flow

Long Li longli at microsoft.com
Fri Feb 27 00:51:27 CET 2026


> Subject: [EXTERNAL] [RFC] app/testpmd: add configurable flow count for txonly
> multi-flow
> 
> Started with Long's patch to change port and added a parameter.
> This is suggestion only *DO NOT MERGE*
> Not sure what good name for parameter is, this is just a quick hack.
> 
> The txonly multi-flow mode generates 64 unique UDP source ports per lcore by
> cycling the high byte from 0xC0 to 0xFF. On SmartNICs with limited hardware
> flow table caching, this fixed count can exhaust the flow cache and degrade
> receive-side performance.
> 
> Add --txonly-nb-flows=N command line parameter and 'set txonly-nb-flows'
> runtime command to limit the number of unique source ports per lcore to
> between 1 and 64. The default remains 64 to preserve existing behavior.
> 
> The source port encoding is unchanged: the low byte carries the lcore ID (avoiding
> atomics) and the high byte cycles through N values starting at 0xC0. Total unique
> flows = txonly_nb_flows * active_lcores.
> 
> Reported-by: Long Li <longli at microsoft.com>
> Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>

Thank you, worked well for Azure VM at 200Gb/s.

Tested-by: Long Li <longli at microsoft.com>


> ---
> 
>  app/test-pmd/cmdline.c                      | 49 +++++++++++++++++++++
>  app/test-pmd/parameters.c                   | 13 ++++++
>  app/test-pmd/testpmd.c                      |  3 ++
>  app/test-pmd/testpmd.h                      |  1 +
>  app/test-pmd/txonly.c                       | 24 +++++-----
>  doc/guides/testpmd_app_ug/run_app.rst       |  8 ++++
>  doc/guides/testpmd_app_ug/testpmd_funcs.rst | 16 +++++++
>  7 files changed, 104 insertions(+), 10 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> c33c66f327..debd226762 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -384,6 +384,10 @@ static void cmd_help_long_parsed(void *parsed_result,
>  			"    Set the scheduling on timestamps"
>  			" timings for the TXONLY mode\n\n"
> 
> +			"set txonly-nb-flows (N)\n"
> +			"    Set the number of flows per lcore in"
> +			" txonly multi-flow mode (1-64)\n\n"
> +
>  			"set corelist (x[,y]*)\n"
>  			"    Set the list of forwarding cores.\n\n"
> 
> @@ -4612,6 +4616,50 @@ static cmdline_parse_inst_t cmd_set_txtimes = {
>  	},
>  };
> 
> +/* *** SET NUMBER OF FLOWS IN TXONLY MULTI-FLOW MODE *** */
> +
> +struct cmd_set_txonly_nb_flows_result {
> +	cmdline_fixed_string_t cmd_keyword;
> +	cmdline_fixed_string_t name;
> +	uint16_t value;
> +};
> +
> +static void
> +cmd_set_txonly_nb_flows_parsed(void *parsed_result,
> +			       __rte_unused struct cmdline *cl,
> +			       __rte_unused void *data)
> +{
> +	struct cmd_set_txonly_nb_flows_result *res = parsed_result;
> +
> +	if (res->value < 1 || res->value > 64) {
> +		fprintf(stderr, "txonly-nb-flows must be >= 1 and <= 64\n");
> +		return;
> +	}
> +	txonly_nb_flows = res->value;
> +}
> +
> +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_keyword =
> +	TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +				 cmd_keyword, "set");
> +static cmdline_parse_token_string_t cmd_set_txonly_nb_flows_name =
> +	TOKEN_STRING_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +				 name, "txonly-nb-flows");
> +static cmdline_parse_token_num_t cmd_set_txonly_nb_flows_value =
> +	TOKEN_NUM_INITIALIZER(struct cmd_set_txonly_nb_flows_result,
> +			      value, RTE_UINT16);
> +
> +static cmdline_parse_inst_t cmd_set_txonly_nb_flows = {
> +	.f = cmd_set_txonly_nb_flows_parsed,
> +	.data = NULL,
> +	.help_str = "set txonly-nb-flows <N>",
> +	.tokens = {
> +		(void *)&cmd_set_txonly_nb_flows_keyword,
> +		(void *)&cmd_set_txonly_nb_flows_name,
> +		(void *)&cmd_set_txonly_nb_flows_value,
> +		NULL,
> +	},
> +};
> +
>  /* *** ADD/REMOVE ALL VLAN IDENTIFIERS TO/FROM A PORT VLAN RX FILTER
> *** */  struct cmd_rx_vlan_filter_all_result {
>  	cmdline_fixed_string_t rx_vlan;
> @@ -14102,6 +14150,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
>  	&cmd_set_txpkts,
>  	&cmd_set_txsplit,
>  	&cmd_set_txtimes,
> +	&cmd_set_txonly_nb_flows,
>  	&cmd_set_fwd_list,
>  	&cmd_set_fwd_mask,
>  	&cmd_set_fwd_mode,
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index
> f2037925c2..2432d720fb 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -193,6 +193,8 @@ enum {
>  	TESTPMD_OPT_MULTI_RX_MEMPOOL_NUM,
>  #define TESTPMD_OPT_TXONLY_MULTI_FLOW "txonly-multi-flow"
>  	TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM,
> +#define TESTPMD_OPT_TXONLY_NB_FLOWS "txonly-nb-flows"
> +	TESTPMD_OPT_TXONLY_NB_FLOWS_NUM,
>  #define TESTPMD_OPT_RXQ_SHARE "rxq-share"
>  	TESTPMD_OPT_RXQ_SHARE_NUM,
>  #define TESTPMD_OPT_ETH_LINK_SPEED "eth-link-speed"
> @@ -348,6 +350,7 @@ static const struct option long_options[] = {
>  	REQUIRED_ARG(TESTPMD_OPT_TXPKTS),
>  	NO_ARG(TESTPMD_OPT_MULTI_RX_MEMPOOL),
>  	NO_ARG(TESTPMD_OPT_TXONLY_MULTI_FLOW),
> +	REQUIRED_ARG(TESTPMD_OPT_TXONLY_NB_FLOWS),
>  	OPTIONAL_ARG(TESTPMD_OPT_RXQ_SHARE),
>  	REQUIRED_ARG(TESTPMD_OPT_ETH_LINK_SPEED),
>  	NO_ARG(TESTPMD_OPT_DISABLE_LINK_CHECK),
> @@ -499,6 +502,8 @@ usage(char* progname)
>  		" or total packet length.\n");
>  	printf("  --multi-rx-mempool: enable multi-rx-mempool support\n");
>  	printf("  --txonly-multi-flow: generate multiple flows in txonly mode\n");
> +	printf("  --txonly-nb-flows=N: number of flows per lcore in txonly"
> +	       " multi-flow mode (1-64, default 64)\n");
>  	printf("  --tx-ip=src,dst: IP addresses in Tx-only mode\n");
>  	printf("  --tx-udp=src[,dst]: UDP ports in Tx-only mode\n");
>  	printf("  --eth-link-speed: force link speed.\n"); @@ -1566,6 +1571,14
> @@ launch_args_parse(int argc, char** argv)
>  		case TESTPMD_OPT_TXONLY_MULTI_FLOW_NUM:
>  			txonly_multi_flow = 1;
>  			break;
> +		case TESTPMD_OPT_TXONLY_NB_FLOWS_NUM:
> +			n = atoi(optarg);
> +			if (n >= 1 && n <= 64)
> +				txonly_nb_flows = (uint16_t)n;
> +			else
> +				rte_exit(EXIT_FAILURE,
> +					 "txonly-nb-flows must be >= 1 and <=
> 64\n");
> +			break;
>  		case TESTPMD_OPT_RXQ_SHARE_NUM:
>  			if (optarg == NULL) {
>  				rxq_share = UINT32_MAX;
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> fbacee89ea..6661bf16cd 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -287,6 +287,9 @@ enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF;
> uint8_t txonly_multi_flow;  /**< Whether multiple flows are generated in
> TXONLY mode. */
> 
> +uint16_t txonly_nb_flows = 64;
> +/**< Number of unique flows per lcore in TXONLY multi-flow mode. */
> +
>  uint32_t tx_pkt_times_inter;
>  /**< Timings for send scheduling in TXONLY mode, time between bursts. */
> 
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> f319471c73..13c3915848 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -673,6 +673,7 @@ enum tx_pkt_split {
>  extern enum tx_pkt_split tx_pkt_split;
> 
>  extern uint8_t txonly_multi_flow;
> +extern uint16_t txonly_nb_flows; /**< Number of flows in txonly
> +multi-flow */
> 
>  extern uint32_t rxq_share;
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> bdcf6ea660..7ba9abf656 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -223,18 +223,22 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct
> rte_mempool *mbp,
>  				sizeof(struct rte_ether_hdr) +
>  				sizeof(struct rte_ipv4_hdr));
>  		/*
> -		 * Generate multiple flows by varying UDP source port.
> -		 * This enables packets are well distributed by RSS in
> -		 * receiver side if any and txonly mode can be a decent
> -		 * packet generator for developer's quick performance
> -		 * regression test.
> +		 * Generate a configurable number of flows per lcore by
> +		 * varying the UDP source port. The low byte is the lcore
> +		 * ID, ensuring each lcore produces unique ports without
> +		 * atomic operations. The high byte cycles through
> +		 * txonly_nb_flows values starting at 0xC0, keeping ports
> +		 * in the ephemeral range 49152-65535 (RFC 6335).
>  		 *
> -		 * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF)
> -		 * will be used, with the least significant byte representing
> -		 * the lcore ID. As such, the most significant byte will cycle
> -		 * through 0xC0 and 0xFF.
> +		 * Total unique flows = txonly_nb_flows * active_lcores.
> +		 *
> +		 * Note: lcore IDs above 255 will alias in the low byte,
> +		 * causing flow overlap between those lcores. This is
> +		 * acceptable as the total flow count at that scale
> +		 * already exceeds typical hardware flow table sizes.
>  		 */
> -		src_port = ((src_var++ | 0xC0) << 8) + rte_lcore_id();
> +		src_port = (((src_var++ % txonly_nb_flows) + 0xC0) << 8)
> +			   + rte_lcore_id();
>  		udp_hdr->src_port = rte_cpu_to_be_16(src_port);
>  		RTE_PER_LCORE(_src_port_var) = src_var;
>  	}
> diff --git a/doc/guides/testpmd_app_ug/run_app.rst
> b/doc/guides/testpmd_app_ug/run_app.rst
> index 97d6c75716..a4a57ea383 100644
> --- a/doc/guides/testpmd_app_ug/run_app.rst
> +++ b/doc/guides/testpmd_app_ug/run_app.rst
> @@ -386,6 +386,14 @@ The command line options are:
> 
>      Generate multiple flows in txonly mode.
> 
> +*   ``--txonly-nb-flows=N``
> +
> +    Set the number of unique flows per lcore when txonly multi-flow mode
> +    is enabled. Valid range is 1 to 64. Default is 64, which preserves
> +    the original behavior. Reducing this value limits the number of unique
> +    UDP source ports generated, which can prevent exhaustion of hardware
> +    flow table entries on SmartNICs.
> +
>  *   ``--rxq-share=[X]``
> 
>      Create queues in shared Rx queue mode if device supports.
> diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> index 62bb167d56..ff1c8a444d 100644
> --- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> +++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
> @@ -924,6 +924,22 @@ Where:
> 
>  * ``rand`` same as 'on', but number of segments per each packet is a random
> value between 1 and total number of segments.
> 
> +set txonly-nb-flows
> +~~~~~~~~~~~~~~~~~~~
> +
> +Set the number of unique flows per lcore in txonly multi-flow mode::
> +
> +   testpmd> set txonly-nb-flows <N>
> +
> +Where ``N`` is the number of unique UDP source port values each lcore
> +will cycle through, in the range 1 to 64. Default is 64.
> +
> +Each lcore generates unique flows by combining the flow index with its
> +lcore ID, so the total number of unique flows across the system is
> +``txonly-nb-flows * active_lcores``. Reducing this value can prevent
> +exhaustion of hardware flow table entries on SmartNICs that have
> +limited flow caching capacity.
> +
>  set corelist
>  ~~~~~~~~~~~~
> 
> --
> 2.51.0



More information about the dev mailing list