[PATCH v5 2/2] app/testpmd: support multi-cores process one TC
Chengwen Feng
fengchengwen at huawei.com
Mon Nov 10 08:36:10 CET 2025
Currently, one TC can be processed by only one core, when there are a
large number of small packets, this core becomes a bottleneck.
This commit supports multi-cores process one TC, the command:
set dcb fwd_tc_cores (tc_cores)
Signed-off-by: Chengwen Feng <fengchengwen at huawei.com>
---
app/test-pmd/cmdline.c | 48 ++++++++++++
app/test-pmd/config.c | 82 +++++++++++++++++----
app/test-pmd/testpmd.c | 9 +++
app/test-pmd/testpmd.h | 1 +
doc/guides/testpmd_app_ug/testpmd_funcs.rst | 8 ++
5 files changed, 132 insertions(+), 16 deletions(-)
diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index cbd6020bc6..97dbc008af 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -6280,6 +6280,53 @@ static cmdline_parse_inst_t cmd_set_dcb_fwd_tc = {
},
};
+/* *** set dcb forward cores per TC *** */
+struct cmd_set_dcb_fwd_tc_cores_result {
+ cmdline_fixed_string_t set;
+ cmdline_fixed_string_t dcb;
+ cmdline_fixed_string_t fwd_tc_cores;
+ uint8_t tc_cores;
+};
+
+static void cmd_set_dcb_fwd_tc_cores_parsed(void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ __rte_unused void *data)
+{
+ struct cmd_set_dcb_fwd_tc_cores_result *res = parsed_result;
+ if (res->tc_cores == 0) {
+ fprintf(stderr, "Cores per-TC should not be zero!\n");
+ return;
+ }
+ dcb_fwd_tc_cores = res->tc_cores;
+ printf("Set cores-per-TC: %u\n", dcb_fwd_tc_cores);
+}
+
+static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_set =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result,
+ set, "set");
+static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_dcb =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result,
+ dcb, "dcb");
+static cmdline_parse_token_string_t cmd_set_dcb_fwd_tc_cores_fwdtccores =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result,
+ fwd_tc_cores, "fwd_tc_cores");
+static cmdline_parse_token_num_t cmd_set_dcb_fwd_tc_cores_tccores =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_dcb_fwd_tc_cores_result,
+ tc_cores, RTE_UINT8);
+
+static cmdline_parse_inst_t cmd_set_dcb_fwd_tc_cores = {
+ .f = cmd_set_dcb_fwd_tc_cores_parsed,
+ .data = NULL,
+ .help_str = "config DCB forwarding cores per-TC, 1-means one core process all queues of a TC.",
+ .tokens = {
+ (void *)&cmd_set_dcb_fwd_tc_cores_set,
+ (void *)&cmd_set_dcb_fwd_tc_cores_dcb,
+ (void *)&cmd_set_dcb_fwd_tc_cores_fwdtccores,
+ (void *)&cmd_set_dcb_fwd_tc_cores_tccores,
+ NULL,
+ },
+};
+
/* *** SET BURST TX DELAY TIME RETRY NUMBER *** */
struct cmd_set_burst_tx_retry_result {
cmdline_fixed_string_t set;
@@ -14060,6 +14107,7 @@ static cmdline_parse_ctx_t builtin_ctx[] = {
&cmd_set_fwd_mode,
&cmd_set_fwd_retry_mode,
&cmd_set_dcb_fwd_tc,
+ &cmd_set_dcb_fwd_tc_cores,
&cmd_set_burst_tx_retry,
&cmd_set_promisc_mode_one,
&cmd_set_promisc_mode_all,
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 88c1e99c5e..b5dc90fe05 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -5112,6 +5112,36 @@ rss_fwd_config_setup(void)
}
}
+static int
+dcb_fwd_check_cores_per_tc(void)
+{
+ struct rte_eth_dcb_info dcb_info = {0};
+ uint32_t port, tc, vmdq_idx;
+
+ if (dcb_fwd_tc_cores == 1)
+ return 0;
+
+ for (port = 0; port < nb_fwd_ports; port++) {
+ (void)rte_eth_dev_get_dcb_info(fwd_ports_ids[port], &dcb_info);
+ for (tc = 0; tc < dcb_info.nb_tcs; tc++) {
+ for (vmdq_idx = 0; vmdq_idx < RTE_ETH_MAX_VMDQ_POOL; vmdq_idx++) {
+ if (dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].nb_queue == 0)
+ break;
+ /* make sure nb_rx_queue can be divisible. */
+ if (dcb_info.tc_queue.tc_rxq[vmdq_idx][tc].nb_queue %
+ dcb_fwd_tc_cores)
+ return -1;
+ /* make sure nb_tx_queue can be divisible. */
+ if (dcb_info.tc_queue.tc_txq[vmdq_idx][tc].nb_queue %
+ dcb_fwd_tc_cores)
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
static uint16_t
get_fwd_port_total_tc_num(void)
{
@@ -5164,14 +5194,17 @@ dcb_fwd_tc_update_dcb_info(struct rte_eth_dcb_info *org_dcb_info)
}
/**
- * For the DCB forwarding test, each core is assigned on each traffic class.
+ * For the DCB forwarding test, each core is assigned on each traffic class
+ * defaultly:
+ * Each core is assigned a multi-stream, each stream being composed of
+ * a RX queue to poll on a RX port for input messages, associated with
+ * a TX queue of a TX port where to send forwarded packets. All RX and
+ * TX queues are mapping to the same traffic class.
+ * If VMDQ and DCB co-exist, each traffic class on different POOLs share
+ * the same core.
*
- * Each core is assigned a multi-stream, each stream being composed of
- * a RX queue to poll on a RX port for input messages, associated with
- * a TX queue of a TX port where to send forwarded packets. All RX and
- * TX queues are mapping to the same traffic class.
- * If VMDQ and DCB co-exist, each traffic class on different POOLs share
- * the same core
+ * If user set cores-per-TC to other value (e.g. 2), then there will multiple
+ * cores to process one TC.
*/
static void
dcb_fwd_config_setup(void)
@@ -5182,6 +5215,7 @@ dcb_fwd_config_setup(void)
lcoreid_t lc_id;
uint16_t nb_rx_queue, nb_tx_queue;
uint16_t i, j, k, sm_id = 0;
+ uint16_t sub_core_idx = 0;
uint16_t total_tc_num;
struct rte_port *port;
uint8_t tc = 0;
@@ -5212,6 +5246,13 @@ dcb_fwd_config_setup(void)
}
}
+ ret = dcb_fwd_check_cores_per_tc();
+ if (ret != 0) {
+ fprintf(stderr, "Error: check forwarding cores-per-TC failed!\n");
+ cur_fwd_config.nb_fwd_lcores = 0;
+ return;
+ }
+
total_tc_num = get_fwd_port_total_tc_num();
if (total_tc_num == 0) {
fprintf(stderr, "Error: total forwarding TC num is zero!\n");
@@ -5219,12 +5260,16 @@ dcb_fwd_config_setup(void)
return;
}
- cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores;
+ if (nb_fwd_lcores < total_tc_num * dcb_fwd_tc_cores) {
+ fprintf(stderr, "Error: the number of forwarding cores is insufficient!\n");
+ cur_fwd_config.nb_fwd_lcores = 0;
+ return;
+ }
+
+ cur_fwd_config.nb_fwd_lcores = total_tc_num * dcb_fwd_tc_cores;
cur_fwd_config.nb_fwd_ports = nb_fwd_ports;
cur_fwd_config.nb_fwd_streams =
(streamid_t) (nb_rxq * cur_fwd_config.nb_fwd_ports);
- if (cur_fwd_config.nb_fwd_lcores > total_tc_num)
- cur_fwd_config.nb_fwd_lcores = total_tc_num;
/* reinitialize forwarding streams */
init_fwd_streams();
@@ -5247,10 +5292,12 @@ dcb_fwd_config_setup(void)
break;
k = fwd_lcores[lc_id]->stream_nb +
fwd_lcores[lc_id]->stream_idx;
- rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base;
- txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base;
- nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue;
- nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue;
+ nb_rx_queue = rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue /
+ dcb_fwd_tc_cores;
+ nb_tx_queue = txp_dcb_info.tc_queue.tc_txq[i][tc].nb_queue /
+ dcb_fwd_tc_cores;
+ rxq = rxp_dcb_info.tc_queue.tc_rxq[i][tc].base + nb_rx_queue * sub_core_idx;
+ txq = txp_dcb_info.tc_queue.tc_txq[i][tc].base + nb_tx_queue * sub_core_idx;
for (j = 0; j < nb_rx_queue; j++) {
struct fwd_stream *fs;
@@ -5262,11 +5309,14 @@ dcb_fwd_config_setup(void)
fs->peer_addr = fs->tx_port;
fs->retry_enabled = retry_enabled;
}
- fwd_lcores[lc_id]->stream_nb +=
- rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue;
+ sub_core_idx++;
+ fwd_lcores[lc_id]->stream_nb += nb_rx_queue;
}
sm_id = (streamid_t) (sm_id + fwd_lcores[lc_id]->stream_nb);
+ if (sub_core_idx < dcb_fwd_tc_cores)
+ continue;
+ sub_core_idx = 0;
tc++;
if (tc < rxp_dcb_info.nb_tcs)
continue;
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 9d0ce5660c..8cfb570da2 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -216,6 +216,15 @@ struct fwd_engine * fwd_engines[] = {
* If bit-n in tc-mask is 1, then TC-n's forwarding is enabled, and vice versa.
*/
uint8_t dcb_fwd_tc_mask = DEFAULT_DCB_FWD_TC_MASK;
+/*
+ * Poll cores per TC when DCB forwarding.
+ * E.g. 1 indicates that one core process all queues of a TC.
+ * 2 indicates that two cores process all queues of a TC. If there
+ * is a TC with 8 queues, then [0, 3] belong to first core, and
+ * [4, 7] belong to second core.
+ * ...
+ */
+uint8_t dcb_fwd_tc_cores = 1;
struct rte_mempool *mempools[RTE_MAX_NUMA_NODES * MAX_SEGS_BUFFER_SPLIT];
uint16_t mempool_flags;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 1ada0de450..492b5757f1 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -486,6 +486,7 @@ extern cmdline_parse_inst_t cmd_set_flex_spec_pattern;
#define DEFAULT_DCB_FWD_TC_MASK 0xFF
extern uint8_t dcb_fwd_tc_mask;
+extern uint8_t dcb_fwd_tc_cores;
extern uint16_t mempool_flags;
diff --git a/doc/guides/testpmd_app_ug/testpmd_funcs.rst b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
index 628f17fed7..209e88d531 100644
--- a/doc/guides/testpmd_app_ug/testpmd_funcs.rst
+++ b/doc/guides/testpmd_app_ug/testpmd_funcs.rst
@@ -1885,6 +1885,14 @@ forwarding is enabled, and vice versa::
testpmd> set dcb fwd_tc (tc_mask)
+set dcb fwd_tc_cores
+~~~~~~~~~~~~~~~~~~~~
+
+Config DCB forwarding cores per-TC, 1-means one core process all queues of a TC,
+2-means two cores process all queues of a TC, and so on::
+
+ testpmd> set dcb fwd_tc_cores (tc_cores)
+
Port Functions
--------------
--
2.17.1
More information about the dev
mailing list