patch 'app/dma-perf: fix on-flight DMA when verifying data' has been queued to stable release 23.11.6

Shani Peretz shperetz at nvidia.com
Thu Dec 25 10:17:51 CET 2025


Hi,

FYI, your patch has been queued to stable release 23.11.6

Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 12/30/25. So please
shout if anyone has objections.

Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.

Queued patches are on a temporary branch at:
https://github.com/shanipr/dpdk-stable

This queued commit can be viewed at:
https://github.com/shanipr/dpdk-stable/commit/548e62ca1a3df0b3a832f43dd8889cd908a99e27

Thanks.

Shani

---
>From 548e62ca1a3df0b3a832f43dd8889cd908a99e27 Mon Sep 17 00:00:00 2001
From: Chengwen Feng <fengchengwen at huawei.com>
Date: Tue, 25 Nov 2025 09:51:29 +0800
Subject: [PATCH] app/dma-perf: fix on-flight DMA when verifying data

[ upstream commit d1b3b669674a17c58eabf3d631b21aaad7232403 ]

There maybe on-flight DMA when verify_data() because the DMA device
may still working when worker exit.

This commit add wait DMA complete stage before worker exit.

Fixes: 623dc9364dc6 ("app/dma-perf: introduce DMA performance test")
Cc: stable at dpdk.org

Signed-off-by: Chengwen Feng <fengchengwen at huawei.com>
---
 app/test-dma-perf/benchmark.c | 45 +++++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/app/test-dma-perf/benchmark.c b/app/test-dma-perf/benchmark.c
index 9b1f58c78c..084c953b02 100644
--- a/app/test-dma-perf/benchmark.c
+++ b/app/test-dma-perf/benchmark.c
@@ -18,7 +18,6 @@
 #define MAX_DMA_CPL_NB 255
 
 #define TEST_WAIT_U_SECOND 10000
-#define POLL_MAX 1000
 
 #define CSV_LINE_DMA_FMT "Scenario %u,%u,%s,%u,%u,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n"
 #define CSV_LINE_CPU_FMT "Scenario %u,%u,NA,NA,NA,%u,%u,%.2lf,%" PRIu64 ",%.3lf,%.3lf\n"
@@ -215,6 +214,40 @@ do_dma_submit_and_poll(uint16_t dev_id, uint64_t *async_cnt,
 	worker_info->total_cpl += nr_cpl;
 }
 
+static int
+do_dma_submit_and_wait_cpl(uint16_t dev_id, uint64_t async_cnt)
+{
+#define MAX_WAIT_MSEC	1000
+#define MAX_POLL	1000
+#define DEQ_SZ		64
+	enum rte_dma_vchan_status st;
+	uint32_t poll_cnt = 0;
+	uint32_t wait_ms = 0;
+	uint16_t nr_cpl;
+
+	rte_dma_submit(dev_id, 0);
+
+	if (rte_dma_vchan_status(dev_id, 0, &st) < 0) {
+		rte_delay_ms(MAX_WAIT_MSEC);
+		goto wait_cpl;
+	}
+
+	while (st == RTE_DMA_VCHAN_ACTIVE && wait_ms++ < MAX_WAIT_MSEC) {
+		rte_delay_ms(1);
+		rte_dma_vchan_status(dev_id, 0, &st);
+	}
+
+wait_cpl:
+	while ((async_cnt > 0) && (poll_cnt++ < MAX_POLL)) {
+		nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
+		async_cnt -= nr_cpl;
+	}
+	if (async_cnt > 0)
+		PRINT_ERR("Error: wait DMA %u failed!\n", dev_id);
+
+	return async_cnt == 0 ? 0 : -1;
+}
+
 static inline int
 do_dma_mem_copy(void *p)
 {
@@ -226,10 +259,8 @@ do_dma_mem_copy(void *p)
 	const uint32_t buf_size = para->buf_size;
 	struct rte_mbuf **srcs = para->srcs;
 	struct rte_mbuf **dsts = para->dsts;
-	uint16_t nr_cpl;
 	uint64_t async_cnt = 0;
 	uint32_t i;
-	uint32_t poll_cnt = 0;
 	int ret;
 
 	worker_info->stop_flag = false;
@@ -260,13 +291,7 @@ dma_copy:
 			break;
 	}
 
-	rte_dma_submit(dev_id, 0);
-	while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) {
-		nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
-		async_cnt -= nr_cpl;
-	}
-
-	return 0;
+	return do_dma_submit_and_wait_cpl(dev_id, async_cnt);
 }
 
 static inline int
-- 
2.43.0

---
  Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- -	2025-12-25 11:16:37.814178465 +0200
+++ 0030-app-dma-perf-fix-on-flight-DMA-when-verifying-data.patch	2025-12-25 11:16:35.518105000 +0200
@@ -1 +1 @@
-From d1b3b669674a17c58eabf3d631b21aaad7232403 Mon Sep 17 00:00:00 2001
+From 548e62ca1a3df0b3a832f43dd8889cd908a99e27 Mon Sep 17 00:00:00 2001
@@ -3 +3 @@
-Date: Mon, 20 Oct 2025 12:11:03 +0800
+Date: Tue, 25 Nov 2025 09:51:29 +0800
@@ -5,0 +6,2 @@
+[ upstream commit d1b3b669674a17c58eabf3d631b21aaad7232403 ]
+
@@ -16,2 +18,2 @@
- app/test-dma-perf/benchmark.c | 71 ++++++++++++++++++++++-------------
- 1 file changed, 44 insertions(+), 27 deletions(-)
+ app/test-dma-perf/benchmark.c | 45 +++++++++++++++++++++++++++--------
+ 1 file changed, 35 insertions(+), 10 deletions(-)
@@ -20 +22 @@
-index 6643ccc95f..4ce95d0f7b 100644
+index 9b1f58c78c..084c953b02 100644
@@ -23 +25 @@
-@@ -19,7 +19,6 @@
+@@ -18,7 +18,6 @@
@@ -31 +33 @@
-@@ -293,6 +292,45 @@ do_dma_submit_and_poll(uint16_t dev_id, uint64_t *async_cnt,
+@@ -215,6 +214,40 @@ do_dma_submit_and_poll(uint16_t dev_id, uint64_t *async_cnt,
@@ -36 +38 @@
-+do_dma_submit_and_wait_cpl(uint16_t dev_id, uint64_t async_cnt, bool use_ops)
++do_dma_submit_and_wait_cpl(uint16_t dev_id, uint64_t async_cnt)
@@ -41 +42,0 @@
-+	struct rte_dma_op *op[DEQ_SZ];
@@ -47,2 +48 @@
-+	if (!use_ops)
-+		rte_dma_submit(dev_id, 0);
++	rte_dma_submit(dev_id, 0);
@@ -62,4 +62 @@
-+		if (use_ops)
-+			nr_cpl = rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
-+		else
-+			nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
++		nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
@@ -75 +72 @@
- do_dma_plain_mem_copy(void *p)
+ do_dma_mem_copy(void *p)
@@ -77 +74 @@
-@@ -304,10 +342,8 @@ do_dma_plain_mem_copy(void *p)
+@@ -226,10 +259,8 @@ do_dma_mem_copy(void *p)
@@ -88,25 +85 @@
-@@ -338,13 +374,7 @@ dma_copy:
- 			break;
- 	}
- 
--	rte_dma_submit(dev_id, 0);
--	while ((async_cnt > 0) && (poll_cnt++ < POLL_MAX)) {
--		nr_cpl = rte_dma_completed(dev_id, 0, MAX_DMA_CPL_NB, NULL, NULL);
--		async_cnt -= nr_cpl;
--	}
--
--	return 0;
-+	return do_dma_submit_and_wait_cpl(dev_id, async_cnt, false);
- }
- 
- static inline int
-@@ -360,8 +390,6 @@ do_dma_sg_mem_copy(void *p)
- 	const uint16_t dev_id = para->dev_id;
- 	uint32_t nr_buf = para->nr_buf;
- 	uint64_t async_cnt = 0;
--	uint32_t poll_cnt = 0;
--	uint16_t nr_cpl;
- 	uint32_t i, j;
- 	int ret;
- 
-@@ -397,13 +425,7 @@ dma_copy:
+@@ -260,13 +291,7 @@ dma_copy:
@@ -123,28 +96 @@
-+	return do_dma_submit_and_wait_cpl(dev_id, async_cnt, false);
- }
- 
- static inline int
-@@ -414,11 +436,11 @@ do_dma_enq_deq_mem_copy(void *p)
- 	volatile struct worker_info *worker_info = &(para->worker_info);
- 	struct rte_dma_op **dma_ops = para->dma_ops;
- 	uint16_t kick_batch = para->kick_batch, sz;
--	uint16_t enq, deq, poll_cnt;
--	uint64_t tenq, tdeq;
- 	const uint16_t dev_id = para->dev_id;
- 	uint32_t nr_buf = para->nr_buf;
- 	struct rte_dma_op *op[DEQ_SZ];
-+	uint64_t tenq, tdeq;
-+	uint16_t enq, deq;
- 	uint32_t i;
- 
- 	worker_info->stop_flag = false;
-@@ -454,11 +476,7 @@ do_dma_enq_deq_mem_copy(void *p)
- 			break;
- 	}
- 
--	poll_cnt = 0;
--	while ((tenq != tdeq) && (poll_cnt++ < POLL_MAX))
--		tdeq += rte_dma_dequeue_ops(dev_id, 0, op, DEQ_SZ);
--
--	return 0;
-+	return do_dma_submit_and_wait_cpl(dev_id, tenq - tdeq, true);
++	return do_dma_submit_and_wait_cpl(dev_id, async_cnt);
@@ -154,8 +99,0 @@
-@@ -614,7 +632,6 @@ setup_memory_env(struct test_configure *cfg, uint32_t nr_buf,
- 		}
- 
- 		if (cfg->use_ops) {
--
- 			nr_buf /= RTE_MAX(nb_src_sges, nb_dst_sges);
- 			*dma_ops = rte_zmalloc(NULL, nr_buf * (sizeof(struct rte_dma_op *)),
- 					       RTE_CACHE_LINE_SIZE);


More information about the stable mailing list