[dpdk-stable] patch 'net/mlx5: reduce Tx completion index memory loads' has been queued to stable release 19.11.3
luca.boccassi at gmail.com
luca.boccassi at gmail.com
Tue May 19 14:53:57 CEST 2020
Hi,
FYI, your patch has been queued to stable release 19.11.3
Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 05/21/20. So please
shout if anyone has objections.
Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.
Thanks.
Luca Boccassi
---
>From 6e0c764098edb83a98edfabff16ada2886db66a4 Mon Sep 17 00:00:00 2001
From: Alexander Kozyrev <akozyrev at mellanox.com>
Date: Mon, 16 Mar 2020 15:34:59 +0000
Subject: [PATCH] net/mlx5: reduce Tx completion index memory loads
[ upstream commit 8d4659e70fc081ab220e44552af194900d720d2e ]
There is a non-optimal check if doorbell is needed present in the
mlx5_tx_handle_completion() function. Advancing a copy of the txq
consumer index and checking this copy with initial value causes
unnecessary memory loads and hurts the performance. It is better to
have a simple small boolean variable for this purpose. That allows
to eliminate all the excessive memory operations with the txq consumer
index and restore the performance of the tx completions.
Fixes: 1fd9af05e44e ("net/mlx5: update Tx error handling routine")
Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 30 ++++++++++++++----------------
1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index ea1050f9cd..905a84d4dc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -2071,7 +2071,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
- uint16_t ci = txq->cq_ci;
+ bool ring_doorbell = false;
int ret;
static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
@@ -2079,8 +2079,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
do {
volatile struct mlx5_cqe *cqe;
- cqe = &txq->cqes[ci & txq->cqe_m];
- ret = check_cqe(cqe, txq->cqe_s, ci);
+ cqe = &txq->cqes[txq->cq_ci & txq->cqe_m];
+ ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci);
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (likely(ret != MLX5_CQE_STATUS_ERR)) {
/* No new CQEs in completion queue. */
@@ -2094,7 +2094,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
* here, before we might perform SQ reset.
*/
rte_wmb();
- txq->cq_ci = ci;
ret = mlx5_tx_error_cqe_handle
(txq, (volatile struct mlx5_err_cqe *)cqe);
if (unlikely(ret < 0)) {
@@ -2110,15 +2109,18 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
* MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
* The send queue is supposed to be empty.
*/
- ++ci;
- txq->cq_pi = ci;
+ ring_doorbell = true;
+ ++txq->cq_ci;
+ txq->cq_pi = txq->cq_ci;
last_cqe = NULL;
continue;
}
/* Normal transmit completion. */
- assert(ci != txq->cq_pi);
- assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);
- ++ci;
+ assert(txq->cq_ci != txq->cq_pi);
+ assert((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
+ cqe->wqe_counter);
+ ring_doorbell = true;
+ ++txq->cq_ci;
last_cqe = cqe;
/*
* We have to restrict the amount of processed CQEs
@@ -2131,14 +2133,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
if (likely(--count == 0))
break;
} while (true);
- if (likely(ci != txq->cq_ci)) {
- /*
- * Update completion queue consuming index
- * and ring doorbell to notify hardware.
- */
+ if (likely(ring_doorbell)) {
+ /* Ring doorbell to notify hardware. */
rte_compiler_barrier();
- txq->cq_ci = ci;
- *txq->cq_db = rte_cpu_to_be_32(ci);
+ *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
mlx5_tx_comp_flush(txq, last_cqe, olx);
}
}
--
2.20.1
---
Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- - 2020-05-19 13:56:20.777894123 +0100
+++ 0047-net-mlx5-reduce-Tx-completion-index-memory-loads.patch 2020-05-19 13:56:18.279502899 +0100
@@ -1,8 +1,10 @@
-From 8d4659e70fc081ab220e44552af194900d720d2e Mon Sep 17 00:00:00 2001
+From 6e0c764098edb83a98edfabff16ada2886db66a4 Mon Sep 17 00:00:00 2001
From: Alexander Kozyrev <akozyrev at mellanox.com>
Date: Mon, 16 Mar 2020 15:34:59 +0000
Subject: [PATCH] net/mlx5: reduce Tx completion index memory loads
+[ upstream commit 8d4659e70fc081ab220e44552af194900d720d2e ]
+
There is a non-optimal check if doorbell is needed present in the
mlx5_tx_handle_completion() function. Advancing a copy of the txq
consumer index and checking this copy with initial value causes
@@ -12,19 +14,18 @@
index and restore the performance of the tx completions.
Fixes: 1fd9af05e44e ("net/mlx5: update Tx error handling routine")
-Cc: stable at dpdk.org
Signed-off-by: Alexander Kozyrev <akozyrev at mellanox.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
- drivers/net/mlx5/mlx5_rxtx.c | 29 +++++++++++++----------------
- 1 file changed, 13 insertions(+), 16 deletions(-)
+ drivers/net/mlx5/mlx5_rxtx.c | 30 ++++++++++++++----------------
+ 1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
-index 5ac63da803..f3bf763769 100644
+index ea1050f9cd..905a84d4dc 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
-@@ -2160,7 +2160,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
+@@ -2071,7 +2071,7 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
{
unsigned int count = MLX5_TX_COMP_MAX_CQE;
volatile struct mlx5_cqe *last_cqe = NULL;
@@ -33,7 +34,7 @@
int ret;
static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value");
-@@ -2168,8 +2168,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
+@@ -2079,8 +2079,8 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
do {
volatile struct mlx5_cqe *cqe;
@@ -44,7 +45,7 @@
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (likely(ret != MLX5_CQE_STATUS_ERR)) {
/* No new CQEs in completion queue. */
-@@ -2183,7 +2183,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
+@@ -2094,7 +2094,6 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
* here, before we might perform SQ reset.
*/
rte_wmb();
@@ -52,7 +53,7 @@
ret = mlx5_tx_error_cqe_handle
(txq, (volatile struct mlx5_err_cqe *)cqe);
if (unlikely(ret < 0)) {
-@@ -2199,16 +2198,18 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
+@@ -2110,15 +2109,18 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
* MLX5_CQE_SYNDROME_WR_FLUSH_ERR status.
* The send queue is supposed to be empty.
*/
@@ -65,18 +66,18 @@
continue;
}
/* Normal transmit completion. */
-- MLX5_ASSERT(ci != txq->cq_pi);
-- MLX5_ASSERT((txq->fcqs[ci & txq->cqe_m] >> 16) ==
-+ MLX5_ASSERT(txq->cq_ci != txq->cq_pi);
-+ MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
- cqe->wqe_counter);
+- assert(ci != txq->cq_pi);
+- assert((txq->fcqs[ci & txq->cqe_m] >> 16) == cqe->wqe_counter);
- ++ci;
++ assert(txq->cq_ci != txq->cq_pi);
++ assert((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) ==
++ cqe->wqe_counter);
+ ring_doorbell = true;
+ ++txq->cq_ci;
last_cqe = cqe;
/*
* We have to restrict the amount of processed CQEs
-@@ -2221,14 +2222,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
+@@ -2131,14 +2133,10 @@ mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq,
if (likely(--count == 0))
break;
} while (true);
More information about the stable
mailing list