[PATCH v2] net/mlx5: fix age checking crash
Rongwei Liu
rongweil at nvidia.com
Thu Oct 9 08:29:24 CEST 2025
When aging is configured, there is a background thread
which queries all the counters in the pool.
Meantime, per queue flow insertion/deletion/update changes
the counter pool too. It introduces a race condition between
resetting counters's in_used and age_idx fields during flow deletion
and reading them in the background thread.
To resolve it, all key members of counter's struct
are placed in a single uint32_t and they are accessed atomically.
To avoid the occasional timestamp equalization with age_idx,
query_gen_when_free is moved out of the union. The total memory
size is kept the same.
Fixes: 04a4de756e14 ("net/mlx5: support flow age action with HWS")
Cc: michaelba at nvidia.com
Cc: stable at dpdk.org
Signed-off-by: Rongwei Liu <rongweil at nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski at nvidia.com>
v2: fix clang compilation error
---
drivers/net/mlx5/mlx5_flow_hw.c | 5 +-
drivers/net/mlx5/mlx5_hws_cnt.c | 8 +-
drivers/net/mlx5/mlx5_hws_cnt.h | 127 ++++++++++++++++++++------------
3 files changed, 85 insertions(+), 55 deletions(-)
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 9a0aa1827e..491a78a0de 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -3232,7 +3232,7 @@ flow_hw_shared_action_construct(struct rte_eth_dev *dev, uint32_t queue,
return -1;
if (action_flags & MLX5_FLOW_ACTION_COUNT) {
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
- if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx) < 0)
+ if (mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &age_cnt, idx, 0) < 0)
return -1;
flow->flags |= MLX5_FLOW_HW_FLOW_FLAG_CNT_ID;
flow->cnt_id = age_cnt;
@@ -3668,7 +3668,8 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
/* Fall-through. */
case RTE_FLOW_ACTION_TYPE_COUNT:
cnt_queue = mlx5_hws_cnt_get_queue(priv, &queue);
- ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id, age_idx);
+ ret = mlx5_hws_cnt_pool_get(priv->hws_cpool, cnt_queue, &cnt_id,
+ age_idx, 0);
if (ret != 0) {
rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
action, "Failed to allocate flow counter");
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.c b/drivers/net/mlx5/mlx5_hws_cnt.c
index 5c738f38ca..fdb44f5a32 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.c
+++ b/drivers/net/mlx5/mlx5_hws_cnt.c
@@ -63,8 +63,8 @@ __mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh,
uint32_t ret __rte_unused;
reset_cnt_num = rte_ring_count(reset_list);
- cpool->query_gen++;
mlx5_aso_cnt_query(sh, cpool);
+ __atomic_store_n(&cpool->query_gen, cpool->query_gen + 1, __ATOMIC_RELEASE);
zcdr.n1 = 0;
zcdu.n1 = 0;
ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list,
@@ -134,14 +134,14 @@ mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool)
uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool);
uint16_t expected1 = HWS_AGE_CANDIDATE;
uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING;
- uint32_t i;
+ uint32_t i, age_idx, in_use;
cpool->time_of_last_age_check = curr_time;
for (i = 0; i < nb_alloc_cnts; ++i) {
- uint32_t age_idx = cpool->pool[i].age_idx;
uint64_t hits;
- if (!cpool->pool[i].in_used || age_idx == 0)
+ mlx5_hws_cnt_get_all(&cpool->pool[i], &in_use, NULL, &age_idx);
+ if (!in_use || age_idx == 0)
continue;
param = mlx5_ipool_get(age_info->ages_ipool, age_idx);
if (unlikely(param == NULL)) {
diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h
index 38a9c19449..6db92a2cb7 100644
--- a/drivers/net/mlx5/mlx5_hws_cnt.h
+++ b/drivers/net/mlx5/mlx5_hws_cnt.h
@@ -42,33 +42,36 @@ struct mlx5_hws_cnt_dcs_mng {
struct mlx5_hws_cnt_dcs dcs[MLX5_HWS_CNT_DCS_NUM];
};
-struct mlx5_hws_cnt {
- struct flow_counter_stats reset;
- bool in_used; /* Indicator whether this counter in used or in pool. */
- union {
- struct {
- uint32_t share:1;
- /*
- * share will be set to 1 when this counter is used as
- * indirect action.
- */
- uint32_t age_idx:24;
- /*
- * When this counter uses for aging, it save the index
- * of AGE parameter. For pure counter (without aging)
- * this index is zero.
- */
- };
- /* This struct is only meaningful when user own this counter. */
- uint32_t query_gen_when_free;
+union mlx5_hws_cnt_state {
+ uint32_t data;
+ struct {
+ uint32_t in_used:1;
+ /* Indicator whether this counter in used or in pool. */
+ uint32_t share:1;
+ /*
+ * share will be set to 1 when this counter is used as
+ * indirect action.
+ */
+ uint32_t age_idx:24;
/*
- * When PMD own this counter (user put back counter to PMD
- * counter pool, i.e), this field recorded value of counter
- * pools query generation at time user release the counter.
+ * When this counter uses for aging, it stores the index
+ * of AGE parameter. Otherwise, this index is zero.
*/
};
};
+struct mlx5_hws_cnt {
+ struct flow_counter_stats reset;
+ union mlx5_hws_cnt_state cnt_state;
+ /* This struct is only meaningful when user own this counter. */
+ uint32_t query_gen_when_free;
+ /*
+ * When PMD own this counter (user put back counter to PMD
+ * counter pool, i.e), this field recorded value of counter
+ * pools query generation at time user release the counter.
+ */
+};
+
struct mlx5_hws_cnt_raw_data_mng {
struct flow_counter_stats *raw;
struct mlx5_pmd_mr mr;
@@ -197,6 +200,42 @@ mlx5_hws_cnt_id_valid(cnt_id_t cnt_id)
MLX5_INDIRECT_ACTION_TYPE_COUNT ? true : false;
}
+static __rte_always_inline void
+mlx5_hws_cnt_set_age_idx(struct mlx5_hws_cnt *cnt, uint32_t value)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
+ cnt_state.age_idx = value;
+ __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELEASE);
+}
+
+static __rte_always_inline void
+mlx5_hws_cnt_set_all(struct mlx5_hws_cnt *cnt, uint32_t in_used, uint32_t share, uint32_t age_idx)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.in_used = !!in_used;
+ cnt_state.share = !!share;
+ cnt_state.age_idx = age_idx;
+ __atomic_store_n(&cnt->cnt_state.data, cnt_state.data, __ATOMIC_RELAXED);
+}
+
+static __rte_always_inline void
+mlx5_hws_cnt_get_all(struct mlx5_hws_cnt *cnt, uint32_t *in_used, uint32_t *share,
+ uint32_t *age_idx)
+{
+ union mlx5_hws_cnt_state cnt_state;
+
+ cnt_state.data = __atomic_load_n(&cnt->cnt_state.data, __ATOMIC_ACQUIRE);
+ if (in_used != NULL)
+ *in_used = cnt_state.in_used;
+ if (share != NULL)
+ *share = cnt_state.share;
+ if (age_idx != NULL)
+ *age_idx = cnt_state.age_idx;
+}
+
/**
* Generate Counter id from internal index.
*
@@ -424,9 +463,9 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
hpool = mlx5_hws_cnt_host_pool(cpool);
iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id);
- hpool->pool[iidx].in_used = false;
+ mlx5_hws_cnt_set_all(&hpool->pool[iidx], 0, 0, 0);
hpool->pool[iidx].query_gen_when_free =
- rte_atomic_load_explicit(&hpool->query_gen, rte_memory_order_relaxed);
+ __atomic_load_n(&hpool->query_gen, __ATOMIC_RELAXED);
if (likely(queue != NULL) && cpool->cfg.host_cpool == NULL)
qcache = hpool->cache->qcache[*queue];
if (unlikely(qcache == NULL)) {
@@ -480,7 +519,7 @@ mlx5_hws_cnt_pool_put(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
*/
static __rte_always_inline int
mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
- cnt_id_t *cnt_id, uint32_t age_idx)
+ cnt_id_t *cnt_id, uint32_t age_idx, uint32_t shared)
{
unsigned int ret;
struct rte_ring_zc_data zcdc = {0};
@@ -508,10 +547,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
__hws_cnt_query_raw(cpool, *cnt_id,
&cpool->pool[iidx].reset.hits,
&cpool->pool[iidx].reset.bytes);
- cpool->pool[iidx].share = 0;
- MLX5_ASSERT(!cpool->pool[iidx].in_used);
- cpool->pool[iidx].in_used = true;
- cpool->pool[iidx].age_idx = age_idx;
+ mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
return 0;
}
ret = rte_ring_dequeue_zc_burst_elem_start(qcache, sizeof(cnt_id_t), 1,
@@ -531,7 +567,8 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
*cnt_id = (*(cnt_id_t *)zcdc.ptr1);
iidx = mlx5_hws_cnt_iidx(cpool, *cnt_id);
query_gen = cpool->pool[iidx].query_gen_when_free;
- if (cpool->query_gen == query_gen) { /* counter is waiting to reset. */
+ /* counter is waiting to reset. */
+ if (__atomic_load_n(&cpool->query_gen, __ATOMIC_RELAXED) == query_gen) {
rte_ring_dequeue_zc_elem_finish(qcache, 0);
/* write-back counter to reset list. */
mlx5_hws_cnt_pool_cache_flush(cpool, *queue);
@@ -549,10 +586,7 @@ mlx5_hws_cnt_pool_get(struct mlx5_hws_cnt_pool *cpool, uint32_t *queue,
__hws_cnt_query_raw(cpool, *cnt_id, &cpool->pool[iidx].reset.hits,
&cpool->pool[iidx].reset.bytes);
rte_ring_dequeue_zc_elem_finish(qcache, 1);
- cpool->pool[iidx].share = 0;
- MLX5_ASSERT(!cpool->pool[iidx].in_used);
- cpool->pool[iidx].in_used = true;
- cpool->pool[iidx].age_idx = age_idx;
+ mlx5_hws_cnt_set_all(&cpool->pool[iidx], 1, shared, age_idx);
return 0;
}
@@ -611,24 +645,15 @@ mlx5_hws_cnt_shared_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id,
uint32_t age_idx)
{
struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
- uint32_t iidx;
- int ret;
- ret = mlx5_hws_cnt_pool_get(hpool, NULL, cnt_id, age_idx);
- if (ret != 0)
- return ret;
- iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id);
- hpool->pool[iidx].share = 1;
- return 0;
+ return mlx5_hws_cnt_pool_get(hpool, NULL, cnt_id, age_idx, 1);
}
static __rte_always_inline void
mlx5_hws_cnt_shared_put(struct mlx5_hws_cnt_pool *cpool, cnt_id_t *cnt_id)
{
struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
- uint32_t iidx = mlx5_hws_cnt_iidx(hpool, *cnt_id);
- hpool->pool[iidx].share = 0;
mlx5_hws_cnt_pool_put(hpool, NULL, cnt_id);
}
@@ -637,8 +662,10 @@ mlx5_hws_cnt_is_shared(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
{
struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id);
+ uint32_t share;
- return hpool->pool[iidx].share ? true : false;
+ mlx5_hws_cnt_get_all(&hpool->pool[iidx], NULL, &share, NULL);
+ return !!share;
}
static __rte_always_inline void
@@ -648,8 +675,8 @@ mlx5_hws_cnt_age_set(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id,
struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id);
- MLX5_ASSERT(hpool->pool[iidx].share);
- hpool->pool[iidx].age_idx = age_idx;
+ MLX5_ASSERT(hpool->pool[iidx].cnt_state.share);
+ mlx5_hws_cnt_set_age_idx(&hpool->pool[iidx], age_idx);
}
static __rte_always_inline uint32_t
@@ -657,9 +684,11 @@ mlx5_hws_cnt_age_get(struct mlx5_hws_cnt_pool *cpool, cnt_id_t cnt_id)
{
struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
uint32_t iidx = mlx5_hws_cnt_iidx(hpool, cnt_id);
+ uint32_t age_idx, share;
- MLX5_ASSERT(hpool->pool[iidx].share);
- return hpool->pool[iidx].age_idx;
+ mlx5_hws_cnt_get_all(&hpool->pool[iidx], NULL, &share, &age_idx);
+ MLX5_ASSERT(share);
+ return age_idx;
}
static __rte_always_inline cnt_id_t
--
2.27.0
More information about the dev
mailing list