[PATCH] net/mlx5: fix spurious CPU wakeups caused by invalid CQE
    Thomas Monjalon 
    thomas at monjalon.net
       
    Tue Oct 28 15:53:50 CET 2025
    
    
  
Cc mlx5 maintainers
15/10/2025 15:39, Sivaprasad Tummala:
> Previously, the PMD used a common monitor callback to determine
> CQE ownership for power-aware polling. However, when a CQE contained
> an invalid opcode(MLX5_CQE_INVALID), ownership bit was not reliable.
> As a result, the monitor condition could falsely indicate CQE
> availability and cause the CPU to wake up unnecessarily during
> low traffic periods.
> 
> This resulted in spurious wakeups in monitor-wait mode and reduced
> the expected power savings, as cores exited the sleep state even
> when no valid CQEs were available.
> 
> This patch introduces a dedicated callback that skips invalid CQEs
> and optimizes power efficiency by preventing false wakeups caused
> by hardware-owned or invalid entries.
> 
> Fixes: a8f0df6bf98d ("net/mlx5: support power monitoring")
> Cc: akozyrev at nvidia.com
> Cc: stable at dpdk.org
> 
> Signed-off-by: Sivaprasad Tummala <sivaprasad.tummala at amd.com>
> ---
>  drivers/net/mlx5/mlx5_rx.c | 17 ++++++++++++++++-
>  1 file changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
> index 420a03068d..2765b4b730 100644
> --- a/drivers/net/mlx5/mlx5_rx.c
> +++ b/drivers/net/mlx5/mlx5_rx.c
> @@ -295,6 +295,20 @@ mlx5_monitor_callback(const uint64_t value,
>  	return (value & m) == v ? -1 : 0;
>  }
>  
> +static int
> +mlx5_monitor_cqe_own_callback(const uint64_t value,
> +		const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
> +{
> +	const uint64_t m = opaque[CLB_MSK_IDX];
> +	const uint64_t v = opaque[CLB_VAL_IDX];
> +	const uint64_t match = ((value & m) == v);
> +	const uint64_t opcode = MLX5_CQE_OPCODE(value);
> +	const uint64_t valid_op = (opcode ^ MLX5_CQE_INVALID);
> +
> +	/* ownership bit is not valid for invalid opcode; CQE is HW owned */
> +	return -(match & valid_op);
> +}
> +
>  int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
>  {
>  	struct mlx5_rxq_data *rxq = rx_queue;
> @@ -312,12 +326,13 @@ int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
>  		pmc->addr = &cqe->validity_iteration_count;
>  		pmc->opaque[CLB_VAL_IDX] = vic;
>  		pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_VIC_INIT;
> +		pmc->fn = mlx5_monitor_callback;
>  	} else {
>  		pmc->addr = &cqe->op_own;
>  		pmc->opaque[CLB_VAL_IDX] = !!idx;
>  		pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK;
> +		pmc->fn = mlx5_monitor_cqe_own_callback;
>  	}
> -	pmc->fn = mlx5_monitor_callback;
>  	pmc->size = sizeof(uint8_t);
>  	return 0;
>  }
    
    
More information about the stable
mailing list