[PATCH v2 05/10] net/mlx5: support per-queue rate limiting
Vincent Jardin
vjardin at free.fr
Wed Mar 11 00:26:48 CET 2026
Wire rte_eth_set_queue_rate_limit() to the mlx5 PMD. The callback
allocates a per-queue PP index with the requested data rate, then
modifies the live SQ via modify_bitmask bit 0 to apply the new
packet_pacing_rate_limit_index — no queue teardown required.
Setting tx_rate=0 clears the PP index on the SQ and frees it.
Capability check uses hca_attr.qos.packet_pacing directly (not
dev_cap.txpp_en which requires Clock Queue prerequisites). This
allows per-queue rate limiting without the tx_pp devarg.
The callback rejects hairpin queues and queues whose SQ is not
yet created.
testpmd usage (no testpmd changes needed):
set port 0 queue 0 rate 1000
set port 0 queue 1 rate 5000
set port 0 queue 0 rate 0 # disable
Supported hardware:
- ConnectX-6 Dx: full support, per-SQ rate via HW rate table
- ConnectX-7/8: full support, coexists with wait-on-time scheduling
- BlueField-2/3: full support as DPU rep ports
Not supported:
- ConnectX-5: packet_pacing exists but dynamic SQ modify may not
work on all firmware versions
- ConnectX-4 Lx and earlier: no packet_pacing capability
Signed-off-by: Vincent Jardin <vjardin at free.fr>
---
drivers/net/mlx5/mlx5.c | 2 +
drivers/net/mlx5/mlx5_tx.h | 2 +
drivers/net/mlx5/mlx5_txq.c | 103 ++++++++++++++++++++++++++++++++++++
3 files changed, 107 insertions(+)
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 4d3bfddc36..c390406ac7 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2690,6 +2690,7 @@ const struct eth_dev_ops mlx5_dev_ops = {
.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
.rx_metadata_negotiate = mlx5_flow_rx_metadata_negotiate,
.get_restore_flags = mlx5_get_restore_flags,
+ .set_queue_rate_limit = mlx5_set_queue_rate_limit,
};
/* Available operations from secondary process. */
@@ -2783,6 +2784,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = {
.count_aggr_ports = mlx5_count_aggr_ports,
.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
.get_restore_flags = mlx5_get_restore_flags,
+ .set_queue_rate_limit = mlx5_set_queue_rate_limit,
};
/**
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index b1b3653247..3a37f5bb4d 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -222,6 +222,8 @@ struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_txq_verify(struct rte_eth_dev *dev);
+int mlx5_set_queue_rate_limit(struct rte_eth_dev *dev, uint16_t queue_idx,
+ uint32_t tx_rate);
int mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq);
void mlx5_txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
void mlx5_txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index fa9bb48fd4..7863b529f6 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1363,6 +1363,109 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
return 0;
}
+/**
+ * Set per-queue packet pacing rate limit.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue_idx
+ * TX queue index.
+ * @param tx_rate
+ * TX rate in Mbps, 0 to disable rate limiting.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_set_queue_rate_limit(struct rte_eth_dev *dev, uint16_t queue_idx,
+ uint32_t tx_rate)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ struct mlx5_txq_ctrl *txq_ctrl;
+ struct mlx5_devx_modify_sq_attr sq_attr = { 0 };
+ int ret;
+
+ if (!sh->cdev->config.hca_attr.qos.packet_pacing) {
+ DRV_LOG(ERR, "Port %u packet pacing not supported.",
+ dev->data->port_id);
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ if (queue_idx >= dev->data->nb_tx_queues) {
+ DRV_LOG(ERR, "Port %u Tx queue %u out of range.",
+ dev->data->port_id, queue_idx);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ if (priv->txqs == NULL || (*priv->txqs)[queue_idx] == NULL) {
+ DRV_LOG(ERR, "Port %u Tx queue %u not configured.",
+ dev->data->port_id, queue_idx);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ txq_ctrl = container_of((*priv->txqs)[queue_idx],
+ struct mlx5_txq_ctrl, txq);
+ if (txq_ctrl->is_hairpin) {
+ DRV_LOG(ERR, "Port %u Tx queue %u is hairpin.",
+ dev->data->port_id, queue_idx);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ if (txq_ctrl->obj == NULL || txq_ctrl->obj->sq == NULL) {
+ DRV_LOG(ERR, "Port %u Tx queue %u SQ not ready.",
+ dev->data->port_id, queue_idx);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ if (tx_rate == 0) {
+ /* Disable rate limiting. */
+ if (txq_ctrl->rl.pp_id == 0)
+ return 0; /* Already disabled. */
+ sq_attr.sq_state = MLX5_SQC_STATE_RDY;
+ sq_attr.state = MLX5_SQC_STATE_RDY;
+ sq_attr.rl_update = 1;
+ sq_attr.packet_pacing_rate_limit_index = 0;
+ ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
+ if (ret) {
+ DRV_LOG(ERR,
+ "Port %u Tx queue %u failed to clear rate.",
+ dev->data->port_id, queue_idx);
+ rte_errno = -ret;
+ return ret;
+ }
+ mlx5_txq_free_pp_rate_limit(&txq_ctrl->rl);
+ DRV_LOG(DEBUG, "Port %u Tx queue %u rate limit disabled.",
+ dev->data->port_id, queue_idx);
+ return 0;
+ }
+ /* Allocate a new PP index for the requested rate into a temp. */
+ struct mlx5_txq_rate_limit new_rl = { 0 };
+
+ ret = mlx5_txq_alloc_pp_rate_limit(sh, &new_rl, tx_rate);
+ if (ret)
+ return ret;
+ /* Modify live SQ to use the new PP index. */
+ sq_attr.sq_state = MLX5_SQC_STATE_RDY;
+ sq_attr.state = MLX5_SQC_STATE_RDY;
+ sq_attr.rl_update = 1;
+ sq_attr.packet_pacing_rate_limit_index = new_rl.pp_id;
+ ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq, &sq_attr);
+ if (ret) {
+ DRV_LOG(ERR, "Port %u Tx queue %u failed to set rate %u Mbps.",
+ dev->data->port_id, queue_idx, tx_rate);
+ mlx5_txq_free_pp_rate_limit(&new_rl);
+ rte_errno = -ret;
+ return ret;
+ }
+ /* SQ updated — release old PP context, install new one. */
+ mlx5_txq_free_pp_rate_limit(&txq_ctrl->rl);
+ txq_ctrl->rl = new_rl;
+ DRV_LOG(DEBUG, "Port %u Tx queue %u rate set to %u Mbps (PP idx %u).",
+ dev->data->port_id, queue_idx, tx_rate, txq_ctrl->rl.pp_id);
+ return 0;
+}
+
/**
* Verify if the queue can be released.
*
--
2.43.0
More information about the dev
mailing list