[PATCH 09/19] common/cnxk: add support for per packet SQ count update

Nithin Dabilpuram ndabilpuram at marvell.com
Mon Sep 1 09:30:25 CEST 2025


From: Satha Rao <skoteshwar at marvell.com>

SQ context extended with new feature, if enabled the counter is updated
when a packet if processed, whether it is transmitted or dropped.

Signed-off-by: Satha Rao <skoteshwar at marvell.com>
---
 drivers/common/cnxk/hw/nix.h                  | 47 +++++++------
 drivers/common/cnxk/roc_features.h            |  6 ++
 drivers/common/cnxk/roc_nix.h                 |  3 +
 drivers/common/cnxk/roc_nix_queue.c           | 70 ++++++++++++++++++-
 drivers/common/cnxk/roc_nix_tm.c              |  2 +-
 drivers/common/cnxk/roc_nix_tm_ops.c          |  8 ++-
 drivers/common/cnxk/roc_platform.h            |  6 ++
 .../common/cnxk/roc_platform_base_symbols.c   |  1 +
 8 files changed, 118 insertions(+), 25 deletions(-)

diff --git a/drivers/common/cnxk/hw/nix.h b/drivers/common/cnxk/hw/nix.h
index 8956b95040..314beb9e0b 100644
--- a/drivers/common/cnxk/hw/nix.h
+++ b/drivers/common/cnxk/hw/nix.h
@@ -2092,21 +2092,25 @@ struct nix_cn20k_sq_ctx_hw_s {
 	uint64_t default_chan : 12;
 	uint64_t sdp_mcast : 1;
 	uint64_t sso_ena : 1;
-	uint64_t dse_rsvd1 : 28;
+	uint64_t dse_rsvd1 : 10;
+	uint64_t update_sq_count : 2;
+	uint64_t seb_count : 16;
 	uint64_t sqb_enqueue_count : 16; /* W4 */
 	uint64_t tail_offset : 6;
 	uint64_t lmt_dis : 1;
 	uint64_t smq_rr_weight : 14;
-	uint64_t dnq_rsvd1 : 27;
+	uint64_t dnq_rsvd1 : 4;
+	uint64_t sq_count_iova_lo : 23;
 	uint64_t tail_sqb : 64; /* W5 */
 	uint64_t next_sqb : 64; /* W6 */
-	uint64_t smq : 11; /* W7 */
+	uint64_t smq : 11;	/* W7 */
 	uint64_t smq_pend : 1;
 	uint64_t smq_next_sq : 20;
 	uint64_t smq_next_sq_vld : 1;
 	uint64_t mnq_dis : 1;
-	uint64_t scm1_rsvd2 : 30;
-	uint64_t smenq_sqb : 64; /* W8 */
+	uint64_t scm1_rsvd2 : 7;
+	uint64_t sq_count_iova_hi : 23;
+	uint64_t smenq_sqb : 64;   /* W8 */
 	uint64_t smenq_offset : 6; /* W9 */
 	uint64_t cq_limit : 8;
 	uint64_t smq_rr_count : 32;
@@ -2122,7 +2126,7 @@ struct nix_cn20k_sq_ctx_hw_s {
 	uint64_t smenq_next_sqb_vld : 1;
 	uint64_t scm_dq_rsvd1 : 9;
 	uint64_t smenq_next_sqb : 64; /* W11 */
-	uint64_t age_drop_octs : 32; /* W12 */
+	uint64_t age_drop_octs : 32;  /* W12 */
 	uint64_t age_drop_pkts : 32;
 	uint64_t drop_pkts : 48; /* W13 */
 	uint64_t drop_octs_lsw : 16;
@@ -2160,19 +2164,20 @@ struct nix_cn20k_sq_ctx_s {
 	uint64_t lmt_dis : 1;
 	uint64_t mnq_dis : 1;
 	uint64_t smq_next_sq : 20;
-	uint64_t smq_lso_segnum :  8;
-	uint64_t tail_offset :  6;
-	uint64_t smenq_offset :  6;
-	uint64_t head_offset :  6;
-	uint64_t smenq_next_sqb_vld :  1;
-	uint64_t smq_pend :  1;
-	uint64_t smq_next_sq_vld :  1;
-	uint64_t reserved_253_255 :  3;
-	uint64_t next_sqb : 64; /* W4 */
-	uint64_t tail_sqb : 64; /* W5 */
-	uint64_t smenq_sqb : 64; /* W6 */
-	uint64_t smenq_next_sqb : 64; /* W7 */
-	uint64_t head_sqb : 64; /* W8 */
+	uint64_t smq_lso_segnum : 8;
+	uint64_t tail_offset : 6;
+	uint64_t smenq_offset : 6;
+	uint64_t head_offset : 6;
+	uint64_t smenq_next_sqb_vld : 1;
+	uint64_t smq_pend : 1;
+	uint64_t smq_next_sq_vld : 1;
+	uint64_t update_sq_count : 2;
+	uint64_t reserved_255_255 : 1;
+	uint64_t next_sqb : 64;	       /* W4 */
+	uint64_t tail_sqb : 64;	       /* W5 */
+	uint64_t smenq_sqb : 64;       /* W6 */
+	uint64_t smenq_next_sqb : 64;  /* W7 */
+	uint64_t head_sqb : 64;	       /* W8 */
 	uint64_t reserved_576_583 : 8; /* W9 */
 	uint64_t vfi_lso_total : 18;
 	uint64_t vfi_lso_sizem1 : 3;
@@ -2183,7 +2188,7 @@ struct nix_cn20k_sq_ctx_s {
 	uint64_t vfi_lso_vld : 1;
 	uint64_t reserved_630_639 : 10;
 	uint64_t scm_lso_rem : 18; /* W10 */
-	uint64_t reserved_658_703 : 46;
+	uint64_t sq_count_iova : 46;
 	uint64_t octs : 48; /* W11 */
 	uint64_t reserved_752_767 : 16;
 	uint64_t pkts : 48; /* W12 */
@@ -2193,7 +2198,7 @@ struct nix_cn20k_sq_ctx_s {
 	uint64_t drop_octs : 48; /* W14 */
 	uint64_t reserved_944_959 : 16;
 	uint64_t drop_pkts : 48; /* W15 */
-	uint64_t reserved_1008_1023 : 16;
+	uint64_t seb_count : 16;
 };
 
 /* [CN10K, .) NIX sq context hardware structure */
diff --git a/drivers/common/cnxk/roc_features.h b/drivers/common/cnxk/roc_features.h
index 48ba2fade7..62a1b9e0b2 100644
--- a/drivers/common/cnxk/roc_features.h
+++ b/drivers/common/cnxk/roc_features.h
@@ -120,4 +120,10 @@ roc_feature_nix_has_plain_pkt_reassembly(void)
 	return roc_model_is_cn20k();
 }
 
+static inline bool
+roc_feature_nix_has_sq_cnt_update(void)
+{
+	return roc_model_is_cn20k();
+}
+
 #endif
diff --git a/drivers/common/cnxk/roc_nix.h b/drivers/common/cnxk/roc_nix.h
index 35eb855986..e070db1baa 100644
--- a/drivers/common/cnxk/roc_nix.h
+++ b/drivers/common/cnxk/roc_nix.h
@@ -409,6 +409,8 @@ struct roc_nix_sq {
 	void *lmt_addr;
 	void *sqe_mem;
 	void *fc;
+	void *sq_cnt_ptr;
+	uint8_t update_sq_cnt;
 	uint8_t tc;
 	bool enable;
 };
@@ -989,6 +991,7 @@ int __roc_api roc_nix_sq_fini(struct roc_nix_sq *sq);
 int __roc_api roc_nix_sq_ena_dis(struct roc_nix_sq *sq, bool enable);
 void __roc_api roc_nix_sq_head_tail_get(struct roc_nix *roc_nix, uint16_t qid,
 					uint32_t *head, uint32_t *tail);
+int __roc_api roc_nix_sq_cnt_update(struct roc_nix_sq *sq, bool enable);
 
 /* PTP */
 int __roc_api roc_nix_ptp_rx_ena_dis(struct roc_nix *roc_nix, int enable);
diff --git a/drivers/common/cnxk/roc_nix_queue.c b/drivers/common/cnxk/roc_nix_queue.c
index e19a6877e6..356367624f 100644
--- a/drivers/common/cnxk/roc_nix_queue.c
+++ b/drivers/common/cnxk/roc_nix_queue.c
@@ -1464,7 +1464,7 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
 
 	if (roc_nix->sqb_slack)
 		nb_sqb_bufs += roc_nix->sqb_slack;
-	else
+	else if (!sq->sq_cnt_ptr)
 		nb_sqb_bufs += PLT_MAX((int)thr, (int)ROC_NIX_SQB_SLACK_DFLT);
 	/* Explicitly set nat_align alone as by default pool is with both
 	 * nat_align and buf_offset = 1 which we don't want for SQB.
@@ -1473,7 +1473,9 @@ sqb_pool_populate(struct roc_nix *roc_nix, struct roc_nix_sq *sq)
 	pool.nat_align = 1;
 
 	memset(&aura, 0, sizeof(aura));
-	aura.fc_ena = 1;
+	/* Disable SQ pool FC updates when SQ count updates are used */
+	if (!sq->sq_cnt_ptr)
+		aura.fc_ena = 1;
 	if (roc_model_is_cn9k() || roc_errata_npa_has_no_fc_stype_ststp())
 		aura.fc_stype = 0x0; /* STF */
 	else
@@ -1827,6 +1829,11 @@ sq_init(struct nix *nix, struct roc_nix_sq *sq, uint32_t rr_quantum, uint16_t sm
 	aq->sq.sq_int_ena |= BIT(NIX_SQINT_SEND_ERR);
 	aq->sq.sq_int_ena |= BIT(NIX_SQINT_MNQ_ERR);
 
+	/* HW atomic update of SQ count */
+	if (sq->sq_cnt_ptr) {
+		aq->sq.sq_count_iova = ((uintptr_t)sq->sq_cnt_ptr) >> 3;
+		aq->sq.update_sq_count = sq->update_sq_cnt;
+	}
 	/* Many to one reduction */
 	aq->sq.qint_idx = sq->qid % nix->qints;
 	if (roc_errata_nix_assign_incorrect_qint()) {
@@ -2133,3 +2140,62 @@ roc_nix_q_err_cb_unregister(struct roc_nix *roc_nix)
 
 	dev->ops->q_err_cb = NULL;
 }
+
+int
+roc_nix_sq_cnt_update(struct roc_nix_sq *sq, bool enable)
+{
+	struct nix *nix = roc_nix_to_nix_priv(sq->roc_nix);
+	struct mbox *mbox = mbox_get((&nix->dev)->mbox);
+	int64_t __rte_atomic *sq_cntm = (int64_t __rte_atomic *)sq->sq_cnt_ptr;
+	struct nix_cn20k_aq_enq_rsp *rsp;
+	struct nix_cn20k_aq_enq_req *aq;
+	int rc;
+
+	aq = mbox_alloc_msg_nix_cn20k_aq_enq(mbox);
+	if (!aq) {
+		mbox_put(mbox);
+		return -ENOSPC;
+	}
+
+	aq->qidx = sq->qid;
+	aq->ctype = NIX_AQ_CTYPE_SQ;
+	aq->op = NIX_AQ_INSTOP_READ;
+	rc = mbox_process_msg(mbox, (void *)&rsp);
+	if (rc) {
+		mbox_put(mbox);
+		return rc;
+	}
+
+	/* Check if sq is already in same state */
+	if ((enable && rsp->sq.update_sq_count) || (!enable && !rsp->sq.update_sq_count)) {
+		mbox_put(mbox);
+		return 0;
+	}
+
+	/* Disable sq */
+	aq = mbox_alloc_msg_nix_cn20k_aq_enq(mbox);
+	if (!aq) {
+		mbox_put(mbox);
+		return -ENOSPC;
+	}
+
+	aq->qidx = sq->qid;
+	aq->ctype = NIX_AQ_CTYPE_SQ;
+	aq->op = NIX_AQ_INSTOP_WRITE;
+	aq->sq_mask.update_sq_count = ~aq->sq_mask.update_sq_count;
+	aq->sq.update_sq_count = enable;
+	if (enable)
+		aq->sq.update_sq_count = sq->update_sq_cnt;
+	rc = mbox_process(mbox);
+	if (rc) {
+		mbox_put(mbox);
+		return rc;
+	}
+	if (enable)
+		plt_atomic_store_explicit(sq_cntm, sq->nb_desc, plt_memory_order_relaxed);
+	else
+		plt_atomic_store_explicit(sq_cntm, 0, plt_memory_order_relaxed);
+
+	mbox_put(mbox);
+	return 0;
+}
diff --git a/drivers/common/cnxk/roc_nix_tm.c b/drivers/common/cnxk/roc_nix_tm.c
index 2771fd8fc4..76c0f01884 100644
--- a/drivers/common/cnxk/roc_nix_tm.c
+++ b/drivers/common/cnxk/roc_nix_tm.c
@@ -601,7 +601,7 @@ roc_nix_tm_sq_flush_spin(struct roc_nix_sq *sq)
 
 		/* SQ reached quiescent state */
 		if (sqb_cnt <= 1 && head_off == tail_off &&
-		    (*(volatile uint64_t *)sq->fc == sq->aura_sqb_bufs)) {
+		    (sq->sq_cnt_ptr || (*(volatile uint64_t *)sq->fc == sq->aura_sqb_bufs))) {
 			break;
 		}
 
diff --git a/drivers/common/cnxk/roc_nix_tm_ops.c b/drivers/common/cnxk/roc_nix_tm_ops.c
index 951c310a56..09d014a276 100644
--- a/drivers/common/cnxk/roc_nix_tm_ops.c
+++ b/drivers/common/cnxk/roc_nix_tm_ops.c
@@ -19,6 +19,12 @@ roc_nix_tm_sq_aura_fc(struct roc_nix_sq *sq, bool enable)
 	plt_tm_dbg("Setting SQ %u SQB aura FC to %s", sq->qid,
 		   enable ? "enable" : "disable");
 
+	/* For cn20K, enable/disable SQ count updates if the SQ count pointer
+	 * was allocated based on the enable field.
+	 */
+	if (sq->sq_cnt_ptr)
+		return roc_nix_sq_cnt_update(sq, enable);
+
 	lf = idev_npa_obj_get();
 	if (!lf)
 		return NPA_ERR_DEVICE_NOT_BOUNDED;
@@ -554,7 +560,7 @@ roc_nix_tm_hierarchy_disable(struct roc_nix *roc_nix)
 		tail_off = (val >> 28) & 0x3F;
 
 		if (sqb_cnt > 1 || head_off != tail_off ||
-		    (*(uint64_t *)sq->fc != sq->aura_sqb_bufs))
+		    (!sq->sq_cnt_ptr && (*(uint64_t *)sq->fc != sq->aura_sqb_bufs)))
 			plt_err("Failed to gracefully flush sq %u", sq->qid);
 	}
 
diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h
index ff3a25e57f..e22a50d47a 100644
--- a/drivers/common/cnxk/roc_platform.h
+++ b/drivers/common/cnxk/roc_platform.h
@@ -212,6 +212,12 @@ plt_thread_is_valid(plt_thread_t thr)
 #define plt_io_rmb()		rte_io_rmb()
 #define plt_atomic_thread_fence rte_atomic_thread_fence
 
+#define plt_atomic_store_explicit rte_atomic_store_explicit
+#define plt_atomic_load_explicit  rte_atomic_load_explicit
+#define plt_memory_order_release  rte_memory_order_release
+#define plt_memory_order_acquire  rte_memory_order_acquire
+#define plt_memory_order_relaxed  rte_memory_order_relaxed
+
 #define plt_bit_relaxed_get32   rte_bit_relaxed_get32
 #define plt_bit_relaxed_set32   rte_bit_relaxed_set32
 #define plt_bit_relaxed_clear32 rte_bit_relaxed_clear32
diff --git a/drivers/common/cnxk/roc_platform_base_symbols.c b/drivers/common/cnxk/roc_platform_base_symbols.c
index 7174e5fe08..5f75d11e24 100644
--- a/drivers/common/cnxk/roc_platform_base_symbols.c
+++ b/drivers/common/cnxk/roc_platform_base_symbols.c
@@ -361,6 +361,7 @@ RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_reta_get)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_flowkey_set)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_rss_default_setup)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_num_xstats_get)
+RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_sq_cnt_update)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_get)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_reset)
 RTE_EXPORT_INTERNAL_SYMBOL(roc_nix_stats_queue_get)
-- 
2.34.1



More information about the dev mailing list