[PATCH v5 10/18] net/mlx5: add HW steering connection tracking support
    Suanming Mou 
    suanmingm at nvidia.com
       
    Thu Oct 20 05:22:02 CEST 2022
    
    
  
This commit adds the support of connection tracking to HW steering as
SW steering did before.
Different with SW steering implementation, take advantage of HW steering
bulk action allocation support, in HW steering only one single CT pool
is needed.
An indexed pool is introduced to record allocated actions from bulk and
CT action state etc. Once one CT action is allocated from bulk, one
indexed object will also be allocated from the indexed pool, similar for
deallocate. That makes mlx5_aso_ct_action can also be managed by that
indexed pool, no need to be reserved from mlx5_aso_ct_pool. The single
CT pool is also saved to mlx5_aso_ct_action struct directly.
The ASO operation functions are shared with SW steering implementation.
Signed-off-by: Suanming Mou <suanmingm at nvidia.com>
---
 doc/guides/nics/mlx5.rst               |   2 +-
 doc/guides/rel_notes/release_22_11.rst |   1 +
 drivers/net/mlx5/linux/mlx5_os.c       |   8 +-
 drivers/net/mlx5/mlx5.c                |   3 +-
 drivers/net/mlx5/mlx5.h                |  54 +++-
 drivers/net/mlx5/mlx5_flow.c           |   1 +
 drivers/net/mlx5/mlx5_flow.h           |   7 +
 drivers/net/mlx5/mlx5_flow_aso.c       | 212 ++++++++++----
 drivers/net/mlx5/mlx5_flow_dv.c        |  28 +-
 drivers/net/mlx5/mlx5_flow_hw.c        | 381 ++++++++++++++++++++++++-
 10 files changed, 619 insertions(+), 78 deletions(-)
diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 0c7bd042a4..e499c38dcf 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -506,7 +506,7 @@ Limitations
   - Cannot co-exist with ASO meter, ASO age action in a single flow rule.
   - Flow rules insertion rate and memory consumption need more optimization.
   - 256 ports maximum.
-  - 4M connections maximum.
+  - 4M connections maximum with ``dv_flow_en`` 1 mode. 16M with ``dv_flow_en`` 2.
 
 - Multi-thread flow insertion:
 
diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 64fbecd7b3..1ec218a5d1 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -244,6 +244,7 @@ New Features
     - Support of FDB.
     - Support of meter.
     - Support of counter.
+    - Support of CT.
 
 * **Rewritten pmdinfo script.**
 
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 78cc44fae8..55801682cc 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -1349,9 +1349,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
 			DRV_LOG(DEBUG, "Flow Hit ASO is supported.");
 		}
 #endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
-#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
-	defined(HAVE_MLX5_DR_ACTION_ASO_CT)
-		if (hca_attr->ct_offload && priv->mtr_color_reg == REG_C_3) {
+#if defined (HAVE_MLX5_DR_CREATE_ACTION_ASO) && \
+    defined (HAVE_MLX5_DR_ACTION_ASO_CT)
+		/* HWS create CT ASO SQ based on HWS configure queue number. */
+		if (sh->config.dv_flow_en != 2 &&
+		    hca_attr->ct_offload && priv->mtr_color_reg == REG_C_3) {
 			err = mlx5_flow_aso_ct_mng_init(sh);
 			if (err) {
 				err = -err;
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e7a4aac354..6490ac636c 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -755,7 +755,8 @@ mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 
 	if (sh->ct_mng)
 		return 0;
-	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
+	sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng) +
+				 sizeof(struct mlx5_aso_sq) * MLX5_ASO_CT_SQ_NUM,
 				 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 	if (!sh->ct_mng) {
 		DRV_LOG(ERR, "ASO CT management allocation failed.");
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b8663e0322..9c080e5eac 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -44,6 +44,8 @@
 
 #define MLX5_SH(dev) (((struct mlx5_priv *)(dev)->data->dev_private)->sh)
 
+#define MLX5_HW_INV_QUEUE UINT32_MAX
+
 /*
  * Number of modification commands.
  * The maximal actions amount in FW is some constant, and it is 16 in the
@@ -1164,7 +1166,12 @@ enum mlx5_aso_ct_state {
 
 /* Generic ASO connection tracking structure. */
 struct mlx5_aso_ct_action {
-	LIST_ENTRY(mlx5_aso_ct_action) next; /* Pointer to the next ASO CT. */
+	union {
+		LIST_ENTRY(mlx5_aso_ct_action) next;
+		/* Pointer to the next ASO CT. Used only in SWS. */
+		struct mlx5_aso_ct_pool *pool;
+		/* Pointer to action pool. Used only in HWS. */
+	};
 	void *dr_action_orig; /* General action object for original dir. */
 	void *dr_action_rply; /* General action object for reply dir. */
 	uint32_t refcnt; /* Action used count in device flows. */
@@ -1178,28 +1185,48 @@ struct mlx5_aso_ct_action {
 #define MLX5_ASO_CT_UPDATE_STATE(c, s) \
 	__atomic_store_n(&((c)->state), (s), __ATOMIC_RELAXED)
 
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+
 /* ASO connection tracking software pool definition. */
 struct mlx5_aso_ct_pool {
 	uint16_t index; /* Pool index in pools array. */
+	/* Free ASO CT index in the pool. Used by HWS. */
+	struct mlx5_indexed_pool *cts;
 	struct mlx5_devx_obj *devx_obj;
-	/* The first devx object in the bulk, used for freeing (not yet). */
-	struct mlx5_aso_ct_action actions[MLX5_ASO_CT_ACTIONS_PER_POOL];
+	union {
+		void *dummy_action;
+		/* Dummy action to increase the reference count in the driver. */
+		struct mlx5dr_action *dr_action;
+		/* HWS action. */
+	};
+	struct mlx5_aso_sq *sq; /* Async ASO SQ. */
+	struct mlx5_aso_sq *shared_sq; /* Shared ASO SQ. */
+	struct mlx5_aso_ct_action actions[0];
 	/* CT action structures bulk. */
 };
 
 LIST_HEAD(aso_ct_list, mlx5_aso_ct_action);
 
+#define MLX5_ASO_CT_SQ_NUM 16
+
 /* Pools management structure for ASO connection tracking pools. */
 struct mlx5_aso_ct_pools_mng {
 	struct mlx5_aso_ct_pool **pools;
 	uint16_t n; /* Total number of pools. */
 	uint16_t next; /* Number of pools in use, index of next free pool. */
+	uint32_t nb_sq; /* Number of ASO SQ. */
 	rte_spinlock_t ct_sl; /* The ASO CT free list lock. */
 	rte_rwlock_t resize_rwl; /* The ASO CT pool resize lock. */
 	struct aso_ct_list free_cts; /* Free ASO CT objects list. */
-	struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
+	struct mlx5_aso_sq aso_sqs[0]; /* ASO queue objects. */
 };
 
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
 /* LAG attr. */
 struct mlx5_lag {
 	uint8_t tx_remap_affinity[16]; /* The PF port number of affinity */
@@ -1337,8 +1364,7 @@ struct mlx5_dev_ctx_shared {
 	rte_spinlock_t geneve_tlv_opt_sl; /* Lock for geneve tlv resource */
 	struct mlx5_flow_mtr_mng *mtrmng;
 	/* Meter management structure. */
-	struct mlx5_aso_ct_pools_mng *ct_mng;
-	/* Management data for ASO connection tracking. */
+	struct mlx5_aso_ct_pools_mng *ct_mng; /* Management data for ASO CT in HWS only. */
 	struct mlx5_lb_ctx self_lb; /* QP to enable self loopback for Devx. */
 	unsigned int flow_max_priority;
 	enum modify_reg flow_mreg_c[MLX5_MREG_C_NUM];
@@ -1654,6 +1680,9 @@ struct mlx5_priv {
 	/* HW steering create ongoing rte flow table list header. */
 	LIST_HEAD(flow_hw_tbl_ongo, rte_flow_template_table) flow_hw_tbl_ongo;
 	struct mlx5_indexed_pool *acts_ipool; /* Action data indexed pool. */
+	struct mlx5_aso_ct_pools_mng *ct_mng;
+	/* Management data for ASO connection tracking. */
+	struct mlx5_aso_ct_pool *hws_ctpool; /* HW steering's CT pool. */
 #endif
 };
 
@@ -2053,15 +2082,15 @@ int mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr, struct mlx5_mtr_bulk *bulk);
 int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_aso_mtr *mtr);
-int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
-int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			   struct mlx5_aso_ct_action *ct);
-int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			     struct mlx5_aso_ct_action *ct,
 			     struct rte_flow_action_conntrack *profile);
-int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+int mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 			  struct mlx5_aso_ct_action *ct);
 uint32_t
 mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr);
@@ -2072,6 +2101,11 @@ int mlx5_aso_cnt_queue_init(struct mlx5_dev_ctx_shared *sh);
 void mlx5_aso_cnt_queue_uninit(struct mlx5_dev_ctx_shared *sh);
 int mlx5_aso_cnt_query(struct mlx5_dev_ctx_shared *sh,
 		struct mlx5_hws_cnt_pool *cpool);
+int mlx5_aso_ct_queue_init(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_pools_mng *ct_mng,
+			   uint32_t nb_queues);
+int mlx5_aso_ct_queue_uninit(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_pools_mng *ct_mng);
 
 /* mlx5_flow_flex.c */
 
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 38932fe9d7..7c3295609d 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -49,6 +49,7 @@ struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
  */
 uint32_t mlx5_flow_hw_avl_tags_init_cnt;
 enum modify_reg mlx5_flow_hw_avl_tags[MLX5_FLOW_HW_TAGS_MAX] = {REG_NON};
+enum modify_reg mlx5_flow_hw_aso_tag;
 
 struct tunnel_default_miss_ctx {
 	uint16_t *queue;
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 210cc9ae3e..7e90eac2d0 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -82,6 +82,10 @@ enum {
 #define MLX5_INDIRECT_ACT_CT_GET_IDX(index) \
 	((index) & ((1 << MLX5_INDIRECT_ACT_CT_OWNER_SHIFT) - 1))
 
+#define MLX5_ACTION_CTX_CT_GET_IDX  MLX5_INDIRECT_ACT_CT_GET_IDX
+#define MLX5_ACTION_CTX_CT_GET_OWNER MLX5_INDIRECT_ACT_CT_GET_OWNER
+#define MLX5_ACTION_CTX_CT_GEN_IDX MLX5_INDIRECT_ACT_CT_GEN_IDX
+
 /* Matches on selected register. */
 struct mlx5_rte_flow_item_tag {
 	enum modify_reg id;
@@ -1455,6 +1459,7 @@ extern struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
 #define MLX5_FLOW_HW_TAGS_MAX 8
 extern uint32_t mlx5_flow_hw_avl_tags_init_cnt;
 extern enum modify_reg mlx5_flow_hw_avl_tags[];
+extern enum modify_reg mlx5_flow_hw_aso_tag;
 
 /*
  * Get metadata match tag and mask for given rte_eth_dev port.
@@ -1531,6 +1536,8 @@ flow_hw_get_reg_id(enum rte_flow_item_type type, uint32_t id)
 		 * REG_B case should be rejected on pattern template validation.
 		 */
 		return REG_A;
+	case RTE_FLOW_ITEM_TYPE_CONNTRACK:
+		return mlx5_flow_hw_aso_tag;
 	case RTE_FLOW_ITEM_TYPE_TAG:
 		MLX5_ASSERT(id < MLX5_FLOW_HW_TAGS_MAX);
 		return mlx5_flow_hw_avl_tags[id];
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index ed9272e583..c00c07b891 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -313,16 +313,8 @@ mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 		break;
 	case ASO_OPC_MOD_CONNECTION_TRACKING:
-		/* 64B per object for query. */
-		if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
-				    &sh->ct_mng->aso_sq.mr))
+		if (mlx5_aso_ct_queue_init(sh, sh->ct_mng, MLX5_ASO_CT_SQ_NUM))
 			return -1;
-		if (mlx5_aso_sq_create(cdev, &sh->ct_mng->aso_sq,
-				       sh->tx_uar.obj, MLX5_ASO_QUEUE_LOG_DESC)) {
-			mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
-			return -1;
-		}
-		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
 		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
@@ -343,7 +335,7 @@ void
 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 		      enum mlx5_access_aso_opc_mod aso_opc_mod)
 {
-	struct mlx5_aso_sq *sq;
+	struct mlx5_aso_sq *sq = NULL;
 
 	switch (aso_opc_mod) {
 	case ASO_OPC_MOD_FLOW_HIT:
@@ -354,14 +346,14 @@ mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 		sq = &sh->mtrmng->pools_mng.sq;
 		break;
 	case ASO_OPC_MOD_CONNECTION_TRACKING:
-		mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
-		sq = &sh->ct_mng->aso_sq;
+		mlx5_aso_ct_queue_uninit(sh, sh->ct_mng);
 		break;
 	default:
 		DRV_LOG(ERR, "Unknown ASO operation mode");
 		return;
 	}
-	mlx5_aso_destroy_sq(sq);
+	if (sq)
+		mlx5_aso_destroy_sq(sq);
 }
 
 /**
@@ -903,6 +895,89 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 	return -1;
 }
 
+static inline struct mlx5_aso_sq*
+__mlx5_aso_ct_get_sq_in_hws(uint32_t queue,
+			    struct mlx5_aso_ct_pool *pool)
+{
+	return (queue == MLX5_HW_INV_QUEUE) ?
+		pool->shared_sq : &pool->sq[queue];
+}
+
+static inline struct mlx5_aso_sq*
+__mlx5_aso_ct_get_sq_in_sws(struct mlx5_dev_ctx_shared *sh,
+			    struct mlx5_aso_ct_action *ct)
+{
+	return &sh->ct_mng->aso_sqs[ct->offset & (MLX5_ASO_CT_SQ_NUM - 1)];
+}
+
+static inline struct mlx5_aso_ct_pool*
+__mlx5_aso_ct_get_pool(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	if (likely(sh->config.dv_flow_en == 2))
+		return ct->pool;
+	return container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+}
+
+int
+mlx5_aso_ct_queue_uninit(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_pools_mng *ct_mng)
+{
+	uint32_t i;
+
+	/* 64B per object for query. */
+	for (i = 0; i < ct_mng->nb_sq; i++) {
+		if (ct_mng->aso_sqs[i].mr.addr)
+			mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr);
+		mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]);
+	}
+	return 0;
+}
+
+/**
+ * API to create and initialize CT Send Queue used for ASO access.
+ *
+ * @param[in] sh
+ *   Pointer to shared device context.
+ * @param[in] ct_mng
+ *   Pointer to the CT management struct.
+ * *param[in] nb_queues
+ *   Number of queues to be allocated.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_aso_ct_queue_init(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_pools_mng *ct_mng,
+		       uint32_t nb_queues)
+{
+	uint32_t i;
+
+	/* 64B per object for query. */
+	for (i = 0; i < nb_queues; i++) {
+		if (mlx5_aso_reg_mr(sh->cdev, 64 * (1 << MLX5_ASO_QUEUE_LOG_DESC),
+				    &ct_mng->aso_sqs[i].mr))
+			goto error;
+		if (mlx5_aso_sq_create(sh->cdev, &ct_mng->aso_sqs[i],
+				       sh->tx_uar.obj,
+				       MLX5_ASO_QUEUE_LOG_DESC))
+			goto error;
+		mlx5_aso_ct_init_sq(&ct_mng->aso_sqs[i]);
+	}
+	ct_mng->nb_sq = nb_queues;
+	return 0;
+error:
+	do {
+		if (ct_mng->aso_sqs[i].mr.addr)
+			mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr);
+		if (&ct_mng->aso_sqs[i])
+			mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]);
+	} while (i--);
+	ct_mng->nb_sq = 0;
+	return -1;
+}
+
 /*
  * Post a WQE to the ASO CT SQ to modify the context.
  *
@@ -918,11 +993,12 @@ mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
  */
 static uint16_t
 mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
+			      struct mlx5_aso_sq *sq,
 			      struct mlx5_aso_ct_action *ct,
-			      const struct rte_flow_action_conntrack *profile)
+			      const struct rte_flow_action_conntrack *profile,
+			      bool need_lock)
 {
 	volatile struct mlx5_aso_wqe *wqe = NULL;
-	struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
 	uint16_t size = 1 << sq->log_desc_n;
 	uint16_t mask = size - 1;
 	uint16_t res;
@@ -931,11 +1007,13 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	void *orig_dir;
 	void *reply_dir;
 
-	rte_spinlock_lock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_lock(&sq->sqsl);
 	/* Prevent other threads to update the index. */
 	res = size - (uint16_t)(sq->head - sq->tail);
 	if (unlikely(!res)) {
-		rte_spinlock_unlock(&sq->sqsl);
+		if (need_lock)
+			rte_spinlock_unlock(&sq->sqsl);
 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
 		return 0;
 	}
@@ -945,7 +1023,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
 	sq->elts[sq->head & mask].query_data = NULL;
-	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	pool = __mlx5_aso_ct_get_pool(sh, ct);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
 						  ct->offset);
@@ -1028,7 +1106,8 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 			   !sh->tx_uar.dbnc);
-	rte_spinlock_unlock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_unlock(&sq->sqsl);
 	return 1;
 }
 
@@ -1080,10 +1159,11 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
  */
 static int
 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
-			    struct mlx5_aso_ct_action *ct, char *data)
+			    struct mlx5_aso_sq *sq,
+			    struct mlx5_aso_ct_action *ct, char *data,
+			    bool need_lock)
 {
 	volatile struct mlx5_aso_wqe *wqe = NULL;
-	struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
 	uint16_t size = 1 << sq->log_desc_n;
 	uint16_t mask = size - 1;
 	uint16_t res;
@@ -1098,10 +1178,12 @@ mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 	} else if (state == ASO_CONNTRACK_WAIT) {
 		return 0;
 	}
-	rte_spinlock_lock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_lock(&sq->sqsl);
 	res = size - (uint16_t)(sq->head - sq->tail);
 	if (unlikely(!res)) {
-		rte_spinlock_unlock(&sq->sqsl);
+		if (need_lock)
+			rte_spinlock_unlock(&sq->sqsl);
 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
 		return 0;
 	}
@@ -1113,7 +1195,7 @@ mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 	wqe_idx = sq->head & mask;
 	sq->elts[wqe_idx].ct = ct;
 	sq->elts[wqe_idx].query_data = data;
-	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	pool = __mlx5_aso_ct_get_pool(sh, ct);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
 						  ct->offset);
@@ -1141,7 +1223,8 @@ mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 			   !sh->tx_uar.dbnc);
-	rte_spinlock_unlock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_unlock(&sq->sqsl);
 	return 1;
 }
 
@@ -1152,9 +1235,10 @@ mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
  *   Pointer to the CT pools management structure.
  */
 static void
-mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
+mlx5_aso_ct_completion_handle(struct mlx5_dev_ctx_shared *sh __rte_unused,
+			      struct mlx5_aso_sq *sq,
+			      bool need_lock)
 {
-	struct mlx5_aso_sq *sq = &mng->aso_sq;
 	struct mlx5_aso_cq *cq = &sq->cq;
 	volatile struct mlx5_cqe *restrict cqe;
 	const uint32_t cq_size = 1 << cq->log_desc_n;
@@ -1165,10 +1249,12 @@ mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
 	uint16_t n = 0;
 	int ret;
 
-	rte_spinlock_lock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_lock(&sq->sqsl);
 	max = (uint16_t)(sq->head - sq->tail);
 	if (unlikely(!max)) {
-		rte_spinlock_unlock(&sq->sqsl);
+		if (need_lock)
+			rte_spinlock_unlock(&sq->sqsl);
 		return;
 	}
 	next_idx = cq->cq_ci & mask;
@@ -1199,7 +1285,8 @@ mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
 		rte_io_wmb();
 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 	}
-	rte_spinlock_unlock(&sq->sqsl);
+	if (need_lock)
+		rte_spinlock_unlock(&sq->sqsl);
 }
 
 /*
@@ -1207,6 +1294,8 @@ mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
  *
  * @param[in] sh
  *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] queue
+ *   The queue index.
  * @param[in] ct
  *   Pointer to connection tracking offload object.
  * @param[in] profile
@@ -1217,21 +1306,26 @@ mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
  */
 int
 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			  uint32_t queue,
 			  struct mlx5_aso_ct_action *ct,
 			  const struct rte_flow_action_conntrack *profile)
 {
 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
-	struct mlx5_aso_ct_pool *pool;
+	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
+	struct mlx5_aso_sq *sq;
+	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
 
-	MLX5_ASSERT(ct);
+	if (sh->config.dv_flow_en == 2)
+		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
+	else
+		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
 	do {
-		mlx5_aso_ct_completion_handle(sh->ct_mng);
-		if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
+		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
+		if (mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile, need_lock))
 			return 0;
 		/* Waiting for wqe resource. */
 		rte_delay_us_sleep(10u);
 	} while (--poll_wqe_times);
-	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
 		ct->offset, pool->index);
 	return -1;
@@ -1242,6 +1336,8 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
  *
  * @param[in] sh
  *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] queue
+ *   The queue which CT works on..
  * @param[in] ct
  *   Pointer to connection tracking offload object.
  *
@@ -1249,25 +1345,29 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
  *   0 on success, -1 on failure.
  */
 int
-mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
 		       struct mlx5_aso_ct_action *ct)
 {
-	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
-	struct mlx5_aso_ct_pool *pool;
+	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
+	struct mlx5_aso_sq *sq;
+	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
 
+	if (sh->config.dv_flow_en == 2)
+		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
+	else
+		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
 	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
 	    ASO_CONNTRACK_READY)
 		return 0;
 	do {
-		mlx5_aso_ct_completion_handle(mng);
+		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
 		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
 		    ASO_CONNTRACK_READY)
 			return 0;
 		/* Waiting for CQE ready, consider should block or sleep. */
 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
 	} while (--poll_cqe_times);
-	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
 		ct->offset, pool->index);
 	return -1;
@@ -1363,18 +1463,24 @@ mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
  */
 int
 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 uint32_t queue,
 			 struct mlx5_aso_ct_action *ct,
 			 struct rte_flow_action_conntrack *profile)
 {
 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
-	struct mlx5_aso_ct_pool *pool;
+	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
+	struct mlx5_aso_sq *sq;
+	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
 	char out_data[64 * 2];
 	int ret;
 
-	MLX5_ASSERT(ct);
+	if (sh->config.dv_flow_en == 2)
+		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
+	else
+		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
 	do {
-		mlx5_aso_ct_completion_handle(sh->ct_mng);
-		ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
+		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
+		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data, need_lock);
 		if (ret < 0)
 			return ret;
 		else if (ret > 0)
@@ -1383,12 +1489,11 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		else
 			rte_delay_us_sleep(10u);
 	} while (--poll_wqe_times);
-	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
 		ct->offset, pool->index);
 	return -1;
 data_handle:
-	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	ret = mlx5_aso_ct_wait_ready(sh, queue, ct);
 	if (!ret)
 		mlx5_aso_ct_obj_analyze(profile, out_data);
 	return ret;
@@ -1408,13 +1513,20 @@ mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
  */
 int
 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
+		      uint32_t queue,
 		      struct mlx5_aso_ct_action *ct)
 {
-	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
+	struct mlx5_aso_sq *sq;
+	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
 	enum mlx5_aso_ct_state state =
 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 
+	if (sh->config.dv_flow_en == 2)
+		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
+	else
+		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
 	if (state == ASO_CONNTRACK_FREE) {
 		rte_errno = ENXIO;
 		return -rte_errno;
@@ -1423,13 +1535,13 @@ mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
 		return 0;
 	}
 	do {
-		mlx5_aso_ct_completion_handle(mng);
+		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
 		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
 		if (state == ASO_CONNTRACK_READY ||
 		    state == ASO_CONNTRACK_QUERY)
 			return 0;
-		/* Waiting for CQE ready, consider should block or sleep. */
-		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+		/* Waiting for CQE ready, consider should block or sleep.  */
+		rte_delay_us_block(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
 	} while (--poll_cqe_times);
 	rte_errno = EBUSY;
 	return -rte_errno;
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index a0bcaa5c53..ea13345baf 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -12813,6 +12813,7 @@ flow_dv_ct_pool_create(struct rte_eth_dev *dev,
 	struct mlx5_devx_obj *obj = NULL;
 	uint32_t i;
 	uint32_t log_obj_size = rte_log2_u32(MLX5_ASO_CT_ACTIONS_PER_POOL);
+	size_t mem_size;
 
 	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->cdev->ctx,
 							  priv->sh->cdev->pdn,
@@ -12822,7 +12823,10 @@ flow_dv_ct_pool_create(struct rte_eth_dev *dev,
 		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
 		return NULL;
 	}
-	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	mem_size = sizeof(struct mlx5_aso_ct_action) *
+		   MLX5_ASO_CT_ACTIONS_PER_POOL +
+		   sizeof(*pool);
+	pool = mlx5_malloc(MLX5_MEM_ZERO, mem_size, 0, SOCKET_ID_ANY);
 	if (!pool) {
 		rte_errno = ENOMEM;
 		claim_zero(mlx5_devx_cmd_destroy(obj));
@@ -12962,10 +12966,13 @@ flow_dv_translate_create_conntrack(struct rte_eth_dev *dev,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
 					  "Failed to allocate CT object");
 	ct = flow_aso_ct_get_by_dev_idx(dev, idx);
-	if (mlx5_aso_ct_update_by_wqe(sh, ct, pro))
-		return rte_flow_error_set(error, EBUSY,
-					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
-					  "Failed to update CT");
+	if (mlx5_aso_ct_update_by_wqe(sh, MLX5_HW_INV_QUEUE, ct, pro)) {
+		flow_dv_aso_ct_dev_release(dev, idx);
+		rte_flow_error_set(error, EBUSY,
+				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+				   "Failed to update CT");
+		return 0;
+	}
 	ct->is_original = !!pro->is_original_dir;
 	ct->peer = pro->peer_port;
 	return idx;
@@ -14160,7 +14167,7 @@ flow_dv_translate(struct rte_eth_dev *dev,
 						RTE_FLOW_ERROR_TYPE_ACTION,
 						NULL,
 						"Failed to get CT object.");
-			if (mlx5_aso_ct_available(priv->sh, ct))
+			if (mlx5_aso_ct_available(priv->sh, MLX5_HW_INV_QUEUE, ct))
 				return rte_flow_error_set(error, rte_errno,
 						RTE_FLOW_ERROR_TYPE_ACTION,
 						NULL,
@@ -15768,14 +15775,15 @@ __flow_dv_action_ct_update(struct rte_eth_dev *dev, uint32_t idx,
 		ret = mlx5_validate_action_ct(dev, new_prf, error);
 		if (ret)
 			return ret;
-		ret = mlx5_aso_ct_update_by_wqe(priv->sh, ct, new_prf);
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, MLX5_HW_INV_QUEUE,
+						ct, new_prf);
 		if (ret)
 			return rte_flow_error_set(error, EIO,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 					NULL,
 					"Failed to send CT context update WQE");
-		/* Block until ready or a failure. */
-		ret = mlx5_aso_ct_available(priv->sh, ct);
+		/* Block until ready or a failure, default is asynchronous. */
+		ret = mlx5_aso_ct_available(priv->sh, MLX5_HW_INV_QUEUE, ct);
 		if (ret)
 			rte_flow_error_set(error, rte_errno,
 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
@@ -16604,7 +16612,7 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 							ct->peer;
 		((struct rte_flow_action_conntrack *)data)->is_original_dir =
 							ct->is_original;
-		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, ct, data))
 			return rte_flow_error_set(error, EIO,
 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
 					NULL,
diff --git a/drivers/net/mlx5/mlx5_flow_hw.c b/drivers/net/mlx5/mlx5_flow_hw.c
index 5b7ef1be68..535df6ba5d 100644
--- a/drivers/net/mlx5/mlx5_flow_hw.c
+++ b/drivers/net/mlx5/mlx5_flow_hw.c
@@ -15,6 +15,14 @@
 /* The maximum actions support in the flow. */
 #define MLX5_HW_MAX_ACTS 16
 
+/*
+ * The default ipool threshold value indicates which per_core_cache
+ * value to set.
+ */
+#define MLX5_HW_IPOOL_SIZE_THRESHOLD (1 << 19)
+/* The default min local cache size. */
+#define MLX5_HW_IPOOL_CACHE_MIN (1 << 9)
+
 /* Default push burst threshold. */
 #define BURST_THR 32u
 
@@ -324,6 +332,25 @@ flow_hw_tir_action_register(struct rte_eth_dev *dev,
 	return hrxq;
 }
 
+static __rte_always_inline int
+flow_hw_ct_compile(struct rte_eth_dev *dev,
+		   uint32_t queue, uint32_t idx,
+		   struct mlx5dr_rule_action *rule_act)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
+
+	ct = mlx5_ipool_get(priv->hws_ctpool->cts, MLX5_ACTION_CTX_CT_GET_IDX(idx));
+	if (!ct || mlx5_aso_ct_available(priv->sh, queue, ct))
+		return -1;
+	rule_act->action = priv->hws_ctpool->dr_action;
+	rule_act->aso_ct.offset = ct->offset;
+	rule_act->aso_ct.direction = ct->is_original ?
+		MLX5DR_ACTION_ASO_CT_DIRECTION_INITIATOR :
+		MLX5DR_ACTION_ASO_CT_DIRECTION_RESPONDER;
+	return 0;
+}
+
 /**
  * Destroy DR actions created by action template.
  *
@@ -640,6 +667,11 @@ flow_hw_shared_action_translate(struct rte_eth_dev *dev,
 			action_src, action_dst, act_idx))
 			return -1;
 		break;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		if (flow_hw_ct_compile(dev, MLX5_HW_INV_QUEUE,
+				       idx, &acts->rule_acts[action_dst]))
+			return -1;
+		break;
 	default:
 		DRV_LOG(WARNING, "Unsupported shared action type:%d", type);
 		break;
@@ -1083,6 +1115,7 @@ __flow_hw_actions_translate(struct rte_eth_dev *dev,
 	bool reformat_used = false;
 	uint16_t action_pos;
 	uint16_t jump_pos;
+	uint32_t ct_idx;
 	int err;
 
 	flow_hw_modify_field_init(&mhdr, at);
@@ -1305,6 +1338,20 @@ __flow_hw_actions_translate(struct rte_eth_dev *dev,
 				goto err;
 			}
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			action_pos = at->actions_off[actions - at->actions];
+			if (masks->conf) {
+				ct_idx = MLX5_ACTION_CTX_CT_GET_IDX
+					 ((uint32_t)(uintptr_t)actions->conf);
+				if (flow_hw_ct_compile(dev, MLX5_HW_INV_QUEUE, ct_idx,
+						       &acts->rule_acts[action_pos]))
+					goto err;
+			} else if (__flow_hw_act_data_general_append
+					(priv, acts, actions->type,
+					 actions - action_start, action_pos)) {
+				goto err;
+			}
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			break;
@@ -1479,6 +1526,8 @@ flow_hw_shared_action_get(struct rte_eth_dev *dev,
  *
  * @param[in] dev
  *   Pointer to the rte_eth_dev data structure.
+ * @param[in] queue
+ *   The flow creation queue index.
  * @param[in] action
  *   Pointer to the shared indirect rte_flow action.
  * @param[in] table
@@ -1492,7 +1541,7 @@ flow_hw_shared_action_get(struct rte_eth_dev *dev,
  *    0 on success, negative value otherwise and rte_errno is set.
  */
 static __rte_always_inline int
-flow_hw_shared_action_construct(struct rte_eth_dev *dev,
+flow_hw_shared_action_construct(struct rte_eth_dev *dev, uint32_t queue,
 				const struct rte_flow_action *action,
 				struct rte_flow_template_table *table,
 				const uint8_t it_idx,
@@ -1532,6 +1581,10 @@ flow_hw_shared_action_construct(struct rte_eth_dev *dev,
 				&rule_act->counter.offset))
 			return -1;
 		break;
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		if (flow_hw_ct_compile(dev, queue, idx, rule_act))
+			return -1;
+		break;
 	default:
 		DRV_LOG(WARNING, "Unsupported shared action type:%d", type);
 		break;
@@ -1727,6 +1780,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 		uint64_t item_flags;
 		struct mlx5_hw_jump_action *jump;
 		struct mlx5_hrxq *hrxq;
+		uint32_t ct_idx;
 		cnt_id_t cnt_id;
 
 		action = &actions[act_data->action_src];
@@ -1735,7 +1789,7 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 		switch (act_data->type) {
 		case RTE_FLOW_ACTION_TYPE_INDIRECT:
 			if (flow_hw_shared_action_construct
-					(dev, action, table, it_idx,
+					(dev, queue, action, table, it_idx,
 					 &rule_acts[act_data->action_dst]))
 				return -1;
 			break;
@@ -1860,6 +1914,13 @@ flow_hw_actions_construct(struct rte_eth_dev *dev,
 				return ret;
 			job->flow->cnt_id = act_data->shared_counter.id;
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			ct_idx = MLX5_ACTION_CTX_CT_GET_IDX
+				 ((uint32_t)(uintptr_t)action->conf);
+			if (flow_hw_ct_compile(dev, queue, ct_idx,
+					       &rule_acts[act_data->action_dst]))
+				return -1;
+			break;
 		default:
 			break;
 		}
@@ -2391,6 +2452,8 @@ flow_hw_table_create(struct rte_eth_dev *dev,
 	if (nb_flows < cfg.trunk_size) {
 		cfg.per_core_cache = 0;
 		cfg.trunk_size = nb_flows;
+	} else if (nb_flows <= MLX5_HW_IPOOL_SIZE_THRESHOLD) {
+		cfg.per_core_cache = MLX5_HW_IPOOL_CACHE_MIN;
 	}
 	/* Check if we requires too many templates. */
 	if (nb_item_templates > max_tpl ||
@@ -2927,6 +2990,9 @@ flow_hw_actions_validate(struct rte_eth_dev *dev,
 		case RTE_FLOW_ACTION_TYPE_COUNT:
 			/* TODO: Validation logic */
 			break;
+		case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+			/* TODO: Validation logic */
+			break;
 		case RTE_FLOW_ACTION_TYPE_END:
 			actions_end = true;
 			break;
@@ -2953,6 +3019,7 @@ static enum mlx5dr_action_type mlx5_hw_dr_action_types[] = {
 	[RTE_FLOW_ACTION_TYPE_MODIFY_FIELD] = MLX5DR_ACTION_TYP_MODIFY_HDR,
 	[RTE_FLOW_ACTION_TYPE_REPRESENTED_PORT] = MLX5DR_ACTION_TYP_VPORT,
 	[RTE_FLOW_ACTION_TYPE_COUNT] = MLX5DR_ACTION_TYP_CTR,
+	[RTE_FLOW_ACTION_TYPE_CONNTRACK] = MLX5DR_ACTION_TYP_ASO_CT,
 };
 
 static int
@@ -2981,6 +3048,11 @@ flow_hw_dr_actions_template_handle_shared(const struct rte_flow_action *mask,
 		action_types[*curr_off] = MLX5DR_ACTION_TYP_CTR;
 		*curr_off = *curr_off + 1;
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		at->actions_off[action_src] = *curr_off;
+		action_types[*curr_off] = MLX5DR_ACTION_TYP_ASO_CT;
+		*curr_off = *curr_off + 1;
+		break;
 	default:
 		DRV_LOG(WARNING, "Unsupported shared action type: %d", type);
 		return -EINVAL;
@@ -3435,6 +3507,7 @@ flow_hw_pattern_validate(struct rte_eth_dev *dev,
 		case RTE_FLOW_ITEM_TYPE_GRE_OPTION:
 		case RTE_FLOW_ITEM_TYPE_ICMP:
 		case RTE_FLOW_ITEM_TYPE_ICMP6:
+		case RTE_FLOW_ITEM_TYPE_CONNTRACK:
 			break;
 		case RTE_FLOW_ITEM_TYPE_END:
 			items_end = true;
@@ -4627,6 +4700,97 @@ flow_hw_create_ctrl_tables(struct rte_eth_dev *dev)
 	return -EINVAL;
 }
 
+static void
+flow_hw_ct_mng_destroy(struct rte_eth_dev *dev,
+		       struct mlx5_aso_ct_pools_mng *ct_mng)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	mlx5_aso_ct_queue_uninit(priv->sh, ct_mng);
+	mlx5_free(ct_mng);
+}
+
+static void
+flow_hw_ct_pool_destroy(struct rte_eth_dev *dev __rte_unused,
+			struct mlx5_aso_ct_pool *pool)
+{
+	if (pool->dr_action)
+		mlx5dr_action_destroy(pool->dr_action);
+	if (pool->devx_obj)
+		claim_zero(mlx5_devx_cmd_destroy(pool->devx_obj));
+	if (pool->cts)
+		mlx5_ipool_destroy(pool->cts);
+	mlx5_free(pool);
+}
+
+static struct mlx5_aso_ct_pool *
+flow_hw_ct_pool_create(struct rte_eth_dev *dev,
+		       const struct rte_flow_port_attr *port_attr)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pool *pool;
+	struct mlx5_devx_obj *obj;
+	uint32_t nb_cts = rte_align32pow2(port_attr->nb_conn_tracks);
+	uint32_t log_obj_size = rte_log2_u32(nb_cts);
+	struct mlx5_indexed_pool_config cfg = {
+		.size = sizeof(struct mlx5_aso_ct_action),
+		.trunk_size = 1 << 12,
+		.per_core_cache = 1 << 13,
+		.need_lock = 1,
+		.release_mem_en = !!priv->sh->config.reclaim_mode,
+		.malloc = mlx5_malloc,
+		.free = mlx5_free,
+		.type = "mlx5_hw_ct_action",
+	};
+	int reg_id;
+	uint32_t flags;
+
+	pool = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*pool), 0, SOCKET_ID_ANY);
+	if (!pool) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+	obj = mlx5_devx_cmd_create_conn_track_offload_obj(priv->sh->cdev->ctx,
+							  priv->sh->cdev->pdn,
+							  log_obj_size);
+	if (!obj) {
+		rte_errno = ENODATA;
+		DRV_LOG(ERR, "Failed to create conn_track_offload_obj using DevX.");
+		goto err;
+	}
+	pool->devx_obj = obj;
+	reg_id = mlx5_flow_get_reg_id(dev, MLX5_ASO_CONNTRACK, 0, NULL);
+	flags = MLX5DR_ACTION_FLAG_HWS_RX | MLX5DR_ACTION_FLAG_HWS_TX;
+	if (priv->sh->config.dv_esw_en && priv->master)
+		flags |= MLX5DR_ACTION_FLAG_HWS_FDB;
+	pool->dr_action = mlx5dr_action_create_aso_ct(priv->dr_ctx,
+						      (struct mlx5dr_devx_obj *)obj,
+						      reg_id - REG_C_0, flags);
+	if (!pool->dr_action)
+		goto err;
+	/*
+	 * No need for local cache if CT number is a small number. Since
+	 * flow insertion rate will be very limited in that case. Here let's
+	 * set the number to less than default trunk size 4K.
+	 */
+	if (nb_cts <= cfg.trunk_size) {
+		cfg.per_core_cache = 0;
+		cfg.trunk_size = nb_cts;
+	} else if (nb_cts <= MLX5_HW_IPOOL_SIZE_THRESHOLD) {
+		cfg.per_core_cache = MLX5_HW_IPOOL_CACHE_MIN;
+	}
+	pool->cts = mlx5_ipool_create(&cfg);
+	if (!pool->cts)
+		goto err;
+	pool->sq = priv->ct_mng->aso_sqs;
+	/* Assign the last extra ASO SQ as public SQ. */
+	pool->shared_sq = &priv->ct_mng->aso_sqs[priv->nb_queue - 1];
+	return pool;
+err:
+	flow_hw_ct_pool_destroy(dev, pool);
+	return NULL;
+}
+
 /**
  * Configure port HWS resources.
  *
@@ -4809,6 +4973,20 @@ flow_hw_configure(struct rte_eth_dev *dev,
 	}
 	if (_queue_attr)
 		mlx5_free(_queue_attr);
+	if (port_attr->nb_conn_tracks) {
+		mem_size = sizeof(struct mlx5_aso_sq) * nb_q_updated +
+			   sizeof(*priv->ct_mng);
+		priv->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, mem_size,
+					   RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
+		if (!priv->ct_mng)
+			goto err;
+		if (mlx5_aso_ct_queue_init(priv->sh, priv->ct_mng, nb_q_updated))
+			goto err;
+		priv->hws_ctpool = flow_hw_ct_pool_create(dev, port_attr);
+		if (!priv->hws_ctpool)
+			goto err;
+		priv->sh->ct_aso_en = 1;
+	}
 	if (port_attr->nb_counters) {
 		priv->hws_cpool = mlx5_hws_cnt_pool_create(dev, port_attr,
 				nb_queue);
@@ -4817,6 +4995,14 @@ flow_hw_configure(struct rte_eth_dev *dev,
 	}
 	return 0;
 err:
+	if (priv->hws_ctpool) {
+		flow_hw_ct_pool_destroy(dev, priv->hws_ctpool);
+		priv->hws_ctpool = NULL;
+	}
+	if (priv->ct_mng) {
+		flow_hw_ct_mng_destroy(dev, priv->ct_mng);
+		priv->ct_mng = NULL;
+	}
 	flow_hw_free_vport_actions(priv);
 	for (i = 0; i < MLX5_HW_ACTION_FLAG_MAX; i++) {
 		if (priv->hw_drop[i])
@@ -4890,6 +5076,14 @@ flow_hw_resource_release(struct rte_eth_dev *dev)
 	}
 	if (priv->hws_cpool)
 		mlx5_hws_cnt_pool_destroy(priv->sh, priv->hws_cpool);
+	if (priv->hws_ctpool) {
+		flow_hw_ct_pool_destroy(dev, priv->hws_ctpool);
+		priv->hws_ctpool = NULL;
+	}
+	if (priv->ct_mng) {
+		flow_hw_ct_mng_destroy(dev, priv->ct_mng);
+		priv->ct_mng = NULL;
+	}
 	mlx5_free(priv->hw_q);
 	priv->hw_q = NULL;
 	claim_zero(mlx5dr_context_close(priv->dr_ctx));
@@ -4958,6 +5152,7 @@ void flow_hw_init_tags_set(struct rte_eth_dev *dev)
 		unset |= 1 << (REG_C_1 - REG_C_0);
 	masks &= ~unset;
 	if (mlx5_flow_hw_avl_tags_init_cnt) {
+		MLX5_ASSERT(mlx5_flow_hw_aso_tag == priv->mtr_color_reg);
 		for (i = 0; i < MLX5_FLOW_HW_TAGS_MAX; i++) {
 			if (mlx5_flow_hw_avl_tags[i] != REG_NON && !!((1 << i) & masks)) {
 				copy[mlx5_flow_hw_avl_tags[i] - REG_C_0] =
@@ -4980,6 +5175,7 @@ void flow_hw_init_tags_set(struct rte_eth_dev *dev)
 		}
 	}
 	priv->sh->hws_tags = 1;
+	mlx5_flow_hw_aso_tag = (enum modify_reg)priv->mtr_color_reg;
 	mlx5_flow_hw_avl_tags_init_cnt++;
 }
 
@@ -5050,6 +5246,170 @@ flow_hw_clear_flow_metadata_config(void)
 	mlx5_flow_hw_flow_metadata_xmeta_en = 0;
 }
 
+static int
+flow_hw_conntrack_destroy(struct rte_eth_dev *dev __rte_unused,
+			  uint32_t idx,
+			  struct rte_flow_error *error)
+{
+	uint16_t owner = (uint16_t)MLX5_ACTION_CTX_CT_GET_OWNER(idx);
+	uint32_t ct_idx = MLX5_ACTION_CTX_CT_GET_IDX(idx);
+	struct rte_eth_dev *owndev = &rte_eth_devices[owner];
+	struct mlx5_priv *priv = owndev->data->dev_private;
+	struct mlx5_aso_ct_pool *pool = priv->hws_ctpool;
+	struct mlx5_aso_ct_action *ct;
+
+	ct = mlx5_ipool_get(pool->cts, ct_idx);
+	if (!ct) {
+		return rte_flow_error_set(error, EINVAL,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				"Invalid CT destruction index");
+	}
+	__atomic_store_n(&ct->state, ASO_CONNTRACK_FREE,
+				 __ATOMIC_RELAXED);
+	mlx5_ipool_free(pool->cts, ct_idx);
+	return 0;
+}
+
+static int
+flow_hw_conntrack_query(struct rte_eth_dev *dev, uint32_t idx,
+			struct rte_flow_action_conntrack *profile,
+			struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pool *pool = priv->hws_ctpool;
+	struct mlx5_aso_ct_action *ct;
+	uint16_t owner = (uint16_t)MLX5_ACTION_CTX_CT_GET_OWNER(idx);
+	uint32_t ct_idx;
+
+	if (owner != PORT_ID(priv))
+		return rte_flow_error_set(error, EACCES,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				"Can't query CT object owned by another port");
+	ct_idx = MLX5_ACTION_CTX_CT_GET_IDX(idx);
+	ct = mlx5_ipool_get(pool->cts, ct_idx);
+	if (!ct) {
+		return rte_flow_error_set(error, EINVAL,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				"Invalid CT query index");
+	}
+	profile->peer_port = ct->peer;
+	profile->is_original_dir = ct->is_original;
+	if (mlx5_aso_ct_query_by_wqe(priv->sh, MLX5_HW_INV_QUEUE, ct, profile))
+		return rte_flow_error_set(error, EIO,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				"Failed to query CT context");
+	return 0;
+}
+
+
+static int
+flow_hw_conntrack_update(struct rte_eth_dev *dev, uint32_t queue,
+			 const struct rte_flow_modify_conntrack *action_conf,
+			 uint32_t idx, struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pool *pool = priv->hws_ctpool;
+	struct mlx5_aso_ct_action *ct;
+	const struct rte_flow_action_conntrack *new_prf;
+	uint16_t owner = (uint16_t)MLX5_ACTION_CTX_CT_GET_OWNER(idx);
+	uint32_t ct_idx;
+	int ret = 0;
+
+	if (PORT_ID(priv) != owner)
+		return rte_flow_error_set(error, EACCES,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "Can't update CT object owned by another port");
+	ct_idx = MLX5_ACTION_CTX_CT_GET_IDX(idx);
+	ct = mlx5_ipool_get(pool->cts, ct_idx);
+	if (!ct) {
+		return rte_flow_error_set(error, EINVAL,
+				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				NULL,
+				"Invalid CT update index");
+	}
+	new_prf = &action_conf->new_ct;
+	if (action_conf->direction)
+		ct->is_original = !!new_prf->is_original_dir;
+	if (action_conf->state) {
+		/* Only validate the profile when it needs to be updated. */
+		ret = mlx5_validate_action_ct(dev, new_prf, error);
+		if (ret)
+			return ret;
+		ret = mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, new_prf);
+		if (ret)
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to send CT context update WQE");
+		if (queue != MLX5_HW_INV_QUEUE)
+			return 0;
+		/* Block until ready or a failure in synchronous mode. */
+		ret = mlx5_aso_ct_available(priv->sh, queue, ct);
+		if (ret)
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+	}
+	return ret;
+}
+
+static struct rte_flow_action_handle *
+flow_hw_conntrack_create(struct rte_eth_dev *dev, uint32_t queue,
+			 const struct rte_flow_action_conntrack *pro,
+			 struct rte_flow_error *error)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_pool *pool = priv->hws_ctpool;
+	struct mlx5_aso_ct_action *ct;
+	uint32_t ct_idx = 0;
+	int ret;
+	bool async = !!(queue != MLX5_HW_INV_QUEUE);
+
+	if (!pool) {
+		rte_flow_error_set(error, EINVAL,
+				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+				   "CT is not enabled");
+		return 0;
+	}
+	ct = mlx5_ipool_zmalloc(pool->cts, &ct_idx);
+	if (!ct) {
+		rte_flow_error_set(error, rte_errno,
+				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+				   "Failed to allocate CT object");
+		return 0;
+	}
+	ct->offset = ct_idx - 1;
+	ct->is_original = !!pro->is_original_dir;
+	ct->peer = pro->peer_port;
+	ct->pool = pool;
+	if (mlx5_aso_ct_update_by_wqe(priv->sh, queue, ct, pro)) {
+		mlx5_ipool_free(pool->cts, ct_idx);
+		rte_flow_error_set(error, EBUSY,
+				   RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+				   "Failed to update CT");
+		return 0;
+	}
+	if (!async) {
+		ret = mlx5_aso_ct_available(priv->sh, queue, ct);
+		if (ret) {
+			mlx5_ipool_free(pool->cts, ct_idx);
+			rte_flow_error_set(error, rte_errno,
+					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					   NULL,
+					   "Timeout to get the CT update");
+			return 0;
+		}
+	}
+	return (struct rte_flow_action_handle *)(uintptr_t)
+		MLX5_ACTION_CTX_CT_GEN_IDX(PORT_ID(priv), ct_idx);
+}
+
 /**
  * Create shared action.
  *
@@ -5097,6 +5457,9 @@ flow_hw_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
 			handle = (struct rte_flow_action_handle *)
 				 (uintptr_t)cnt_id;
 		break;
+	case RTE_FLOW_ACTION_TYPE_CONNTRACK:
+		handle = flow_hw_conntrack_create(dev, queue, action->conf, error);
+		break;
 	default:
 		handle = flow_dv_action_create(dev, conf, action, error);
 	}
@@ -5132,10 +5495,18 @@ flow_hw_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
 			     void *user_data,
 			     struct rte_flow_error *error)
 {
+	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
+	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
+
 	RTE_SET_USED(queue);
 	RTE_SET_USED(attr);
 	RTE_SET_USED(user_data);
-	return flow_dv_action_update(dev, handle, update, error);
+	switch (type) {
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return flow_hw_conntrack_update(dev, queue, update, act_idx, error);
+	default:
+		return flow_dv_action_update(dev, handle, update, error);
+	}
 }
 
 /**
@@ -5174,6 +5545,8 @@ flow_hw_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
 		return mlx5_hws_cnt_shared_put(priv->hws_cpool, &act_idx);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return flow_hw_conntrack_destroy(dev, act_idx, error);
 	default:
 		return flow_dv_action_destroy(dev, handle, error);
 	}
@@ -5327,6 +5700,8 @@ flow_hw_action_query(struct rte_eth_dev *dev,
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
 		return flow_hw_query_counter(dev, act_idx, data, error);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		return flow_hw_conntrack_query(dev, act_idx, data, error);
 	default:
 		return flow_dv_action_query(dev, handle, data, error);
 	}
-- 
2.25.1
    
    
More information about the dev
mailing list