[dpdk-dev] [PATCH v7 09/17] net/mlx5: add ASO CT query implementation

Bing Zhao bingz at nvidia.com
Wed May 5 14:23:20 CEST 2021


After the connection tracking context is created and being used by
the flows, the context will be updated by the HW automatically after
a packet passed the CT validation. E.g., the ACK, SEQ, window and
state of CT can be updated with both direction traffic.

In order to query the updated contents of this context, a WQE should
be posted to the SQ with a return buffer. The data will be filled
into the buffer. And the profile will be filled with specific value.

During the execution of query command, the context may be updated.
The result of the query command may not be the latest one.

Signed-off-by: Bing Zhao <bingz at nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo at nvidia.com>
---
 drivers/net/mlx5/mlx5.h          |  10 +-
 drivers/net/mlx5/mlx5_flow_aso.c | 245 +++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_flow_dv.c  |  19 +++
 3 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0f2a26efc0..6d3f89519d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -493,7 +493,10 @@ struct mlx5_aso_sq_elem {
 			uint16_t burst_size;
 		};
 		struct mlx5_aso_mtr *mtr;
-		struct mlx5_aso_ct_action *ct;
+		struct {
+			struct mlx5_aso_ct_action *ct;
+			char *query_data;
+		};
 	};
 };
 
@@ -1707,5 +1710,10 @@ int mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 int mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 			      struct mlx5_aso_ct_action *ct,
 			      const struct rte_flow_action_conntrack *profile);
+int mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+			   struct mlx5_aso_ct_action *ct);
+int mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			     struct mlx5_aso_ct_action *ct,
+			     struct rte_flow_action_conntrack *profile);
 
 #endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_aso.c b/drivers/net/mlx5/mlx5_flow_aso.c
index 37cb43147a..92fa9ede60 100644
--- a/drivers/net/mlx5/mlx5_flow_aso.c
+++ b/drivers/net/mlx5/mlx5_flow_aso.c
@@ -933,6 +933,7 @@ mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
 	/* Fill next WQE. */
 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 	sq->elts[sq->head & mask].ct = ct;
+	sq->elts[sq->head & mask].query_data = NULL;
 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 	/* Each WQE will have a single CT object. */
 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
@@ -1048,9 +1049,95 @@ mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
 		ct = sq->elts[idx].ct;
 		MLX5_ASSERT(ct);
 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
+		if (sq->elts[idx].query_data)
+			rte_memcpy(sq->elts[idx].query_data,
+				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
+				   64);
 	}
 }
 
+/*
+ * Post a WQE to the ASO CT SQ to query the current context.
+ *
+ * @param[in] mng
+ *   Pointer to the CT pools management structure.
+ * @param[in] ct
+ *   Pointer to the generic CT structure related to the context.
+ * @param[in] data
+ *   Pointer to data area to be filled.
+ *
+ * @return
+ *   1 on success (WQE number), 0 on failure.
+ */
+static int
+mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
+			    struct mlx5_aso_ct_action *ct, char *data)
+{
+	volatile struct mlx5_aso_wqe *wqe = NULL;
+	struct mlx5_aso_sq *sq = &mng->aso_sq;
+	uint16_t size = 1 << sq->log_desc_n;
+	uint16_t mask = size - 1;
+	uint16_t res;
+	uint16_t wqe_idx;
+	struct mlx5_aso_ct_pool *pool;
+	enum mlx5_aso_ct_state state =
+				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
+
+	if (state == ASO_CONNTRACK_FREE) {
+		DRV_LOG(ERR, "Fail: No context to query");
+		return -1;
+	} else if (state == ASO_CONNTRACK_WAIT) {
+		return 0;
+	}
+	rte_spinlock_lock(&sq->sqsl);
+	res = size - (uint16_t)(sq->head - sq->tail);
+	if (unlikely(!res)) {
+		rte_spinlock_unlock(&sq->sqsl);
+		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
+		return 0;
+	}
+	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
+	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
+	/* Confirm the location and address of the prefetch instruction. */
+	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
+	/* Fill next WQE. */
+	wqe_idx = sq->head & mask;
+	sq->elts[wqe_idx].ct = ct;
+	sq->elts[wqe_idx].query_data = data;
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	/* Each WQE will have a single CT object. */
+	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
+						  ct->offset);
+	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
+			(ASO_OPC_MOD_CONNECTION_TRACKING <<
+			 WQE_CSEG_OPC_MOD_OFFSET) |
+			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
+	/*
+	 * There is no write request is required.
+	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
+	 * "BYTEWISE_64BYTE" is needed for a whole context.
+	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
+	 * "data_mask" is ignored.
+	 * Buffer address was already filled during initialization.
+	 */
+	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
+					ASO_CSEG_DATA_MASK_MODE_OFFSET);
+	wqe->aso_cseg.data_mask = 0;
+	sq->head++;
+	/*
+	 * Each WQE contains 2 WQEBB's, even though
+	 * data segment is not used in this case.
+	 */
+	sq->pi += 2;
+	rte_io_wmb();
+	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
+	rte_wmb();
+	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
+	rte_wmb();
+	rte_spinlock_unlock(&sq->sqsl);
+	return 1;
+}
+
 /*
  * Handle completions from WQEs sent to ASO CT.
  *
@@ -1143,3 +1230,161 @@ mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 		ct->offset, pool->index);
 	return -1;
 }
+
+/*
+ * The routine is used to wait for WQE completion to continue with queried data.
+ *
+ * @param[in] sh
+ *   Pointer to mlx5_dev_ctx_shared object.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
+		       struct mlx5_aso_ct_action *ct)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+
+	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+	    ASO_CONNTRACK_READY)
+		return 0;
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
+		    ASO_CONNTRACK_READY)
+			return 0;
+		/* Waiting for CQE ready, consider should block or sleep. */
+		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
+	} while (--poll_cqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+}
+
+/*
+ * Convert the hardware conntrack data format into the profile.
+ *
+ * @param[in] profile
+ *   Pointer to conntrack profile to be filled after query.
+ * @param[in] wdata
+ *   Pointer to data fetched from hardware.
+ */
+static inline void
+mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
+			char *wdata)
+{
+	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
+	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
+
+	/* MLX5_GET16 should be taken into consideration. */
+	profile->state = (enum rte_flow_conntrack_state)
+			 MLX5_GET(conn_track_aso, wdata, state);
+	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
+	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
+					  sack_permitted);
+	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
+					    connection_assured);
+	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
+						 challenged_acked);
+	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
+					   max_ack_window);
+	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
+						 retranmission_limit);
+	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
+	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
+	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
+			      MLX5_GET(conn_track_aso, wdata, last_index);
+	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
+	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
+	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
+	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
+				reply_direction_tcp_liberal_enabled) |
+				MLX5_GET(conn_track_aso, wdata,
+				original_direction_tcp_liberal_enabled);
+	/* No liberal in the RTE structure profile. */
+	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					    reply_direction_tcp_scale);
+	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_close_initiated);
+	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_data_unacked);
+	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					reply_direction_tcp_max_ack);
+	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
+					       r_dir, sent_end);
+	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
+						r_dir, reply_end);
+	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
+					      r_dir, max_win);
+	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
+					      r_dir, max_ack);
+	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
+					       original_direction_tcp_scale);
+	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_close_initiated);
+	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_data_unacked);
+	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
+					original_direction_tcp_max_ack);
+	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
+						  o_dir, sent_end);
+	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
+						   o_dir, reply_end);
+	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
+						 o_dir, max_win);
+	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
+						 o_dir, max_ack);
+}
+
+/*
+ * Query connection tracking information parameter by send WQE.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] ct
+ *   Pointer to connection tracking offload object.
+ * @param[out] profile
+ *   Pointer to connection tracking TCP information.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+int
+mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
+			 struct mlx5_aso_ct_action *ct,
+			 struct rte_flow_action_conntrack *profile)
+{
+	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
+	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
+	struct mlx5_aso_ct_pool *pool;
+	char out_data[64 * 2];
+	int ret;
+
+	MLX5_ASSERT(ct);
+	do {
+		mlx5_aso_ct_completion_handle(mng);
+		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			goto data_handle;
+		/* Waiting for wqe resource or state. */
+		else
+			rte_delay_us_sleep(10u);
+	} while (--poll_wqe_times);
+	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
+	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
+		ct->offset, pool->index);
+	return -1;
+data_handle:
+	ret = mlx5_aso_ct_wait_ready(sh, ct);
+	if (!ret)
+		mlx5_aso_ct_obj_analyze(profile, out_data);
+	return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 3b84dea34b..e1beb83e92 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -14808,6 +14808,8 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
 	uint32_t idx = act_idx & ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_aso_ct_action *ct;
 
 	switch (type) {
 	case MLX5_INDIRECT_ACTION_TYPE_AGE:
@@ -14823,6 +14825,23 @@ flow_dv_action_query(struct rte_eth_dev *dev,
 		return 0;
 	case MLX5_INDIRECT_ACTION_TYPE_COUNT:
 		return flow_dv_query_count(dev, idx, data, error);
+	case MLX5_INDIRECT_ACTION_TYPE_CT:
+		ct = flow_aso_ct_get_by_idx(dev, idx);
+		if (!ct->refcnt)
+			return rte_flow_error_set(error, EFAULT,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"CT object is inactive");
+		((struct rte_flow_action_conntrack *)data)->peer_port =
+							ct->peer;
+		((struct rte_flow_action_conntrack *)data)->is_original_dir =
+							ct->is_original;
+		if (mlx5_aso_ct_query_by_wqe(priv->sh, ct, data))
+			return rte_flow_error_set(error, EIO,
+					RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					NULL,
+					"Failed to query CT context");
+		return 0;
 	default:
 		return rte_flow_error_set(error, ENOTSUP,
 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
-- 
2.26.2



More information about the dev mailing list