[dpdk-dev] [PATCH 7/7] net/mlx5: add minimal required Tx data inline

Viacheslav Ovsiienko viacheslavo at mellanox.com
Thu Jul 4 18:29:27 CEST 2019


Tx data packet data may be inlined into transmit descriptor.
At some circumstances ConnectX NICs may require data to be
inlined for correct operation. The exact data amount may
depend on NIC operation mode, requested Tx offloads,
E-Switch configuration, etc.

The number of data bytes to inline may be specified with
devargs key "txq_inline_min". If this key is present the
specified value (may be aligned by the driver in order
not to exceed the limits and provide better descriptor
space utilization) will be used by the driver and it
is guaranteed the requested data bytes are inlined into
the descriptor beside other inline settings.

If "txq_inline_min" key is not present the value may
be queried by the driver from the NIC via DevX if this
feature is available. This patch provides the implementation
of this query.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo at mellanox.com>
---
 drivers/net/mlx5/mlx5.c           |  93 +++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5.h           |   4 ++
 drivers/net/mlx5/mlx5_defs.h      |  18 ++++++
 drivers/net/mlx5/mlx5_devx_cmds.c | 100 ++++++++++++++++++++++++++++++-
 drivers/net/mlx5/mlx5_prm.h       | 121 +++++++++++++++++++++++++++++++++++++-
 5 files changed, 334 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index e803f08..ce3a62b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1045,6 +1045,97 @@ struct mlx5_dev_spawn_data {
 }
 
 /**
+ * Configures the minimal amount of data to inline into WQE
+ * while sending packets.
+ *
+ * - the txq_inline_min has the maximal priority, if this
+ *   key is specified in devargs
+ * - if DevX is enabled the inline mode is queried from the
+ *   device (HCA attributes and NIC vport context if needed).
+ * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4LX
+ *   and none (0 bytes) for other NICs
+ *
+ * @param spawn
+ *   Verbs device parameters (name, port, switch_info) to spawn.
+ * @param config
+ *   Device configuration parameters.
+ */
+static void
+mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
+		    struct mlx5_dev_config *config)
+{
+	if (config->txq_inline_min != MLX5_ARG_UNSET) {
+		/* Application defines size of inlined data explicitly. */
+		goto exit;
+	}
+	if (config->hca_attr.eth_net_offloads) {
+		/* We have DevX enabled, inline mode queried successfully. */
+		switch (config->hca_attr.wqe_inline_mode) {
+		case MLX5_CAP_INLINE_MODE_L2:
+			/* outer L2 header must be inlined. */
+			config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
+			goto exit;
+		case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
+			/* No inline data are required by NIC. */
+			config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
+			goto exit;
+		case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+			/* inline mode is defined by NIC vport context. */
+			if (!config->hca_attr.eth_virt)
+				break;
+			switch (config->hca_attr.vport_inline_mode) {
+			case MLX5_INLINE_MODE_NONE:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_NONE;
+				goto exit;
+			case MLX5_INLINE_MODE_L2:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_L2;
+				goto exit;
+			case MLX5_INLINE_MODE_IP:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_L3;
+				goto exit;
+			case MLX5_INLINE_MODE_TCP_UDP:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_L4;
+				goto exit;
+			case MLX5_INLINE_MODE_INNER_L2:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_INNER_L2;
+				goto exit;
+			case MLX5_INLINE_MODE_INNER_IP:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_INNER_L3;
+				goto exit;
+			case MLX5_INLINE_MODE_INNER_TCP_UDP:
+				config->txq_inline_min =
+					MLX5_INLINE_HSIZE_INNER_L4;
+				goto exit;
+			}
+		}
+	}
+	/*
+	 * We get here if we are unable to deduce
+	 * inline data size with DevX. Try PCI ID
+	 * to determine old NICs.
+	 */
+	switch (spawn->pci_dev->id.device_id) {
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
+	case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
+		config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
+		break;
+	default:
+		config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
+		break;
+	}
+exit:
+	DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min);
+}
+
+/**
  * Spawn an Ethernet device from Verbs information.
  *
  * @param dpdk_dev
@@ -1529,6 +1620,8 @@ struct mlx5_dev_spawn_data {
 #else
 	config.dv_esw_en = 0;
 #endif
+	/* Detect minimal data bytes to inline. */
+	mlx5_set_min_inline(spawn, &config);
 	/* Store device configuration on private structure. */
 	priv->config = config;
 	if (config.dv_flow_en) {
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index ac7ea1d..db462bc 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -160,6 +160,10 @@ struct mlx5_devx_counter_set {
 /* HCA attributes. */
 struct mlx5_hca_attr {
 	uint32_t eswitch_manager:1;
+	uint32_t eth_net_offloads:1;
+	uint32_t eth_virt:1;
+	uint32_t wqe_inline_mode:2;
+	uint32_t vport_inline_mode:3;
 };
 
 /* Flow list . */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 873a595..8c118d5 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -60,6 +60,24 @@
 /* Maximum Packet headers size (L2+L3+L4) for TSO. */
 #define MLX5_MAX_TSO_HEADER (128u + 34u)
 
+/* Inline data size required by NICs. */
+#define MLX5_INLINE_HSIZE_NONE 0
+#define MLX5_INLINE_HSIZE_L2 (sizeof(struct rte_ether_hdr) + \
+			      sizeof(struct rte_vlan_hdr))
+#define MLX5_INLINE_HSIZE_L3 (MLX5_INLINE_HSIZE_L2 + \
+			      sizeof(struct rte_ipv6_hdr))
+#define MLX5_INLINE_HSIZE_L4 (MLX5_INLINE_HSIZE_L3 + \
+			      sizeof(struct rte_tcp_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L2 (MLX5_INLINE_HSIZE_L3 + \
+				    sizeof(struct rte_udp_hdr) + \
+				    sizeof(struct rte_vxlan_hdr) + \
+				    sizeof(struct rte_ether_hdr) + \
+				    sizeof(struct rte_vlan_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L3 (MLX5_INLINE_HSIZE_INNER_L2 + \
+				    sizeof(struct rte_ipv6_hdr))
+#define MLX5_INLINE_HSIZE_INNER_L4 (MLX5_INLINE_HSIZE_INNER_L3 + \
+				    sizeof(struct rte_tcp_hdr))
+
 /* Threshold of buffer replenishment for vectorized Rx. */
 #define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \
 	(RTE_MIN(MLX5_VPMD_RX_MAX_BURST, (unsigned int)(n) >> 2))
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
index e5776c4..de470a6 100644
--- a/drivers/net/mlx5/mlx5_devx_cmds.c
+++ b/drivers/net/mlx5/mlx5_devx_cmds.c
@@ -107,6 +107,59 @@ int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
 }
 
 /**
+ * Query NIC vport context.
+ * Currently fiils minimal inline attribute.
+ *
+ * @param[in] ctx
+ *   ibv contexts returned from mlx5dv_open_device.
+ * @param[in] vport
+ *   vport index
+ * @param[out] attr
+ *   Attributes device values.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+static int
+mlx5_devx_cmd_query_nic_vport_context(struct ibv_context *ctx,
+				      unsigned int vport,
+				      struct mlx5_hca_attr *attr)
+{
+	uint32_t in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+	uint32_t out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+	void *vctx;
+	int status, syndrome, rc;
+
+	/* Query NIC vport context to determine inline mode. */
+	MLX5_SET(query_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
+	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
+	if (vport)
+		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
+	rc = mlx5_glue->devx_general_cmd(ctx,
+					 in, sizeof(in),
+					 out, sizeof(out));
+	if (rc)
+		goto error;
+	status = MLX5_GET(query_nic_vport_context_out, out, status);
+	syndrome = MLX5_GET(query_nic_vport_context_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to query NIC vport context, "
+			"status %x, syndrome = %x",
+			status, syndrome);
+		return -1;
+	}
+	vctx = MLX5_ADDR_OF(query_nic_vport_context_out, out,
+			    nic_vport_context);
+	attr->vport_inline_mode = MLX5_GET(nic_vport_context, vctx,
+					   min_wqe_inline_mode);
+	return 0;
+error:
+	rc = (rc > 0) ? -rc : rc;
+	return rc;
+}
+
+/**
  * Query HCA attributes.
  * Using those attributes we can check on run time if the device
  * is having the required capabilities.
@@ -136,7 +189,7 @@ int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
 	rc = mlx5_glue->devx_general_cmd(ctx,
 					 in, sizeof(in), out, sizeof(out));
 	if (rc)
-		return rc;
+		goto error;
 	status = MLX5_GET(query_hca_cap_out, out, status);
 	syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
 	if (status) {
@@ -147,5 +200,50 @@ int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
 	}
 	hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
 	attr->eswitch_manager = MLX5_GET(cmd_hca_cap, hcattr, eswitch_manager);
+	attr->eth_net_offloads = MLX5_GET(cmd_hca_cap, hcattr,
+					  eth_net_offloads);
+	attr->eth_virt = MLX5_GET(cmd_hca_cap, hcattr, eth_virt);
+	if (!attr->eth_net_offloads)
+		return 0;
+
+	/* Query HCA offloads for Ethernet protocol. */
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod,
+		 MLX5_GET_HCA_CAP_OP_MOD_ETHERNET |
+		 MLX5_HCA_CAP_OPMOD_GET_CUR);
+
+	rc = mlx5_glue->devx_general_cmd(ctx,
+					 in, sizeof(in),
+					 out, sizeof(out));
+	if (rc) {
+		attr->eth_net_offloads = 0;
+		goto error;
+	}
+	status = MLX5_GET(query_hca_cap_out, out, status);
+	syndrome = MLX5_GET(query_hca_cap_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to query devx HCA capabilities, "
+			"status %x, syndrome = %x",
+			status, syndrome);
+		attr->eth_net_offloads = 0;
+		return -1;
+	}
+	hcattr = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
+	attr->wqe_inline_mode = MLX5_GET(eth_offload_cap, hcattr,
+					 wqe_inline_mode);
+	if (attr->wqe_inline_mode != MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return 0;
+	if (attr->eth_virt) {
+		rc = mlx5_devx_cmd_query_nic_vport_context(ctx, 0, attr);
+		if (rc) {
+			attr->eth_virt = 0;
+			goto error;
+		}
+	}
 	return 0;
+error:
+	rc = (rc > 0) ? -rc : rc;
+	return rc;
 }
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 471a3e3..765b3f1 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -600,6 +600,7 @@ enum {
 
 enum {
 	MLX5_CMD_OP_QUERY_HCA_CAP = 0x100,
+	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT = 0x754,
 	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
 };
@@ -664,7 +665,8 @@ struct mlx5_ifc_query_flow_counter_in_bits {
 
 enum {
 	MLX5_GET_HCA_CAP_OP_MOD_GENERAL_DEVICE = 0x0 << 1,
-	MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP        = 0xc << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_ETHERNET = 0x1 << 1,
+	MLX5_GET_HCA_CAP_OP_MOD_QOS_CAP = 0xc << 1,
 };
 
 enum {
@@ -672,6 +674,23 @@ enum {
 	MLX5_HCA_CAP_OPMOD_GET_CUR   = 1,
 };
 
+enum {
+	MLX5_CAP_INLINE_MODE_L2,
+	MLX5_CAP_INLINE_MODE_VPORT_CONTEXT,
+	MLX5_CAP_INLINE_MODE_NOT_REQUIRED,
+};
+
+enum {
+	MLX5_INLINE_MODE_NONE,
+	MLX5_INLINE_MODE_L2,
+	MLX5_INLINE_MODE_IP,
+	MLX5_INLINE_MODE_TCP_UDP,
+	MLX5_INLINE_MODE_RESERVED4,
+	MLX5_INLINE_MODE_INNER_L2,
+	MLX5_INLINE_MODE_INNER_IP,
+	MLX5_INLINE_MODE_INNER_TCP_UDP,
+};
+
 struct mlx5_ifc_cmd_hca_cap_bits {
 	u8 reserved_at_0[0x30];
 	u8 vhca_id[0x10];
@@ -942,6 +961,42 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8 reserved_at_61f[0x1e1];
 };
 
+struct mlx5_ifc_eth_offload_cap_bits {
+	u8 csum_cap[0x1];
+	u8 vlan_cap[0x1];
+	u8 lro_cap[0x1];
+	u8 lro_psh_flag[0x1];
+	u8 lro_time_stamp[0x1];
+	u8 reserved_at_5[0x2];
+	u8 wqe_vlan_insert[0x1];
+	u8 self_lb_en_modifiable[0x1];
+	u8 reserved_at_9[0x2];
+	u8 max_lso_cap[0x5];
+	u8 multi_pkt_send_wqe[0x2];
+	u8 wqe_inline_mode[0x2];
+	u8 rss_ind_tbl_cap[0x4];
+	u8 reg_umr_sq[0x1];
+	u8 scatter_fcs[0x1];
+	u8 enhanced_multi_pkt_send_wqe[0x1];
+	u8 tunnel_lso_const_out_ip_id[0x1];
+	u8 reserved_at_1c[0x2];
+	u8 tunnel_stateless_gre[0x1];
+	u8 tunnel_stateless_vxlan[0x1];
+	u8 swp[0x1];
+	u8 swp_csum[0x1];
+	u8 swp_lso[0x1];
+	u8 reserved_at_23[0xd];
+	u8 max_vxlan_udp_ports[0x8];
+	u8 reserved_at_38[0x6];
+	u8 max_geneve_opt_len[0x1];
+	u8 tunnel_stateless_geneve_rx[0x1];
+	u8 reserved_at_40[0x10];
+	u8 lro_min_mss_size[0x10];
+	u8 reserved_at_60[0x120];
+	u8 lro_timer_supported_periods[4][0x20];
+	u8 reserved_at_200[0x600];
+};
+
 struct mlx5_ifc_qos_cap_bits {
 	u8 packet_pacing[0x1];
 	u8 esw_scheduling[0x1];
@@ -969,6 +1024,7 @@ struct mlx5_ifc_qos_cap_bits {
 
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+	struct mlx5_ifc_eth_offload_cap_bits eth_offload_cap;
 	struct mlx5_ifc_qos_cap_bits qos_cap;
 	u8 reserved_at_0[0x8000];
 };
@@ -989,6 +1045,69 @@ struct mlx5_ifc_query_hca_cap_in_bits {
 	u8 reserved_at_40[0x40];
 };
 
+struct mlx5_ifc_mac_address_layout_bits {
+	u8 reserved_at_0[0x10];
+	u8 mac_addr_47_32[0x10];
+	u8 mac_addr_31_0[0x20];
+};
+
+struct mlx5_ifc_nic_vport_context_bits {
+	u8 reserved_at_0[0x5];
+	u8 min_wqe_inline_mode[0x3];
+	u8 reserved_at_8[0x15];
+	u8 disable_mc_local_lb[0x1];
+	u8 disable_uc_local_lb[0x1];
+	u8 roce_en[0x1];
+	u8 arm_change_event[0x1];
+	u8 reserved_at_21[0x1a];
+	u8 event_on_mtu[0x1];
+	u8 event_on_promisc_change[0x1];
+	u8 event_on_vlan_change[0x1];
+	u8 event_on_mc_address_change[0x1];
+	u8 event_on_uc_address_change[0x1];
+	u8 reserved_at_40[0xc];
+	u8 affiliation_criteria[0x4];
+	u8 affiliated_vhca_id[0x10];
+	u8 reserved_at_60[0xd0];
+	u8 mtu[0x10];
+	u8 system_image_guid[0x40];
+	u8 port_guid[0x40];
+	u8 node_guid[0x40];
+	u8 reserved_at_200[0x140];
+	u8 qkey_violation_counter[0x10];
+	u8 reserved_at_350[0x430];
+	u8 promisc_uc[0x1];
+	u8 promisc_mc[0x1];
+	u8 promisc_all[0x1];
+	u8 reserved_at_783[0x2];
+	u8 allowed_list_type[0x3];
+	u8 reserved_at_788[0xc];
+	u8 allowed_list_size[0xc];
+	struct mlx5_ifc_mac_address_layout_bits permanent_address;
+	u8 reserved_at_7e0[0x20];
+};
+
+struct mlx5_ifc_query_nic_vport_context_out_bits {
+	u8 status[0x8];
+	u8 reserved_at_8[0x18];
+	u8 syndrome[0x20];
+	u8 reserved_at_40[0x40];
+	struct mlx5_ifc_nic_vport_context_bits nic_vport_context;
+};
+
+struct mlx5_ifc_query_nic_vport_context_in_bits {
+	u8 opcode[0x10];
+	u8 reserved_at_10[0x10];
+	u8 reserved_at_20[0x10];
+	u8 op_mod[0x10];
+	u8 other_vport[0x1];
+	u8 reserved_at_41[0xf];
+	u8 vport_number[0x10];
+	u8 reserved_at_60[0x5];
+	u8 allowed_list_type[0x3];
+	u8 reserved_at_68[0x18];
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
-- 
1.8.3.1



More information about the dev mailing list