[dpdk-dev] [PATCH 10/13] cxgbe: update RX path for Chelsio T6

Rahul Lakkireddy rahul.lakkireddy at chelsio.com
Sat May 27 05:46:25 CEST 2017


Update RX path to reflect Chelsio T6 register value changes.
Update ingress pack boundary value based on maximum payload size
that can be accommodated by underlying PCI.  Update ingress pad
boundary value based on smallest memory controller bus width
possible.  Enforce alignment for free list pointer start address.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy at chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras at chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h        |   4 +-
 drivers/net/cxgbe/base/common.h         |   1 +
 drivers/net/cxgbe/base/t4_hw.c          | 104 +++++++++++++++++++++++++++++---
 drivers/net/cxgbe/base/t4_regs_values.h |   7 ++-
 drivers/net/cxgbe/sge.c                 |  48 ++++-----------
 5 files changed, 117 insertions(+), 47 deletions(-)

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 2680790..cc89e49 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2014-2016 Chelsio Communications.
+ *   Copyright(c) 2014-2017 Chelsio Communications.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -459,7 +459,9 @@ static inline void t4_write_reg64(struct adapter *adapter, u32 reg_addr,
 #define PCI_CAP_ID_EXP          0x10    /* PCI Express */
 #define PCI_CAP_LIST_ID         0       /* Capability ID */
 #define PCI_CAP_LIST_NEXT       1       /* Next capability in the list */
+#define PCI_EXP_DEVCTL          0x0008  /* Device control */
 #define PCI_EXP_DEVCTL2         40      /* Device Control 2 */
+#define PCI_EXP_DEVCTL_PAYLOAD  0x00E0  /* Max payload */
 #define PCI_CAP_ID_VPD          0x03    /* Vital Product Data */
 #define PCI_VPD_ADDR            2       /* Address to access (15 bits!) */
 #define PCI_VPD_ADDR_F          0x8000  /* Write 0, 1 indicates completion */
diff --git a/drivers/net/cxgbe/base/common.h b/drivers/net/cxgbe/base/common.h
index 9383628..5765bf3 100644
--- a/drivers/net/cxgbe/base/common.h
+++ b/drivers/net/cxgbe/base/common.h
@@ -282,6 +282,7 @@ int t4_fw_bye(struct adapter *adap, unsigned int mbox);
 int t4_fw_reset(struct adapter *adap, unsigned int mbox, int reset);
 int t4_fw_halt(struct adapter *adap, unsigned int mbox, int reset);
 int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset);
+int t4_fl_pkt_align(struct adapter *adap);
 int t4_fixup_host_params_compat(struct adapter *adap, unsigned int page_size,
 				unsigned int cache_line_size,
 				enum chip_type chip_compat);
diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c
index 4e1545a..96d4bfd 100644
--- a/drivers/net/cxgbe/base/t4_hw.c
+++ b/drivers/net/cxgbe/base/t4_hw.c
@@ -3358,6 +3358,49 @@ int t4_fw_restart(struct adapter *adap, unsigned int mbox, int reset)
 }
 
 /**
+ * t4_fl_pkt_align - return the fl packet alignment
+ * @adap: the adapter
+ *
+ * T4 has a single field to specify the packing and padding boundary.
+ * T5 onwards has separate fields for this and hence the alignment for
+ * next packet offset is maximum of these two.
+ */
+int t4_fl_pkt_align(struct adapter *adap)
+{
+	u32 sge_control, sge_control2;
+	unsigned int ingpadboundary, ingpackboundary, fl_align, ingpad_shift;
+
+	sge_control = t4_read_reg(adap, A_SGE_CONTROL);
+
+	/* T4 uses a single control field to specify both the PCIe Padding and
+	 * Packing Boundary.  T5 introduced the ability to specify these
+	 * separately.  The actual Ingress Packet Data alignment boundary
+	 * within Packed Buffer Mode is the maximum of these two
+	 * specifications.
+	 */
+	if (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+		ingpad_shift = X_INGPADBOUNDARY_SHIFT;
+	else
+		ingpad_shift = X_T6_INGPADBOUNDARY_SHIFT;
+
+	ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) + ingpad_shift);
+
+	fl_align = ingpadboundary;
+	if (!is_t4(adap->params.chip)) {
+		sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2);
+		ingpackboundary = G_INGPACKBOUNDARY(sge_control2);
+		if (ingpackboundary == X_INGPACKBOUNDARY_16B)
+			ingpackboundary = 16;
+		else
+			ingpackboundary = 1 << (ingpackboundary +
+					X_INGPACKBOUNDARY_SHIFT);
+
+		fl_align = max(ingpadboundary, ingpackboundary);
+	}
+	return fl_align;
+}
+
+/**
  * t4_fixup_host_params_compat - fix up host-dependent parameters
  * @adap: the adapter
  * @page_size: the host's Base Page Size
@@ -3402,6 +3445,10 @@ int t4_fixup_host_params_compat(struct adapter *adap,
 						  X_INGPADBOUNDARY_SHIFT) |
 				V_EGRSTATUSPAGESIZE(stat_len != 64));
 	else {
+		unsigned int pack_align;
+		unsigned int ingpad, ingpack;
+		unsigned int pcie_cap;
+
 		/*
 		 * T5 introduced the separation of the Free List Padding and
 		 * Packing Boundaries.  Thus, we can select a smaller Padding
@@ -3415,12 +3462,34 @@ int t4_fixup_host_params_compat(struct adapter *adap,
 		 * Size (the minimum unit of transfer to/from Memory).  If we
 		 * have a Padding Boundary which is smaller than the Memory
 		 * Line Size, that'll involve a Read-Modify-Write cycle on the
-		 * Memory Controller which is never good.  For T5 the smallest
-		 * Padding Boundary which we can select is 32 bytes which is
-		 * larger than any known Memory Controller Line Size so we'll
-		 * use that.
+		 * Memory Controller which is never good.
 		 */
 
+		/* We want the Packing Boundary to be based on the Cache Line
+		 * Size in order to help avoid False Sharing performance
+		 * issues between CPUs, etc.  We also want the Packing
+		 * Boundary to incorporate the PCI-E Maximum Payload Size.  We
+		 * get best performance when the Packing Boundary is a
+		 * multiple of the Maximum Payload Size.
+		 */
+		pack_align = fl_align;
+		pcie_cap = t4_os_find_pci_capability(adap, PCI_CAP_ID_EXP);
+		if (pcie_cap) {
+			unsigned int mps, mps_log;
+			u16 devctl;
+
+			/* The PCIe Device Control Maximum Payload Size field
+			 * [bits 7:5] encodes sizes as powers of 2 starting at
+			 * 128 bytes.
+			 */
+			t4_os_pci_read_cfg2(adap, pcie_cap + PCI_EXP_DEVCTL,
+					    &devctl);
+			mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
+			mps = 1 << mps_log;
+			if (mps > pack_align)
+				pack_align = mps;
+		}
+
 		/*
 		 * N.B. T5 has a different interpretation of the "0" value for
 		 * the Packing Boundary.  This corresponds to 16 bytes instead
@@ -3429,19 +3498,36 @@ int t4_fixup_host_params_compat(struct adapter *adap,
 		 * on the other hand, if we wanted 32 bytes, the best we can
 		 * really do is 64 bytes ...
 		 */
-		if (fl_align <= 32) {
+		if (pack_align <= 16) {
+			ingpack = X_INGPACKBOUNDARY_16B;
+			fl_align = 16;
+		} else if (pack_align == 32) {
+			ingpack = X_INGPACKBOUNDARY_64B;
 			fl_align = 64;
-			fl_align_log = 6;
+		} else {
+			unsigned int pack_align_log = cxgbe_fls(pack_align) - 1;
+
+			ingpack = pack_align_log - X_INGPACKBOUNDARY_SHIFT;
+			fl_align = pack_align;
 		}
+
+		/* Use the smallest Ingress Padding which isn't smaller than
+		 * the Memory Controller Read/Write Size.  We'll take that as
+		 * being 8 bytes since we don't know of any system with a
+		 * wider Memory Controller Bus Width.
+		 */
+		if (is_t5(adap->params.chip))
+			ingpad = X_INGPADBOUNDARY_32B;
+		else
+			ingpad = X_T6_INGPADBOUNDARY_8B;
 		t4_set_reg_field(adap, A_SGE_CONTROL,
 				 V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
 				 F_EGRSTATUSPAGESIZE,
-				 V_INGPADBOUNDARY(X_INGPCIEBOUNDARY_32B) |
+				 V_INGPADBOUNDARY(ingpad) |
 				 V_EGRSTATUSPAGESIZE(stat_len != 64));
 		t4_set_reg_field(adap, A_SGE_CONTROL2,
 				 V_INGPACKBOUNDARY(M_INGPACKBOUNDARY),
-				 V_INGPACKBOUNDARY(fl_align_log -
-						   X_INGPACKBOUNDARY_SHIFT));
+				 V_INGPACKBOUNDARY(ingpack));
 	}
 
 	/*
diff --git a/drivers/net/cxgbe/base/t4_regs_values.h b/drivers/net/cxgbe/base/t4_regs_values.h
index d7d3144..1326594 100644
--- a/drivers/net/cxgbe/base/t4_regs_values.h
+++ b/drivers/net/cxgbe/base/t4_regs_values.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2014-2015 Chelsio Communications.
+ *   Copyright(c) 2014-2017 Chelsio Communications.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -55,10 +55,15 @@
 #define X_RXPKTCPLMODE_SPLIT		1
 #define X_INGPCIEBOUNDARY_32B		0
 #define X_INGPADBOUNDARY_SHIFT		5
+#define X_INGPADBOUNDARY_32B		0
+
+#define X_T6_INGPADBOUNDARY_SHIFT	3
+#define X_T6_INGPADBOUNDARY_8B		0
 
 /* CONTROL2 register */
 #define X_INGPACKBOUNDARY_SHIFT		5
 #define X_INGPACKBOUNDARY_16B		0
+#define X_INGPACKBOUNDARY_64B		1
 
 /* GTS register */
 #define X_TIMERREG_RESTART_COUNTER	6
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 699b577..b16a0bf 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -420,7 +420,9 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q,
 		mbuf->nb_segs = 1;
 		mbuf->port = rxq->rspq.port_id;
 
-		mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off);
+		mapping = (dma_addr_t)RTE_ALIGN(mbuf->buf_physaddr +
+						mbuf->data_off,
+						adap->sge.fl_align);
 		mapping |= buf_size_idx;
 		*d++ = cpu_to_be64(mapping);
 		set_rx_sw_desc(sd, mbuf, mapping);
@@ -1684,8 +1686,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	if (fl) {
 		struct sge_eth_rxq *rxq = container_of(fl, struct sge_eth_rxq,
 						       fl);
-		enum chip_type chip = (enum chip_type)CHELSIO_CHIP_VERSION(
-				adap->params.chip);
+		unsigned int chip_ver = CHELSIO_CHIP_VERSION(adap->params.chip);
 
 		/*
 		 * Allocate the ring for the hardware free list (with space
@@ -1731,9 +1732,12 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		 * Hence maximum allowed burst size will be 448 bytes.
 		 */
 		c.fl0dcaen_to_fl0cidxfthresh =
-			htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) |
-			      V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ?
-			      X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B));
+			htons(V_FW_IQ_CMD_FL0FBMIN(chip_ver <= CHELSIO_T5 ?
+						   X_FETCHBURSTMIN_128B :
+						   X_FETCHBURSTMIN_64B) |
+			      V_FW_IQ_CMD_FL0FBMAX(chip_ver <= CHELSIO_T5 ?
+						   X_FETCHBURSTMAX_512B :
+						   X_FETCHBURSTMAX_256B));
 		c.fl0size = htons(flsz);
 		c.fl0addr = cpu_to_be64(fl->addr);
 	}
@@ -2189,8 +2193,7 @@ static int t4_sge_init_soft(struct adapter *adap)
 int t4_sge_init(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	u32 sge_control, sge_control2, sge_conm_ctrl;
-	unsigned int ingpadboundary, ingpackboundary;
+	u32 sge_control, sge_conm_ctrl;
 	int ret, egress_threshold;
 
 	/*
@@ -2200,34 +2203,7 @@ int t4_sge_init(struct adapter *adap)
 	sge_control = t4_read_reg(adap, A_SGE_CONTROL);
 	s->pktshift = G_PKTSHIFT(sge_control);
 	s->stat_len = (sge_control & F_EGRSTATUSPAGESIZE) ? 128 : 64;
-
-	/*
-	 * T4 uses a single control field to specify both the PCIe Padding and
-	 * Packing Boundary.  T5 introduced the ability to specify these
-	 * separately.  The actual Ingress Packet Data alignment boundary
-	 * within Packed Buffer Mode is the maximum of these two
-	 * specifications.
-	 */
-	ingpadboundary = 1 << (G_INGPADBOUNDARY(sge_control) +
-			 X_INGPADBOUNDARY_SHIFT);
-	s->fl_align = ingpadboundary;
-
-	if (!is_t4(adap->params.chip) && !adap->use_unpacked_mode) {
-		/*
-		 * T5 has a weird interpretation of one of the PCIe Packing
-		 * Boundary values.  No idea why ...
-		 */
-		sge_control2 = t4_read_reg(adap, A_SGE_CONTROL2);
-		ingpackboundary = G_INGPACKBOUNDARY(sge_control2);
-		if (ingpackboundary == X_INGPACKBOUNDARY_16B)
-			ingpackboundary = 16;
-		else
-			ingpackboundary = 1 << (ingpackboundary +
-					  X_INGPACKBOUNDARY_SHIFT);
-
-		s->fl_align = max(ingpadboundary, ingpackboundary);
-	}
-
+	s->fl_align = t4_fl_pkt_align(adap);
 	ret = t4_sge_init_soft(adap);
 	if (ret < 0) {
 		dev_err(adap, "%s: t4_sge_init_soft failed, error %d\n",
-- 
2.5.3



More information about the dev mailing list