[dpdk-dev] [PATCH 6/7] net/cxgbe: implement flow query operation

Rahul Lakkireddy rahul.lakkireddy at chelsio.com
Fri Jun 8 19:58:16 CEST 2018


From: Shagun Agrawal <shaguna at chelsio.com>

Add API to query filter hit and byte counts from hardware.

Signed-off-by: Shagun Agrawal <shaguna at chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras at chelsio.com>
Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy at chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h |   1 +
 drivers/net/cxgbe/base/common.h  |  15 +++
 drivers/net/cxgbe/base/t4_hw.c   | 209 +++++++++++++++++++++++++++++++++++++++
 drivers/net/cxgbe/base/t4_hw.h   |   4 +
 drivers/net/cxgbe/base/t4_regs.h |  16 +++
 drivers/net/cxgbe/cxgbe_filter.c |  62 ++++++++++++
 drivers/net/cxgbe/cxgbe_filter.h |   2 +
 drivers/net/cxgbe/cxgbe_flow.c   |  62 +++++++++++-
 drivers/net/cxgbe/cxgbe_main.c   |   1 +
 9 files changed, 371 insertions(+), 1 deletion(-)

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 7f9ddae01..de46ecfe3 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -319,6 +319,7 @@ struct adapter {
 	unsigned int vpd_flag;
 
 	int use_unpacked_mode; /* unpacked rx mode state */
+	rte_spinlock_t win0_lock;
 
 	struct tid_info tids;     /* Info used to access TID related tables */
 };
diff --git a/drivers/net/cxgbe/base/common.h b/drivers/net/cxgbe/base/common.h
index c80304b24..e524f7931 100644
--- a/drivers/net/cxgbe/base/common.h
+++ b/drivers/net/cxgbe/base/common.h
@@ -18,6 +18,9 @@ extern "C" {
 
 #define CXGBE_PAGE_SIZE RTE_PGSIZE_4K
 
+#define T4_MEMORY_WRITE 0
+#define T4_MEMORY_READ  1
+
 enum {
 	MAX_NPORTS     = 4,     /* max # of ports */
 };
@@ -47,6 +50,8 @@ enum cc_fec {
 	FEC_BASER_RS = 1 << 2,    /* BaseR/Reed-Solomon */
 };
 
+enum { MEM_EDC0, MEM_EDC1, MEM_MC, MEM_MC0 = MEM_MC, MEM_MC1 };
+
 struct port_stats {
 	u64 tx_octets;            /* total # of octets in good frames */
 	u64 tx_frames;            /* all good frames */
@@ -502,5 +507,15 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size);
 int t4_seeprom_read(struct adapter *adapter, u32 addr, u32 *data);
 int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data);
 int t4_seeprom_wp(struct adapter *adapter, int enable);
+int t4_memory_rw_addr(struct adapter *adap, int win,
+		      u32 addr, u32 len, void *hbuf, int dir);
+int t4_memory_rw_mtype(struct adapter *adap, int win, int mtype, u32 maddr,
+		       u32 len, void *hbuf, int dir);
+static inline int t4_memory_rw(struct adapter *adap, int win,
+			       int mtype, u32 maddr, u32 len,
+			       void *hbuf, int dir)
+{
+	return t4_memory_rw_mtype(adap, win, mtype, maddr, len, hbuf, dir);
+}
 fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16);
 #endif /* __CHELSIO_COMMON_H */
diff --git a/drivers/net/cxgbe/base/t4_hw.c b/drivers/net/cxgbe/base/t4_hw.c
index c146c911e..66d080476 100644
--- a/drivers/net/cxgbe/base/t4_hw.c
+++ b/drivers/net/cxgbe/base/t4_hw.c
@@ -5215,3 +5215,212 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf)
 	}
 	return 0;
 }
+
+/**
+ * t4_memory_rw_addr - read/write adapter memory via PCIE memory window
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @addr: address within adapter memory
+ * @len: amount of memory to transfer
+ * @hbuf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Reads/writes an [almost] arbitrary memory region in the firmware: the
+ * firmware memory address and host buffer must be aligned on 32-bit
+ * boudaries; the length may be arbitrary.
+ *
+ * NOTES:
+ *  1. The memory is transferred as a raw byte sequence from/to the
+ *     firmware's memory.  If this memory contains data structures which
+ *     contain multi-byte integers, it's the caller's responsibility to
+ *     perform appropriate byte order conversions.
+ *
+ *  2. It is the Caller's responsibility to ensure that no other code
+ *     uses the specified PCI-E Memory Window while this routine is
+ *     using it.  This is typically done via the use of OS-specific
+ *     locks, etc.
+ */
+int t4_memory_rw_addr(struct adapter *adap, int win, u32 addr,
+		      u32 len, void *hbuf, int dir)
+{
+	u32 pos, offset, resid;
+	u32 win_pf, mem_reg, mem_aperture, mem_base;
+	u32 *buf;
+
+	/* Argument sanity checks ...*/
+	if (addr & 0x3 || (uintptr_t)hbuf & 0x3)
+		return -EINVAL;
+	buf = (u32 *)hbuf;
+
+	/* It's convenient to be able to handle lengths which aren't a
+	 * multiple of 32-bits because we often end up transferring files to
+	 * the firmware.  So we'll handle that by normalizing the length here
+	 * and then handling any residual transfer at the end.
+	 */
+	resid = len & 0x3;
+	len -= resid;
+
+	/* Each PCI-E Memory Window is programmed with a window size -- or
+	 * "aperture" -- which controls the granularity of its mapping onto
+	 * adapter memory.  We need to grab that aperture in order to know
+	 * how to use the specified window.  The window is also programmed
+	 * with the base address of the Memory Window in BAR0's address
+	 * space.  For T4 this is an absolute PCI-E Bus Address.  For T5
+	 * the address is relative to BAR0.
+	 */
+	mem_reg = t4_read_reg(adap,
+			      PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN,
+						  win));
+	mem_aperture = 1 << (G_WINDOW(mem_reg) + X_WINDOW_SHIFT);
+	mem_base = G_PCIEOFST(mem_reg) << X_PCIEOFST_SHIFT;
+
+	win_pf = is_t4(adap->params.chip) ? 0 : V_PFNUM(adap->pf);
+
+	/* Calculate our initial PCI-E Memory Window Position and Offset into
+	 * that Window.
+	 */
+	pos = addr & ~(mem_aperture - 1);
+	offset = addr - pos;
+
+	/* Set up initial PCI-E Memory Window to cover the start of our
+	 * transfer.  (Read it back to ensure that changes propagate before we
+	 * attempt to use the new value.)
+	 */
+	t4_write_reg(adap,
+		     PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, win),
+		     pos | win_pf);
+	t4_read_reg(adap,
+		    PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, win));
+
+	/* Transfer data to/from the adapter as long as there's an integral
+	 * number of 32-bit transfers to complete.
+	 *
+	 * A note on Endianness issues:
+	 *
+	 * The "register" reads and writes below from/to the PCI-E Memory
+	 * Window invoke the standard adapter Big-Endian to PCI-E Link
+	 * Little-Endian "swizzel."  As a result, if we have the following
+	 * data in adapter memory:
+	 *
+	 *     Memory:  ... | b0 | b1 | b2 | b3 | ...
+	 *     Address:      i+0  i+1  i+2  i+3
+	 *
+	 * Then a read of the adapter memory via the PCI-E Memory Window
+	 * will yield:
+	 *
+	 *     x = readl(i)
+	 *         31                  0
+	 *         [ b3 | b2 | b1 | b0 ]
+	 *
+	 * If this value is stored into local memory on a Little-Endian system
+	 * it will show up correctly in local memory as:
+	 *
+	 *     ( ..., b0, b1, b2, b3, ... )
+	 *
+	 * But on a Big-Endian system, the store will show up in memory
+	 * incorrectly swizzled as:
+	 *
+	 *     ( ..., b3, b2, b1, b0, ... )
+	 *
+	 * So we need to account for this in the reads and writes to the
+	 * PCI-E Memory Window below by undoing the register read/write
+	 * swizzels.
+	 */
+	while (len > 0) {
+		if (dir == T4_MEMORY_READ)
+			*buf++ = le32_to_cpu((__le32)t4_read_reg(adap,
+								 mem_base +
+								 offset));
+		else
+			t4_write_reg(adap, mem_base + offset,
+				     (u32)cpu_to_le32(*buf++));
+		offset += sizeof(__be32);
+		len -= sizeof(__be32);
+
+		/* If we've reached the end of our current window aperture,
+		 * move the PCI-E Memory Window on to the next.  Note that
+		 * doing this here after "len" may be 0 allows us to set up
+		 * the PCI-E Memory Window for a possible final residual
+		 * transfer below ...
+		 */
+		if (offset == mem_aperture) {
+			pos += mem_aperture;
+			offset = 0;
+			t4_write_reg(adap,
+				PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET,
+						    win), pos | win_pf);
+			t4_read_reg(adap,
+				PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET,
+						    win));
+		}
+	}
+
+	/* If the original transfer had a length which wasn't a multiple of
+	 * 32-bits, now's where we need to finish off the transfer of the
+	 * residual amount.  The PCI-E Memory Window has already been moved
+	 * above (if necessary) to cover this final transfer.
+	 */
+	if (resid) {
+		union {
+			u32 word;
+			char byte[4];
+		} last;
+		unsigned char *bp;
+		int i;
+
+		if (dir == T4_MEMORY_READ) {
+			last.word = le32_to_cpu((__le32)t4_read_reg(adap,
+								    mem_base +
+								    offset));
+			for (bp = (unsigned char *)buf, i = resid; i < 4; i++)
+				bp[i] = last.byte[i];
+		} else {
+			last.word = *buf;
+			for (i = resid; i < 4; i++)
+				last.byte[i] = 0;
+			t4_write_reg(adap, mem_base + offset,
+				     (u32)cpu_to_le32(last.word));
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * t4_memory_rw_mtype -read/write EDC 0, EDC 1 or MC via PCIE memory window
+ * @adap: the adapter
+ * @win: PCI-E Memory Window to use
+ * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC
+ * @maddr: address within indicated memory type
+ * @len: amount of memory to transfer
+ * @hbuf: host memory buffer
+ * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0)
+ *
+ * Reads/writes adapter memory using t4_memory_rw_addr().  This routine
+ * provides an (memory type, address within memory type) interface.
+ */
+int t4_memory_rw_mtype(struct adapter *adap, int win, int mtype, u32 maddr,
+		       u32 len, void *hbuf, int dir)
+{
+	u32 mtype_offset;
+	u32 edc_size, mc_size;
+
+	/* Offset into the region of memory which is being accessed
+	 * MEM_EDC0 = 0
+	 * MEM_EDC1 = 1
+	 * MEM_MC   = 2 -- MEM_MC for chips with only 1 memory controller
+	 * MEM_MC1  = 3 -- for chips with 2 memory controllers (e.g. T5)
+	 */
+	edc_size  = G_EDRAM0_SIZE(t4_read_reg(adap, A_MA_EDRAM0_BAR));
+	if (mtype != MEM_MC1) {
+		mtype_offset = (mtype * (edc_size * 1024 * 1024));
+	} else {
+		mc_size = G_EXT_MEM0_SIZE(t4_read_reg(adap,
+						      A_MA_EXT_MEMORY0_BAR));
+		mtype_offset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024;
+	}
+
+	return t4_memory_rw_addr(adap, win,
+				 mtype_offset + maddr, len,
+				 hbuf, dir);
+}
diff --git a/drivers/net/cxgbe/base/t4_hw.h b/drivers/net/cxgbe/base/t4_hw.h
index ac12afc04..e77563dfa 100644
--- a/drivers/net/cxgbe/base/t4_hw.h
+++ b/drivers/net/cxgbe/base/t4_hw.h
@@ -42,6 +42,10 @@ enum {
 	SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / SGE_EQ_IDXSIZE,
 };
 
+enum {
+	TCB_SIZE        = 128,   /* TCB size */
+};
+
 struct sge_qstat {                /* data written to SGE queue status entries */
 	__be32 qid;
 	__be16 cidx;
diff --git a/drivers/net/cxgbe/base/t4_regs.h b/drivers/net/cxgbe/base/t4_regs.h
index c0d6ddcac..fd8f9cf27 100644
--- a/drivers/net/cxgbe/base/t4_regs.h
+++ b/drivers/net/cxgbe/base/t4_regs.h
@@ -458,6 +458,7 @@
 #define F_CRXPKTENC    V_CRXPKTENC(1U)
 
 #define TP_BASE_ADDR 0x7d00
+#define A_TP_CMM_TCB_BASE 0x7d10
 
 #define A_TP_TIMER_RESOLUTION 0x7d90
 
@@ -574,6 +575,21 @@
 #define S_RM_OVLAN	9
 #define V_RM_OVLAN(x)	((x) << S_RM_OVLAN)
 
+/* registers for module MA */
+#define A_MA_EDRAM0_BAR 0x77c0
+
+#define S_EDRAM0_SIZE    0
+#define M_EDRAM0_SIZE    0xfffU
+#define V_EDRAM0_SIZE(x) ((x) << S_EDRAM0_SIZE)
+#define G_EDRAM0_SIZE(x) (((x) >> S_EDRAM0_SIZE) & M_EDRAM0_SIZE)
+
+#define A_MA_EXT_MEMORY0_BAR 0x77c8
+
+#define S_EXT_MEM0_SIZE    0
+#define M_EXT_MEM0_SIZE    0xfffU
+#define V_EXT_MEM0_SIZE(x) ((x) << S_EXT_MEM0_SIZE)
+#define G_EXT_MEM0_SIZE(x) (((x) >> S_EXT_MEM0_SIZE) & M_EXT_MEM0_SIZE)
+
 /* registers for module MPS */
 #define MPS_BASE_ADDR 0x9000
 #define T4VF_MPS_BASE_ADDR 0x0100
diff --git a/drivers/net/cxgbe/cxgbe_filter.c b/drivers/net/cxgbe/cxgbe_filter.c
index 8129ed01f..2fc580e61 100644
--- a/drivers/net/cxgbe/cxgbe_filter.c
+++ b/drivers/net/cxgbe/cxgbe_filter.c
@@ -545,3 +545,65 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
 			t4_complete(&ctx->completion);
 	}
 }
+
+/*
+ * Retrieve the packet count for the specified filter.
+ */
+int cxgbe_get_filter_count(struct adapter *adapter, unsigned int fidx,
+			   u64 *c, bool get_byte)
+{
+	struct filter_entry *f;
+	unsigned int tcb_base, tcbaddr;
+	int ret;
+
+	tcb_base = t4_read_reg(adapter, A_TP_CMM_TCB_BASE);
+	if (fidx >= adapter->tids.nftids)
+		return -ERANGE;
+
+	f = &adapter->tids.ftid_tab[fidx];
+	if (!f->valid)
+		return -EINVAL;
+
+	tcbaddr = tcb_base + f->tid * TCB_SIZE;
+
+	if (is_t5(adapter->params.chip) || is_t6(adapter->params.chip)) {
+		/*
+		 * For T5, the Filter Packet Hit Count is maintained as a
+		 * 32-bit Big Endian value in the TCB field {timestamp}.
+		 * Similar to the craziness above, instead of the filter hit
+		 * count showing up at offset 20 ((W_TCB_TIMESTAMP == 5) *
+		 * sizeof(u32)), it actually shows up at offset 24.  Whacky.
+		 */
+		if (get_byte) {
+			unsigned int word_offset = 4;
+			__be64 be64_byte_count;
+
+			t4_os_lock(&adapter->win0_lock);
+			ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+					   tcbaddr +
+					   (word_offset * sizeof(__be32)),
+					   sizeof(be64_byte_count),
+					   &be64_byte_count,
+					   T4_MEMORY_READ);
+			t4_os_unlock(&adapter->win0_lock);
+			if (ret < 0)
+				return ret;
+			*c = be64_to_cpu(be64_byte_count);
+		} else {
+			unsigned int word_offset = 6;
+			__be32 be32_count;
+
+			t4_os_lock(&adapter->win0_lock);
+			ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+					   tcbaddr +
+					   (word_offset * sizeof(__be32)),
+					   sizeof(be32_count), &be32_count,
+					   T4_MEMORY_READ);
+			t4_os_unlock(&adapter->win0_lock);
+			if (ret < 0)
+				return ret;
+			*c = (u64)be32_to_cpu(be32_count);
+		}
+	}
+	return 0;
+}
diff --git a/drivers/net/cxgbe/cxgbe_filter.h b/drivers/net/cxgbe/cxgbe_filter.h
index e0ba6a4d3..3c81c1a64 100644
--- a/drivers/net/cxgbe/cxgbe_filter.h
+++ b/drivers/net/cxgbe/cxgbe_filter.h
@@ -220,4 +220,6 @@ int cxgbe_del_filter(struct rte_eth_dev *dev, unsigned int filter_id,
 		     struct filter_ctx *ctx);
 int cxgbe_alloc_ftid(struct adapter *adap, unsigned int family);
 int validate_filter(struct adapter *adap, struct ch_filter_specification *fs);
+int cxgbe_get_filter_count(struct adapter *adapter, unsigned int fidx,
+			   u64 *c, bool get_byte);
 #endif /* _CXGBE_FILTER_H_ */
diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c
index 1584df392..89490ecc2 100644
--- a/drivers/net/cxgbe/cxgbe_flow.c
+++ b/drivers/net/cxgbe/cxgbe_flow.c
@@ -522,6 +522,66 @@ cxgbe_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
 	return 0;
 }
 
+static int __cxgbe_flow_query(struct rte_flow *flow, u64 *count,
+			      u64 *byte_count)
+{
+	struct adapter *adap = ethdev2adap(flow->dev);
+	unsigned int fidx = flow->fidx;
+	int ret = 0;
+
+	ret = cxgbe_get_filter_count(adap, fidx, count, 0);
+	if (ret)
+		return ret;
+	return cxgbe_get_filter_count(adap, fidx, byte_count, 1);
+}
+
+static int
+cxgbe_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
+		 const struct rte_flow_action *action, void *data,
+		 struct rte_flow_error *e)
+{
+	struct ch_filter_specification fs;
+	struct rte_flow_query_count *c;
+	struct filter_entry *f;
+	int ret;
+
+	RTE_SET_USED(dev);
+
+	f = flow->f;
+	fs = f->fs;
+
+	if (action->type != RTE_FLOW_ACTION_TYPE_COUNT)
+		return rte_flow_error_set(e, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+					  "only count supported for query");
+
+	/*
+	 * This is a valid operation, Since we are allowed to do chelsio
+	 * specific operations in rte side of our code but not vise-versa
+	 *
+	 * So, fs can be queried/modified here BUT rte_flow_query_count
+	 * cannot be worked on by the lower layer since we want to maintain
+	 * it as rte_flow agnostic.
+	 */
+	if (!fs.hitcnts)
+		return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
+					  &fs, "filter hit counters were not"
+					  " enabled during filter creation");
+
+	c = (struct rte_flow_query_count *)data;
+	ret = __cxgbe_flow_query(flow, &c->hits, &c->bytes);
+	if (ret)
+		return rte_flow_error_set(e, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
+					  f, "cxgbe pmd failed to"
+					  " perform query");
+
+	/* Query was successful */
+	c->bytes_set = 1;
+	c->hits_set = 1;
+
+	return 0; /* success / partial_success */
+}
+
 static int
 cxgbe_flow_validate(struct rte_eth_dev *dev,
 		    const struct rte_flow_attr *attr,
@@ -577,7 +637,7 @@ static const struct rte_flow_ops cxgbe_flow_ops = {
 	.create		= cxgbe_flow_create,
 	.destroy	= cxgbe_flow_destroy,
 	.flush		= NULL,
-	.query		= NULL,
+	.query		= cxgbe_flow_query,
 	.isolate	= NULL,
 };
 
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index a00e0700d..21ad380ae 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -1527,6 +1527,7 @@ int cxgbe_probe(struct adapter *adapter)
 
 	t4_os_lock_init(&adapter->mbox_lock);
 	TAILQ_INIT(&adapter->mbox_list);
+	t4_os_lock_init(&adapter->win0_lock);
 
 	err = t4_prep_adapter(adapter);
 	if (err)
-- 
2.14.1



More information about the dev mailing list