[dpdk-dev] [PATCH v4 29/62] net/qede/base: optimize cache-line access

Rasesh Mody rasesh.mody at cavium.com
Tue Mar 28 08:51:59 CEST 2017


Optimize cache-line access in ecore_chain -
re-arrange fields so that fields that are needed for fastpath
[mostly produce/consume and their derivatives] are in the first cache
line, and the rest are in the second.

This is true for both PBL and NEXT_PTR kind of chains.
Advancing a page in a SINGLE_PAGE chain would still require the 2nd
cacheline as well, but afaik only SPQ uses it and so it isn't
considered as 'fastpath'.

Signed-off-by: Rasesh Mody <rasesh.mody at cavium.com>
---
 drivers/net/qede/base/ecore_chain.h       |  143 ++++++++++++++++-------------
 drivers/net/qede/base/ecore_dev.c         |   14 +--
 drivers/net/qede/base/ecore_sp_commands.c |    4 +-
 3 files changed, 89 insertions(+), 72 deletions(-)

diff --git a/drivers/net/qede/base/ecore_chain.h b/drivers/net/qede/base/ecore_chain.h
index 61e39b5..ba272a9 100644
--- a/drivers/net/qede/base/ecore_chain.h
+++ b/drivers/net/qede/base/ecore_chain.h
@@ -59,25 +59,6 @@ struct ecore_chain_ext_pbl {
 	void *p_pbl_virt;
 };
 
-struct ecore_chain_pbl {
-	/* Base address of a pre-allocated buffer for pbl */
-	dma_addr_t p_phys_table;
-	void *p_virt_table;
-
-	/* Table for keeping the virtual addresses of the chain pages,
-	 * respectively to the physical addresses in the pbl table.
-	 */
-	void **pp_virt_addr_tbl;
-
-	/* Index to current used page by producer/consumer */
-	union {
-		struct ecore_chain_pbl_u16 pbl16;
-		struct ecore_chain_pbl_u32 pbl32;
-	} u;
-
-	bool external;
-};
-
 struct ecore_chain_u16 {
 	/* Cyclic index of next element to produce/consme */
 	u16 prod_idx;
@@ -91,40 +72,75 @@ struct ecore_chain_u32 {
 };
 
 struct ecore_chain {
-	/* Address of first page of the chain */
-	void *p_virt_addr;
-	dma_addr_t p_phys_addr;
-
+	/* fastpath portion of the chain - required for commands such
+	 * as produce / consume.
+	 */
 	/* Point to next element to produce/consume */
 	void *p_prod_elem;
 	void *p_cons_elem;
 
-	enum ecore_chain_mode mode;
-	enum ecore_chain_use_mode intended_use;
+	/* Fastpath portions of the PBL [if exists] */
+
+	struct {
+		/* Table for keeping the virtual addresses of the chain pages,
+		 * respectively to the physical addresses in the pbl table.
+		 */
+		void		**pp_virt_addr_tbl;
+
+		union {
+			struct ecore_chain_pbl_u16	u16;
+			struct ecore_chain_pbl_u32	u32;
+		} c;
+	} pbl;
 
-	enum ecore_chain_cnt_type cnt_type;
 	union {
 		struct ecore_chain_u16 chain16;
 		struct ecore_chain_u32 chain32;
 	} u;
 
-	u32 page_cnt;
+	/* Capacity counts only usable elements */
+	u32				capacity;
+	u32				page_cnt;
 
-	/* Number of elements - capacity is for usable elements only,
-	 * while size will contain total number of elements [for entire chain].
+	/* A u8 would suffice for mode, but it would save as a lot of headaches
+	 * on castings & defaults.
 	 */
-	u32 capacity;
-	u32 size;
+	enum ecore_chain_mode		mode;
 
 	/* Elements information for fast calculations */
 	u16 elem_per_page;
 	u16 elem_per_page_mask;
-	u16 elem_unusable;
-	u16 usable_per_page;
 	u16 elem_size;
 	u16 next_page_mask;
+	u16 usable_per_page;
+	u8 elem_unusable;
 
-	struct ecore_chain_pbl pbl;
+	u8				cnt_type;
+
+	/* Slowpath of the chain - required for initialization and destruction,
+	 * but isn't involved in regular functionality.
+	 */
+
+	/* Base address of a pre-allocated buffer for pbl */
+	struct {
+		dma_addr_t		p_phys_table;
+		void			*p_virt_table;
+	} pbl_sp;
+
+	/* Address of first page of the chain  - the address is required
+	 * for fastpath operation [consume/produce] but only for the the SINGLE
+	 * flavour which isn't considered fastpath [== SPQ].
+	 */
+	void				*p_virt_addr;
+	dma_addr_t			p_phys_addr;
+
+	/* Total number of elements [for entire chain] */
+	u32				size;
+
+	u8				intended_use;
+
+	/* TBD - do we really need this? Couldn't find usage for it */
+	bool				b_external_pbl;
 
 	void *dp_ctx;
 };
@@ -135,8 +151,8 @@ struct ecore_chain {
 
 #define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode)		\
 	  ((mode == ECORE_CHAIN_MODE_NEXT_PTR) ?		\
-	   (1 + ((sizeof(struct ecore_chain_next) - 1) /		\
-	   (elem_size))) : 0)
+	   (u8)(1 + ((sizeof(struct ecore_chain_next) - 1) /	\
+		     (elem_size))) : 0)
 
 #define USABLE_ELEMS_PER_PAGE(elem_size, mode)		\
 	((u32)(ELEMS_PER_PAGE(elem_size) -			\
@@ -245,7 +261,7 @@ u16 ecore_chain_get_usable_per_page(struct ecore_chain *p_chain)
 }
 
 static OSAL_INLINE
-u16 ecore_chain_get_unusable_per_page(struct ecore_chain *p_chain)
+u8 ecore_chain_get_unusable_per_page(struct ecore_chain *p_chain)
 {
 	return p_chain->elem_unusable;
 }
@@ -263,7 +279,7 @@ static OSAL_INLINE u32 ecore_chain_get_page_cnt(struct ecore_chain *p_chain)
 static OSAL_INLINE
 dma_addr_t ecore_chain_get_pbl_phys(struct ecore_chain *p_chain)
 {
-	return p_chain->pbl.p_phys_table;
+	return p_chain->pbl_sp.p_phys_table;
 }
 
 /**
@@ -288,9 +304,9 @@ ecore_chain_advance_page(struct ecore_chain *p_chain, void **p_next_elem,
 		p_next = (struct ecore_chain_next *)(*p_next_elem);
 		*p_next_elem = p_next->next_virt;
 		if (is_chain_u16(p_chain))
-			*(u16 *)idx_to_inc += p_chain->elem_unusable;
+			*(u16 *)idx_to_inc += (u16)p_chain->elem_unusable;
 		else
-			*(u32 *)idx_to_inc += p_chain->elem_unusable;
+			*(u32 *)idx_to_inc += (u16)p_chain->elem_unusable;
 		break;
 	case ECORE_CHAIN_MODE_SINGLE:
 		*p_next_elem = p_chain->p_virt_addr;
@@ -391,7 +407,7 @@ static OSAL_INLINE void *ecore_chain_produce(struct ecore_chain *p_chain)
 		if ((p_chain->u.chain16.prod_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_prod_idx = &p_chain->u.chain16.prod_idx;
-			p_prod_page_idx = &p_chain->pbl.u.pbl16.prod_page_idx;
+			p_prod_page_idx = &p_chain->pbl.c.u16.prod_page_idx;
 			ecore_chain_advance_page(p_chain, &p_chain->p_prod_elem,
 						 p_prod_idx, p_prod_page_idx);
 		}
@@ -400,7 +416,7 @@ static OSAL_INLINE void *ecore_chain_produce(struct ecore_chain *p_chain)
 		if ((p_chain->u.chain32.prod_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_prod_idx = &p_chain->u.chain32.prod_idx;
-			p_prod_page_idx = &p_chain->pbl.u.pbl32.prod_page_idx;
+			p_prod_page_idx = &p_chain->pbl.c.u32.prod_page_idx;
 			ecore_chain_advance_page(p_chain, &p_chain->p_prod_elem,
 						 p_prod_idx, p_prod_page_idx);
 		}
@@ -465,7 +481,7 @@ static OSAL_INLINE void *ecore_chain_consume(struct ecore_chain *p_chain)
 		if ((p_chain->u.chain16.cons_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_cons_idx = &p_chain->u.chain16.cons_idx;
-			p_cons_page_idx = &p_chain->pbl.u.pbl16.cons_page_idx;
+			p_cons_page_idx = &p_chain->pbl.c.u16.cons_page_idx;
 			ecore_chain_advance_page(p_chain, &p_chain->p_cons_elem,
 						 p_cons_idx, p_cons_page_idx);
 		}
@@ -474,7 +490,7 @@ static OSAL_INLINE void *ecore_chain_consume(struct ecore_chain *p_chain)
 		if ((p_chain->u.chain32.cons_idx &
 		     p_chain->elem_per_page_mask) == p_chain->next_page_mask) {
 			p_cons_idx = &p_chain->u.chain32.cons_idx;
-			p_cons_page_idx = &p_chain->pbl.u.pbl32.cons_page_idx;
+			p_cons_page_idx = &p_chain->pbl.c.u32.cons_page_idx;
 			ecore_chain_advance_page(p_chain, &p_chain->p_cons_elem,
 						 p_cons_idx, p_cons_page_idx);
 		}
@@ -518,25 +534,26 @@ static OSAL_INLINE void ecore_chain_reset(struct ecore_chain *p_chain)
 		u32 reset_val = p_chain->page_cnt - 1;
 
 		if (is_chain_u16(p_chain)) {
-			p_chain->pbl.u.pbl16.prod_page_idx = (u16)reset_val;
-			p_chain->pbl.u.pbl16.cons_page_idx = (u16)reset_val;
+			p_chain->pbl.c.u16.prod_page_idx = (u16)reset_val;
+			p_chain->pbl.c.u16.cons_page_idx = (u16)reset_val;
 		} else {
-			p_chain->pbl.u.pbl32.prod_page_idx = reset_val;
-			p_chain->pbl.u.pbl32.cons_page_idx = reset_val;
+			p_chain->pbl.c.u32.prod_page_idx = reset_val;
+			p_chain->pbl.c.u32.cons_page_idx = reset_val;
 		}
 	}
 
 	switch (p_chain->intended_use) {
-	case ECORE_CHAIN_USE_TO_CONSUME_PRODUCE:
-	case ECORE_CHAIN_USE_TO_PRODUCE:
-			/* Do nothing */
-			break;
-
 	case ECORE_CHAIN_USE_TO_CONSUME:
-			/* produce empty elements */
-			for (i = 0; i < p_chain->capacity; i++)
+		/* produce empty elements */
+		for (i = 0; i < p_chain->capacity; i++)
 			ecore_chain_recycle_consumed(p_chain);
-			break;
+		break;
+
+	case ECORE_CHAIN_USE_TO_CONSUME_PRODUCE:
+	case ECORE_CHAIN_USE_TO_PRODUCE:
+	default:
+		/* Do nothing */
+		break;
 	}
 }
 
@@ -563,9 +580,9 @@ ecore_chain_init_params(struct ecore_chain *p_chain, u32 page_cnt, u8 elem_size,
 	p_chain->p_virt_addr = OSAL_NULL;
 	p_chain->p_phys_addr = 0;
 	p_chain->elem_size = elem_size;
-	p_chain->intended_use = intended_use;
+	p_chain->intended_use = (u8)intended_use;
 	p_chain->mode = mode;
-	p_chain->cnt_type = cnt_type;
+	p_chain->cnt_type = (u8)cnt_type;
 
 	p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size);
 	p_chain->usable_per_page = USABLE_ELEMS_PER_PAGE(elem_size, mode);
@@ -577,9 +594,9 @@ ecore_chain_init_params(struct ecore_chain *p_chain, u32 page_cnt, u8 elem_size,
 	p_chain->page_cnt = page_cnt;
 	p_chain->capacity = p_chain->usable_per_page * page_cnt;
 	p_chain->size = p_chain->elem_per_page * page_cnt;
-	p_chain->pbl.external = false;
-	p_chain->pbl.p_phys_table = 0;
-	p_chain->pbl.p_virt_table = OSAL_NULL;
+	p_chain->b_external_pbl = false;
+	p_chain->pbl_sp.p_phys_table = 0;
+	p_chain->pbl_sp.p_virt_table = OSAL_NULL;
 	p_chain->pbl.pp_virt_addr_tbl = OSAL_NULL;
 
 	p_chain->dp_ctx = dp_ctx;
@@ -623,8 +640,8 @@ static OSAL_INLINE void ecore_chain_init_pbl_mem(struct ecore_chain *p_chain,
 						 dma_addr_t p_phys_pbl,
 						 void **pp_virt_addr_tbl)
 {
-	p_chain->pbl.p_phys_table = p_phys_pbl;
-	p_chain->pbl.p_virt_table = p_virt_pbl;
+	p_chain->pbl_sp.p_phys_table = p_phys_pbl;
+	p_chain->pbl_sp.p_virt_table = p_virt_pbl;
 	p_chain->pbl.pp_virt_addr_tbl = pp_virt_addr_tbl;
 }
 
diff --git a/drivers/net/qede/base/ecore_dev.c b/drivers/net/qede/base/ecore_dev.c
index c895656..1c08d4a 100644
--- a/drivers/net/qede/base/ecore_dev.c
+++ b/drivers/net/qede/base/ecore_dev.c
@@ -3559,13 +3559,13 @@ static void ecore_chain_free_pbl(struct ecore_dev *p_dev,
 				 struct ecore_chain *p_chain)
 {
 	void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
-	u8 *p_pbl_virt = (u8 *)p_chain->pbl.p_virt_table;
+	u8 *p_pbl_virt = (u8 *)p_chain->pbl_sp.p_virt_table;
 	u32 page_cnt = p_chain->page_cnt, i, pbl_size;
 
 	if (!pp_virt_addr_tbl)
 		return;
 
-	if (!p_chain->pbl.p_virt_table)
+	if (!p_pbl_virt)
 		goto out;
 
 	for (i = 0; i < page_cnt; i++) {
@@ -3581,10 +3581,10 @@ static void ecore_chain_free_pbl(struct ecore_dev *p_dev,
 
 	pbl_size = page_cnt * ECORE_CHAIN_PBL_ENTRY_SIZE;
 
-	if (!p_chain->pbl.external)
-		OSAL_DMA_FREE_COHERENT(p_dev, p_chain->pbl.p_virt_table,
-				       p_chain->pbl.p_phys_table, pbl_size);
-out:
+	if (!p_chain->b_external_pbl)
+		OSAL_DMA_FREE_COHERENT(p_dev, p_chain->pbl_sp.p_virt_table,
+				       p_chain->pbl_sp.p_phys_table, pbl_size);
+ out:
 	OSAL_VFREE(p_dev, p_chain->pbl.pp_virt_addr_tbl);
 }
 
@@ -3716,7 +3716,7 @@ ecore_chain_alloc_pbl(struct ecore_dev *p_dev,
 	} else {
 		p_pbl_virt = ext_pbl->p_pbl_virt;
 		p_pbl_phys = ext_pbl->p_pbl_phys;
-		p_chain->pbl.external = true;
+		p_chain->b_external_pbl = true;
 	}
 
 	ecore_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys,
diff --git a/drivers/net/qede/base/ecore_sp_commands.c b/drivers/net/qede/base/ecore_sp_commands.c
index 23ebab7..b831970 100644
--- a/drivers/net/qede/base/ecore_sp_commands.c
+++ b/drivers/net/qede/base/ecore_sp_commands.c
@@ -379,11 +379,11 @@ enum _ecore_status_t ecore_sp_pf_start(struct ecore_hwfn *p_hwfn,
 
 	/* Place EQ address in RAMROD */
 	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
-		       p_hwfn->p_eq->chain.pbl.p_phys_table);
+		       p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
 	page_cnt = (u8)ecore_chain_get_page_cnt(&p_hwfn->p_eq->chain);
 	p_ramrod->event_ring_num_pages = page_cnt;
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
-		       p_hwfn->p_consq->chain.pbl.p_phys_table);
+		       p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
 
 	ecore_tunn_set_pf_start_params(p_hwfn, p_tunn,
 				       &p_ramrod->tunnel_config);
-- 
1.7.10.3



More information about the dev mailing list