[PATCH v4 1/2] mempool: cache align mempool cache objects
    Morten Brørup 
    mb at smartsharesystems.com
       
    Fri Oct 28 08:41:51 CEST 2022
    
    
  
Add __rte_cache_aligned to the objs array.
It makes no difference in the general case, but if get/put operations are
always 32 objects, it will reduce the number of memory (or last level
cache) accesses from five to four 64 B cache lines for every get/put
operation.
For readability reasons, an example using 16 objects follows:
Currently, with 16 objects (128B), we access to 3
cache lines:
      ┌────────┐
      │len     │
cache │********│---
line0 │********│ ^
      │********│ |
      ├────────┤ | 16 objects
      │********│ | 128B
cache │********│ |
line1 │********│ |
      │********│ |
      ├────────┤ |
      │********│_v_
cache │        │
line2 │        │
      │        │
      └────────┘
With the alignment, it is also 3 cache lines:
      ┌────────┐
      │len     │
cache │        │
line0 │        │
      │        │
      ├────────┤---
      │********│ ^
cache │********│ |
line1 │********│ |
      │********│ |
      ├────────┤ | 16 objects
      │********│ | 128B
cache │********│ |
line2 │********│ |
      │********│ v
      └────────┘---
However, accessing the objects at the bottom of the mempool cache is a
special case, where cache line0 is also used for objects.
Consider the next burst (and any following bursts):
Current:
      ┌────────┐
      │len     │
cache │        │
line0 │        │
      │        │
      ├────────┤
      │        │
cache │        │
line1 │        │
      │        │
      ├────────┤
      │        │
cache │********│---
line2 │********│ ^
      │********│ |
      ├────────┤ | 16 objects
      │********│ | 128B
cache │********│ |
line3 │********│ |
      │********│ |
      ├────────┤ |
      │********│_v_
cache │        │
line4 │        │
      │        │
      └────────┘
4 cache lines touched, incl. line0 for len.
With the proposed alignment:
      ┌────────┐
      │len     │
cache │        │
line0 │        │
      │        │
      ├────────┤
      │        │
cache │        │
line1 │        │
      │        │
      ├────────┤
      │        │
cache │        │
line2 │        │
      │        │
      ├────────┤
      │********│---
cache │********│ ^
line3 │********│ |
      │********│ | 16 objects
      ├────────┤ | 128B
      │********│ |
cache │********│ |
line4 │********│ |
      │********│_v_
      └────────┘
Only 3 cache lines touched, incl. line0 for len.
Credits go to Olivier Matz for the nice ASCII graphics.
v4:
* No changes. Added reviewed- and acked-by tags.
v3:
* No changes. Made part of a series.
v2:
* No such version.
Signed-off-by: Morten Brørup <mb at smartsharesystems.com>
Reviewed-by: Andrew Rybchenko <andrew.rybchenko at oktetlabs.ru>
Acked-by: Olivier Matz <olivier.matz at 6wind.com>
---
 lib/mempool/rte_mempool.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/mempool/rte_mempool.h b/lib/mempool/rte_mempool.h
index 1f5707f46a..3725a72951 100644
--- a/lib/mempool/rte_mempool.h
+++ b/lib/mempool/rte_mempool.h
@@ -86,11 +86,13 @@ struct rte_mempool_cache {
 	uint32_t size;	      /**< Size of the cache */
 	uint32_t flushthresh; /**< Threshold before we flush excess elements */
 	uint32_t len;	      /**< Current cache count */
-	/*
+	/**
+	 * Cache objects
+	 *
 	 * Cache is allocated to this size to allow it to overflow in certain
 	 * cases to avoid needless emptying of cache.
 	 */
-	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 2]; /**< Cache objects */
+	void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 2] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 /**
-- 
2.17.1
    
    
More information about the dev
mailing list