[dpdk-dev] [PATCH v2 5/6] mempool: prevent objects from being across pages
Olivier Matz
olivier.matz at 6wind.com
Wed Oct 30 15:36:18 CET 2019
When populating a mempool, ensure that objects are not located across
several pages, except if user did not request iova contiguous objects.
Signed-off-by: Vamsi Krishna Attunuru <vattunuru at marvell.com>
Signed-off-by: Olivier Matz <olivier.matz at 6wind.com>
---
drivers/mempool/octeontx2/Makefile | 3 +
drivers/mempool/octeontx2/meson.build | 3 +
drivers/mempool/octeontx2/otx2_mempool_ops.c | 119 ++++++++++++++++---
lib/librte_mempool/rte_mempool.c | 23 ++--
lib/librte_mempool/rte_mempool_ops_default.c | 32 ++++-
5 files changed, 147 insertions(+), 33 deletions(-)
diff --git a/drivers/mempool/octeontx2/Makefile b/drivers/mempool/octeontx2/Makefile
index 87cce22c6..d781cbfc6 100644
--- a/drivers/mempool/octeontx2/Makefile
+++ b/drivers/mempool/octeontx2/Makefile
@@ -27,6 +27,9 @@ EXPORT_MAP := rte_mempool_octeontx2_version.map
LIBABIVER := 1
+# for rte_mempool_get_page_size
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
#
# all source are stored in SRCS-y
#
diff --git a/drivers/mempool/octeontx2/meson.build b/drivers/mempool/octeontx2/meson.build
index 9fde40f0e..28f9634da 100644
--- a/drivers/mempool/octeontx2/meson.build
+++ b/drivers/mempool/octeontx2/meson.build
@@ -21,3 +21,6 @@ foreach flag: extra_flags
endforeach
deps += ['eal', 'mbuf', 'kvargs', 'bus_pci', 'common_octeontx2', 'mempool']
+
+# for rte_mempool_get_page_size
+allow_experimental_apis = true
diff --git a/drivers/mempool/octeontx2/otx2_mempool_ops.c b/drivers/mempool/octeontx2/otx2_mempool_ops.c
index d769575f4..47117aec6 100644
--- a/drivers/mempool/octeontx2/otx2_mempool_ops.c
+++ b/drivers/mempool/octeontx2/otx2_mempool_ops.c
@@ -713,12 +713,76 @@ static ssize_t
otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
{
- /*
- * Simply need space for one more object to be able to
- * fulfill alignment requirements.
- */
- return rte_mempool_op_calc_mem_size_default(mp, obj_num + 1, pg_shift,
- min_chunk_size, align);
+ size_t total_elt_sz;
+ size_t obj_per_page, pg_sz, objs_in_last_page;
+ size_t mem_size;
+
+ /* derived from rte_mempool_op_calc_mem_size_default() */
+
+ total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
+
+ if (total_elt_sz == 0) {
+ mem_size = 0;
+ } else if (pg_shift == 0) {
+ /* one object margin to fix alignment */
+ mem_size = total_elt_sz * (obj_num + 1);
+ } else {
+ pg_sz = (size_t)1 << pg_shift;
+ obj_per_page = pg_sz / total_elt_sz;
+
+ /* we need to keep one object to fix alignment */
+ if (obj_per_page > 0)
+ obj_per_page--;
+
+ if (obj_per_page == 0) {
+ /*
+ * Note that if object size is bigger than page size,
+ * then it is assumed that pages are grouped in subsets
+ * of physically continuous pages big enough to store
+ * at least one object.
+ */
+ mem_size = RTE_ALIGN_CEIL(2 * total_elt_sz,
+ pg_sz) * obj_num;
+ } else {
+ /* In the best case, the allocator will return a
+ * page-aligned address. For example, with 5 objs,
+ * the required space is as below:
+ * | page0 | page1 | page2 (last) |
+ * |obj0 |obj1 |xxx|obj2 |obj3 |xxx|obj4|
+ * <------------- mem_size ------------->
+ */
+ objs_in_last_page = ((obj_num - 1) % obj_per_page) + 1;
+ /* room required for the last page */
+ mem_size = objs_in_last_page * total_elt_sz;
+ /* room required for other pages */
+ mem_size += ((obj_num - objs_in_last_page) /
+ obj_per_page) << pg_shift;
+
+ /* In the worst case, the allocator returns a
+ * non-aligned pointer, wasting up to
+ * total_elt_sz. Add a margin for that.
+ */
+ mem_size += total_elt_sz - 1;
+ }
+ }
+
+ *min_chunk_size = total_elt_sz * 2;
+ *align = RTE_CACHE_LINE_SIZE;
+
+ return mem_size;
+}
+
+/* Returns -1 if object crosses a page boundary, else returns 0 */
+static int
+check_obj_bounds(char *obj, size_t pg_sz, size_t elt_sz)
+{
+ if (pg_sz == 0)
+ return 0;
+ if (elt_sz > pg_sz)
+ return 0;
+ if (RTE_PTR_ALIGN(obj, pg_sz) != RTE_PTR_ALIGN(obj + elt_sz - 1, pg_sz))
+ return -1;
+ return 0;
}
static int
@@ -726,8 +790,12 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
rte_iova_t iova, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
{
- size_t total_elt_sz;
+ char *va = vaddr;
+ size_t total_elt_sz, pg_sz;
size_t off;
+ unsigned int i;
+ void *obj;
+ int ret;
if (iova == RTE_BAD_IOVA)
return -EINVAL;
@@ -735,22 +803,45 @@ otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
/* Align object start address to a multiple of total_elt_sz */
- off = total_elt_sz - ((uintptr_t)vaddr % total_elt_sz);
+ off = total_elt_sz - (((uintptr_t)(va - 1) % total_elt_sz) + 1);
if (len < off)
return -EINVAL;
- vaddr = (char *)vaddr + off;
- iova += off;
- len -= off;
- npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
+ npa_lf_aura_op_range_set(mp->pool_id, iova + off, iova + len - off);
if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
return -EBUSY;
- return rte_mempool_op_populate_default(mp, max_objs, vaddr, iova, len,
- obj_cb, obj_cb_arg);
+ /* the following is derived from rte_mempool_op_populate_default() */
+
+ ret = rte_mempool_get_page_size(mp, &pg_sz);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < max_objs; i++) {
+ /* avoid objects to cross page boundaries, and align
+ * offset to a multiple of total_elt_sz.
+ */
+ if (check_obj_bounds(va + off, pg_sz, total_elt_sz) < 0) {
+ off += RTE_PTR_ALIGN_CEIL(va + off, pg_sz) - (va + off);
+ off += total_elt_sz - (((uintptr_t)(va + off - 1) %
+ total_elt_sz) + 1);
+ }
+
+ if (off + total_elt_sz > len)
+ break;
+
+ off += mp->header_size;
+ obj = va + off;
+ obj_cb(mp, obj_cb_arg, obj,
+ (iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));
+ rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
+ off += mp->elt_size + mp->trailer_size;
+ }
+
+ return i;
}
static struct rte_mempool_ops otx2_npa_ops = {
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 758c5410b..d3db9273d 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -431,8 +431,6 @@ rte_mempool_get_page_size(struct rte_mempool *mp, size_t *pg_sz)
if (!need_iova_contig_obj)
*pg_sz = 0;
- else if (!alloc_in_ext_mem && rte_eal_iova_mode() == RTE_IOVA_VA)
- *pg_sz = 0;
else if (rte_eal_has_hugepages() || alloc_in_ext_mem)
*pg_sz = get_min_page_size(mp->socket_id);
else
@@ -481,17 +479,15 @@ rte_mempool_populate_default(struct rte_mempool *mp)
* then just set page shift and page size to 0, because the user has
* indicated that there's no need to care about anything.
*
- * if we do need contiguous objects, there is also an option to reserve
- * the entire mempool memory as one contiguous block of memory, in
- * which case the page shift and alignment wouldn't matter as well.
+ * if we do need contiguous objects (if a mempool driver has its
+ * own calc_size() method returning min_chunk_size = mem_size),
+ * there is also an option to reserve the entire mempool memory
+ * as one contiguous block of memory.
*
* if we require contiguous objects, but not necessarily the entire
- * mempool reserved space to be contiguous, then there are two options.
- *
- * if our IO addresses are virtual, not actual physical (IOVA as VA
- * case), then no page shift needed - our memory allocation will give us
- * contiguous IO memory as far as the hardware is concerned, so
- * act as if we're getting contiguous memory.
+ * mempool reserved space to be contiguous, pg_sz will be != 0,
+ * and the default ops->populate() will take care of not placing
+ * objects across pages.
*
* if our IO addresses are physical, we may get memory from bigger
* pages, or we might get memory from smaller pages, and how much of it
@@ -504,11 +500,6 @@ rte_mempool_populate_default(struct rte_mempool *mp)
*
* If we fail to get enough contiguous memory, then we'll go and
* reserve space in smaller chunks.
- *
- * We also have to take into account the fact that memory that we're
- * going to allocate from can belong to an externally allocated memory
- * area, in which case the assumption of IOVA as VA mode being
- * synonymous with IOVA contiguousness will not hold.
*/
need_iova_contig_obj = !(mp->flags & MEMPOOL_F_NO_IOVA_CONTIG);
diff --git a/lib/librte_mempool/rte_mempool_ops_default.c b/lib/librte_mempool/rte_mempool_ops_default.c
index f6aea7662..e5cd4600f 100644
--- a/lib/librte_mempool/rte_mempool_ops_default.c
+++ b/lib/librte_mempool/rte_mempool_ops_default.c
@@ -61,21 +61,47 @@ rte_mempool_op_calc_mem_size_default(const struct rte_mempool *mp,
return mem_size;
}
+/* Returns -1 if object crosses a page boundary, else returns 0 */
+static int
+check_obj_bounds(char *obj, size_t pg_sz, size_t elt_sz)
+{
+ if (pg_sz == 0)
+ return 0;
+ if (elt_sz > pg_sz)
+ return 0;
+ if (RTE_PTR_ALIGN(obj, pg_sz) != RTE_PTR_ALIGN(obj + elt_sz - 1, pg_sz))
+ return -1;
+ return 0;
+}
+
int
rte_mempool_op_populate_default(struct rte_mempool *mp, unsigned int max_objs,
void *vaddr, rte_iova_t iova, size_t len,
rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
{
- size_t total_elt_sz;
+ char *va = vaddr;
+ size_t total_elt_sz, pg_sz;
size_t off;
unsigned int i;
void *obj;
+ int ret;
+
+ ret = rte_mempool_get_page_size(mp, &pg_sz);
+ if (ret < 0)
+ return ret;
total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
- for (off = 0, i = 0; off + total_elt_sz <= len && i < max_objs; i++) {
+ for (off = 0, i = 0; i < max_objs; i++) {
+ /* avoid objects to cross page boundaries */
+ if (check_obj_bounds(va + off, pg_sz, total_elt_sz) < 0)
+ off += RTE_PTR_ALIGN_CEIL(va + off, pg_sz) - (va + off);
+
+ if (off + total_elt_sz > len)
+ break;
+
off += mp->header_size;
- obj = (char *)vaddr + off;
+ obj = va + off;
obj_cb(mp, obj_cb_arg, obj,
(iova == RTE_BAD_IOVA) ? RTE_BAD_IOVA : (iova + off));
rte_mempool_ops_enqueue_bulk(mp, &obj, 1);
--
2.20.1
More information about the dev
mailing list