[dpdk-dev] [PATCH 30/41] eal: enable callbacks on malloc/free and mp sync
Anatoly Burakov
anatoly.burakov at intel.com
Sat Mar 3 14:46:18 CET 2018
Also, rewrite VFIO to rely on memory callbacks instead of manually
registering memory with VFIO. Callbacks will only be registered if
VFIO is enabled.
Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
---
lib/librte_eal/common/malloc_heap.c | 21 +++++++++++++++++
lib/librte_eal/linuxapp/eal/eal_memalloc.c | 37 +++++++++++++++++++++---------
lib/librte_eal/linuxapp/eal/eal_vfio.c | 35 ++++++++++++++++++++++++++++
3 files changed, 82 insertions(+), 11 deletions(-)
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 9109555..9d055c8 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -223,6 +223,7 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
void *map_addr;
size_t map_len;
int n_pages;
+ bool callback_triggered = false;
map_len = RTE_ALIGN_CEIL(align + elt_size +
MALLOC_ELEM_TRAILER_LEN, pg_sz);
@@ -242,14 +243,25 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
map_addr = ms[0]->addr;
+ /* notify user about changes in memory map */
+ eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, map_addr, map_len);
+
/* notify other processes that this has happened */
if (request_sync()) {
/* we couldn't ensure all processes have mapped memory,
* so free it back and notify everyone that it's been
* freed back.
+ *
+ * technically, we could've avoided adding memory addresses to
+ * the map, but that would've led to inconsistent behavior
+ * between primary and secondary processes, as those get
+ * callbacks during sync. therefore, force primary process to
+ * do alloc-and-rollback syncs as well.
*/
+ callback_triggered = true;
goto free_elem;
}
+
heap->total_size += map_len;
RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
@@ -260,6 +272,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
return 0;
free_elem:
+ if (callback_triggered)
+ eal_memalloc_notify(RTE_MEM_EVENT_FREE, map_addr, map_len);
+
rollback_expand_heap(ms, n_pages, elem, map_addr, map_len);
request_sync();
@@ -615,6 +630,10 @@ malloc_heap_free(struct malloc_elem *elem)
heap->total_size -= n_pages * msl->hugepage_sz;
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* notify user about changes in memory map */
+ eal_memalloc_notify(RTE_MEM_EVENT_FREE,
+ aligned_start, aligned_len);
+
/* don't care if any of this fails */
malloc_heap_free_pages(aligned_start, aligned_len);
@@ -637,6 +656,8 @@ malloc_heap_free(struct malloc_elem *elem)
* already removed from the heap, so it is, for all intents and
* purposes, hidden from the rest of DPDK even if some other
* process (including this one) may have these pages mapped.
+ *
+ * notifications about deallocated memory happen during sync.
*/
request_to_primary(&req);
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index 227d703..1008fae 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -34,7 +34,6 @@
#include <rte_eal.h>
#include <rte_memory.h>
#include <rte_spinlock.h>
-#include <rte_vfio.h>
#include "eal_filesystem.h"
#include "eal_internal_cfg.h"
@@ -480,10 +479,6 @@ alloc_page(struct rte_memseg *ms, void *addr, uint64_t size, int socket_id,
ms->iova = iova;
ms->socket_id = socket_id;
- /* map the segment so that VFIO has access to it */
- if (rte_eal_iova_mode() == RTE_IOVA_VA &&
- rte_vfio_dma_map(ms->addr_64, iova, size))
- RTE_LOG(DEBUG, EAL, "Cannot register segment with VFIO\n");
return 0;
mapped:
@@ -515,12 +510,6 @@ free_page(struct rte_memseg *ms, struct hugepage_info *hi,
char path[PATH_MAX];
int fd, ret;
- /* unmap the segment from VFIO */
- if (rte_eal_iova_mode() == RTE_IOVA_VA &&
- rte_vfio_dma_unmap(ms->addr_64, ms->iova, ms->len)) {
- RTE_LOG(DEBUG, EAL, "Cannot unregister segment with VFIO\n");
- }
-
if (mmap(ms->addr, ms->hugepage_sz, PROT_READ,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0) ==
MAP_FAILED) {
@@ -808,6 +797,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
diff_len = RTE_MIN(chunk_len, diff_len);
+ /* if we are freeing memory, notif the application */
+ if (!used) {
+ struct rte_memseg *ms;
+ void *start_va;
+ size_t len;
+
+ ms = rte_fbarray_get(l_arr, start);
+ start_va = ms->addr;
+ len = ms->len * diff_len;
+
+ eal_memalloc_notify(RTE_MEM_EVENT_FREE, start_va, len);
+ }
+
for (i = 0; i < diff_len; i++) {
struct rte_memseg *p_ms, *l_ms;
int seg_idx = start + i;
@@ -834,6 +836,19 @@ sync_chunk(struct rte_memseg_list *primary_msl,
}
}
+ /* if we just allocated memory, notify the application */
+ if (used) {
+ struct rte_memseg *ms;
+ void *start_va;
+ size_t len;
+
+ ms = rte_fbarray_get(l_arr, start);
+ start_va = ms->addr;
+ len = ms->len * diff_len;
+
+ eal_memalloc_notify(RTE_MEM_EVENT_ALLOC, start_va, len);
+ }
+
/* calculate how much we can advance until next chunk */
diff_len = used ?
rte_fbarray_find_contig_used(l_arr, start) :
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 8fe8984..d3c3b70 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -214,6 +214,37 @@ vfio_group_device_count(int vfio_group_fd)
return vfio_cfg.vfio_groups[i].devices;
}
+static void
+vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len)
+{
+ struct rte_memseg_list *msl;
+ struct rte_memseg *ms;
+ size_t cur_len = 0;
+ uint64_t pgsz;
+
+ msl = rte_mem_virt2memseg_list(addr);
+ pgsz = msl->hugepage_sz;
+
+ while (cur_len < len) {
+ const void *va = RTE_PTR_ADD(addr, cur_len);
+ uint64_t vfio_va, iova;
+
+ ms = rte_mem_virt2memseg(va, msl);
+ vfio_va = (uint64_t) (uintptr_t) va;
+ iova = ms->iova;
+
+ /* this never gets called in legacy mode, so we can be sure that
+ * each segment is a single page.
+ */
+ if (type == RTE_MEM_EVENT_ALLOC)
+ rte_vfio_dma_map(vfio_va, iova, pgsz);
+ else
+ rte_vfio_dma_unmap(vfio_va, iova, pgsz);
+
+ cur_len += pgsz;
+ }
+}
+
int
rte_vfio_clear_group(int vfio_group_fd)
{
@@ -507,6 +538,10 @@ rte_vfio_enable(const char *modname)
if (vfio_cfg.vfio_container_fd != -1) {
RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
vfio_cfg.vfio_enabled = 1;
+
+ /* register callback for mem events */
+ rte_mem_event_register_callback("vfio_mem_event_clb",
+ vfio_mem_event_callback);
} else {
RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
}
--
2.7.4
More information about the dev
mailing list