[dpdk-dev] [PATCH v6 48/70] eal: add shared indexed file-backed array

Anatoly Burakov anatoly.burakov at intel.com
Wed Apr 11 14:30:23 CEST 2018


rte_fbarray is a simple indexed array stored in shared memory
via mapping files into memory. Rationale for its existence is the
following: since we are going to map memory page-by-page, there
could be quite a lot of memory segments to keep track of (for
smaller page sizes, page count can easily reach thousands). We
can't really make page lists truly dynamic and infinitely expandable,
because that involves reallocating memory (which is a big no-no in
multiprocess). What we can do instead is have a maximum capacity as
something really, really large, and decide at allocation time how
big the array is going to be. We map the entire file into memory,
which makes it possible to use fbarray as shared memory, provided
the structure itself is allocated in shared memory. Per-fbarray
locking is also used to avoid index data races (but not contents
data races - that is up to user application to synchronize).

In addition, in understanding that we will frequently need to scan
this array for free space and iterating over array linearly can
become slow, rte_fbarray provides facilities to index array's
usage. The following use cases are covered:
 - find next free/used slot (useful either for adding new elements
   to fbarray, or walking the list)
 - find starting index for next N free/used slots (useful for when
   we want to allocate chunk of VA-contiguous memory composed of
   several pages)
 - find how many contiguous free/used slots there are, starting
   from specified index (useful for when we want to figure out
   how many pages we have until next hole in allocated memory, to
   speed up some bulk operations where we would otherwise have to
   walk the array and add pages one by one)

This is accomplished by storing a usage mask in-memory, right
after the data section of the array, and using some bit-level
magic to figure out the info we need.

Signed-off-by: Anatoly Burakov <anatoly.burakov at intel.com>
Tested-by: Santosh Shukla <santosh.shukla at caviumnetworks.com>
Tested-by: Hemant Agrawal <hemant.agrawal at nxp.com>
Tested-by: Gowrishankar Muthukrishnan <gowrishankar.m at linux.vnet.ibm.com>
---
 lib/librte_eal/bsdapp/eal/Makefile          |   1 +
 lib/librte_eal/common/Makefile              |   2 +-
 lib/librte_eal/common/eal_common_fbarray.c  | 859 ++++++++++++++++++++++++++++
 lib/librte_eal/common/eal_filesystem.h      |  13 +
 lib/librte_eal/common/include/rte_fbarray.h | 353 ++++++++++++
 lib/librte_eal/common/meson.build           |   2 +
 lib/librte_eal/linuxapp/eal/Makefile        |   1 +
 lib/librte_eal/rte_eal_version.map          |  16 +
 8 files changed, 1246 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_eal/common/eal_common_fbarray.c
 create mode 100644 lib/librte_eal/common/include/rte_fbarray.h

diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index ed1d17b..1b43d77 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -53,6 +53,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index ea824a3..48f870f 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -16,7 +16,7 @@ INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
 INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h
+INC += rte_reciprocal.h rte_fbarray.h
 
 GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
 GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 0000000..f65875d
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,859 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1 << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+	int n_masks;
+	uint64_t data[];
+};
+
+static size_t
+calc_mask_size(int len)
+{
+	/* mask must be multiple of MASK_ALIGN, even though length of array
+	 * itself may not be aligned on that boundary.
+	 */
+	len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+	return sizeof(struct used_mask) +
+			sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, int elt_sz, int len)
+{
+	size_t data_sz = elt_sz * len;
+	size_t msk_sz = calc_mask_size(len);
+	return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, int elt_sz, int len)
+{
+	return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+	char path[PATH_MAX];
+	void *map_addr;
+
+	if (ftruncate(fd, len)) {
+		RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+
+	map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_FIXED, fd, 0);
+	if (map_addr != addr) {
+		RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+		/* pass errno up the chain */
+		rte_errno = errno;
+		return -1;
+	}
+	return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, int start, int n, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	int msk_idx, lookahead_idx, first, first_mod;
+	int last, last_mod, last_msk;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+		uint64_t cur_msk, lookahead_msk;
+		int run_start, clz, left;
+		bool found = false;
+		/*
+		 * The process of getting n consecutive bits for arbitrary n is
+		 * a bit involved, but here it is in a nutshell:
+		 *
+		 *  1. let n be the number of consecutive bits we're looking for
+		 *  2. check if n can fit in one mask, and if so, do n-1
+		 *     rshift-ands to see if there is an appropriate run inside
+		 *     our current mask
+		 *    2a. if we found a run, bail out early
+		 *    2b. if we didn't find a run, proceed
+		 *  3. invert the mask and count leading zeroes (that is, count
+		 *     how many consecutive set bits we had starting from the
+		 *     end of current mask) as k
+		 *    3a. if k is 0, continue to next mask
+		 *    3b. if k is not 0, we have a potential run
+		 *  4. to satisfy our requirements, next mask must have n-k
+		 *     consecutive set bits right at the start, so we will do
+		 *     (n-k-1) rshift-ands and check if first bit is set.
+		 *
+		 * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+		 * we either run out of masks, lose the run, or find what we
+		 * were looking for.
+		 */
+		cur_msk = msk->data[msk_idx];
+		left = n;
+
+		/* if we're looking for free spaces, invert the mask */
+		if (!used)
+			cur_msk = ~cur_msk;
+
+		/* combine current ignore mask with last index ignore mask */
+		if (msk_idx == last)
+			ignore_msk |= last_msk;
+
+		/* if we have an ignore mask, ignore once */
+		if (ignore_msk) {
+			cur_msk &= ignore_msk;
+			ignore_msk = 0;
+		}
+
+		/* if n can fit in within a single mask, do a search */
+		if (n <= MASK_ALIGN) {
+			uint64_t tmp_msk = cur_msk;
+			int s_idx;
+			for (s_idx = 0; s_idx < n - 1; s_idx++)
+				tmp_msk &= tmp_msk >> 1ULL;
+			/* we found what we were looking for */
+			if (tmp_msk != 0) {
+				run_start = __builtin_ctzll(tmp_msk);
+				return MASK_GET_IDX(msk_idx, run_start);
+			}
+		}
+
+		/*
+		 * we didn't find our run within the mask, or n > MASK_ALIGN,
+		 * so we're going for plan B.
+		 */
+
+		/* count leading zeroes on inverted mask */
+		clz = __builtin_clzll(~cur_msk);
+
+		/* if there aren't any runs at the end either, just continue */
+		if (clz == 0)
+			continue;
+
+		/* we have a partial run at the end, so try looking ahead */
+		run_start = MASK_ALIGN - clz;
+		left -= clz;
+
+		for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+				lookahead_idx++) {
+			int s_idx, need;
+			lookahead_msk = msk->data[lookahead_idx];
+
+			/* if we're looking for free space, invert the mask */
+			if (!used)
+				lookahead_msk = ~lookahead_msk;
+
+			/* figure out how many consecutive bits we need here */
+			need = RTE_MIN(left, MASK_ALIGN);
+
+			for (s_idx = 0; s_idx < need - 1; s_idx++)
+				lookahead_msk &= lookahead_msk >> 1ULL;
+
+			/* if first bit is not set, we've lost the run */
+			if ((lookahead_msk & 1) == 0) {
+				/*
+				 * we've scanned this far, so we know there are
+				 * no runs in the space we've lookahead-scanned
+				 * as well, so skip that on next iteration.
+				 */
+				ignore_msk = ~((1ULL << need) - 1);
+				msk_idx = lookahead_idx;
+				break;
+			}
+
+			left -= need;
+
+			/* check if we've found what we were looking for */
+			if (left == 0) {
+				found = true;
+				break;
+			}
+		}
+
+		/* we didn't find anything, so continue */
+		if (!found)
+			continue;
+
+		return MASK_GET_IDX(msk_idx, run_start);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? -ENOENT : -ENOSPC;
+	return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	int idx, first, first_mod;
+	int last, last_mod, last_msk;
+	uint64_t ignore_msk;
+
+	/*
+	 * mask only has granularity of MASK_ALIGN, but start may not be aligned
+	 * on that boundary, so construct a special mask to exclude anything we
+	 * don't want to see to avoid confusing ctz.
+	 */
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	for (idx = first; idx < msk->n_masks; idx++) {
+		uint64_t cur = msk->data[idx];
+		int found;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first)
+			cur &= ignore_msk;
+
+		/* check if we have any entries */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * find first set bit - that will correspond to whatever it is
+		 * that we're looking for.
+		 */
+		found = __builtin_ctzll(cur);
+		return MASK_GET_IDX(idx, found);
+	}
+	/* we didn't find anything */
+	rte_errno = used ? -ENOENT : -ENOSPC;
+	return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, int start, bool used)
+{
+	const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+			arr->len);
+	int idx, first, first_mod;
+	int last, last_mod, last_msk;
+	int need_len, result = 0;
+
+	/* array length may not be aligned, so calculate ignore mask for last
+	 * mask index.
+	 */
+	last = MASK_LEN_TO_IDX(arr->len);
+	last_mod = MASK_LEN_TO_MOD(arr->len);
+	last_msk = ~(-(1ULL) << last_mod);
+
+	first = MASK_LEN_TO_IDX(start);
+	first_mod = MASK_LEN_TO_MOD(start);
+	for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+		uint64_t cur = msk->data[idx];
+		int run_len;
+
+		need_len = MASK_ALIGN;
+
+		/* if we're looking for free entries, invert mask */
+		if (!used)
+			cur = ~cur;
+
+		/* if this is last mask, ignore everything after last bit */
+		if (idx == last)
+			cur &= last_msk;
+
+		/* ignore everything before start on first iteration */
+		if (idx == first) {
+			cur >>= first_mod;
+			/* at the start, we don't need the full mask len */
+			need_len -= first_mod;
+		}
+
+		/* we will be looking for zeroes, so invert the mask */
+		cur = ~cur;
+
+		/* if mask is zero, we have a complete run */
+		if (cur == 0)
+			continue;
+
+		/*
+		 * see if current run ends before mask end.
+		 */
+		run_len = __builtin_ctzll(cur);
+
+		/* add however many zeroes we've had in the last run and quit */
+		if (run_len < need_len) {
+			result += run_len;
+			break;
+		}
+	}
+	return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, int idx, bool used)
+{
+	struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+	int msk_idx = MASK_LEN_TO_IDX(idx);
+	bool already_used;
+	int ret = -1;
+
+	if (arr == NULL || idx < 0 || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	ret = 0;
+
+	/* prevent array from changing under us */
+	rte_rwlock_write_lock(&arr->rwlock);
+
+	already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+	/* nothing to be done */
+	if (used == already_used)
+		goto out;
+
+	if (used) {
+		msk->data[msk_idx] |= msk_bit;
+		arr->count++;
+	} else {
+		msk->data[msk_idx] &= ~msk_bit;
+		arr->count--;
+	}
+out:
+	rte_rwlock_write_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+	if (name == NULL || elt_sz == 0 || len == 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+		rte_errno = ENAMETOOLONG;
+		return -1;
+	}
+	return 0;
+}
+
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, int len, int elt_sz)
+{
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	struct used_mask *msk;
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	if (fully_validate(name, elt_sz, len))
+		return -1;
+
+	page_sz = sysconf(_SC_PAGESIZE);
+
+	/* calculate our memory limits */
+	mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+	data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+	if (data == NULL)
+		goto fail;
+
+	eal_get_fbarray_path(path, sizeof(path), name);
+
+	/*
+	 * Each fbarray is unique to process namespace, i.e. the filename
+	 * depends on process prefix. Try to take out a lock and see if we
+	 * succeed. If we don't, someone else is using it already.
+	 */
+	fd = open(path, O_CREAT | O_RDWR, 0600);
+	if (fd < 0) {
+		RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
+				path, strerror(errno));
+		rte_errno = errno;
+		goto fail;
+	} else if (flock(fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
+				path, strerror(errno));
+		rte_errno = EBUSY;
+		goto fail;
+	}
+
+	/* take out a non-exclusive lock, so that other processes could still
+	 * attach to it, but no other process could reinitialize it.
+	 */
+	if (flock(fd, LOCK_SH | LOCK_NB)) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	if (resize_and_map(fd, data, mmap_len))
+		goto fail;
+
+	/* we've mmap'ed the file, we can now close the fd */
+	close(fd);
+
+	/* initialize the data */
+	memset(data, 0, mmap_len);
+
+	/* populate data structure */
+	snprintf(arr->name, sizeof(arr->name), "%s", name);
+	arr->data = data;
+	arr->len = len;
+	arr->elt_sz = elt_sz;
+	arr->count = 0;
+
+	msk = get_used_mask(data, elt_sz, len);
+	msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+	rte_rwlock_init(&arr->rwlock);
+
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+	size_t page_sz, mmap_len;
+	char path[PATH_MAX];
+	void *data = NULL;
+	int fd = -1;
+
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize attach as two values we need (element
+	 * size and array length) are constant for the duration of life of
+	 * the array, so the parts we care about will not race.
+	 */
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len))
+		return -1;
+
+	page_sz = sysconf(_SC_PAGESIZE);
+
+	mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+	data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+	if (data == NULL)
+		goto fail;
+
+	eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	/* lock the file, to let others know we're using it */
+	if (flock(fd, LOCK_SH | LOCK_NB)) {
+		rte_errno = errno;
+		goto fail;
+	}
+
+	if (resize_and_map(fd, data, mmap_len))
+		goto fail;
+
+	close(fd);
+
+	/* we're done */
+
+	return 0;
+fail:
+	if (data)
+		munmap(data, mmap_len);
+	if (fd >= 0)
+		close(fd);
+	return -1;
+}
+
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+	if (arr == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * we don't need to synchronize detach as two values we need (element
+	 * size and total capacity) are constant for the duration of life of
+	 * the array, so the parts we care about will not race. if the user is
+	 * detaching while doing something else in the same process, we can't
+	 * really do anything about it, things will blow up either way.
+	 */
+
+	size_t page_sz = sysconf(_SC_PAGESIZE);
+
+	/* this may already be unmapped (e.g. repeated call from previously
+	 * failed destroy(), but this is on user, we can't (easily) know if this
+	 * is still mapped.
+	 */
+	munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len));
+
+	return 0;
+}
+
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+	int fd, ret;
+	char path[PATH_MAX];
+
+	ret = rte_fbarray_detach(arr);
+	if (ret)
+		return ret;
+
+	/* try deleting the file */
+	eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+	fd = open(path, O_RDONLY);
+	if (flock(fd, LOCK_EX | LOCK_NB)) {
+		RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+		rte_errno = EBUSY;
+		ret = -1;
+	} else {
+		ret = 0;
+		unlink(path);
+		memset(arr, 0, sizeof(*arr));
+	}
+	close(fd);
+
+	return ret;
+}
+
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, int idx)
+{
+	void *ret = NULL;
+	if (arr == NULL || idx < 0) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	if (idx >= arr->len) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, int idx)
+{
+	return set_used(arr, idx, true);
+}
+
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, int idx)
+{
+	return set_used(arr, idx, false);
+}
+
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, int idx)
+{
+	struct used_mask *msk;
+	int msk_idx;
+	uint64_t msk_bit;
+	int ret = -1;
+
+	if (arr == NULL || idx < 0 || idx >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+	msk_idx = MASK_LEN_TO_IDX(idx);
+	msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+	ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+	rte_rwlock_read_unlock(&arr->rwlock);
+
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, int start)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	if (arr->len == arr->count) {
+		rte_errno = ENOSPC;
+		goto out;
+	}
+
+	ret = find_next(arr, start, false);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, int start)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	if (arr->count == 0) {
+		rte_errno = ENOENT;
+		goto out;
+	}
+
+	ret = find_next(arr, start, true);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, int start, int n)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len ||
+			n < 0 || n > arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	if (arr->len == arr->count || arr->len - arr->count < n) {
+		rte_errno = ENOSPC;
+		goto out;
+	}
+
+	ret = find_next_n(arr, start, n, false);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, int start, int n)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len ||
+			n < 0 || n > arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	if (arr->count < n) {
+		rte_errno = ENOENT;
+		goto out;
+	}
+
+	ret = find_next_n(arr, start, n, true);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, int start)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	if (arr->len == arr->count) {
+		rte_errno = ENOSPC;
+		goto out;
+	}
+
+	if (arr->count == 0) {
+		ret = arr->len - start;
+		goto out;
+	}
+
+	ret = find_contig(arr, start, false);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, int start)
+{
+	int ret = -1;
+
+	if (arr == NULL || start < 0 || start >= arr->len) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	ret = find_contig(arr, start, true);
+
+	rte_rwlock_read_unlock(&arr->rwlock);
+	return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+	void *end;
+	int ret = -1;
+
+	/*
+	 * no need to synchronize as it doesn't matter if underlying data
+	 * changes - we're doing pointer arithmetic here.
+	 */
+
+	if (arr == NULL || elt == NULL) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+	end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+	if (elt < arr->data || elt >= end) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+	return ret;
+}
+
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+	struct used_mask *msk;
+	int i;
+
+	if (arr == NULL || f == NULL) {
+		rte_errno = EINVAL;
+		return;
+	}
+
+	if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+		fprintf(f, "Invalid file-backed array\n");
+		goto out;
+	}
+
+	/* prevent array from changing under us */
+	rte_rwlock_read_lock(&arr->rwlock);
+
+	fprintf(f, "File-backed array: %s\n", arr->name);
+	fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+			arr->len, arr->count, arr->elt_sz);
+
+	msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+	for (i = 0; i < msk->n_masks; i++)
+		fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+	rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 4708dd5..1c6048b 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -13,6 +13,7 @@
 
 /** Path of rte config file. */
 #define RUNTIME_CONFIG_FMT "%s/.%s_config"
+#define FBARRAY_FMT "%s/%s_%s"
 
 #include <stdint.h>
 #include <limits.h>
@@ -55,6 +56,18 @@ eal_mp_socket_path(void)
 	return buffer;
 }
 
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+	const char *directory = "/tmp";
+	const char *home_dir = getenv("HOME");
+
+	if (getuid() != 0 && home_dir != NULL)
+		directory = home_dir;
+	snprintf(buffer, buflen - 1, FBARRAY_FMT, directory,
+			internal_config.hugefile_prefix, name);
+	return buffer;
+}
+
 /** Path of hugepage info file. */
 #define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
 
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
new file mode 100644
index 0000000..97df945
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -0,0 +1,353 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef RTE_FBARRAY_H
+#define RTE_FBARRAY_H
+
+/**
+ * @file
+ *
+ * File-backed shared indexed array for DPDK.
+ *
+ * Basic workflow is expected to be the following:
+ *  1) Allocate array either using ``rte_fbarray_init()`` or
+ *     ``rte_fbarray_attach()`` (depending on whether it's shared between
+ *     multiple DPDK processes)
+ *  2) find free spots using ``rte_fbarray_find_next_free()``
+ *  3) get pointer to data in the free spot using ``rte_fbarray_get()``, and
+ *     copy data into the pointer (element size is fixed)
+ *  4) mark entry as used using ``rte_fbarray_set_used()``
+ *
+ * Calls to ``rte_fbarray_init()`` and ``rte_fbarray_destroy()`` will have
+ * consequences for all processes, while calls to ``rte_fbarray_attach()`` and
+ * ``rte_fbarray_detach()`` will only have consequences within a single process.
+ * Therefore, it is safe to call ``rte_fbarray_attach()`` or
+ * ``rte_fbarray_detach()`` while another process is using ``rte_fbarray``,
+ * provided no other thread within the same process will try to use
+ * ``rte_fbarray`` before attaching or after detaching. It is not safe to call
+ * ``rte_fbarray_init()`` or ``rte_fbarray_destroy()`` while another thread or
+ * another process is using ``rte_fbarray``.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_compat.h>
+#include <rte_rwlock.h>
+
+#define RTE_FBARRAY_NAME_LEN 64
+
+struct rte_fbarray {
+	char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
+	int count;                       /**< number of entries stored */
+	int len;                         /**< current length of the array */
+	int elt_sz;                      /**< size of each element */
+	void *data;                      /**< data pointer */
+	rte_rwlock_t rwlock;             /**< multiprocess lock */
+};
+
+/**
+ * Set up ``rte_fbarray`` structure and allocate underlying resources.
+ *
+ * Call this function to correctly set up ``rte_fbarray`` and allocate
+ * underlying files that will be backing the data in the current process. Note
+ * that in order to use and share ``rte_fbarray`` between multiple processes,
+ * data pointed to by ``arr`` pointer must itself be allocated in shared memory.
+ *
+ * @param arr
+ *   Valid pointer to allocated ``rte_fbarray`` structure.
+ *
+ * @param name
+ *   Unique name to be assigned to this array.
+ *
+ * @param len
+ *   Number of elements initially available in the array.
+ *
+ * @param elt_sz
+ *   Size of each element.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, int len,
+		int elt_sz);
+
+
+/**
+ * Attach to a file backing an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to attach to file that will be backing the data in the
+ * current process. The structure must have been previously correctly set up
+ * with a call to ``rte_fbarray_init()``. Calls to ``rte_fbarray_attach()`` are
+ * usually meant to be performed in a multiprocessing scenario, with data
+ * pointed to by ``arr`` pointer allocated in shared memory.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up rte_fbarray structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure, and remove the underlying file.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within the current process. This will also
+ * zero-fill data pointed to by ``arr`` pointer and remove the underlying file
+ * backing the data, so it is expected that by the time this function is called,
+ * all other processes have detached from this ``rte_fbarray``.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within current process.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr);
+
+
+/**
+ * Get pointer to element residing at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Index of an element to get a pointer to.
+ *
+ * @return
+ *  - non-NULL pointer on success.
+ *  - NULL on failure, with ``rte_errno`` indicating reason for failure.
+ */
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, int idx);
+
+
+/**
+ * Find index of a specified element within the array.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param elt
+ *   Pointer to element to find index to.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt);
+
+
+/**
+ * Mark specified element as used.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to mark as used.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, int idx);
+
+
+/**
+ * Mark specified element as free.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to mark as free.
+ *
+ * @return
+ *  - 0 on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, int idx);
+
+
+/**
+ * Check whether element at specified index is marked as used.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ *   Element index to check as used.
+ *
+ * @return
+ *  - 1 if element is used.
+ *  - 0 if element is unused.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, int idx);
+
+
+/**
+ * Find index of next free element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, int start);
+
+
+/**
+ * Find index of next used element, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, int start);
+
+
+/**
+ * Find index of next chunk of ``n`` free elements, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of free elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, int start, int n);
+
+
+/**
+ * Find index of next chunk of ``n`` used elements, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @param n
+ *   Number of used elements to look for.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, int start, int n);
+
+
+/**
+ * Find how many more free entries there are, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, int start);
+
+
+/**
+ * Find how many more used entries there are, starting at specified index.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ *   Element index to start search from.
+ *
+ * @return
+ *  - non-negative integer on success.
+ *  - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, int start);
+
+
+/**
+ * Dump ``rte_fbarray`` metadata.
+ *
+ * @param arr
+ *   Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param f
+ *   File object to dump information into.
+ */
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FBARRAY_H */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 82b8910..7d02191 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -11,6 +11,7 @@ common_sources = files(
 	'eal_common_devargs.c',
 	'eal_common_dev.c',
 	'eal_common_errno.c',
+	'eal_common_fbarray.c',
 	'eal_common_hexdump.c',
 	'eal_common_launch.c',
 	'eal_common_lcore.c',
@@ -51,6 +52,7 @@ common_headers = files(
 	'include/rte_eal_memconfig.h',
 	'include/rte_eal_interrupts.h',
 	'include/rte_errno.h',
+	'include/rte_fbarray.h',
 	'include/rte_hexdump.h',
 	'include/rte_interrupts.h',
 	'include/rte_keepalive.h',
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index b9c7727..c407a43 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -61,6 +61,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index 76209f9..0f542b1 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -221,6 +221,22 @@ EXPERIMENTAL {
 	rte_eal_hotplug_add;
 	rte_eal_hotplug_remove;
 	rte_eal_mbuf_user_pool_ops;
+	rte_fbarray_attach;
+	rte_fbarray_destroy;
+	rte_fbarray_detach;
+	rte_fbarray_dump_metadata;
+	rte_fbarray_find_idx;
+	rte_fbarray_find_next_free;
+	rte_fbarray_find_next_used;
+	rte_fbarray_find_next_n_free;
+	rte_fbarray_find_next_n_used;
+	rte_fbarray_find_contig_free;
+	rte_fbarray_find_contig_used;
+	rte_fbarray_get;
+	rte_fbarray_init;
+	rte_fbarray_is_used;
+	rte_fbarray_set_free;
+	rte_fbarray_set_used;
 	rte_log_register_type_and_pick_level;
 	rte_malloc_dump_heaps;
 	rte_mem_iova2virt;
-- 
2.7.4


More information about the dev mailing list