[dpdk-dev] [PATCH v4 3/8] eal: introduce memory management wrappers

Burakov, Anatoly anatoly.burakov at intel.com
Wed Apr 29 19:13:49 CEST 2020


On 29-Apr-20 12:50 AM, Dmitry Kozlyuk wrote:
> Introduce OS-independent wrappers for memory management operations used
> across DPDK and specifically in common code of EAL:
> 
> * rte_mem_map()
> * rte_mem_unmap()
> * rte_get_page_size()
> * rte_mem_lock()
> 
> Windows uses different APIs for memory mapping and reservation, while
> Unices reserve memory by mapping it. Introduce EAL private functions to
> support memory reservation in common code:
> 
> * eal_mem_reserve()
> * eal_mem_free()
> * eal_mem_set_dump()
> 
> Wrappers follow POSIX semantics limited to DPDK tasks, but their
> signatures deliberately differ from POSIX ones to be more safe and
> expressive.
> 
> Signed-off-by: Dmitry Kozlyuk <dmitry.kozliuk at gmail.com>
> ---

<snip>

>   	RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
>   
> @@ -105,24 +94,24 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
>   			return NULL;
>   		}
>   
> -		mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_NONE,
> -				mmap_flags, -1, 0);
> -		if (mapped_addr == MAP_FAILED && allow_shrink)
> -			*size -= page_sz;
> +		mapped_addr = eal_mem_reserve(
> +			requested_addr, (size_t)map_sz, reserve_flags);
> +		if ((mapped_addr == NULL) && allow_shrink)
> +			size -= page_sz;

Should be *size -= page_sz, size is a pointer in this case.

>   
> -		if (mapped_addr != MAP_FAILED && addr_is_hint &&
> -		    mapped_addr != requested_addr) {
> +		if ((mapped_addr != NULL) && addr_is_hint &&
> +				(mapped_addr != requested_addr)) {
>   			try++;
>   			next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
>   			if (try <= MAX_MMAP_WITH_DEFINED_ADDR_TRIES) {
>   				/* hint was not used. Try with another offset */
> -				munmap(mapped_addr, map_sz);
> -				mapped_addr = MAP_FAILED;
> +				eal_mem_free(mapped_addr, *size);

Why change map_sz to *size?

> +				mapped_addr = NULL;
>   				requested_addr = next_baseaddr;
>   			}
>   		}
>   	} while ((allow_shrink || addr_is_hint) &&
> -		 mapped_addr == MAP_FAILED && *size > 0);
> +		(mapped_addr == NULL) && (*size > 0));
>   

<snip>

> @@ -547,10 +531,10 @@ rte_eal_memdevice_init(void)
>   int
>   rte_mem_lock_page(const void *virt)
>   {
> -	unsigned long virtual = (unsigned long)virt;
> -	int page_size = getpagesize();
> -	unsigned long aligned = (virtual & ~(page_size - 1));
> -	return mlock((void *)aligned, page_size);
> +	uintptr_t virtual = (uintptr_t)virt;
> +	int page_size = rte_get_page_size();
> +	uintptr_t aligned = (virtual & ~(page_size - 1));

Might as well fix to use macros? e.g.

size_t pagesz = rte_get_page_size();
return rte_mem_lock(RTE_PTR_ALIGN(virt, pagesz), pagesz);

(also, note that rte_get_page_size() returns size_t, not int)

> +	return rte_mem_lock((void *)aligned, page_size);
>   }
>   
>   int
> diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
> index 3aafd892f..67ca83e47 100644
> --- a/lib/librte_eal/common/eal_private.h
> +++ b/lib/librte_eal/common/eal_private.h
> @@ -11,6 +11,7 @@
>   

<snip>

> + *  Reservation size. Must be a multiple of system page size.
> + * @param flags
> + *  Reservation options, a combination of eal_mem_reserve_flags.
> + * @returns
> + *  Starting address of the reserved area on success, NULL on failure.
> + *  Callers must not access this memory until remapping it.
> + */
> +void *eal_mem_reserve(void *requested_addr, size_t size, int flags);

Should we also require requested_addr to be page-aligned?

Also, here and in other added API's, nitpick but our coding style guide 
(and the code style in this file) suggests that return value should be 
on a separate line, e.g.

void *
eal_mem_reserve(...);

> +
> +/**
> + * Free memory obtained by eal_mem_reserve() or eal_mem_alloc().
> + *
> + * If *virt* and *size* describe a part of the reserved region,
> + * only this part of the region is freed (accurately up to the system
> + * page size). If *virt* points to allocated memory, *size* must match
> + * the one specified on allocation. The behavior is undefined
> + * if the memory pointed by *virt* is obtained from another source
> + * than listed above.
> + *

<snip>

> +}
> +
> +static int
> +mem_rte_to_sys_prot(int prot)
> +{
> +	int sys_prot = 0;

Maybe set it to PROT_NONE to make it more obvious?

> +
> +	if (prot & RTE_PROT_READ)
> +		sys_prot |= PROT_READ;
> +	if (prot & RTE_PROT_WRITE)
> +		sys_prot |= PROT_WRITE;
> +	if (prot & RTE_PROT_EXECUTE)
> +		sys_prot |= PROT_EXEC;
> +
> +	return sys_prot;
> +}
> +
> +void *
> +rte_mem_map(void *requested_addr, size_t size, int prot, int flags,
> +	int fd, size_t offset)
> +{
> +	int sys_prot = 0;

Not necessary to initialize sys_prot (and it's counter-productive as 
compiler warning about uninitialized usage is a *good thing*!).

> +	int sys_flags = 0;
> +
> +	sys_prot = mem_rte_to_sys_prot(prot);
> +
> +	if (flags & RTE_MAP_SHARED)
> +		sys_flags |= MAP_SHARED;
> +	if (flags & RTE_MAP_ANONYMOUS)
> +		sys_flags |= MAP_ANONYMOUS;
> +	if (flags & RTE_MAP_PRIVATE)
> +		sys_flags |= MAP_PRIVATE;
> +	if (flags & RTE_MAP_FORCE_ADDRESS)
> +		sys_flags |= MAP_FIXED;
> +
> +	return mem_map(requested_addr, size, sys_prot, sys_flags, fd, offset);
> +}
> +
> +int
> +rte_mem_unmap(void *virt, size_t size)
> +{
> +	return mem_unmap(virt, size);
> +}
> +
> +size_t
> +rte_get_page_size(void)
> +{
> +	return sysconf(_SC_PAGESIZE);

Can we perhaps cache this value?

> +}
> +
> +int
> +rte_mem_lock(const void *virt, size_t size)
> +{
> +	return mlock(virt, size);

This call can fail. It should pass errno as rte_errno as well, just like 
all other calls from this family.

Also, if the implementation "may require" page alignment, how about 
requiring it unconditionally?

> +}
> diff --git a/lib/librte_eal/unix/meson.build b/lib/librte_eal/unix/meson.build
> index cfa1b4ef9..5734f26ad 100644
> --- a/lib/librte_eal/unix/meson.build
> +++ b/lib/librte_eal/unix/meson.build
> @@ -3,4 +3,5 @@
>   
>   sources += files(
>   	'eal_unix.c',
> +	'eal_unix_memory.c',
>   )
> 


-- 
Thanks,
Anatoly


More information about the dev mailing list