[dpdk-dev] [PATCH v3 5/9] gpudev: add memory API
Mattias Rönnblom
hofors at lysator.liu.se
Fri Oct 29 21:38:23 CEST 2021
On 2021-10-09 03:53, eagostini at nvidia.com wrote:
> From: Elena Agostini <eagostini at nvidia.com>
>
> In heterogeneous computing system, processing is not only in the CPU.
> Some tasks can be delegated to devices working in parallel.
> Such workload distribution can be achieved by sharing some memory.
>
> As a first step, the features are focused on memory management.
> A function allows to allocate memory inside the device,
> or in the main (CPU) memory while making it visible for the device.
> This memory may be used to save packets or for synchronization data.
>
> The next step should focus on GPU processing task control.
>
> Signed-off-by: Elena Agostini <eagostini at nvidia.com>
> Signed-off-by: Thomas Monjalon <thomas at monjalon.net>
> ---
> app/test-gpudev/main.c | 118 +++++++++++++++++++++++++
> doc/guides/gpus/features/default.ini | 3 +
> doc/guides/prog_guide/gpudev.rst | 19 ++++
> doc/guides/rel_notes/release_21_11.rst | 1 +
> lib/gpudev/gpudev.c | 101 +++++++++++++++++++++
> lib/gpudev/gpudev_driver.h | 12 +++
> lib/gpudev/rte_gpudev.h | 95 ++++++++++++++++++++
> lib/gpudev/version.map | 4 +
> 8 files changed, 353 insertions(+)
>
> diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
> index 6a73a54e84..98c02a3ee0 100644
> --- a/app/test-gpudev/main.c
> +++ b/app/test-gpudev/main.c
> @@ -62,6 +62,110 @@ args_parse(int argc, char **argv)
> }
> }
>
> +static int
> +alloc_gpu_memory(uint16_t gpu_id)
> +{
> + void * ptr_1 = NULL;
Delete space between '*' and 'p'.
> + void * ptr_2 = NULL;
> + size_t buf_bytes = 1024;
> + int ret = 0;
This initialization is redundant.
> +
> + printf("\n=======> TEST: Allocate GPU memory\n");
> +
> + /* Alloc memory on GPU 0 */
> + ptr_1 = rte_gpu_malloc(gpu_id, buf_bytes);
> + if(ptr_1 == NULL)
> + {
Misplaced braces.
"if (" rather than "if(".
> + fprintf(stderr, "rte_gpu_malloc GPU memory returned error\n");
> + return -1;
> + }
> + printf("GPU memory allocated at 0x%p %zdB\n", ptr_1, buf_bytes);
> +
> + ptr_2 = rte_gpu_malloc(gpu_id, buf_bytes);
> + if(ptr_2 == NULL)
> + {
Again, and throughout this file.
> + fprintf(stderr, "rte_gpu_malloc GPU memory returned error\n");
> + return -1;
> + }
> + printf("GPU memory allocated at 0x%p %zdB\n", ptr_2, buf_bytes);
> +
> + ret = rte_gpu_free(gpu_id, (uint8_t*)(ptr_1)+0x700);
> + if(ret < 0)
> + {
> + printf("GPU memory 0x%p + 0x700 NOT freed because of memory address not recognized by driver\n", ptr_1);
> + }
> + else
> + {
> + fprintf(stderr, "rte_gpu_free erroneusly freed GPU memory 0x%p + 0x700\n", ptr_1);
> + return -1;
> + }
> +
> + ret = rte_gpu_free(gpu_id, ptr_2);
> + if(ret < 0)
> + {
> + fprintf(stderr, "rte_gpu_free returned error %d\n", ret);
> + return -1;
> + }
> + printf("GPU memory 0x%p freed\n", ptr_2);
> +
> + ret = rte_gpu_free(gpu_id, ptr_1);
> + if(ret < 0)
> + {
> + fprintf(stderr, "rte_gpu_free returned error %d\n", ret);
> + return -1;
> + }
> + printf("GPU memory 0x%p freed\n", ptr_1);
> +
> + return 0;
> +}
> +
> +static int
> +register_cpu_memory(uint16_t gpu_id)
> +{
> + void * ptr = NULL;
> + size_t buf_bytes = 1024;
> + int ret = 0;
> +
> + printf("\n=======> TEST: Register CPU memory\n");
> +
> + /* Alloc memory on CPU visible from GPU 0 */
> + ptr = rte_zmalloc(NULL, buf_bytes, 0);
> + if (ptr == NULL) {
> + fprintf(stderr, "Failed to allocate CPU memory.\n");
> + return -1;
> + }
> +
> + ret = rte_gpu_register(gpu_id, buf_bytes, ptr);
> + if(ret < 0)
> + {
> + fprintf(stderr, "rte_gpu_register CPU memory returned error %d\n", ret);
> + return -1;
> + }
> + printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes);
> +
> + ret = rte_gpu_unregister(gpu_id, (uint8_t*)(ptr)+0x700);
> + if(ret < 0)
> + {
> + printf("CPU memory 0x%p + 0x700 NOT unregistered because of memory address not recognized by driver\n", ptr);
> + }
> + else
> + {
> + fprintf(stderr, "rte_gpu_free erroneusly freed GPU memory 0x%p + 0x700\n", ptr);
> + return -1;
> + }
> + printf("CPU memory 0x%p unregistered\n", ptr);
> +
> + ret = rte_gpu_unregister(gpu_id, ptr);
> + if(ret < 0)
> + {
> + fprintf(stderr, "rte_gpu_unregister returned error %d\n", ret);
> + return -1;
> + }
> + printf("CPU memory 0x%p unregistered\n", ptr);
> +
> + return 0;
> +}
> +
> int
> main(int argc, char **argv)
> {
> @@ -99,6 +203,20 @@ main(int argc, char **argv)
> }
> printf("\n\n");
>
> + if(nb_gpus == 0 > + {
> + fprintf(stderr, "Need at least one GPU on the system to run the example\n");
> + return EXIT_FAILURE;
> + }
> +
> + gpu_id = 0;
> +
> + /**
> + * Memory tests
> + */
> + alloc_gpu_memory(gpu_id);
> + register_cpu_memory(gpu_id);
> +
> /* clean up the EAL */
> rte_eal_cleanup();
> printf("Bye...\n");
> diff --git a/doc/guides/gpus/features/default.ini b/doc/guides/gpus/features/default.ini
> index ec7a545eb7..87e9966424 100644
> --- a/doc/guides/gpus/features/default.ini
> +++ b/doc/guides/gpus/features/default.ini
> @@ -8,3 +8,6 @@
> ;
> [Features]
> Get device info =
> +Share CPU memory with device =
> +Allocate device memory =
> +Free memory =
> diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst
> index 7694639489..9aca69038c 100644
> --- a/doc/guides/prog_guide/gpudev.rst
> +++ b/doc/guides/prog_guide/gpudev.rst
> @@ -30,6 +30,8 @@ Features
> This library provides a number of features:
>
> - Interoperability with device-specific library through generic handlers.
> +- Allocate and free memory on the device.
> +- Register CPU memory to make it visible from the device.
>
>
> API Overview
> @@ -46,3 +48,20 @@ that will be registered internally by the driver as an additional device (child)
> connected to a physical device (parent).
> Each device (parent or child) is represented through a ID
> required to indicate which device a given operation should be executed on.
> +
> +Memory Allocation
> +~~~~~~~~~~~~~~~~~
> +
> +gpudev can allocate on an input given GPU device a memory area
> +returning the pointer to that memory.
> +Later, it's also possible to free that memory with gpudev.
> +GPU memory allocated outside of the gpudev library
> +(e.g. with GPU-specific library) cannot be freed by the gpudev library.
> +
> +Memory Registration
> +~~~~~~~~~~~~~~~~~~~
> +
> +gpudev can register a CPU memory area to make it visible from a GPU device.
> +Later, it's also possible to unregister that memory with gpudev.
> +CPU memory registered outside of the gpudev library
> +(e.g. with GPU specific library) cannot be unregistered by the gpudev library.
> diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
> index 4986a35b50..c4ac5e3053 100644
> --- a/doc/guides/rel_notes/release_21_11.rst
> +++ b/doc/guides/rel_notes/release_21_11.rst
> @@ -65,6 +65,7 @@ New Features
> * **Introduced GPU device class with first features:**
>
> * Device information
> + * Memory management
>
> * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
>
> diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c
> index f0690cf730..1d8318f769 100644
> --- a/lib/gpudev/gpudev.c
> +++ b/lib/gpudev/gpudev.c
> @@ -6,6 +6,7 @@
> #include <rte_tailq.h>
> #include <rte_string_fns.h>
> #include <rte_memzone.h>
> +#include <rte_malloc.h>
> #include <rte_errno.h>
> #include <rte_log.h>
>
> @@ -523,3 +524,103 @@ rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
> }
> return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
> }
> +
> +void *
> +rte_gpu_malloc(int16_t dev_id, size_t size)
> +{
> + struct rte_gpu *dev;
> + void *ptr;
> + int ret;
> +
> + dev = gpu_get_by_id(dev_id);
> + if (dev == NULL) {
> + GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
> + rte_errno = ENODEV;
> + return NULL;
> + }
> +
> + if (dev->ops.mem_alloc == NULL) {
> + GPU_LOG(ERR, "mem allocation not supported");
> + rte_errno = ENOTSUP;
> + return NULL;
> + }
> +
> + if (size == 0) /* dry-run */
> + return NULL;
> +
> + ret = dev->ops.mem_alloc(dev, size, &ptr);
> +
> + switch (ret) {
> + case 0:
> + return ptr;
> + case -ENOMEM:
> + case -E2BIG:
> + rte_errno = -ret;
> + return NULL;
> + default:
> + rte_errno = -EPERM;
> + return NULL;
> + }
> +}
> +
> +int
> +rte_gpu_register(int16_t dev_id, size_t size, void * ptr)
> +{
> + struct rte_gpu *dev;
> +
> + dev = gpu_get_by_id(dev_id);
> + if (dev == NULL) {
> + GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
> + rte_errno = ENODEV;
> + return -rte_errno;
> + }
> +
> + if (dev->ops.mem_register == NULL) {
> + GPU_LOG(ERR, "mem registration not supported");
> + rte_errno = ENOTSUP;
> + return -rte_errno;
> + }
> +
> + if (size == 0 || ptr == NULL) /* dry-run */
> + return -EINVAL;
> +
> + return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr));
> +}
> +
> +int
> +rte_gpu_unregister(int16_t dev_id, void * ptr)
> +{
> + struct rte_gpu *dev;
> +
> + dev = gpu_get_by_id(dev_id);
> + if (dev == NULL) {
> + GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id);
> + rte_errno = ENODEV;
> + return -rte_errno;
> + }
> +
> + if (dev->ops.mem_unregister == NULL) {
> + rte_errno = ENOTSUP;
> + return -rte_errno;
> + }
> + return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr));
> +}
> +
> +int
> +rte_gpu_free(int16_t dev_id, void *ptr)
> +{
> + struct rte_gpu *dev;
> +
> + dev = gpu_get_by_id(dev_id);
> + if (dev == NULL) {
> + GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id);
> + rte_errno = ENODEV;
> + return -rte_errno;
> + }
> +
> + if (dev->ops.mem_free == NULL) {
> + rte_errno = ENOTSUP;
> + return -rte_errno;
> + }
> + return GPU_DRV_RET(dev->ops.mem_free(dev, ptr));
> +}
> diff --git a/lib/gpudev/gpudev_driver.h b/lib/gpudev/gpudev_driver.h
> index 9459c7e30f..11015944a6 100644
> --- a/lib/gpudev/gpudev_driver.h
> +++ b/lib/gpudev/gpudev_driver.h
> @@ -27,12 +27,24 @@ enum rte_gpu_state {
> struct rte_gpu;
> typedef int (rte_gpu_close_t)(struct rte_gpu *dev);
> typedef int (rte_gpu_info_get_t)(struct rte_gpu *dev, struct rte_gpu_info *info);
> +typedef int (rte_gpu_mem_alloc_t)(struct rte_gpu *dev, size_t size, void **ptr);
> +typedef int (rte_gpu_free_t)(struct rte_gpu *dev, void *ptr);
> +typedef int (rte_gpu_mem_register_t)(struct rte_gpu *dev, size_t size, void *ptr);
> +typedef int (rte_gpu_mem_unregister_t)(struct rte_gpu *dev, void *ptr);
>
> struct rte_gpu_ops {
> /* Get device info. If NULL, info is just copied. */
> rte_gpu_info_get_t *dev_info_get;
> /* Close device or child context. */
> rte_gpu_close_t *dev_close;
> + /* Allocate memory in device. */
> + rte_gpu_mem_alloc_t *mem_alloc;
> + /* Register CPU memory in device. */
> + rte_gpu_mem_register_t *mem_register;
> + /* Free memory allocated or registered in device. */
> + rte_gpu_free_t *mem_free;
> + /* Unregister CPU memory in device. */
> + rte_gpu_mem_unregister_t *mem_unregister;
> };
>
> struct rte_gpu_mpshared {
> diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h
> index df75dbdbab..3c276581c0 100644
> --- a/lib/gpudev/rte_gpudev.h
> +++ b/lib/gpudev/rte_gpudev.h
> @@ -9,6 +9,7 @@
> #include <stdint.h>
> #include <stdbool.h>
>
> +#include <rte_bitops.h>
> #include <rte_compat.h>
>
> /**
> @@ -292,6 +293,100 @@ int rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
> __rte_experimental
> int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info);
>
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Allocate a chunk of memory usable by the device.
> + *
> + * @param dev_id
> + * Device ID requiring allocated memory.
> + * @param size
> + * Number of bytes to allocate.
> + * Requesting 0 will do nothing.
> + *
> + * @return
> + * A pointer to the allocated memory, otherwise NULL and rte_errno is set:
> + * - ENODEV if invalid dev_id
> + * - EINVAL if reserved flags
> + * - ENOTSUP if operation not supported by the driver
> + * - E2BIG if size is higher than limit
> + * - ENOMEM if out of space
> + * - EPERM if driver error
> + */
> +__rte_experimental
> +void *rte_gpu_malloc(int16_t dev_id, size_t size)
> +__rte_alloc_size(2);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Deallocate a chunk of memory allocated with rte_gpu_malloc().
> + *
> + * @param dev_id
> + * Reference device ID.
> + * @param ptr
> + * Pointer to the memory area to be deallocated.
> + * NULL is a no-op accepted value.
> + *
> + * @return
> + * 0 on success, -rte_errno otherwise:
I don't think you are supposed to set rte_errno if it's not needed,
which is not the case here (since you return the error code).
> + * - ENODEV if invalid dev_id
> + * - ENOTSUP if operation not supported by the driver
> + * - EPERM if driver error
> + */
> +__rte_experimental
> +int rte_gpu_free(int16_t dev_id, void *ptr);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Register a chunk of memory on the CPU usable by the device.
> + *
> + * @param dev_id
> + * Device ID requiring allocated memory.
> + * @param size
> + * Number of bytes to allocate.
> + * Requesting 0 will do nothing.
> + * @param ptr
> + * Pointer to the memory area to be registered.
> + * NULL is a no-op accepted value.
> +
> + * @return
> + * A pointer to the allocated memory, otherwise NULL and rte_errno is set:
> + * - ENODEV if invalid dev_id
> + * - EINVAL if reserved flags
> + * - ENOTSUP if operation not supported by the driver
> + * - E2BIG if size is higher than limit
> + * - ENOMEM if out of space
> + * - EPERM if driver error
> + */
> +__rte_experimental
> +int rte_gpu_register(int16_t dev_id, size_t size, void * ptr);
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change without prior notice.
> + *
> + * Deregister a chunk of memory previusly registered with rte_gpu_mem_register()
> + *
> + * @param dev_id
> + * Reference device ID.
> + * @param ptr
> + * Pointer to the memory area to be unregistered.
> + * NULL is a no-op accepted value.
> + *
> + * @return
> + * 0 on success, -rte_errno otherwise:
> + * - ENODEV if invalid dev_id
> + * - ENOTSUP if operation not supported by the driver
> + * - EPERM if driver error
> + */
> +__rte_experimental
> +int rte_gpu_unregister(int16_t dev_id, void *ptr);
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map
> index 58dc632393..d4a65ebd52 100644
> --- a/lib/gpudev/version.map
> +++ b/lib/gpudev/version.map
> @@ -8,9 +8,13 @@ EXPERIMENTAL {
> rte_gpu_close;
> rte_gpu_count_avail;
> rte_gpu_find_next;
> + rte_gpu_free;
> rte_gpu_info_get;
> rte_gpu_init;
> rte_gpu_is_valid;
> + rte_gpu_malloc;
> + rte_gpu_register;
> + rte_gpu_unregister;
> };
>
> INTERNAL {
>
More information about the dev
mailing list