[dpdk-dev] [PATCH v3 5/9] gpudev: add memory API
Elena Agostini
eagostini at nvidia.com
Mon Nov 8 16:16:30 CET 2021
> From: Mattias Rönnblom <hofors at lysator.liu.se>
> Date: Friday, 29 October 2021 at 21:38
> To: Elena Agostini <eagostini at nvidia.com>, dev at dpdk.org <dev at dpdk.org>
> Cc: NBU-Contact-Thomas Monjalon <thomas at monjalon.net>
> Subject: Re: [dpdk-dev] [PATCH v3 5/9] gpudev: add memory API>
>
> On 2021-10-09 03:53, eagostini at nvidia.com wrote:
> > From: Elena Agostini <eagostini at nvidia.com>
> >
> > In heterogeneous computing system, processing is not only in the CPU.
> > Some tasks can be delegated to devices working in parallel.
> > Such workload distribution can be achieved by sharing some memory.
> >
> > As a first step, the features are focused on memory management.
> > A function allows to allocate memory inside the device,
> > or in the main (CPU) memory while making it visible for the device.
> > This memory may be used to save packets or for synchronization data.
> >
> > The next step should focus on GPU processing task control.
> >
> > Signed-off-by: Elena Agostini <eagostini at nvidia.com>
> > Signed-off-by: Thomas Monjalon <thomas at monjalon.net>
> > ---
> > app/test-gpudev/main.c | 118 +++++++++++++++++++++++++
> > doc/guides/gpus/features/default.ini | 3 +
> > doc/guides/prog_guide/gpudev.rst | 19 ++++
> > doc/guides/rel_notes/release_21_11.rst | 1 +
> > lib/gpudev/gpudev.c | 101 +++++++++++++++++++++
> > lib/gpudev/gpudev_driver.h | 12 +++
> > lib/gpudev/rte_gpudev.h | 95 ++++++++++++++++++++
> > lib/gpudev/version.map | 4 +
> > 8 files changed, 353 insertions(+)
> >
> > diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
> > index 6a73a54e84..98c02a3ee0 100644
> > --- a/app/test-gpudev/main.c
> > +++ b/app/test-gpudev/main.c
> > @@ -62,6 +62,110 @@ args_parse(int argc, char **argv)
> > }
> > }
> >
> > +static int
> > +alloc_gpu_memory(uint16_t gpu_id)
> > +{
> > + void * ptr_1 = NULL;>
> Delete space between '*' and 'p'.>
Thanks Mattias, I addressed all of your comments and I re-run checkpatch script
In all the gpudev related files.
> > + void * ptr_2 = NULL;
> > + size_t buf_bytes = 1024;
> > + int ret = 0;>
> This initialization is redundant.>
> > +
> > + printf("\n=======> TEST: Allocate GPU memory\n");
> > +
> > + /* Alloc memory on GPU 0 */
> > + ptr_1 = rte_gpu_malloc(gpu_id, buf_bytes);
> > + if(ptr_1 == NULL)
> > + {>
> Misplaced braces.>
> "if (" rather than "if(".>
> > + fprintf(stderr, "rte_gpu_malloc GPU memory returned error\n");
> > + return -1;
> > + }
> > + printf("GPU memory allocated at 0x%p %zdB\n", ptr_1, buf_bytes);
> > +
> > + ptr_2 = rte_gpu_malloc(gpu_id, buf_bytes);
> > + if(ptr_2 == NULL)
> > + {>
> Again, and throughout this file.>
> > + fprintf(stderr, "rte_gpu_malloc GPU memory returned error\n");
> > + return -1;
> > + }
> > + printf("GPU memory allocated at 0x%p %zdB\n", ptr_2, buf_bytes);
> > +
> > + ret = rte_gpu_free(gpu_id, (uint8_t*)(ptr_1)+0x700);
> > + if(ret < 0)
> > + {
> > + printf("GPU memory 0x%p + 0x700 NOT freed because of memory address not recognized by driver\n", ptr_1);
> > + }
> > + else
> > + {
> > + fprintf(stderr, "rte_gpu_free erroneusly freed GPU memory 0x%p + 0x700\n", ptr_1);
> > + return -1;
> > + }
> > +
> > + ret = rte_gpu_free(gpu_id, ptr_2);
> > + if(ret < 0)
> > + {
> > + fprintf(stderr, "rte_gpu_free returned error %d\n", ret);
> > + return -1;
> > + }
> > + printf("GPU memory 0x%p freed\n", ptr_2);
> > +
> > + ret = rte_gpu_free(gpu_id, ptr_1);
> > + if(ret < 0)
> > + {
> > + fprintf(stderr, "rte_gpu_free returned error %d\n", ret);
> > + return -1;
> > + }
> > + printf("GPU memory 0x%p freed\n", ptr_1);
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +register_cpu_memory(uint16_t gpu_id)
> > +{
> > + void * ptr = NULL;
> > + size_t buf_bytes = 1024;
> > + int ret = 0;
> > +
> > + printf("\n=======> TEST: Register CPU memory\n");
> > +
> > + /* Alloc memory on CPU visible from GPU 0 */
> > + ptr = rte_zmalloc(NULL, buf_bytes, 0);
> > + if (ptr == NULL) {
> > + fprintf(stderr, "Failed to allocate CPU memory.\n");
> > + return -1;
> > + }
> > +
> > + ret = rte_gpu_register(gpu_id, buf_bytes, ptr);
> > + if(ret < 0)
> > + {
> > + fprintf(stderr, "rte_gpu_register CPU memory returned error %d\n", ret);
> > + return -1;
> > + }
> > + printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes);
> > +
> > + ret = rte_gpu_unregister(gpu_id, (uint8_t*)(ptr)+0x700);
> > + if(ret < 0)
> > + {
> > + printf("CPU memory 0x%p + 0x700 NOT unregistered because of memory address not recognized by driver\n", ptr);
> > + }
> > + else
> > + {
> > + fprintf(stderr, "rte_gpu_free erroneusly freed GPU memory 0x%p + 0x700\n", ptr);
> > + return -1;
> > + }
> > + printf("CPU memory 0x%p unregistered\n", ptr);
> > +
> > + ret = rte_gpu_unregister(gpu_id, ptr);
> > + if(ret < 0)
> > + {
> > + fprintf(stderr, "rte_gpu_unregister returned error %d\n", ret);
> > + return -1;
> > + }
> > + printf("CPU memory 0x%p unregistered\n", ptr);
> > +
> > + return 0;
> > +}
> > +
> > int
> > main(int argc, char **argv)
> > {
> > @@ -99,6 +203,20 @@ main(int argc, char **argv)
> > }
> > printf("\n\n");
> >
> > + if(nb_gpus == 0 > + {
> > + fprintf(stderr, "Need at least one GPU on the system to run the example\n");
> > + return EXIT_FAILURE;
> > + }
> > +
> > + gpu_id = 0;
> > +
> > + /**
> > + * Memory tests
> > + */
> > + alloc_gpu_memory(gpu_id);
> > + register_cpu_memory(gpu_id);
> > +
> > /* clean up the EAL */
> > rte_eal_cleanup();
> > printf("Bye...\n");
> > diff --git a/doc/guides/gpus/features/default.ini b/doc/guides/gpus/features/default.ini
> > index ec7a545eb7..87e9966424 100644
> > --- a/doc/guides/gpus/features/default.ini
> > +++ b/doc/guides/gpus/features/default.ini
> > @@ -8,3 +8,6 @@
> > ;
> > [Features]
> > Get device info =
> > +Share CPU memory with device =
> > +Allocate device memory =
> > +Free memory =
> > diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst
> > index 7694639489..9aca69038c 100644
> > --- a/doc/guides/prog_guide/gpudev.rst
> > +++ b/doc/guides/prog_guide/gpudev.rst
> > @@ -30,6 +30,8 @@ Features
> > This library provides a number of features:
> >
> > - Interoperability with device-specific library through generic handlers.
> > +- Allocate and free memory on the device.
> > +- Register CPU memory to make it visible from the device.
> >
> >
> > API Overview
> > @@ -46,3 +48,20 @@ that will be registered internally by the driver as an additional device (child)
> > connected to a physical device (parent).
> > Each device (parent or child) is represented through a ID
> > required to indicate which device a given operation should be executed on.
> > +
> > +Memory Allocation
> > +~~~~~~~~~~~~~~~~~
> > +
> > +gpudev can allocate on an input given GPU device a memory area
> > +returning the pointer to that memory.
> > +Later, it's also possible to free that memory with gpudev.
> > +GPU memory allocated outside of the gpudev library
> > +(e.g. with GPU-specific library) cannot be freed by the gpudev library.
> > +
> > +Memory Registration
> > +~~~~~~~~~~~~~~~~~~~
> > +
> > +gpudev can register a CPU memory area to make it visible from a GPU device.
> > +Later, it's also possible to unregister that memory with gpudev.
> > +CPU memory registered outside of the gpudev library
> > +(e.g. with GPU specific library) cannot be unregistered by the gpudev library.
> > diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
> > index 4986a35b50..c4ac5e3053 100644
> > --- a/doc/guides/rel_notes/release_21_11.rst
> > +++ b/doc/guides/rel_notes/release_21_11.rst
> > @@ -65,6 +65,7 @@ New Features
> > * **Introduced GPU device class with first features:**
> >
> > * Device information
> > + * Memory management
> >
> > * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
> >
> > diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c
> > index f0690cf730..1d8318f769 100644
> > --- a/lib/gpudev/gpudev.c
> > +++ b/lib/gpudev/gpudev.c
> > @@ -6,6 +6,7 @@
> > #include <rte_tailq.h>
> > #include <rte_string_fns.h>
> > #include <rte_memzone.h>
> > +#include <rte_malloc.h>
> > #include <rte_errno.h>
> > #include <rte_log.h>
> >
> > @@ -523,3 +524,103 @@ rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
> > }
> > return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
> > }
> > +
> > +void *
> > +rte_gpu_malloc(int16_t dev_id, size_t size)
> > +{
> > + struct rte_gpu *dev;
> > + void *ptr;
> > + int ret;
> > +
> > + dev = gpu_get_by_id(dev_id);
> > + if (dev == NULL) {
> > + GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
> > + rte_errno = ENODEV;
> > + return NULL;
> > + }
> > +
> > + if (dev->ops.mem_alloc == NULL) {
> > + GPU_LOG(ERR, "mem allocation not supported");
> > + rte_errno = ENOTSUP;
> > + return NULL;
> > + }
> > +
> > + if (size == 0) /* dry-run */
> > + return NULL;
> > +
> > + ret = dev->ops.mem_alloc(dev, size, &ptr);
> > +
> > + switch (ret) {
> > + case 0:
> > + return ptr;
> > + case -ENOMEM:
> > + case -E2BIG:
> > + rte_errno = -ret;
> > + return NULL;
> > + default:
> > + rte_errno = -EPERM;
> > + return NULL;
> > + }
> > +}
> > +
> > +int
> > +rte_gpu_register(int16_t dev_id, size_t size, void * ptr)
> > +{
> > + struct rte_gpu *dev;
> > +
> > + dev = gpu_get_by_id(dev_id);
> > + if (dev == NULL) {
> > + GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
> > + rte_errno = ENODEV;
> > + return -rte_errno;
> > + }
> > +
> > + if (dev->ops.mem_register == NULL) {
> > + GPU_LOG(ERR, "mem registration not supported");
> > + rte_errno = ENOTSUP;
> > + return -rte_errno;
> > + }
> > +
> > + if (size == 0 || ptr == NULL) /* dry-run */
> > + return -EINVAL;
> > +
> > + return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr));
> > +}
> > +
> > +int
> > +rte_gpu_unregister(int16_t dev_id, void * ptr)
> > +{
> > + struct rte_gpu *dev;
> > +
> > + dev = gpu_get_by_id(dev_id);
> > + if (dev == NULL) {
> > + GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id);
> > + rte_errno = ENODEV;
> > + return -rte_errno;
> > + }
> > +
> > + if (dev->ops.mem_unregister == NULL) {
> > + rte_errno = ENOTSUP;
> > + return -rte_errno;
> > + }
> > + return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr));
> > +}
> > +
> > +int
> > +rte_gpu_free(int16_t dev_id, void *ptr)
> > +{
> > + struct rte_gpu *dev;
> > +
> > + dev = gpu_get_by_id(dev_id);
> > + if (dev == NULL) {
> > + GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id);
> > + rte_errno = ENODEV;
> > + return -rte_errno;
> > + }
> > +
> > + if (dev->ops.mem_free == NULL) {
> > + rte_errno = ENOTSUP;
> > + return -rte_errno;
> > + }
> > + return GPU_DRV_RET(dev->ops.mem_free(dev, ptr));
> > +}
> > diff --git a/lib/gpudev/gpudev_driver.h b/lib/gpudev/gpudev_driver.h
> > index 9459c7e30f..11015944a6 100644
> > --- a/lib/gpudev/gpudev_driver.h
> > +++ b/lib/gpudev/gpudev_driver.h
> > @@ -27,12 +27,24 @@ enum rte_gpu_state {
> > struct rte_gpu;
> > typedef int (rte_gpu_close_t)(struct rte_gpu *dev);
> > typedef int (rte_gpu_info_get_t)(struct rte_gpu *dev, struct rte_gpu_info *info);
> > +typedef int (rte_gpu_mem_alloc_t)(struct rte_gpu *dev, size_t size, void **ptr);
> > +typedef int (rte_gpu_free_t)(struct rte_gpu *dev, void *ptr);
> > +typedef int (rte_gpu_mem_register_t)(struct rte_gpu *dev, size_t size, void *ptr);
> > +typedef int (rte_gpu_mem_unregister_t)(struct rte_gpu *dev, void *ptr);
> >
> > struct rte_gpu_ops {
> > /* Get device info. If NULL, info is just copied. */
> > rte_gpu_info_get_t *dev_info_get;
> > /* Close device or child context. */
> > rte_gpu_close_t *dev_close;
> > + /* Allocate memory in device. */
> > + rte_gpu_mem_alloc_t *mem_alloc;
> > + /* Register CPU memory in device. */
> > + rte_gpu_mem_register_t *mem_register;
> > + /* Free memory allocated or registered in device. */
> > + rte_gpu_free_t *mem_free;
> > + /* Unregister CPU memory in device. */
> > + rte_gpu_mem_unregister_t *mem_unregister;
> > };
> >
> > struct rte_gpu_mpshared {
> > diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h
> > index df75dbdbab..3c276581c0 100644
> > --- a/lib/gpudev/rte_gpudev.h
> > +++ b/lib/gpudev/rte_gpudev.h
> > @@ -9,6 +9,7 @@
> > #include <stdint.h>
> > #include <stdbool.h>
> >
> > +#include <rte_bitops.h>
> > #include <rte_compat.h>
> >
> > /**
> > @@ -292,6 +293,100 @@ int rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
> > __rte_experimental
> > int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info);
> >
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice.
> > + *
> > + * Allocate a chunk of memory usable by the device.
> > + *
> > + * @param dev_id
> > + * Device ID requiring allocated memory.
> > + * @param size
> > + * Number of bytes to allocate.
> > + * Requesting 0 will do nothing.
> > + *
> > + * @return
> > + * A pointer to the allocated memory, otherwise NULL and rte_errno is set:
> > + * - ENODEV if invalid dev_id
> > + * - EINVAL if reserved flags
> > + * - ENOTSUP if operation not supported by the driver
> > + * - E2BIG if size is higher than limit
> > + * - ENOMEM if out of space
> > + * - EPERM if driver error
> > + */
> > +__rte_experimental
> > +void *rte_gpu_malloc(int16_t dev_id, size_t size)
> > +__rte_alloc_size(2);
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice.
> > + *
> > + * Deallocate a chunk of memory allocated with rte_gpu_malloc().
> > + *
> > + * @param dev_id
> > + * Reference device ID.
> > + * @param ptr
> > + * Pointer to the memory area to be deallocated.
> > + * NULL is a no-op accepted value.
> > + *
> > + * @return
> > + * 0 on success, -rte_errno otherwise:>
> I don't think you are supposed to set rte_errno if it's not needed,
> which is not the case here (since you return the error code).>
> > + * - ENODEV if invalid dev_id
> > + * - ENOTSUP if operation not supported by the driver
> > + * - EPERM if driver error
> > + */
> > +__rte_experimental
> > +int rte_gpu_free(int16_t dev_id, void *ptr);
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice.
> > + *
> > + * Register a chunk of memory on the CPU usable by the device.
> > + *
> > + * @param dev_id
> > + * Device ID requiring allocated memory.
> > + * @param size
> > + * Number of bytes to allocate.
> > + * Requesting 0 will do nothing.
> > + * @param ptr
> > + * Pointer to the memory area to be registered.
> > + * NULL is a no-op accepted value.
> > +
> > + * @return
> > + * A pointer to the allocated memory, otherwise NULL and rte_errno is set:
> > + * - ENODEV if invalid dev_id
> > + * - EINVAL if reserved flags
> > + * - ENOTSUP if operation not supported by the driver
> > + * - E2BIG if size is higher than limit
> > + * - ENOMEM if out of space
> > + * - EPERM if driver error
> > + */
> > +__rte_experimental
> > +int rte_gpu_register(int16_t dev_id, size_t size, void * ptr);
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change without prior notice.
> > + *
> > + * Deregister a chunk of memory previusly registered with rte_gpu_mem_register()
> > + *
> > + * @param dev_id
> > + * Reference device ID.
> > + * @param ptr
> > + * Pointer to the memory area to be unregistered.
> > + * NULL is a no-op accepted value.
> > + *
> > + * @return
> > + * 0 on success, -rte_errno otherwise:
> > + * - ENODEV if invalid dev_id
> > + * - ENOTSUP if operation not supported by the driver
> > + * - EPERM if driver error
> > + */
> > +__rte_experimental
> > +int rte_gpu_unregister(int16_t dev_id, void *ptr);
> > +
> > #ifdef __cplusplus
> > }
> > #endif
> > diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map
> > index 58dc632393..d4a65ebd52 100644
> > --- a/lib/gpudev/version.map
> > +++ b/lib/gpudev/version.map
> > @@ -8,9 +8,13 @@ EXPERIMENTAL {
> > rte_gpu_close;
> > rte_gpu_count_avail;
> > rte_gpu_find_next;
> > + rte_gpu_free;
> > rte_gpu_info_get;
> > rte_gpu_init;
> > rte_gpu_is_valid;
> > + rte_gpu_malloc;
> > + rte_gpu_register;
> > + rte_gpu_unregister;
> > };
> >
> > INTERNAL {
> >
More information about the dev
mailing list