[dpdk-dev] [PATCH 2/5] lib/ring: add template to support different element sizes

Honnappa Nagarahalli Honnappa.Nagarahalli at arm.com
Thu Oct 3 05:33:37 CEST 2019


<snip>

> >
> > > > Add templates to support creating ring APIs with different ring
> > > > element sizes.
> > > >
> > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli at arm.com>
> > > > Reviewed-by: Dharmik Thakkar <dharmik.thakkar at arm.com>
> > > > Reviewed-by: Gavin Hu <gavin.hu at arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang at arm.com>
> > > > ---
> > > >  lib/librte_ring/Makefile            |   4 +-
> > > >  lib/librte_ring/meson.build         |   4 +-
> > > >  lib/librte_ring/rte_ring_template.c |  46 ++++
> > > > lib/librte_ring/rte_ring_template.h | 330
> > > > ++++++++++++++++++++++++++++
> > > >  4 files changed, 382 insertions(+), 2 deletions(-)  create mode
> > > > 100644 lib/librte_ring/rte_ring_template.c
> > > >  create mode 100644 lib/librte_ring/rte_ring_template.h
> > > >
> > > > diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile
> > > > index
> > > > 4c8410229..818898110 100644
> > > > --- a/lib/librte_ring/Makefile
> > > > +++ b/lib/librte_ring/Makefile
> > > > @@ -19,6 +19,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c  #
> > > > install includes  SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include :=
> > > > rte_ring.h \
> > > >  					rte_ring_generic.h \
> > > > -					rte_ring_c11_mem.h
> > > > +					rte_ring_c11_mem.h \
> > > > +					rte_ring_template.h \
> > > > +					rte_ring_template.c
> > > >
> > > >  include $(RTE_SDK)/mk/rte.lib.mk
> > > > diff --git a/lib/librte_ring/meson.build
> > > > b/lib/librte_ring/meson.build index 74219840a..e4e208a7c 100644
> > > > --- a/lib/librte_ring/meson.build
> > > > +++ b/lib/librte_ring/meson.build
> > > > @@ -5,7 +5,9 @@ version = 2
> > > >  sources = files('rte_ring.c')
> > > >  headers = files('rte_ring.h',
> > > >  		'rte_ring_c11_mem.h',
> > > > -		'rte_ring_generic.h')
> > > > +		'rte_ring_generic.h',
> > > > +		'rte_ring_template.h',
> > > > +		'rte_ring_template.c')
> > > >
> > > >  # rte_ring_create_elem and rte_ring_get_memsize_elem are
> > > > experimental allow_experimental_apis = true diff --git
> > > > a/lib/librte_ring/rte_ring_template.c
> > > > b/lib/librte_ring/rte_ring_template.c
> > > > new file mode 100644
> > > > index 000000000..1ca593f95
> > > > --- /dev/null
> > > > +++ b/lib/librte_ring/rte_ring_template.c
> > > > @@ -0,0 +1,46 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright (c) 2019 Arm Limited  */
> > > > +
> > > > +#include <stdio.h>
> > > > +#include <stdarg.h>
> > > > +#include <string.h>
> > > > +#include <stdint.h>
> > > > +#include <inttypes.h>
> > > > +#include <errno.h>
> > > > +#include <sys/queue.h>
> > > > +
> > > > +#include <rte_common.h>
> > > > +#include <rte_log.h>
> > > > +#include <rte_memory.h>
> > > > +#include <rte_memzone.h>
> > > > +#include <rte_malloc.h>
> > > > +#include <rte_launch.h>
> > > > +#include <rte_eal.h>
> > > > +#include <rte_eal_memconfig.h>
> > > > +#include <rte_atomic.h>
> > > > +#include <rte_per_lcore.h>
> > > > +#include <rte_lcore.h>
> > > > +#include <rte_branch_prediction.h> #include <rte_errno.h>
> > > > +#include <rte_string_fns.h> #include <rte_spinlock.h> #include
> > > > +<rte_tailq.h>
> > > > +
> > > > +#include "rte_ring.h"
> > > > +
> > > > +/* return the size of memory occupied by a ring */ ssize_t
> > > > +__RTE_RING_CONCAT(rte_ring_get_memsize)(unsigned count) {
> > > > +	return rte_ring_get_memsize_elem(count,
> > > RTE_RING_TMPLT_ELEM_SIZE); }
> > > > +
> > > > +/* create the ring */
> > > > +struct rte_ring *
> > > > +__RTE_RING_CONCAT(rte_ring_create)(const char *name, unsigned
> count,
> > > > +		int socket_id, unsigned flags)
> > > > +{
> > > > +	return rte_ring_create_elem(name, count,
> > > RTE_RING_TMPLT_ELEM_SIZE,
> > > > +		socket_id, flags);
> > > > +}
> > > > diff --git a/lib/librte_ring/rte_ring_template.h
> > > > b/lib/librte_ring/rte_ring_template.h
> > > > new file mode 100644
> > > > index 000000000..b9b14dfbb
> > > > --- /dev/null
> > > > +++ b/lib/librte_ring/rte_ring_template.h
> > > > @@ -0,0 +1,330 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright (c) 2019 Arm Limited  */
> > > > +
> > > > +#ifndef _RTE_RING_TEMPLATE_H_
> > > > +#define _RTE_RING_TEMPLATE_H_
> > > > +
> > > > +#ifdef __cplusplus
> > > > +extern "C" {
> > > > +#endif
> > > > +
> > > > +#include <stdio.h>
> > > > +#include <stdint.h>
> > > > +#include <sys/queue.h>
> > > > +#include <errno.h>
> > > > +#include <rte_common.h>
> > > > +#include <rte_config.h>
> > > > +#include <rte_memory.h>
> > > > +#include <rte_lcore.h>
> > > > +#include <rte_atomic.h>
> > > > +#include <rte_branch_prediction.h> #include <rte_memzone.h>
> > > > +#include <rte_pause.h> #include <rte_ring.h>
> > > > +
> > > > +/* Ring API suffix name - used to append to API names */ #ifndef
> > > > +RTE_RING_TMPLT_API_SUFFIX #error RTE_RING_TMPLT_API_SUFFIX
> not
> > > > +defined #endif
> > > > +
> > > > +/* Ring's element size in bits, should be a power of 2 */ #ifndef
> > > > +RTE_RING_TMPLT_ELEM_SIZE #error RTE_RING_TMPLT_ELEM_SIZE
> not
> > > defined
> > > > +#endif
> > > > +
> > > > +/* Type of ring elements */
> > > > +#ifndef RTE_RING_TMPLT_ELEM_TYPE
> > > > +#error RTE_RING_TMPLT_ELEM_TYPE not defined #endif
> > > > +
> > > > +#define _rte_fuse(a, b) a##_##b
> > > > +#define __rte_fuse(a, b) _rte_fuse(a, b) #define
> > > > +__RTE_RING_CONCAT(a) __rte_fuse(a, RTE_RING_TMPLT_API_SUFFIX)
> > > > +
> > > > +/* Calculate the memory size needed for a ring */
> > > > +RTE_RING_TMPLT_EXPERIMENTAL ssize_t
> > > > +__RTE_RING_CONCAT(rte_ring_get_memsize)(unsigned count);
> > > > +
> > > > +/* Create a new ring named *name* in memory. */
> > > > +RTE_RING_TMPLT_EXPERIMENTAL struct rte_ring *
> > > > +__RTE_RING_CONCAT(rte_ring_create)(const char *name, unsigned
> count,
> > > > +					int socket_id, unsigned flags);
> > >
> > >
> > > Just an idea - probably same thing can be achieved in a different way.
> > > Instead of all these defines - replace ENQUEUE_PTRS/DEQUEUE_PTRS
> > > macros with static inline functions and then make all internal functions,
> i.e.
> > > __rte_ring_do_dequeue()
> > > to accept enqueue/dequeue function pointer as a parameter.
> > > Then let say default rte_ring_mc_dequeue_bulk will do:
> > >
> > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >                 unsigned int n, unsigned int *available) {
> > >         return __rte_ring_do_dequeue(r, obj_table, n,
> RTE_RING_QUEUE_FIXED,
> > >                         __IS_MC, available, dequeue_ptr_default); }
> > >
> > > Then if someone will like to define ring functions forelt_size==X,
> > > all he would need to do:
> > > 1. define his own enqueue/dequeuer functions.
> > > 2. do something like:
> > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >                 unsigned int n, unsigned int *available) {
> > >         return __rte_ring_do_dequeue(r, obj_table, n,
> RTE_RING_QUEUE_FIXED,
> > >                         __IS_MC, available, dequeue_X); }
> > >
> > > Konstantin
> > Thanks for the feedback/idea. The goal of this patch was to make it
> > simple enough to define APIs to store any element size without code
> duplication.
> 
> Well, then if we store elt_size inside the ring, it should be easy enough to add
> to the API generic functions that would use memcpy(or rte_memcpy) for
> enqueue/dequeue.
> Yes, it might be slower than existing (8B per elem), but might be still
> acceptable.
The element size will be a constant in most use cases. If we keep the element size as a parameter, it allows the compiler to do any loop unrolling and auto-vectorization optimizations on copying.
Storing the element size will result in additional memory access.

> 
> >With this patch, the user has to write ~4 lines of code to get APIs for
> >any element size. I would like to keep the goal still the  same.
> >
> > If we have to avoid the macro-fest, the main problem that needs to be
> > addressed is - how to represent different sizes of element types in a generic
> way? IMO, we can do this by defining the element type to be a multiple of
> uint32_t (I do not think we need to go to uint16_t).
> >
> > For ex:
> > rte_ring_mp_enqueue_bulk_objs(struct rte_ring *r,
> >                 uint32_t *obj_table, unsigned int num_objs,
> >                 unsigned int n,
> >                 enum rte_ring_queue_behavior behavior, unsigned int is_sp,
> >                 unsigned int *free_space) { }
> >
> > This approach would ensure that we have generic enough APIs and they
> > can be used for elements of any size. But the element itself needs to be a
> multiple of 32b - I think this should not be a concern.
> >
> > The API suffix definitely needs to be better, any suggestions?
> 
> >
> > >
> > >
> > > > +
> > > > +/**
> > > > + * @internal Enqueue several objects on the ring  */ static
> > > > +__rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(__rte_ring_do_enqueue)(struct rte_ring *r,
> > > > +		RTE_RING_TMPLT_ELEM_TYPE const *obj_table, unsigned int
> > > n,
> > > > +		enum rte_ring_queue_behavior behavior, unsigned int is_sp,
> > > > +		unsigned int *free_space)
> > > > +{
> > > > +	uint32_t prod_head, prod_next;
> > > > +	uint32_t free_entries;
> > > > +
> > > > +	n = __rte_ring_move_prod_head(r, is_sp, n, behavior,
> > > > +			&prod_head, &prod_next, &free_entries);
> > > > +	if (n == 0)
> > > > +		goto end;
> > > > +
> > > > +	ENQUEUE_PTRS(r, &r[1], prod_head, obj_table, n,
> > > > +		RTE_RING_TMPLT_ELEM_TYPE);
> > > > +
> > > > +	update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
> > > > +end:
> > > > +	if (free_space != NULL)
> > > > +		*free_space = free_entries - n;
> > > > +	return n;
> > > > +}
> > > > +
> > > > +/**
> > > > + * @internal Dequeue several objects from the ring  */ static
> > > > +__rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(__rte_ring_do_dequeue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	enum rte_ring_queue_behavior behavior, unsigned int is_sc,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	uint32_t cons_head, cons_next;
> > > > +	uint32_t entries;
> > > > +
> > > > +	n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
> > > > +			&cons_head, &cons_next, &entries);
> > > > +	if (n == 0)
> > > > +		goto end;
> > > > +
> > > > +	DEQUEUE_PTRS(r, &r[1], cons_head, obj_table, n,
> > > > +		RTE_RING_TMPLT_ELEM_TYPE);
> > > > +
> > > > +	update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
> > > > +
> > > > +end:
> > > > +	if (available != NULL)
> > > > +		*available = entries - n;
> > > > +	return n;
> > > > +}
> > > > +
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on the ring (multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_mp_enqueue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE const *obj_table, unsigned int n,
> > > > +	unsigned int *free_space)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, __IS_MP, free_space); }
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on a ring (NOT multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_sp_enqueue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE const *obj_table, unsigned int n,
> > > > +	unsigned int *free_space)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, __IS_SP, free_space); }
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on a ring.
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_enqueue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE const *obj_table, unsigned int n,
> > > > +	unsigned int *free_space)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, r->prod.single,
> free_space); }
> > > > +
> > > > +/**
> > > > + * Enqueue one object on a ring (multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_mp_enqueue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE obj)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(rte_ring_mp_enqueue_bulk)(r, &obj, 1,
> > > NULL) ?
> > > > +			0 : -ENOBUFS;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Enqueue one object on a ring (NOT multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_sp_enqueue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE obj)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(rte_ring_sp_enqueue_bulk)(r, &obj, 1,
> > > NULL) ?
> > > > +			0 : -ENOBUFS;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Enqueue one object on a ring.
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_enqueue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(rte_ring_enqueue_bulk)(r, obj, 1,
> > > NULL) ?
> > > > +			0 : -ENOBUFS;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Dequeue several objects from a ring (multi-consumers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_mc_dequeue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, __IS_MC, available); }
> > > > +
> > > > +/**
> > > > + * Dequeue several objects from a ring (NOT multi-consumers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_sc_dequeue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, __IS_SC, available); }
> > > > +
> > > > +/**
> > > > + * Dequeue several objects from a ring.
> > > > + */
> > > > +static __rte_always_inline unsigned int
> > > > +__RTE_RING_CONCAT(rte_ring_dequeue_bulk)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_FIXED, r->cons.single, available); }
> > > > +
> > > > +/**
> > > > + * Dequeue one object from a ring (multi-consumers safe).
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_mc_dequeue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_p) {
> > > > +	return __RTE_RING_CONCAT(rte_ring_mc_dequeue_bulk)(r, obj_p, 1,
> > > NULL) ?
> > > > +			0 : -ENOENT;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Dequeue one object from a ring (NOT multi-consumers safe).
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_sc_dequeue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_p) {
> > > > +	return __RTE_RING_CONCAT(rte_ring_sc_dequeue_bulk)(r, obj_p, 1,
> > > NULL) ?
> > > > +			0 : -ENOENT;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Dequeue one object from a ring.
> > > > + */
> > > > +static __rte_always_inline int
> > > > +__RTE_RING_CONCAT(rte_ring_dequeue)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_p) {
> > > > +	return __RTE_RING_CONCAT(rte_ring_dequeue_bulk)(r, obj_p, 1,
> > > NULL) ?
> > > > +			0 : -ENOENT;
> > > > +}
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on the ring (multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_mp_enqueue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table,
> > > > +			 unsigned int n, unsigned int *free_space) {
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_VARIABLE, __IS_MP, free_space); }
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on a ring (NOT multi-producers safe).
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_sp_enqueue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table,
> > > > +			 unsigned int n, unsigned int *free_space) {
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_VARIABLE, __IS_SP, free_space); }
> > > > +
> > > > +/**
> > > > + * Enqueue several objects on a ring.
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_enqueue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *free_space)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_enqueue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_VARIABLE, r->prod.single,
> > > free_space);
> > > > +}
> > > > +
> > > > +/**
> > > > + * Dequeue several objects from a ring (multi-consumers safe).
> > > > +When the
> > > request
> > > > + * objects are more than the available objects, only dequeue the
> > > > + actual
> > > number
> > > > + * of objects
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_mc_dequeue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_VARIABLE, __IS_MC, available); }
> > > > +
> > > > +/**
> > > > + * Dequeue several objects from a ring (NOT multi-consumers
> > > > +safe).When
> > > the
> > > > + * request objects are more than the available objects, only
> > > > +dequeue the
> > > > + * actual number of objects
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_sc_dequeue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +			RTE_RING_QUEUE_VARIABLE, __IS_SC, available); }
> > > > +
> > > > +/**
> > > > + * Dequeue multiple objects from a ring up to a maximum number.
> > > > + */
> > > > +static __rte_always_inline unsigned
> > > > +__RTE_RING_CONCAT(rte_ring_dequeue_burst)(struct rte_ring *r,
> > > > +	RTE_RING_TMPLT_ELEM_TYPE *obj_table, unsigned int n,
> > > > +	unsigned int *available)
> > > > +{
> > > > +	return __RTE_RING_CONCAT(__rte_ring_do_dequeue)(r, obj_table, n,
> > > > +				RTE_RING_QUEUE_VARIABLE,
> > > > +				r->cons.single, available);
> > > > +}
> > > > +
> > > > +#ifdef __cplusplus
> > > > +}
> > > > +#endif
> > > > +
> > > > +#endif /* _RTE_RING_TEMPLATE_H_ */
> > > > --
> > > > 2.17.1



More information about the dev mailing list