[dpdk-dev] [PATCH v4 1/2] lib/ring: apis to support configurable element size
Ananyev, Konstantin
konstantin.ananyev at intel.com
Mon Oct 21 11:04:39 CEST 2019
> >
> > fix patch
> > =======
> >
> > From a2be5a9b136333a56d466ef042c655e522ca7012 Mon Sep 17 00:00:00
> > 2001
> > From: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > Date: Fri, 18 Oct 2019 15:50:43 +0100
> > Subject: [PATCH] fix1
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > ---
> > lib/librte_ring/rte_ring_elem.h | 4 ++--
> > 1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
> > index 92e92f150..5e1819069 100644
> > --- a/lib/librte_ring/rte_ring_elem.h
> > +++ b/lib/librte_ring/rte_ring_elem.h
> > @@ -118,7 +118,7 @@ struct rte_ring *rte_ring_create_elem(const char
> > *name, unsigned count,
> > uint32_t sz = n * (esize / sizeof(uint32_t)); \
> > if (likely(idx + n < size)) { \
> > for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += 8) { \
> > - memcpy (ring + i, obj + i, 8 * sizeof (uint32_t)); \
> > + memcpy (ring + idx, obj + i, 8 * sizeof
> > + (uint32_t)); \
> > } \
> > switch (n & 0x7) { \
> > case 7: \
> > @@ -153,7 +153,7 @@ struct rte_ring *rte_ring_create_elem(const char
> > *name, unsigned count,
> > uint32_t sz = n * (esize / sizeof(uint32_t)); \
> > if (likely(idx + n < size)) { \
> > for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += 8) { \
> > - memcpy (obj + i, ring + i, 8 * sizeof (uint32_t)); \
> > + memcpy (obj + i, ring + idx, 8 * sizeof
> Actually, this fix alone is not enough. 'idx' needs to be normalized to elements of type 'uint32_t'.
>
> > + (uint32_t)); \
> > } \
> > switch (n & 0x7) { \
> > case 7: \
> > --
> > 2.17.1
> >
> > update patch (remove macros)
> > =========================
> >
> > From 18b388e877b97e243f807f27a323e876b30869dd Mon Sep 17 00:00:00
> > 2001
> > From: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > Date: Fri, 18 Oct 2019 17:35:43 +0100
> > Subject: [PATCH] update1
> >
> > Signed-off-by: Konstantin Ananyev <konstantin.ananyev at intel.com>
> > ---
> > lib/librte_ring/rte_ring_elem.h | 141 ++++++++++++++++----------------
> > 1 file changed, 70 insertions(+), 71 deletions(-)
> >
> > diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
> > index 5e1819069..eb706b12f 100644
> > --- a/lib/librte_ring/rte_ring_elem.h
> > +++ b/lib/librte_ring/rte_ring_elem.h
> > @@ -109,75 +109,74 @@ __rte_experimental struct rte_ring
> > *rte_ring_create_elem(const char *name, unsigned count,
> > unsigned esize, int socket_id, unsigned flags);
> >
> > -#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n)
> > do { \
> > - unsigned int i; \
> > - const uint32_t size = (r)->size; \
> > - uint32_t idx = prod_head & (r)->mask; \
> > - uint32_t *ring = (uint32_t *)ring_start; \
> > - uint32_t *obj = (uint32_t *)obj_table; \
> > - uint32_t sz = n * (esize / sizeof(uint32_t)); \
> > - if (likely(idx + n < size)) { \
> > - for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += 8) { \
> > - memcpy (ring + idx, obj + i, 8 * sizeof (uint32_t)); \
> > - } \
> > - switch (n & 0x7) { \
> > - case 7: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 6: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 5: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 4: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 3: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 2: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - case 1: \
> > - ring[idx++] = obj[i++]; /* fallthrough */ \
> > - } \
> > - } else { \
> > - for (i = 0; idx < size; i++, idx++)\
> > - ring[idx] = obj[i]; \
> > - for (idx = 0; i < n; i++, idx++) \
> > - ring[idx] = obj[i]; \
> > - } \
> > -} while (0)
> > -
> > -#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n)
> > do { \
> > - unsigned int i; \
> > - uint32_t idx = cons_head & (r)->mask; \
> > - const uint32_t size = (r)->size; \
> > - uint32_t *ring = (uint32_t *)ring_start; \
> > - uint32_t *obj = (uint32_t *)obj_table; \
> > - uint32_t sz = n * (esize / sizeof(uint32_t)); \
> > - if (likely(idx + n < size)) { \
> > - for (i = 0; i < (sz & ((~(unsigned)0x7))); i += 8, idx += 8) { \
> > - memcpy (obj + i, ring + idx, 8 * sizeof (uint32_t)); \
> > - } \
> > - switch (n & 0x7) { \
> > - case 7: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 6: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 5: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 4: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 3: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 2: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - case 1: \
> > - obj[i++] = ring[idx++]; /* fallthrough */ \
> > - } \
> > - } else { \
> > - for (i = 0; idx < size; i++, idx++) \
> > - obj[i] = ring[idx]; \
> > - for (idx = 0; i < n; i++, idx++) \
> > - obj[i] = ring[idx]; \
> > - } \
> > -} while (0)
> > +static __rte_always_inline void
> > +copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t num,
> > +uint32_t esize) {
> > + uint32_t i, sz;
> > +
> > + sz = (num * esize) / sizeof(uint32_t);
> > +
> > + for (i = 0; i < (sz & ~7); i += 8)
> > + memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t));
> > +
> > + switch (sz & 7) {
> > + case 7: du32[sz - 7] = su32[sz - 7]; /* fallthrough */
> > + case 6: du32[sz - 6] = su32[sz - 6]; /* fallthrough */
> > + case 5: du32[sz - 5] = su32[sz - 5]; /* fallthrough */
> > + case 4: du32[sz - 4] = su32[sz - 4]; /* fallthrough */
> > + case 3: du32[sz - 3] = su32[sz - 3]; /* fallthrough */
> > + case 2: du32[sz - 2] = su32[sz - 2]; /* fallthrough */
> > + case 1: du32[sz - 1] = su32[sz - 1]; /* fallthrough */
> > + }
> > +}
> > +
> > +static __rte_always_inline void
> > +enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head,
> > + void *obj_table, uint32_t num, uint32_t esize) {
> > + uint32_t idx, n;
> > + uint32_t *du32;
> > + const uint32_t *su32;
> > +
> > + const uint32_t size = r->size;
> > +
> > + idx = prod_head & (r)->mask;
> Same here, 'idx' needs to be normalized to elements of type 'uint32_t' and similar fixes on other variables.
Ups true, my bad.
> I have applied your
> suggestion in 6/6 in v6 along with my corrections. The rte_ring_elem test cases are added in 3/6. I have verified that they are running
> fine (they are done for 64b alone, will add more). Hopefully, there are no more errors.
Cool, we'll re-run perf test om my box.
Thanks
Konstantin
>
> > +
> > + du32 = (uint32_t *)ring_start + idx;
> > + su32 = obj_table;
> > +
> > + if (idx + num < size)
> > + copy_elems(du32, su32, num, esize);
> > + else {
> > + n = size - idx;
> > + copy_elems(du32, su32, n, esize);
> > + copy_elems(ring_start, su32 + n, num - n, esize);
> > + }
> > +}
> > +
> > +static __rte_always_inline void
> > +dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head,
> > + void *obj_table, uint32_t num, uint32_t esize) {
> > + uint32_t idx, n;
> > + uint32_t *du32;
> > + const uint32_t *su32;
> > +
> > + const uint32_t size = r->size;
> > +
> > + idx = cons_head & (r)->mask;
> > +
> > + su32 = (uint32_t *)ring_start + idx;
> > + du32 = obj_table;
> > +
> > + if (idx + num < size)
> > + copy_elems(du32, su32, num, esize);
> > + else {
> > + n = size - idx;
> > + copy_elems(du32, su32, n, esize);
> > + copy_elems(du32 + n, ring_start, num - n, esize);
> > + }
> > +}
> >
> > /* Between load and load. there might be cpu reorder in weak model
> > * (powerpc/arm).
> > @@ -232,7 +231,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void
> > * const obj_table,
> > if (n == 0)
> > goto end;
> >
> > - ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
> > + enqueue_elems(r, &r[1], prod_head, obj_table, n, esize);
> >
> > update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
> > end:
> > @@ -279,7 +278,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void
> > *obj_table,
> > if (n == 0)
> > goto end;
> >
> > - DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
> > + dequeue_elems(r, &r[1], cons_head, obj_table, n, esize);
> >
> > update_tail(&r->cons, cons_head, cons_next, is_sc, 0);
> >
> > --
> > 2.17.1
> >
More information about the dev
mailing list