[dpdk-dev] [PATCH v3 1/2] mbuf: support attaching external buffer to mbuf

Yongseok Koh yskoh at mellanox.com
Thu Apr 19 03:11:04 CEST 2018


This patch introduces a new way of attaching an external buffer to a mbuf.

Attaching an external buffer is quite similar to mbuf indirection in
replacing buffer addresses and length of a mbuf, but a few differences:
  - As refcnt of a direct mbuf is at least 2, the buffer area of a direct
    mbuf must be read-only. But external buffer has its own refcnt and it
    starts from 1. Unless multiple mbufs are attached to a mbuf having an
    external buffer, the external buffer is writable.
  - There's no need to allocate buffer from a mempool. Any buffer can be
    attached with appropriate free callback.
  - Smaller metadata is required to maintain shared data such as refcnt.

Signed-off-by: Yongseok Koh <yskoh at mellanox.com>
---

Submitting only non-mlx5 patches to meet deadline for RC1. mlx5 patches
will be submitted separately rebased on a differnet patchset which
accommodates new memory hotplug design to mlx PMDs.

v3:
* implement external buffer attachment instead of introducing buf_off for
  mbuf indirection.

 lib/librte_mbuf/rte_mbuf.h | 276 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 248 insertions(+), 28 deletions(-)

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 06eceba37..e64160c81 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -326,7 +326,7 @@ extern "C" {
 		PKT_TX_MACSEC |		 \
 		PKT_TX_SEC_OFFLOAD)
 
-#define __RESERVED           (1ULL << 61) /**< reserved for future mbuf use */
+#define EXT_ATTACHED_MBUF    (1ULL << 61) /**< Mbuf having external buffer */
 
 #define IND_ATTACHED_MBUF    (1ULL << 62) /**< Indirect attached mbuf */
 
@@ -568,6 +568,24 @@ struct rte_mbuf {
 
 } __rte_cache_aligned;
 
+/**
+ * Function typedef of callback to free externally attached buffer.
+ */
+typedef void (*rte_mbuf_extbuf_free_callback_t)(void *addr, void *opaque);
+
+/**
+ * Shared data at the end of an external buffer.
+ */
+struct rte_mbuf_ext_shared_info {
+	rte_mbuf_extbuf_free_callback_t free_cb; /**< Free callback function */
+	void *fcb_opaque;                        /**< Free callback argument */
+	RTE_STD_C11
+	union {
+		rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */
+		uint16_t refcnt;          /**< Non-atomically accessed refcnt */
+	};
+};
+
 /**< Maximum number of nb_segs allowed. */
 #define RTE_MBUF_MAX_NB_SEGS	UINT16_MAX
 
@@ -693,9 +711,14 @@ rte_mbuf_to_baddr(struct rte_mbuf *md)
 #define RTE_MBUF_INDIRECT(mb)   ((mb)->ol_flags & IND_ATTACHED_MBUF)
 
 /**
+ * Returns TRUE if given mbuf has external buffer, or FALSE otherwise.
+ */
+#define RTE_MBUF_HAS_EXTBUF(mb) ((mb)->ol_flags & EXT_ATTACHED_MBUF)
+
+/**
  * Returns TRUE if given mbuf is direct, or FALSE otherwise.
  */
-#define RTE_MBUF_DIRECT(mb)     (!RTE_MBUF_INDIRECT(mb))
+#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb) && !RTE_MBUF_HAS_EXTBUF(mb))
 
 /**
  * Private data in case of pktmbuf pool.
@@ -821,6 +844,58 @@ rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value)
 
 #endif /* RTE_MBUF_REFCNT_ATOMIC */
 
+/**
+ * Reads the refcnt of an external buffer.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @return
+ *   Reference count number.
+ */
+static inline uint16_t
+rte_extbuf_refcnt_read(const struct rte_mbuf_ext_shared_info *shinfo)
+{
+	return (uint16_t)(rte_atomic16_read(&shinfo->refcnt_atomic));
+}
+
+/**
+ * Set refcnt of an external buffer.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @param new_value
+ *   Value set
+ */
+static inline void
+rte_extbuf_refcnt_set(struct rte_mbuf_ext_shared_info *shinfo,
+	uint16_t new_value)
+{
+	rte_atomic16_set(&shinfo->refcnt_atomic, new_value);
+}
+
+/**
+ * Add given value to refcnt of an external buffer and return its new
+ * value.
+ *
+ * @param shinfo
+ *   Shared data of the external buffer.
+ * @param value
+ *   Value to add/subtract
+ * @return
+ *   Updated value
+ */
+static inline uint16_t
+rte_extbuf_refcnt_update(struct rte_mbuf_ext_shared_info *shinfo,
+	int16_t value)
+{
+	if (likely(rte_extbuf_refcnt_read(shinfo) == 1)) {
+		rte_extbuf_refcnt_set(shinfo, 1 + value);
+		return 1 + value;
+	}
+
+	return (uint16_t)rte_atomic16_add_return(&shinfo->refcnt_atomic, value);
+}
+
 /** Mbuf prefetch */
 #define RTE_MBUF_PREFETCH_TO_FREE(m) do {       \
 	if ((m) != NULL)                        \
@@ -1195,11 +1270,120 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
 }
 
 /**
+ * Return shared data of external buffer of a mbuf.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @return
+ *   The address of the shared data.
+ */
+static inline struct rte_mbuf_ext_shared_info *
+rte_mbuf_ext_shinfo(struct rte_mbuf *m)
+{
+	return (struct rte_mbuf_ext_shared_info *)
+		RTE_PTR_ADD(m->buf_addr, m->buf_len);
+}
+
+/**
+ * Attach an external buffer to a mbuf.
+ *
+ * User-managed anonymous buffer can be attached to an mbuf. When attaching
+ * it, corresponding free callback function and its argument should be
+ * provided. This callback function will be called once all the mbufs are
+ * detached from the buffer.
+ *
+ * More mbufs can be attached to the same external buffer by
+ * ``rte_pktmbuf_attach()`` once the external buffer has been attached by
+ * this API.
+ *
+ * Detachment can be done by either ``rte_pktmbuf_detach_extbuf()`` or
+ * ``rte_pktmbuf_detach()``.
+ *
+ * A few bytes in the trailer of the provided buffer will be dedicated for
+ * shared data (``struct rte_mbuf_ext_shared_info``) to store refcnt,
+ * callback function and so on. The shared data can be referenced by
+ * ``rte_mbuf_ext_shinfo()``
+ *
+ * Attaching an external buffer is quite similar to mbuf indirection in
+ * replacing buffer addresses and length of a mbuf, but a few differences:
+ * - As refcnt of a direct mbuf is at least 2, the buffer area of a direct
+ *   mbuf must be read-only. But external buffer has its own refcnt and it
+ *   starts from 1. Unless multiple mbufs are attached to a mbuf having an
+ *   external buffer, the external buffer is writable.
+ * - There's no need to allocate buffer from a mempool. Any buffer can be
+ *   attached with appropriate free callback.
+ * - Smaller metadata is required to maintain shared data such as refcnt.
+ *
+ * @warning
+ * @b EXPERIMENTAL: This API may change without prior notice.
+ * Once external buffer is enabled by allowing experimental API,
+ * ``RTE_MBUF_DIRECT()`` and ``RTE_MBUF_INDIRECT()`` are no longer
+ * exclusive. A mbuf can be consiered direct if it is neither indirect nor
+ * having external buffer.
+ *
+ * @param m
+ *   The pointer to the mbuf.
+ * @param buf_addr
+ *   The pointer to the external buffer we're attaching to.
+ * @param buf_len
+ *   The size of the external buffer we're attaching to. This must be larger
+ *   than the size of ``struct rte_mbuf_ext_shared_info`` and padding for
+ *   alignment. If not enough, this function will return NULL.
+ * @param free_cb
+ *   Free callback function to call when the external buffer needs to be freed.
+ * @param fcb_opaque
+ *   Argument for the free callback function.
+ * @return
+ *   A pointer to the new start of the data on success, return NULL otherwise.
+ */
+static inline char * __rte_experimental
+rte_pktmbuf_attach_extbuf(struct rte_mbuf *m, void *buf_addr,
+	uint16_t buf_len, rte_mbuf_extbuf_free_callback_t free_cb,
+	void *fcb_opaque)
+{
+	void *buf_end = RTE_PTR_ADD(buf_addr, buf_len);
+	struct rte_mbuf_ext_shared_info *shinfo;
+
+	shinfo = RTE_PTR_ALIGN_FLOOR(RTE_PTR_SUB(buf_end, sizeof(*shinfo)),
+			sizeof(uintptr_t));
+
+	if ((void *)shinfo <= buf_addr)
+		return NULL;
+
+	m->buf_addr = buf_addr;
+	m->buf_iova = rte_mempool_virt2iova(buf_addr);
+	m->buf_len = RTE_PTR_DIFF(shinfo, buf_addr);
+	m->data_len = 0;
+
+	rte_pktmbuf_reset_headroom(m);
+	m->ol_flags |= EXT_ATTACHED_MBUF;
+
+	rte_extbuf_refcnt_set(shinfo, 1);
+	shinfo->free_cb = free_cb;
+	shinfo->fcb_opaque = fcb_opaque;
+
+	return (char *)m->buf_addr + m->data_off;
+}
+
+/**
+ * Detach the external buffer attached to a mbuf, same as
+ * ``rte_pktmbuf_detach()``
+ *
+ * @param m
+ *   The mbuf having external buffer.
+ */
+#define rte_pktmbuf_detach_extbuf(m) rte_pktmbuf_detach(m)
+
+/**
  * Attach packet mbuf to another packet mbuf.
  *
- * After attachment we refer the mbuf we attached as 'indirect',
- * while mbuf we attached to as 'direct'.
- * The direct mbuf's reference counter is incremented.
+ * If the mbuf we are attaching to isn't a direct buffer and is attached to
+ * an external buffer, the mbuf being attached will be attached to the
+ * external buffer instead of mbuf indirection.
+ *
+ * Otherwise, the mbuf will be indirectly attached. After attachment we
+ * refer the mbuf we attached as 'indirect', while mbuf we attached to as
+ * 'direct'.  The direct mbuf's reference counter is incremented.
  *
  * Right now, not supported:
  *  - attachment for already indirect mbuf (e.g. - mi has to be direct).
@@ -1213,19 +1397,18 @@ static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool,
  */
 static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 {
-	struct rte_mbuf *md;
-
 	RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
 	    rte_mbuf_refcnt_read(mi) == 1);
 
-	/* if m is not direct, get the mbuf that embeds the data */
-	if (RTE_MBUF_DIRECT(m))
-		md = m;
-	else
-		md = rte_mbuf_from_indirect(m);
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		rte_extbuf_refcnt_update(rte_mbuf_ext_shinfo(m), 1);
+		mi->ol_flags = m->ol_flags;
+	} else {
+		rte_mbuf_refcnt_update(rte_mbuf_from_indirect(m), 1);
+		mi->priv_size = m->priv_size;
+		mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
+	}
 
-	rte_mbuf_refcnt_update(md, 1);
-	mi->priv_size = m->priv_size;
 	mi->buf_iova = m->buf_iova;
 	mi->buf_addr = m->buf_addr;
 	mi->buf_len = m->buf_len;
@@ -1241,7 +1424,6 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 	mi->next = NULL;
 	mi->pkt_len = mi->data_len;
 	mi->nb_segs = 1;
-	mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF;
 	mi->packet_type = m->packet_type;
 	mi->timestamp = m->timestamp;
 
@@ -1250,12 +1432,53 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
 }
 
 /**
- * Detach an indirect packet mbuf.
+ * @internal used by rte_pktmbuf_detach().
+ *
+ * Decrement the reference counter of the external buffer. When the
+ * reference counter becomes 0, the buffer is freed by pre-registered
+ * callback.
+ */
+static inline void
+__rte_pktmbuf_free_extbuf(struct rte_mbuf *m)
+{
+	struct rte_mbuf_ext_shared_info *shinfo;
+
+	RTE_ASSERT(RTE_MBUF_HAS_EXTBUF(m));
+
+	shinfo = rte_mbuf_ext_shinfo(m);
+
+	if (rte_extbuf_refcnt_update(shinfo, -1) == 0)
+		shinfo->free_cb(m->buf_addr, shinfo->fcb_opaque);
+}
+
+/**
+ * @internal used by rte_pktmbuf_detach().
  *
+ * Decrement the direct mbuf's reference counter. When the reference
+ * counter becomes 0, the direct mbuf is freed.
+ */
+static inline void
+__rte_pktmbuf_free_direct(struct rte_mbuf *m)
+{
+	struct rte_mbuf *md = rte_mbuf_from_indirect(m);
+
+	RTE_ASSERT(RTE_MBUF_INDIRECT(m));
+
+	if (rte_mbuf_refcnt_update(md, -1) == 0) {
+		md->next = NULL;
+		md->nb_segs = 1;
+		rte_mbuf_refcnt_set(md, 1);
+		rte_mbuf_raw_free(md);
+	}
+}
+
+/**
+ * Detach a packet mbuf from external buffer or direct buffer.
+ *
+ *  - decrement refcnt and free the external/direct buffer if refcnt
+ *    becomes zero.
  *  - restore original mbuf address and length values.
  *  - reset pktmbuf data and data_len to their default values.
- *  - decrement the direct mbuf's reference counter. When the
- *  reference counter becomes 0, the direct mbuf is freed.
  *
  * All other fields of the given packet mbuf will be left intact.
  *
@@ -1264,10 +1487,14 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
  */
 static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 {
-	struct rte_mbuf *md = rte_mbuf_from_indirect(m);
 	struct rte_mempool *mp = m->pool;
 	uint32_t mbuf_size, buf_len, priv_size;
 
+	if (RTE_MBUF_HAS_EXTBUF(m))
+		__rte_pktmbuf_free_extbuf(m);
+	else
+		__rte_pktmbuf_free_direct(m);
+
 	priv_size = rte_pktmbuf_priv_size(mp);
 	mbuf_size = sizeof(struct rte_mbuf) + priv_size;
 	buf_len = rte_pktmbuf_data_room_size(mp);
@@ -1279,13 +1506,6 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	rte_pktmbuf_reset_headroom(m);
 	m->data_len = 0;
 	m->ol_flags = 0;
-
-	if (rte_mbuf_refcnt_update(md, -1) == 0) {
-		md->next = NULL;
-		md->nb_segs = 1;
-		rte_mbuf_refcnt_set(md, 1);
-		rte_mbuf_raw_free(md);
-	}
 }
 
 /**
@@ -1309,7 +1529,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 
 	if (likely(rte_mbuf_refcnt_read(m) == 1)) {
 
-		if (RTE_MBUF_INDIRECT(m))
+		if (!RTE_MBUF_DIRECT(m))
 			rte_pktmbuf_detach(m);
 
 		if (m->next != NULL) {
@@ -1321,7 +1541,7 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
 
 	} else if (__rte_mbuf_refcnt_update(m, -1) == 0) {
 
-		if (RTE_MBUF_INDIRECT(m))
+		if (!RTE_MBUF_DIRECT(m))
 			rte_pktmbuf_detach(m);
 
 		if (m->next != NULL) {
-- 
2.11.0



More information about the dev mailing list