[PATCH dpdk v2 2/2] graph: replace circular buffer with priority-based bitmap

kirankumark at marvell.com kirankumark at marvell.com
Tue Jun 16 08:30:26 CEST 2026


This will break the ABI. Please check and fix.


>  	rte_node_process_t process; /**< Node process function. */
>  	rte_node_init_t init;       /**< Node init function. */
>  	rte_node_fini_t fini;       /**< Node fini function. */
> diff --git a/lib/graph/rte_graph_model_mcore_dispatch.h b/lib/graph/rte_graph_model_mcore_dispatch.h
> index f9ff3daa88ec..50a473564b56 100644
> --- a/lib/graph/rte_graph_model_mcore_dispatch.h
> +++ b/lib/graph/rte_graph_model_mcore_dispatch.h
> @@ -77,9 +77,13 @@ int rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
>  							   unsigned int lcore_id);
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + * Nodes with different lcore affinity are dispatched to their target lcore.
> + *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup function.
>   *
> @@ -88,20 +92,28 @@ int rte_graph_model_mcore_dispatch_node_lcore_affinity_set(const char *name,
>  static inline void
>  rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
>  {
> -	const rte_graph_off_t *cir_start = graph->cir_start;
> -	const rte_node_t mask = graph->cir_mask;
> -	uint32_t head = graph->head;
> +	const uint16_t nwords = graph->nb_sched_words;
>  	struct rte_node *node;
> +	uint16_t word, bit;
>
>  	if (graph->dispatch.wq != NULL)
>  		__rte_graph_mcore_dispatch_sched_wq_process(graph);
>
> -	while (likely(head != graph->tail)) {
> -		node = (struct rte_node *)RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
> +	/* Seed pending bitmap with source nodes bound to this lcore */
> +	for (word = 0; word < nwords; word++)
> +		graph->pending[word] |= graph->src_pending[word];
>
> -		/* skip the src nodes which not bind with current worker */
> -		if ((int32_t)head < 1 && node->dispatch.lcore_id != graph->dispatch.lcore_id)
> -			continue;
> +	for (;;) {
> +		/* find first word with any pending bit */
> +		for (word = 0; word < nwords; word++)
> +			if (graph->pending[word])
> +				break;
> +		if (word == nwords)
> +			break; /* no more pending nodes */
> +
> +		bit = rte_ctz64(graph->pending[word]);
> +		graph->pending[word] &= ~(1ULL << bit);
> +		node = __rte_graph_pending_node(graph, word, bit);
>
>  		/* Schedule the node until all task/objs are done */
>  		if (node->dispatch.lcore_id != RTE_MAX_LCORE &&
> @@ -111,11 +123,7 @@ rte_graph_walk_mcore_dispatch(struct rte_graph *graph)
>  			continue;
>
>  		__rte_node_process(graph, node);
> -
> -		head = likely((int32_t)head > 0) ? head & mask : head;
>  	}
> -
> -	graph->tail = 0;
>  }
>
>  #ifdef __cplusplus
> diff --git a/lib/graph/rte_graph_model_rtc.h b/lib/graph/rte_graph_model_rtc.h
> index 4b6236e301e3..38feb3e1ca88 100644
> --- a/lib/graph/rte_graph_model_rtc.h
> +++ b/lib/graph/rte_graph_model_rtc.h
> @@ -6,9 +6,12 @@
>  #include "rte_graph_worker_common.h"
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
> + * Nodes are visited in scheduling order (lowest priority value first).
> + * Source nodes are seeded into the pending bitmap at the start of each walk.
> + *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup function.
>   *
> @@ -17,30 +20,52 @@
>  static inline void
>  rte_graph_walk_rtc(struct rte_graph *graph)
>  {
> -	const rte_graph_off_t *cir_start = graph->cir_start;
> -	const rte_node_t mask = graph->cir_mask;
> -	uint32_t head = graph->head;
> +	const uint16_t nwords = graph->nb_sched_words;
>  	struct rte_node *node;
> +	uint16_t word, bit;
>
>  	/*
> -	 * Walk on the source node(s) ((cir_start - head) -> cir_start) and then
> -	 * on the pending streams (cir_start -> (cir_start + mask) -> cir_start)
> -	 * in a circular buffer fashion.
> +	 * Nodes are assigned a bit position (sched_idx) sorted by (priority,
> +	 * node_id) at graph creation time. Source nodes are forced to INT16_MIN
> +	 * priority so they always come first.
>  	 *
> -	 *	+-----+ <= cir_start - head [number of source nodes]
> -	 *	|     |
> -	 *	| ... | <= source nodes
> -	 *	|     |
> -	 *	+-----+ <= cir_start [head = 0] [tail = 0]
> -	 *	|     |
> -	 *	| ... | <= pending streams
> -	 *	|     |
> -	 *	+-----+ <= cir_start + mask
> +	 * sched_table[] maps bit positions to node offsets:
> +	 *
> +	 *   pending[]         sched_table[]
> +	 *   +----------+      +------------------+
> +	 *   | word 0   | ---> | src_node_0       | bit 0 (prio=INT16_MIN)
> +	 *   | 1100...1 |      | src_node_1       | bit 1 (prio=INT16_MIN)
> +	 *   |          |      | mpls_input       | bit 2 (prio=-10)
> +	 *   |          |      | ipv4_input       | bit 3 (prio=0)
> +	 *   |          |      | ...              |
> +	 *   +----------+      +------------------+
> +	 *   | word 1   | ---> | ip4_rewrite      | bit 64 (prio=10)
> +	 *   | ...      |      | ...              |
> +	 *   +----------+      +------------------+
> +	 *
> +	 * Walk: for each word, find lowest set bit (rte_ctz64), process that
> +	 * node, clear the bit, re-read the word (processing may have set new
> +	 * bits), repeat.
> +	 *
> +	 * After each node is processed, restart scanning from word 0 since
> +	 * processing may set bits in any word, including earlier ones.
>  	 */
> -	while (likely(head != graph->tail)) {
> -		node = (struct rte_node *)RTE_PTR_ADD(graph, cir_start[(int32_t)head++]);
> +
> +	/* Seed pending bitmap with source nodes */
> +	for (word = 0; word < nwords; word++)
> +		graph->pending[word] |= graph->src_pending[word];
> +
> +	for (;;) {
> +		/* find first word with any pending bit */
> +		for (word = 0; word < nwords; word++)
> +			if (graph->pending[word])
> +				break;
> +		if (word == nwords)
> +			break; /* no more pending nodes */
> +
> +		bit = rte_ctz64(graph->pending[word]);
> +		graph->pending[word] &= ~(1ULL << bit);
> +		node = __rte_graph_pending_node(graph, word, bit);
>  		__rte_node_process(graph, node);
> -		head = likely((int32_t)head > 0) ? head & mask : head;
>  	}
> -	graph->tail = 0;
>  }
> diff --git a/lib/graph/rte_graph_worker.h b/lib/graph/rte_graph_worker.h
> index b0f952a82cc9..e513d7a655d9 100644
> --- a/lib/graph/rte_graph_worker.h
> +++ b/lib/graph/rte_graph_worker.h
> @@ -14,7 +14,7 @@ extern "C" {
>  #endif
>
>  /**
> - * Perform graph walk on the circular buffer and invoke the process function
> + * Perform graph walk on the pending bitmap and invoke the process function
>   * of the nodes and collect the stats.
>   *
>   * @param graph
> diff --git a/lib/graph/rte_graph_worker_common.h b/lib/graph/rte_graph_worker_common.h
> index 4ab53a533e4c..0e60486043d8 100644
> --- a/lib/graph/rte_graph_worker_common.h
> +++ b/lib/graph/rte_graph_worker_common.h
> @@ -49,15 +49,14 @@ SLIST_HEAD(rte_graph_rq_head, rte_graph);
>   */
>  struct __rte_cache_aligned rte_graph {
>  	/* Fast path area. */
> -	uint32_t tail;		     /**< Tail of circular buffer. */
> -	uint32_t head;		     /**< Head of circular buffer. */
> -	uint32_t cir_mask;	     /**< Circular buffer wrap around mask. */
>  	rte_node_t nb_nodes;	     /**< Number of nodes in the graph. */
> -	rte_graph_off_t *cir_start;  /**< Pointer to circular buffer. */
>  	rte_graph_off_t nodes_start; /**< Offset at which node memory starts. */
> +	rte_graph_off_t *sched_table; /**< Node offset indexed by sched_idx. */
> +	uint64_t *pending;	     /**< Bitmap of pending nodes. */
> +	uint64_t *src_pending;	     /**< Bitmap of source nodes (constant). */
> +	uint16_t nb_sched_words;     /**< Number of uint64_t words in pending bitmaps. */
>  	uint8_t model;		     /**< graph model */
> -	uint8_t reserved1;	     /**< Reserved for future use. */
> -	uint16_t reserved2;	     /**< Reserved for future use. */
> +	/* 26 bytes padding */
>  	union {
>  		/* Fast schedule area for mcore dispatch model */
>  		struct {
> @@ -98,6 +97,7 @@ struct __rte_cache_aligned rte_node {
>  	rte_node_t id;		/**< Node identifier. */
>  	rte_node_t parent_id;	/**< Parent Node identifier. */
>  	rte_edge_t nb_edges;	/**< Number of edges from this node. */
> +	uint16_t sched_idx;	/**< Bit position in pending bitmap. */
>  	uint32_t realloc_count;	/**< Number of times realloced. */
>
>  	char parent[RTE_NODE_NAMESIZE];	/**< Parent node name. */
> @@ -132,7 +132,7 @@ struct __rte_cache_aligned rte_node {
>  		}; /**< Node Context. */
>  		uint16_t size;		/**< Total number of objects available. */
>  		uint16_t idx;		/**< Number of objects used. */
> -		rte_graph_off_t off;	/**< Offset of node in the graph reel. */
> +		rte_graph_off_t off;	/**< Offset of node in the graph memory. */
>  		uint64_t total_cycles;	/**< Cycles spent in this node. */
>  		uint64_t total_calls;	/**< Calls done to this node. */
>  		uint64_t total_objs;	/**< Objects processed by this node. */
> @@ -187,12 +187,12 @@ void __rte_node_stream_alloc_size(struct rte_graph *graph,
>  /**
>   * @internal
>   *
> - * Enqueue a given node to the tail of the graph reel.
> + * Process a node's pending objects and collect stats.
>   *
>   * @param graph
>   *   Pointer Graph object.
>   * @param node
> - *   Pointer to node object to be enqueued.
> + *   Pointer to node object to be processed.
>   */
>  static __rte_always_inline void
>  __rte_node_process(struct rte_graph *graph, struct rte_node *node)
> @@ -220,21 +220,42 @@ __rte_node_process(struct rte_graph *graph, struct rte_node *node)
>  /**
>   * @internal
>   *
> - * Enqueue a given node to the tail of the graph reel.
> + * Get a pointer to a node from the scheduling table.
>   *
>   * @param graph
>   *   Pointer Graph object.
> + * @param word
> + *   Offset in the pending bitmap.
> + * @param bit
> + *   Bit number.
> + *
> + * @return
> + *   Pointer to the node.
> + */
> +static __rte_always_inline struct rte_node *
> +__rte_graph_pending_node(struct rte_graph *graph, uint16_t word, uint16_t bit)
> +{
> +	const uint16_t index = (word * sizeof(*graph->pending) * CHAR_BIT) + bit;
> +	const rte_graph_off_t node_offset = graph->sched_table[index];
> +	return RTE_PTR_ADD(graph, node_offset);
> +}
> +
> +/**
> + * @internal
> + *
> + * Mark a node as pending in the graph scheduling bitmap.
> + *
> + * @param bitmap
> + *   Either graph->pending or graph->src_pending.
>   * @param node
> - *   Pointer to node object to be enqueued.
> + *   Pointer to node object to be marked pending.
>   */
>  static __rte_always_inline void
> -__rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
> +__rte_node_pending_set(uint64_t *bitmap, struct rte_node *node)
>  {
> -	uint32_t tail;
> -
> -	tail = graph->tail;
> -	graph->cir_start[tail++] = node->off;
> -	graph->tail = tail & graph->cir_mask;
> +	const uint16_t word = node->sched_idx / (sizeof(*bitmap) * CHAR_BIT);
> +	const uint16_t bit = node->sched_idx % (sizeof(*bitmap) * CHAR_BIT);
> +	bitmap[word] |= 1ULL << bit;
>  }
>
>  /**
> @@ -242,8 +263,8 @@ __rte_node_enqueue_tail_update(struct rte_graph *graph, struct rte_node *node)
>   *
>   * Enqueue sequence prologue function.
>   *
> - * Updates the node to tail of graph reel and resizes the number of objects
> - * available in the stream as needed.
> + * Marks the node as pending in the scheduling bitmap and resizes the number
> + * of objects available in the stream as needed.
>   *
>   * @param graph
>   *   Pointer to the graph object.
> @@ -259,9 +280,8 @@ __rte_node_enqueue_prologue(struct rte_graph *graph, struct rte_node *node,
>  			    const uint16_t idx, const uint16_t space)
>  {
>
> -	/* Add to the pending stream list if the node is new */
>  	if (idx == 0)
> -		__rte_node_enqueue_tail_update(graph, node);
> +		__rte_node_pending_set(graph->pending, node);
>
>  	if (unlikely(node->size < (idx + space)))
>  		__rte_node_stream_alloc_size(graph, node, node->size + space);
> @@ -293,7 +313,7 @@ __rte_node_next_node_get(struct rte_node *node, rte_edge_t next)
>
>  /**
>   * Enqueue the objs to next node for further processing and set
> - * the next node to pending state in the circular buffer.
> + * the next node to pending state in the scheduling bitmap.
>   *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup().
> @@ -321,7 +341,7 @@ rte_node_enqueue(struct rte_graph *graph, struct rte_node *node,
>
>  /**
>   * Enqueue only one obj to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   *
>   * @param graph
>   *   Graph pointer returned from rte_graph_lookup().
> @@ -347,7 +367,7 @@ rte_node_enqueue_x1(struct rte_graph *graph, struct rte_node *node,
>
>  /**
>   * Enqueue only two objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   * Same as rte_node_enqueue_x1 but enqueue two objs.
>   *
>   * @param graph
> @@ -377,7 +397,7 @@ rte_node_enqueue_x2(struct rte_graph *graph, struct rte_node *node,
>
>  /**
>   * Enqueue only four objs to next node for further processing and
> - * set the next node to pending state in the circular buffer.
> + * set the next node to pending state in the scheduling bitmap.
>   * Same as rte_node_enqueue_x1 but enqueue four objs.
>   *
>   * @param graph
> @@ -414,7 +434,7 @@ rte_node_enqueue_x4(struct rte_graph *graph, struct rte_node *node,
>
>  /**
>   * Enqueue objs to multiple next nodes for further processing and
> - * set the next nodes to pending state in the circular buffer.
> + * set the next nodes to pending state in the scheduling bitmap.
>   * objs[i] will be enqueued to nexts[i].
>   *
>   * @param graph
> @@ -472,7 +492,7 @@ rte_node_next_stream_get(struct rte_graph *graph, struct rte_node *node,
>  }
>
>  /**
> - * Put the next stream to pending state in the circular buffer
> + * Put the next stream to pending state in the scheduling bitmap
>   * for further processing. Should be invoked after rte_node_next_stream_get().
>   *
>   * @param graph
> @@ -496,8 +516,7 @@ rte_node_next_stream_put(struct rte_graph *graph, struct rte_node *node,
>
>  	node = __rte_node_next_node_get(node, next);
>  	if (node->idx == 0)
> -		__rte_node_enqueue_tail_update(graph, node);
> -
> +		__rte_node_pending_set(graph->pending, node);
>  	node->idx += idx;
>  }
>
> @@ -530,7 +549,7 @@ rte_node_next_stream_move(struct rte_graph *graph, struct rte_node *src,
>  		src->objs = dobjs;
>  		src->size = dsz;
>  		dst->idx = src->idx;
> -		__rte_node_enqueue_tail_update(graph, dst);
> +		__rte_node_pending_set(graph->pending, dst);
>  	} else { /* Move the objects from src node to dst node */
>  		rte_node_enqueue(graph, src, next, src->objs, src->idx);
>  	}
> --
> 2.54.0
>


More information about the dev mailing list