[dpdk-dev] [PATCH v2 2/4] event/cnxk: add Rx adapter fastpath ops

pbhagavatula at marvell.com pbhagavatula at marvell.com
Mon May 24 14:23:00 CEST 2021


From: Pavan Nikhilesh <pbhagavatula at marvell.com>

Add support for event eth Rx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c | 115 ++++++++-
 drivers/event/cnxk/cn10k_worker.c   | 164 +++++++++----
 drivers/event/cnxk/cn10k_worker.h   |  91 +++++--
 drivers/event/cnxk/cn9k_eventdev.c  | 254 ++++++++++++++++++-
 drivers/event/cnxk/cn9k_worker.c    | 364 +++++++++++++++++++---------
 drivers/event/cnxk/cn9k_worker.h    | 158 +++++++++---
 drivers/event/cnxk/meson.build      |   7 +
 7 files changed, 931 insertions(+), 222 deletions(-)

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 66040df060..b1ad5b2878 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -247,17 +247,120 @@ static void
 cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	const event_dequeue_t sso_hws_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_tmo_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_tmo_deq_seg_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn10k_sso_hws_tmo_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
 
 	event_dev->enqueue = cn10k_sso_hws_enq;
 	event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn10k_sso_hws_enq_fwd_burst;
-
-	event_dev->dequeue = cn10k_sso_hws_deq;
-	event_dev->dequeue_burst = cn10k_sso_hws_deq_burst;
-	if (dev->is_timeout_deq) {
-		event_dev->dequeue = cn10k_sso_hws_tmo_deq;
-		event_dev->dequeue_burst = cn10k_sso_hws_tmo_deq_burst;
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		event_dev->dequeue = sso_hws_deq_seg
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		event_dev->dequeue_burst = sso_hws_deq_seg_burst
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		if (dev->is_timeout_deq) {
+			event_dev->dequeue = sso_hws_tmo_deq_seg
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		}
+	} else {
+		event_dev->dequeue = sso_hws_deq
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		event_dev->dequeue_burst = sso_hws_deq_burst
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		if (dev->is_timeout_deq) {
+			event_dev->dequeue = sso_hws_tmo_deq
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		}
 	}
 }
 
diff --git a/drivers/event/cnxk/cn10k_worker.c b/drivers/event/cnxk/cn10k_worker.c
index e2aa534c64..4365aec992 100644
--- a/drivers/event/cnxk/cn10k_worker.c
+++ b/drivers/event/cnxk/cn10k_worker.c
@@ -60,56 +60,118 @@ cn10k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-uint16_t __rte_hot
-cn10k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn10k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
-		return 1;
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn10k_sso_hws_deq_##name(                           \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn10k_sso_hws *ws = port;                               \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);               \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);  \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name(                     \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn10k_sso_hws_deq_##name(port, ev, timeout_ticks);      \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn10k_sso_hws *ws = port;                               \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);               \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)     \
+			ret = cn10k_sso_hws_get_work(ws, ev, flags,            \
+						     ws->lookup_mem);          \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn10k_sso_hws_tmo_deq_##name(port, ev, timeout_ticks);  \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn10k_sso_hws *ws = port;                               \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);               \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		return cn10k_sso_hws_get_work(                                 \
+			ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem);   \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn10k_sso_hws_deq_seg_##name(port, ev, timeout_ticks);  \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn10k_sso_hws *ws = port;                               \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);               \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)     \
+			ret = cn10k_sso_hws_get_work(ws, ev, flags,            \
+						     ws->lookup_mem);          \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn10k_sso_hws_tmo_deq_seg_##name(port, ev,              \
+							timeout_ticks);        \
 	}
 
-	return cn10k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-			uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn10k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn10k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->tag_wqe_op);
-		return ret;
-	}
-
-	ret = cn10k_sso_hws_get_work(ws, ev);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn10k_sso_hws_get_work(ws, ev);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn10k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-			    uint16_t nb_events, uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn10k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index 085857bccf..ad320d2dc0 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -83,20 +83,40 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
 		cn10k_sso_hws_fwd_group(ws, ev, grp);
 }
 
+static __rte_always_inline void
+cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+		  const uint32_t tag, const uint32_t flags,
+		  const void *const lookup_mem)
+{
+	union mbuf_initializer mbuf_init = {
+		.fields = {.data_off = RTE_PKTMBUF_HEADROOM,
+			   .refcnt = 1,
+			   .nb_segs = 1,
+			   .port = port_id},
+	};
+
+	cn10k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+			      (struct rte_mbuf *)mbuf, lookup_mem,
+			      mbuf_init.value, flags);
+}
+
 static __rte_always_inline uint16_t
-cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
+cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
+		       const uint32_t flags, void *lookup_mem)
 {
 	union {
 		__uint128_t get_work;
 		uint64_t u64[2];
 	} gw;
+	uint64_t mbuf;
 
 	gw.get_work = ws->gw_wdata;
 #if defined(RTE_ARCH_ARM64) && !defined(__clang__)
 	asm volatile(
 		PLT_CPU_FEATURE_PREAMBLE
 		"caspl %[wdata], %H[wdata], %[wdata], %H[wdata], [%[gw_loc]]\n"
-		: [wdata] "+r"(gw.get_work)
+		"sub %[mbuf], %H[wdata], #0x80				\n"
+		: [wdata] "+r"(gw.get_work), [mbuf] "=&r"(mbuf)
 		: [gw_loc] "r"(ws->getwrk_op)
 		: "memory");
 #else
@@ -104,11 +124,25 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev)
 	do {
 		roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
 	} while (gw.u64[0] & BIT_ULL(63));
+	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
 
+	if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+		if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+		    RTE_EVENT_TYPE_ETHDEV) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+			gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+			cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+					  gw.u64[0] & 0xFFFFF, flags,
+					  lookup_mem);
+			gw.u64[1] = mbuf;
+		}
+	}
+
 	ev->event = gw.u64[0];
 	ev->u64 = gw.u64[1];
 
@@ -123,6 +157,7 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
 		__uint128_t get_work;
 		uint64_t u64[2];
 	} gw;
+	uint64_t mbuf;
 
 #ifdef RTE_ARCH_ARM64
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -133,19 +168,34 @@ cn10k_sso_hws_get_work_empty(struct cn10k_sso_hws *ws, struct rte_event *ev)
 		     "		ldp %[tag], %[wqp], [%[tag_loc]]	\n"
 		     "		tbnz %[tag], 63, rty%=			\n"
 		     "done%=:	dmb ld					\n"
-		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     "		sub %[mbuf], %[wqp], #0x80		\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+		       [mbuf] "=&r"(mbuf)
 		     : [tag_loc] "r"(ws->tag_wqe_op)
 		     : "memory");
 #else
 	do {
 		roc_load_pair(gw.u64[0], gw.u64[1], ws->tag_wqe_op);
 	} while (gw.u64[0] & BIT_ULL(63));
+	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
 
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
 
+	if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+		if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+		    RTE_EVENT_TYPE_ETHDEV) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+			gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+			cn10k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+					  gw.u64[0] & 0xFFFFF, 0, NULL);
+			gw.u64[1] = mbuf;
+		}
+	}
+
 	ev->event = gw.u64[0];
 	ev->u64 = gw.u64[1];
 
@@ -164,16 +214,29 @@ uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
 					       const struct rte_event ev[],
 					       uint16_t nb_events);
 
-uint16_t __rte_hot cn10k_sso_hws_deq(void *port, struct rte_event *ev,
-				     uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_deq_burst(void *port, struct rte_event ev[],
-					   uint16_t nb_events,
-					   uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
-					 uint64_t timeout_ticks);
-uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst(void *port,
-					       struct rte_event ev[],
-					       uint16_t nb_events,
-					       uint64_t timeout_ticks);
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn10k_sso_hws_deq_##name(                           \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn10k_sso_hws_deq_burst_##name(                     \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn10k_sso_hws_deq_seg_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn10k_sso_hws_deq_seg_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn10k_sso_hws_tmo_deq_seg_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
 
 #endif
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 8e6bf54df9..16acea4cda 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -252,17 +252,179 @@ static void
 cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 {
 	struct cnxk_sso_evdev *dev = cnxk_sso_pmd_priv(event_dev);
+	/* Single WS modes */
+	const event_dequeue_t sso_hws_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_tmo_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_tmo_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_deq_seg_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_tmo_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_tmo_deq_seg_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_tmo_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	/* Dual WS modes */
+	const event_dequeue_t sso_hws_dual_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_dual_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_dual_tmo_deq[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_dual_tmo_deq_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_dual_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_dual_deq_seg_burst[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_deq_seg_burst_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_t sso_hws_dual_tmo_deq_seg[2][2][2][2] = {
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_seg_##name,
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};
+
+	const event_dequeue_burst_t sso_hws_dual_tmo_deq_seg_burst[2][2][2][2] =
+		{
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	[f3][f2][f1][f0] = cn9k_sso_hws_dual_tmo_deq_seg_burst_##name,
+			NIX_RX_FASTPATH_MODES
+#undef R
+		};
 
 	event_dev->enqueue = cn9k_sso_hws_enq;
 	event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
 	event_dev->enqueue_forward_burst = cn9k_sso_hws_enq_fwd_burst;
-
-	event_dev->dequeue = cn9k_sso_hws_deq;
-	event_dev->dequeue_burst = cn9k_sso_hws_deq_burst;
-	if (dev->deq_tmo_ns) {
-		event_dev->dequeue = cn9k_sso_hws_tmo_deq;
-		event_dev->dequeue_burst = cn9k_sso_hws_tmo_deq_burst;
+	if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+		event_dev->dequeue = sso_hws_deq_seg
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		event_dev->dequeue_burst = sso_hws_deq_seg_burst
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		if (dev->is_timeout_deq) {
+			event_dev->dequeue = sso_hws_tmo_deq_seg
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_tmo_deq_seg_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		}
+	} else {
+		event_dev->dequeue = sso_hws_deq
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		event_dev->dequeue_burst = sso_hws_deq_burst
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_CHECKSUM_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+			[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		if (dev->is_timeout_deq) {
+			event_dev->dequeue = sso_hws_tmo_deq
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_tmo_deq_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+		}
 	}
 
 	if (dev->dual_ws) {
@@ -272,14 +434,82 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		event_dev->enqueue_forward_burst =
 			cn9k_sso_hws_dual_enq_fwd_burst;
 
-		event_dev->dequeue = cn9k_sso_hws_dual_deq;
-		event_dev->dequeue_burst = cn9k_sso_hws_dual_deq_burst;
-		if (dev->deq_tmo_ns) {
-			event_dev->dequeue = cn9k_sso_hws_dual_tmo_deq;
-			event_dev->dequeue_burst =
-				cn9k_sso_hws_dual_tmo_deq_burst;
+		if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
+			event_dev->dequeue = sso_hws_dual_deq_seg
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_dual_deq_seg_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			if (dev->is_timeout_deq) {
+				event_dev->dequeue = sso_hws_dual_tmo_deq_seg
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_CHECKSUM_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_PTYPE_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_RSS_F)];
+				event_dev->dequeue_burst =
+					sso_hws_dual_tmo_deq_seg_burst
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_CHECKSUM_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_PTYPE_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_RSS_F)];
+			}
+		} else {
+			event_dev->dequeue = sso_hws_dual_deq
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			event_dev->dequeue_burst = sso_hws_dual_deq_burst
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+				[!!(dev->rx_offloads &
+				    NIX_RX_OFFLOAD_CHECKSUM_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_PTYPE_F)]
+				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
+			if (dev->is_timeout_deq) {
+				event_dev->dequeue = sso_hws_dual_tmo_deq
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_CHECKSUM_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_PTYPE_F)]
+					[!!(dev->rx_offloads &
+					    NIX_RX_OFFLOAD_RSS_F)];
+				event_dev->dequeue_burst =
+					sso_hws_dual_tmo_deq_burst
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_MARK_UPDATE_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_CHECKSUM_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_PTYPE_F)]
+						[!!(dev->rx_offloads &
+						    NIX_RX_OFFLOAD_RSS_F)];
+			}
 		}
 	}
+
+	rte_mb();
 }
 
 static void *
diff --git a/drivers/event/cnxk/cn9k_worker.c b/drivers/event/cnxk/cn9k_worker.c
index 9ceacc98dd..0f031a5fa3 100644
--- a/drivers/event/cnxk/cn9k_worker.c
+++ b/drivers/event/cnxk/cn9k_worker.c
@@ -60,59 +60,121 @@ cn9k_sso_hws_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-uint16_t __rte_hot
-cn9k_sso_hws_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn9k_sso_hws *ws = port;
-
-	RTE_SET_USED(timeout_ticks);
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->tag_op);
-		return 1;
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn9k_sso_hws_deq_##name(                            \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws *ws = port;                                \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_op);                   \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		return cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);   \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name(                      \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_deq_##name(port, ev, timeout_ticks);       \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_##name(                        \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws *ws = port;                                \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_op);                   \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);    \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)     \
+			ret = cn9k_sso_hws_get_work(ws, ev, flags,             \
+						    ws->lookup_mem);           \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_tmo_deq_##name(port, ev, timeout_ticks);   \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name(                        \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws *ws = port;                                \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_op);                   \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		return cn9k_sso_hws_get_work(                                  \
+			ws, ev, flags | NIX_RX_MULTI_SEG_F, ws->lookup_mem);   \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_deq_seg_##name(port, ev, timeout_ticks);   \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_##name(                    \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws *ws = port;                                \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (ws->swtag_req) {                                           \
+			ws->swtag_req = 0;                                     \
+			cnxk_sso_hws_swtag_wait(ws->tag_op);                   \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn9k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem);    \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)     \
+			ret = cn9k_sso_hws_get_work(ws, ev, flags,             \
+						    ws->lookup_mem);           \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_burst_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_tmo_deq_seg_##name(port, ev,               \
+						       timeout_ticks);         \
 	}
 
-	return cn9k_sso_hws_get_work(ws, ev);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[], uint16_t nb_events,
-		       uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn9k_sso_hws_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn9k_sso_hws *ws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (ws->swtag_req) {
-		ws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(ws->tag_op);
-		return ret;
-	}
-
-	ret = cn9k_sso_hws_get_work(ws, ev);
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++)
-		ret = cn9k_sso_hws_get_work(ws, ev);
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-			   uint16_t nb_events, uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn9k_sso_hws_tmo_deq(port, ev, timeout_ticks);
-}
+NIX_RX_FASTPATH_MODES
+#undef R
 
 /* Dual ws ops. */
 
@@ -172,65 +234,145 @@ cn9k_sso_hws_dual_enq_fwd_burst(void *port, const struct rte_event ev[],
 	return 1;
 }
 
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev, uint64_t timeout_ticks)
-{
-	struct cn9k_sso_hws_dual *dws = port;
-	uint16_t gw;
-
-	RTE_SET_USED(timeout_ticks);
-	if (dws->swtag_req) {
-		dws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
-		return 1;
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws_dual *dws = port;                          \
+		uint16_t gw;                                                   \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+		if (dws->swtag_req) {                                          \
+			dws->swtag_req = 0;                                    \
+			cnxk_sso_hws_swtag_wait(                               \
+				dws->ws_state[!dws->vws].tag_op);              \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],      \
+						&dws->ws_state[!dws->vws], ev, \
+						flags, dws->lookup_mem);       \
+		dws->vws = !dws->vws;                                          \
+		return gw;                                                     \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_dual_deq_##name(port, ev, timeout_ticks);  \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws_dual *dws = port;                          \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (dws->swtag_req) {                                          \
+			dws->swtag_req = 0;                                    \
+			cnxk_sso_hws_swtag_wait(                               \
+				dws->ws_state[!dws->vws].tag_op);              \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],     \
+						 &dws->ws_state[!dws->vws],    \
+						 ev, flags, dws->lookup_mem);  \
+		dws->vws = !dws->vws;                                          \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {   \
+			ret = cn9k_sso_hws_dual_get_work(                      \
+				&dws->ws_state[dws->vws],                      \
+				&dws->ws_state[!dws->vws], ev, flags,          \
+				dws->lookup_mem);                              \
+			dws->vws = !dws->vws;                                  \
+		}                                                              \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_dual_tmo_deq_##name(port, ev,              \
+							timeout_ticks);        \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws_dual *dws = port;                          \
+		uint16_t gw;                                                   \
+                                                                               \
+		RTE_SET_USED(timeout_ticks);                                   \
+		if (dws->swtag_req) {                                          \
+			dws->swtag_req = 0;                                    \
+			cnxk_sso_hws_swtag_wait(                               \
+				dws->ws_state[!dws->vws].tag_op);              \
+			return 1;                                              \
+		}                                                              \
+                                                                               \
+		gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],      \
+						&dws->ws_state[!dws->vws], ev, \
+						flags, dws->lookup_mem);       \
+		dws->vws = !dws->vws;                                          \
+		return gw;                                                     \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_dual_deq_seg_##name(port, ev,              \
+							timeout_ticks);        \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_##name(               \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks)      \
+	{                                                                      \
+		struct cn9k_sso_hws_dual *dws = port;                          \
+		uint16_t ret = 1;                                              \
+		uint64_t iter;                                                 \
+                                                                               \
+		if (dws->swtag_req) {                                          \
+			dws->swtag_req = 0;                                    \
+			cnxk_sso_hws_swtag_wait(                               \
+				dws->ws_state[!dws->vws].tag_op);              \
+			return ret;                                            \
+		}                                                              \
+                                                                               \
+		ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],     \
+						 &dws->ws_state[!dws->vws],    \
+						 ev, flags, dws->lookup_mem);  \
+		dws->vws = !dws->vws;                                          \
+		for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {   \
+			ret = cn9k_sso_hws_dual_get_work(                      \
+				&dws->ws_state[dws->vws],                      \
+				&dws->ws_state[!dws->vws], ev, flags,          \
+				dws->lookup_mem);                              \
+			dws->vws = !dws->vws;                                  \
+		}                                                              \
+                                                                               \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_burst_##name(         \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks)                                        \
+	{                                                                      \
+		RTE_SET_USED(nb_events);                                       \
+                                                                               \
+		return cn9k_sso_hws_dual_tmo_deq_seg_##name(port, ev,          \
+							    timeout_ticks);    \
 	}
 
-	gw = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
-					&dws->ws_state[!dws->vws], ev);
-	dws->vws = !dws->vws;
-	return gw;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_deq_burst(void *port, struct rte_event ev[],
-			    uint16_t nb_events, uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn9k_sso_hws_dual_deq(port, ev, timeout_ticks);
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
-			  uint64_t timeout_ticks)
-{
-	struct cn9k_sso_hws_dual *dws = port;
-	uint16_t ret = 1;
-	uint64_t iter;
-
-	if (dws->swtag_req) {
-		dws->swtag_req = 0;
-		cnxk_sso_hws_swtag_wait(dws->ws_state[!dws->vws].tag_op);
-		return ret;
-	}
-
-	ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
-					 &dws->ws_state[!dws->vws], ev);
-	dws->vws = !dws->vws;
-	for (iter = 1; iter < timeout_ticks && (ret == 0); iter++) {
-		ret = cn9k_sso_hws_dual_get_work(&dws->ws_state[dws->vws],
-						 &dws->ws_state[!dws->vws], ev);
-		dws->vws = !dws->vws;
-	}
-
-	return ret;
-}
-
-uint16_t __rte_hot
-cn9k_sso_hws_dual_tmo_deq_burst(void *port, struct rte_event ev[],
-				uint16_t nb_events, uint64_t timeout_ticks)
-{
-	RTE_SET_USED(nb_events);
-
-	return cn9k_sso_hws_dual_tmo_deq(port, ev, timeout_ticks);
-}
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index f5a4401465..1fde652ff8 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -128,17 +128,38 @@ cn9k_sso_hws_dual_forward_event(struct cn9k_sso_hws_dual *dws,
 	}
 }
 
+static __rte_always_inline void
+cn9k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
+		 const uint32_t tag, const uint32_t flags,
+		 const void *const lookup_mem)
+{
+	union mbuf_initializer mbuf_init = {
+		.fields = {.data_off = RTE_PKTMBUF_HEADROOM,
+			   .refcnt = 1,
+			   .nb_segs = 1,
+			   .port = port_id},
+	};
+
+	cn9k_nix_cqe_to_mbuf((struct nix_cqe_hdr_s *)wqe, tag,
+			     (struct rte_mbuf *)mbuf, lookup_mem,
+			     mbuf_init.value, flags);
+}
+
 static __rte_always_inline uint16_t
 cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
 			   struct cn9k_sso_hws_state *ws_pair,
-			   struct rte_event *ev)
+			   struct rte_event *ev, const uint32_t flags,
+			   const void *const lookup_mem)
 {
 	const uint64_t set_gw = BIT_ULL(16) | 1;
 	union {
 		__uint128_t get_work;
 		uint64_t u64[2];
 	} gw;
+	uint64_t mbuf;
 
+	if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+		rte_prefetch_non_temporal(lookup_mem);
 #ifdef RTE_ARCH_ARM64
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "rty%=:					\n"
@@ -147,7 +168,10 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
 		     "		tbnz %[tag], 63, rty%=		\n"
 		     "done%=:	str %[gw], [%[pong]]		\n"
 		     "		dmb ld				\n"
-		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     "		sub %[mbuf], %[wqp], #0x80	\n"
+		     "		prfm pldl1keep, [%[mbuf]]	\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+		       [mbuf] "=&r"(mbuf)
 		     : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op),
 		       [gw] "r"(set_gw), [pong] "r"(ws_pair->getwrk_op));
 #else
@@ -156,12 +180,26 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
 		gw.u64[0] = plt_read64(ws->tag_op);
 	gw.u64[1] = plt_read64(ws->wqp_op);
 	plt_write64(set_gw, ws_pair->getwrk_op);
+	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
 
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
 
+	if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+		if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+		    RTE_EVENT_TYPE_ETHDEV) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+			gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+			cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+					 gw.u64[0] & 0xFFFFF, flags,
+					 lookup_mem);
+			gw.u64[1] = mbuf;
+		}
+	}
+
 	ev->event = gw.u64[0];
 	ev->u64 = gw.u64[1];
 
@@ -169,16 +207,21 @@ cn9k_sso_hws_dual_get_work(struct cn9k_sso_hws_state *ws,
 }
 
 static __rte_always_inline uint16_t
-cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
+cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev,
+		      const uint32_t flags, const void *const lookup_mem)
 {
 	union {
 		__uint128_t get_work;
 		uint64_t u64[2];
 	} gw;
+	uint64_t mbuf;
 
 	plt_write64(BIT_ULL(16) | /* wait for work. */
 			    1,	  /* Use Mask set 0. */
 		    ws->getwrk_op);
+
+	if (flags & NIX_RX_OFFLOAD_PTYPE_F)
+		rte_prefetch_non_temporal(lookup_mem);
 #ifdef RTE_ARCH_ARM64
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
 		     "		ldr %[tag], [%[tag_loc]]	\n"
@@ -190,7 +233,10 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
 		     "		ldr %[wqp], [%[wqp_loc]]	\n"
 		     "		tbnz %[tag], 63, rty%=		\n"
 		     "done%=:	dmb ld				\n"
-		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     "		sub %[mbuf], %[wqp], #0x80	\n"
+		     "		prfm pldl1keep, [%[mbuf]]	\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+		       [mbuf] "=&r"(mbuf)
 		     : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
 #else
 	gw.u64[0] = plt_read64(ws->tag_op);
@@ -198,12 +244,26 @@ cn9k_sso_hws_get_work(struct cn9k_sso_hws *ws, struct rte_event *ev)
 		gw.u64[0] = plt_read64(ws->tag_op);
 
 	gw.u64[1] = plt_read64(ws->wqp_op);
+	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
 
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
 
+	if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+		if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+		    RTE_EVENT_TYPE_ETHDEV) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+			gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+			cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+					 gw.u64[0] & 0xFFFFF, flags,
+					 lookup_mem);
+			gw.u64[1] = mbuf;
+		}
+	}
+
 	ev->event = gw.u64[0];
 	ev->u64 = gw.u64[1];
 
@@ -218,6 +278,7 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
 		__uint128_t get_work;
 		uint64_t u64[2];
 	} gw;
+	uint64_t mbuf;
 
 #ifdef RTE_ARCH_ARM64
 	asm volatile(PLT_CPU_FEATURE_PREAMBLE
@@ -230,7 +291,9 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
 		     "		ldr %[wqp], [%[wqp_loc]]	\n"
 		     "		tbnz %[tag], 63, rty%=		\n"
 		     "done%=:	dmb ld				\n"
-		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1])
+		     "		sub %[mbuf], %[wqp], #0x80	\n"
+		     : [tag] "=&r"(gw.u64[0]), [wqp] "=&r"(gw.u64[1]),
+		       [mbuf] "=&r"(mbuf)
 		     : [tag_loc] "r"(ws->tag_op), [wqp_loc] "r"(ws->wqp_op));
 #else
 	gw.u64[0] = plt_read64(ws->tag_op);
@@ -238,12 +301,25 @@ cn9k_sso_hws_get_work_empty(struct cn9k_sso_hws_state *ws, struct rte_event *ev)
 		gw.u64[0] = plt_read64(ws->tag_op);
 
 	gw.u64[1] = plt_read64(ws->wqp_op);
+	mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
 #endif
 
 	gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
 		    (gw.u64[0] & (0x3FFull << 36)) << 4 |
 		    (gw.u64[0] & 0xffffffff);
 
+	if (CNXK_TT_FROM_EVENT(gw.u64[0]) != SSO_TT_EMPTY) {
+		if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+		    RTE_EVENT_TYPE_ETHDEV) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+
+			gw.u64[0] = CNXK_CLR_SUB_EVENT(gw.u64[0]);
+			cn9k_wqe_to_mbuf(gw.u64[1], mbuf, port,
+					 gw.u64[0] & 0xFFFFF, 0, NULL);
+			gw.u64[1] = mbuf;
+		}
+	}
+
 	ev->event = gw.u64[0];
 	ev->u64 = gw.u64[1];
 
@@ -274,28 +350,54 @@ uint16_t __rte_hot cn9k_sso_hws_dual_enq_fwd_burst(void *port,
 						   const struct rte_event ev[],
 						   uint16_t nb_events);
 
-uint16_t __rte_hot cn9k_sso_hws_deq(void *port, struct rte_event *ev,
-				    uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_deq_burst(void *port, struct rte_event ev[],
-					  uint16_t nb_events,
-					  uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq(void *port, struct rte_event *ev,
-					uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst(void *port, struct rte_event ev[],
-					      uint16_t nb_events,
-					      uint64_t timeout_ticks);
-
-uint16_t __rte_hot cn9k_sso_hws_dual_deq(void *port, struct rte_event *ev,
-					 uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst(void *port,
-					       struct rte_event ev[],
-					       uint16_t nb_events,
-					       uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq(void *port, struct rte_event *ev,
-					     uint64_t timeout_ticks);
-uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst(void *port,
-						   struct rte_event ev[],
-						   uint16_t nb_events,
-						   uint64_t timeout_ticks);
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn9k_sso_hws_deq_##name(                            \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_deq_burst_##name(                      \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_##name(                        \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_burst_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_deq_seg_##name(                        \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_deq_seg_burst_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_##name(                    \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_tmo_deq_seg_burst_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
+
+#define R(name, f3, f2, f1, f0, flags)                                         \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_##name(                       \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_burst_##name(                 \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_##name(                   \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_dual_deq_seg_burst_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);                                       \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_##name(               \
+		void *port, struct rte_event *ev, uint64_t timeout_ticks);     \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tmo_deq_seg_burst_##name(         \
+		void *port, struct rte_event ev[], uint16_t nb_events,         \
+		uint64_t timeout_ticks);
+
+NIX_RX_FASTPATH_MODES
+#undef R
 
 #endif
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index 87bb9f76a9..0a3bcffd64 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -8,6 +8,13 @@ if not is_linux or not dpdk_conf.get('RTE_ARCH_64')
     subdir_done()
 endif
 
+extra_flags = ['-Wno-strict-aliasing']
+foreach flag: extra_flags
+    if cc.has_argument(flag)
+        cflags += flag
+    endif
+endforeach
+
 sources = files(
         'cn9k_eventdev.c',
         'cn9k_worker.c',
-- 
2.17.1



More information about the dev mailing list