[dpdk-dev] [DPDK 18.08] ethdev: add flow API to expand RSS flows

Nelio Laranjeiro nelio.laranjeiro at 6wind.com
Mon May 28 11:54:38 CEST 2018


Introduce an helper for PMD to expand easily flows items list with RSS
action into multiple flow items lists with priority information.

For instance a user items list being "eth / end" with rss action types
"ipv4-udp ipv6-udp end" needs to be expanded into three items lists:

 - eth
 - eth / ipv4 / udp
 - eth / ipv6 / udp

to match the user request.  Some drivers are unable to reach such
request without this expansion, this API is there to help those.
Only PMD should use such API for their internal cooking, the application
will still handle a single flow.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro at 6wind.com>
---
 lib/librte_ethdev/rte_flow.c        | 404 ++++++++++++++++++++++++++++
 lib/librte_ethdev/rte_flow_driver.h |  32 +++
 2 files changed, 436 insertions(+)

diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c
index 7947529da..0c42fc31c 100644
--- a/lib/librte_ethdev/rte_flow.c
+++ b/lib/librte_ethdev/rte_flow.c
@@ -507,3 +507,407 @@ rte_flow_copy(struct rte_flow_desc *desc, size_t len,
 	}
 	return 0;
 }
+
+/* Copy the existing items list and expand with new items. */
+static int
+rte_flow_expand_rss_item(void *buf, size_t size,
+			 const struct rte_flow_item *items,
+			 const struct rte_flow_item *newitems)
+{
+	void *data = buf;
+	const struct rte_flow_item *item;
+	struct rte_flow_item *dst;
+	size_t data_size = 0;
+
+	dst = data;
+	/* Copy Item structure into buffer. */
+	for (item = items; item->type != RTE_FLOW_ITEM_TYPE_END; ++item) {
+		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		if (data_size + sizeof(*item) <= size) {
+			memcpy(dst, item, sizeof(*item));
+			++dst;
+		}
+		data_size += sizeof(*item);
+	}
+	item = newitems;
+	do {
+		if (item->type == RTE_FLOW_ITEM_TYPE_VOID) {
+			++item;
+			continue;
+		}
+		if (data_size + sizeof(*item) <= size) {
+			memcpy(dst, item, sizeof(*item));
+			++dst;
+		}
+		data_size += sizeof(*item);
+		++item;
+	} while ((item - 1)->type != RTE_FLOW_ITEM_TYPE_END);
+	/**
+	 * Copy Item spec, last, mask into buffer and set pointers
+	 * accordingly.
+	 */
+	dst = data;
+	for (item = items; item->type != RTE_FLOW_ITEM_TYPE_END; ++item) {
+		if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
+			continue;
+		if (item->spec) {
+			size_t s = flow_item_spec_copy(NULL, item, ITEM_SPEC);
+			void *addr = (data_size + s) <= size ?
+				(void *)((uintptr_t)data + data_size) :
+				NULL;
+
+			data_size += flow_item_spec_copy(addr, item, ITEM_SPEC);
+			if (addr)
+				dst->spec = addr;
+		}
+		if (item->last) {
+			size_t s = flow_item_spec_copy(NULL, item, ITEM_LAST);
+			void *addr = (data_size + s) <= size ?
+				(void *)((uintptr_t)data + data_size) :
+				NULL;
+
+			data_size += flow_item_spec_copy(addr, item, ITEM_LAST);
+			if (addr)
+				dst->last = addr;
+		}
+		if (item->mask) {
+			size_t s = flow_item_spec_copy(NULL, item, ITEM_MASK);
+			void *addr = (data_size + s) <= size ?
+				(void *)((uintptr_t)data + data_size) :
+				NULL;
+
+			data_size += flow_item_spec_copy(addr, item, ITEM_MASK);
+			if (addr)
+				dst->mask = addr;
+		}
+		if (data_size <= size)
+			++dst;
+	}
+	return data_size;
+}
+
+/** Verify the expansion is supported by the device. */
+static int
+rte_flow_expand_rss_is_supported(const enum rte_flow_item_type **supported,
+				 const enum rte_flow_item_type *expand)
+{
+	unsigned int i;
+	unsigned int sidx;
+	unsigned int eidx;
+
+	for (i = 0; supported[i]; ++i) {
+		sidx = 0;
+		eidx = 0;
+		while (1) {
+			if (expand[eidx] != supported[i][sidx]) {
+				break;
+			} else if ((expand[eidx] == RTE_FLOW_ITEM_TYPE_END) &&
+				   (supported[i][sidx] ==
+				    RTE_FLOW_ITEM_TYPE_END)) {
+				return 1;
+			} else if ((expand[eidx] == RTE_FLOW_ITEM_TYPE_END) ||
+				   (supported[i][sidx] ==
+				    RTE_FLOW_ITEM_TYPE_END)) {
+				break;
+			} else if (expand[eidx] == RTE_FLOW_ITEM_TYPE_VOID) {
+				++eidx;
+				continue;
+			} else if (supported[i][sidx] ==
+				   RTE_FLOW_ITEM_TYPE_VOID) {
+				++sidx;
+				continue;
+			}
+			++sidx;
+			++eidx;
+		}
+	}
+	return 0;
+}
+
+/** Update internal buffer. */
+static inline void
+rte_flow_expand_rss_update(struct rte_flow_expand_rss *buf, void *addr,
+			   uint32_t priority)
+{
+	buf->priority[buf->entries] = priority;
+	buf->patterns[buf->entries] = addr;
+	buf->entries++;
+}
+
+int
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pat, uint64_t types,
+		    const enum rte_flow_item_type **supported)
+{
+	const struct rte_flow_item *item;
+	uint32_t priority = 0;
+	struct {
+		uint32_t eth:1; /**< Ethernet item is  present. */
+		uint32_t ipv4:1; /**< IPv4 item is  present. */
+		uint32_t ipv6:1; /**< IPv6 item is  present. */
+		uint32_t ipv6_ex:1; /**< IPv6 EXT item is  present. */
+		uint32_t udp:1; /**< UDP item is  present. */
+		uint32_t tcp:1; /**< TCP item is  present. */
+		uint32_t sctp:1; /**< STCP item is  present. */
+		uint32_t vxlan:1; /**< VXLAN item is  present. */
+		uint32_t geneve:1; /**< GENEVE item is  present. */
+		uint32_t nvgre:1; /**< NVGRE item is  present. */
+	} layer = { .eth = 0 };
+	const struct rte_flow_item end[] = {
+		{ .type = RTE_FLOW_ITEM_TYPE_END },
+	};
+	void *addr;
+	uint32_t off; /**< Offset to write new items data starting from *buf. */
+	uint32_t max_entries;
+
+	for (max_entries = 0; supported[max_entries]; ++max_entries)
+		;
+	off = sizeof(*buf) +
+		/* Size for the list of patterns. */
+		sizeof(*buf->patterns) +
+		RTE_ALIGN_CEIL(max_entries * sizeof(struct rte_flow_item *),
+			       sizeof(void *)) +
+		/* Size for priorities. */
+		sizeof(*buf->priority) +
+		RTE_ALIGN_CEIL(max_entries * sizeof(uint32_t), sizeof(void *));
+	if (off < size) {
+		buf->priority = (void *)(buf + 1);
+		buf->patterns = (void *)&buf->priority[max_entries];
+		buf->patterns[0] = (void *)&buf->patterns[max_entries];
+		addr = buf->patterns[0];
+		buf->entries = 0;
+	}
+	/**
+	 * Parse the pattern and deactivate the bit-field in RSS which cannot
+	 * match anymore the pattern.
+	 */
+	for (item = pat; item->type != RTE_FLOW_ITEM_TYPE_END; ++item) {
+		switch (item->type) {
+		case RTE_FLOW_ITEM_TYPE_ETH:
+			layer.eth = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV4:
+			layer.ipv4 = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6:
+			layer.ipv6 = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_IPV6_EXT:
+			layer.ipv6_ex = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_UDP:
+			layer.udp = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_TCP:
+			layer.tcp = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_VXLAN:
+			layer.vxlan = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_GENEVE:
+			layer.geneve = 1;
+			break;
+		case RTE_FLOW_ITEM_TYPE_NVGRE:
+			layer.nvgre = 1;
+			break;
+		default:
+			break;
+		}
+	}
+	off += rte_flow_expand_rss_item(addr, (off < size) ? size - off : 0,
+					pat, end);
+	if (off <= size) {
+		rte_flow_expand_rss_update(buf, addr, priority);
+		addr = (void *)((uintptr_t)buf + off);
+	}
+	if ((types & ETH_RSS_IP) &&
+	    (!(layer.ipv4 || layer.ipv6 || layer.ipv6_ex))) {
+		++priority;
+		if (types & (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
+			     ETH_RSS_NONFRAG_IPV4_OTHER)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_IPV4 },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV4,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (off <= size) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+		if (types & (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
+			     ETH_RSS_NONFRAG_IPV6_OTHER)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_IPV6 },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV6,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (size < off) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+		if (types & ETH_RSS_IPV6_EX) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_IPV6_EXT },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV6_EXT,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (off <= size) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+	}
+	if (types & (ETH_RSS_TCP | ETH_RSS_UDP)) {
+		++priority;
+		if ((types & ETH_RSS_NONFRAG_IPV4_UDP) &&
+		    !(layer.ipv6 || layer.ipv6_ex || layer.tcp || layer.udp)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_UDP },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV4,
+				RTE_FLOW_ITEM_TYPE_UDP,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (off <= size) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+		if ((types & ETH_RSS_NONFRAG_IPV4_TCP) &&
+		    !(layer.ipv6 || layer.ipv6_ex || layer.tcp || layer.udp)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_TCP },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV4,
+				RTE_FLOW_ITEM_TYPE_TCP,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (off <= size) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+		if ((types & ETH_RSS_NONFRAG_IPV6_UDP) &&
+		    !(layer.ipv4 || layer.tcp || layer.udp)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_UDP },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				RTE_FLOW_ITEM_TYPE_IPV6,
+				RTE_FLOW_ITEM_TYPE_UDP,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (size < off) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+		if ((types & (ETH_RSS_NONFRAG_IPV6_TCP |
+			      ETH_RSS_IPV6_TCP_EX)) &&
+		    !(layer.ipv4 || layer.tcp || layer.udp)) {
+			const struct rte_flow_item new[] = {
+				{ .type = RTE_FLOW_ITEM_TYPE_TCP },
+				{ .type = RTE_FLOW_ITEM_TYPE_END },
+			};
+			const enum rte_flow_item_type list[] = {
+				RTE_FLOW_ITEM_TYPE_ETH,
+				(layer.ipv6_ex ?
+				 RTE_FLOW_ITEM_TYPE_IPV6_EXT :
+				 RTE_FLOW_ITEM_TYPE_IPV6),
+				RTE_FLOW_ITEM_TYPE_UDP,
+				RTE_FLOW_ITEM_TYPE_END,
+			};
+			int ret;
+
+			ret = rte_flow_expand_rss_is_supported(supported, list);
+			if (ret) {
+				off += rte_flow_expand_rss_item
+					(addr, (off < size) ? size - off : 0,
+					 pat, new);
+				if (off <= size) {
+					rte_flow_expand_rss_update(buf, addr,
+								   priority);
+					addr = (void *)((uintptr_t)buf + off);
+				}
+			}
+		}
+	}
+	return off;
+}
diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h
index 1c90c600d..9058a8715 100644
--- a/lib/librte_ethdev/rte_flow_driver.h
+++ b/lib/librte_ethdev/rte_flow_driver.h
@@ -114,6 +114,38 @@ struct rte_flow_ops {
 const struct rte_flow_ops *
 rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error);
 
+/**
+ * Expansion structure for RSS flows.
+ */
+struct rte_flow_expand_rss {
+	uint32_t entries; /**< Number of entries in the following arrays. */
+	struct rte_flow_item **patterns; /**< Expanded pattern array. */
+	uint32_t *priority; /**< Priority offset for each expansion. */
+};
+
+/**
+ * Expand RSS flows into several possible flows according to the RSS hash
+ * fields requested and the driver capabilities.
+ *
+ * @param[in,out] buf
+ *   Buffer to store the result expansion.
+ * @param[in] size
+ *   Size in octets of the buffer.
+ * @param[in] pat
+ *   User flow pattern.
+ * @param[in] types
+ *   RSS types expected (see ETH_RSS_*).
+ * @param[in] supported.
+ *   List of support expansion pattern from the device.
+ *
+ * @return
+ *   The size in octets used to expand.
+ */
+int
+rte_flow_expand_rss(struct rte_flow_expand_rss *buf, size_t size,
+		    const struct rte_flow_item *pat, uint64_t types,
+		    const enum rte_flow_item_type **supported);
+
 #ifdef __cplusplus
 }
 #endif
-- 
2.17.0



More information about the dev mailing list