<div dir="ltr">Hi, I've added the Ubuntu 24.04 environment to the Community Lab but without werror enabled as the rte_pcapng.c warning would cause them to fail. Once the rte_pcapng.c warning is fixed, I will re-enable werror. I will send an email to the ci mailing list with the logs of the warning so that people are aware of it. <br><br>Thanks,<br>Cody</div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Tue, May 21, 2024 at 4:14 PM Stephen Hemminger <<a href="mailto:stephen@networkplumber.org" target="_blank">stephen@networkplumber.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">There were multiple issues in the RSS queue support in the TAP<br>
driver. This required extensive rework of the BPF support.<br>
<br>
Change the BPF loading to use bpftool to<br>
create a skeleton header file, and load with libbpf.<br>
The BPF is always compiled from source so less chance that<br>
source and instructions diverge. Also resolves issue where<br>
libbpf and source get out of sync. The program<br>
is only loaded once, so if multiple rules are created<br>
only one BPF program is loaded in kernel.<br>
<br>
The new BPF program only needs a single action.<br>
No need for action and re-classification step.<br>
<br>
It also fixes the missing bits from the original.<br>
- supports setting RSS key per flow<br>
- level of hash can be L3 or L3/L4.<br>
<br>
Bugzilla ID: 1329<br>
<br>
Signed-off-by: Stephen Hemminger <<a href="mailto:stephen@networkplumber.org" target="_blank">stephen@networkplumber.org</a>><br>
---<br>
doc/guides/rel_notes/release_24_07.rst | 3 +<br>
drivers/net/tap/bpf/meson.build | 81 +++--<br>
drivers/net/tap/meson.build | 39 ++-<br>
drivers/net/tap/rte_eth_tap.c | 14 +-<br>
drivers/net/tap/rte_eth_tap.h | 6 +-<br>
drivers/net/tap/tap_flow.c | 416 ++++++-------------------<br>
drivers/net/tap/tap_flow.h | 17 +-<br>
drivers/net/tap/tap_rss.h | 10 +-<br>
drivers/net/tap/tap_tcmsgs.h | 4 +-<br>
9 files changed, 186 insertions(+), 404 deletions(-)<br>
<br>
diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst<br>
index a6295359b1..37a6e98637 100644<br>
--- a/doc/guides/rel_notes/release_24_07.rst<br>
+++ b/doc/guides/rel_notes/release_24_07.rst<br>
@@ -59,6 +59,9 @@ New Features<br>
<br>
* Updated to support up to 8 queues when used by secondary process.<br>
<br>
+ * Fixed support of RSS flow action to work with current Linux<br>
+ kernels and BPF tooling. Will only be enabled if clang, libbpf 1.0<br>
+ and bpftool are available.<br>
<br>
Removed Items<br>
-------------<br>
diff --git a/drivers/net/tap/bpf/meson.build b/drivers/net/tap/bpf/meson.build<br>
index f2c03a19fd..df497948e2 100644<br>
--- a/drivers/net/tap/bpf/meson.build<br>
+++ b/drivers/net/tap/bpf/meson.build<br>
@@ -1,17 +1,26 @@<br>
# SPDX-License-Identifier: BSD-3-Clause<br>
# Copyright 2024 Stephen Hemminger <<a href="mailto:stephen@networkplumber.org" target="_blank">stephen@networkplumber.org</a>><br>
<br>
-enable_tap_rss = false<br>
-<br>
-libbpf = dependency('libbpf', required: false, method: 'pkg-config')<br>
+# Loading BPF requires libbpf<br>
+# and the bpf_map__XXX API's were introduced in 0.8.0<br>
+libbpf = dependency('libbpf', version: '>= 1.0',<br>
+ required: false, method: 'pkg-config')<br>
if not libbpf.found()<br>
message('net/tap: no RSS support missing libbpf')<br>
subdir_done()<br>
endif<br>
<br>
+# Making skeleton needs bpftool<br>
# Debian install this in /usr/sbin which is not in $PATH<br>
-bpftool = find_program('bpftool', '/usr/sbin/bpftool', required: false, version: '>= 5.6.0')<br>
-if not bpftool.found()<br>
+bpftool_supports_skel = false<br>
+bpftool = find_program('bpftool', '/usr/sbin/bpftool', required: false)<br>
+if bpftool.found()<br>
+ # Some Ubuntu versions have non-functional bpftool<br>
+ bpftool_supports_skel = run_command(bpftool, 'gen', 'help',<br>
+ check:false).returncode() == 0<br>
+endif<br>
+<br>
+if not bpftool_supports_skel<br>
message('net/tap: no RSS support missing bpftool')<br>
subdir_done()<br>
endif<br>
@@ -39,43 +48,47 @@ machine_name = run_command('uname', '-m').stdout().strip()<br>
march_include_dir = '/usr/include/' + machine_name + '-linux-gnu'<br>
<br>
clang_flags = [<br>
- '-O2',<br>
- '-Wall',<br>
- '-Wextra',<br>
- '-target',<br>
- 'bpf',<br>
- '-g',<br>
- '-c',<br>
+ # these are flags used to build the BPF code<br>
+ '-O2',<br>
+ '-Wall',<br>
+ '-Wextra',<br>
+ max_queues,<br>
+ '-target',<br>
+ 'bpf',<br>
+ '-g',<br>
+ '-c',<br>
]<br>
<br>
+# Command used to compile BPF pgrograme<br>
bpf_o_cmd = [<br>
- clang,<br>
- clang_flags,<br>
- '-idirafter',<br>
- libbpf_include_dir,<br>
- '-idirafter',<br>
- march_include_dir,<br>
- '@INPUT@',<br>
- '-o',<br>
- '@OUTPUT@'<br>
+ clang,<br>
+ clang_flags,<br>
+ '-idirafter',<br>
+ libbpf_include_dir,<br>
+ '-idirafter',<br>
+ march_include_dir,<br>
+ '@INPUT@',<br>
+ '-o',<br>
+ '@OUTPUT@',<br>
]<br>
<br>
+# Command used to generate header file from BPF object<br>
skel_h_cmd = [<br>
- bpftool,<br>
- 'gen',<br>
- 'skeleton',<br>
- '@INPUT@'<br>
+ bpftool,<br>
+ 'gen',<br>
+ 'skeleton',<br>
+ '@INPUT@',<br>
]<br>
<br>
tap_rss_o = custom_target(<br>
- 'tap_rss.bpf.o',<br>
- input: 'tap_rss.c',<br>
- output: 'tap_rss.o',<br>
- command: bpf_o_cmd)<br>
+ 'tap_rss.bpf.o',<br>
+ input: 'tap_rss.c',<br>
+ output: 'tap_rss.o',<br>
+ command: bpf_o_cmd)<br>
<br>
tap_rss_skel_h = custom_target(<br>
- 'tap_rss.skel.h',<br>
- input: tap_rss_o,<br>
- output: 'tap_rss.skel.h',<br>
- command: skel_h_cmd,<br>
- capture: true)<br>
+ 'tap_rss.skel.h',<br>
+ input: tap_rss_o,<br>
+ output: 'tap_rss.skel.h',<br>
+ command: skel_h_cmd,<br>
+ capture: true)<br>
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build<br>
index 66647a1c62..5e5a3ad3c6 100644<br>
--- a/drivers/net/tap/meson.build<br>
+++ b/drivers/net/tap/meson.build<br>
@@ -5,36 +5,33 @@ if not is_linux<br>
build = false<br>
reason = 'only supported on Linux'<br>
endif<br>
+<br>
sources = files(<br>
'rte_eth_tap.c',<br>
'tap_intr.c',<br>
'tap_netlink.c',<br>
)<br>
<br>
+deps = ['bus_vdev', 'gso', 'hash']<br>
+<br>
+max_queues = '-DTAP_MAX_QUEUES=16'<br>
+cflags += max_queues<br>
+<br>
+require_iova_in_mbuf = false<br>
+<br>
if cc.has_header_symbol('linux/pkt_cls.h', 'TCA_FLOWER_ACT')<br>
cflags += '-DHAVE_TCA_FLOWER'<br>
sources += files(<br>
- 'tap_bpf_api.c',<br>
- 'tap_flow.c',<br>
- 'tap_tcmsgs.c',<br>
+ 'tap_flow.c',<br>
+ 'tap_tcmsgs.c',<br>
)<br>
-endif<br>
-<br>
-deps = ['bus_vdev', 'gso', 'hash']<br>
<br>
-cflags += '-DTAP_MAX_QUEUES=16'<br>
-<br>
-# input array for meson symbol search:<br>
-# [ "MACRO to define if found", "header for the search",<br>
-# "enum/define", "symbol to search" ]<br>
-#<br>
-args = [<br>
- [ 'HAVE_TC_ACT_BPF', 'linux/tc_act/tc_bpf.h', 'TCA_ACT_BPF_UNSPEC' ],<br>
-]<br>
-config = configuration_data()<br>
-foreach arg:args<br>
- config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))<br>
-endforeach<br>
-configure_file(output : 'tap_autoconf.h', configuration : config)<br>
+ enable_tap_rss = false<br>
<br>
-require_iova_in_mbuf = false<br>
+ subdir('bpf')<br>
+ if enable_tap_rss<br>
+ cflags += '-DHAVE_BPF_RSS'<br>
+ ext_deps += libbpf<br>
+ sources += tap_rss_skel_h<br>
+ endif<br>
+endif<br>
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c<br>
index 9058a47295..d847565073 100644<br>
--- a/drivers/net/tap/rte_eth_tap.c<br>
+++ b/drivers/net/tap/rte_eth_tap.c<br>
@@ -1140,6 +1140,7 @@ tap_dev_close(struct rte_eth_dev *dev)<br>
tap_flow_implicit_flush(internals, NULL);<br>
tap_nl_final(internals->nlsk_fd);<br>
internals->nlsk_fd = -1;<br>
+ tap_flow_bpf_destroy(internals);<br>
}<br>
#endif<br>
<br>
@@ -1949,6 +1950,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,<br>
strlcpy(pmd->name, tap_name, sizeof(pmd->name));<br>
pmd->type = type;<br>
pmd->ka_fd = -1;<br>
+<br>
#ifdef HAVE_TCA_FLOWER<br>
pmd->nlsk_fd = -1;<br>
#endif<br>
@@ -2031,13 +2033,6 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,<br>
/* Make network device persist after application exit */<br>
pmd->persist = persist;<br>
<br>
- pmd->if_index = if_nametoindex(pmd->name);<br>
- if (!pmd->if_index) {<br>
- TAP_LOG(ERR, "%s: failed to get if_index.", pmd->name);<br>
- goto disable_rte_flow;<br>
- }<br>
-<br>
-<br>
#ifdef HAVE_TCA_FLOWER<br>
/*<br>
* Set up everything related to rte_flow:<br>
@@ -2053,6 +2048,11 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,<br>
pmd->name);<br>
goto disable_rte_flow;<br>
}<br>
+ pmd->if_index = if_nametoindex(pmd->name);<br>
+ if (!pmd->if_index) {<br>
+ TAP_LOG(ERR, "%s: failed to get if_index.", pmd->name);<br>
+ goto disable_rte_flow;<br>
+ }<br>
if (qdisc_create_multiq(pmd->nlsk_fd, pmd->if_index) < 0) {<br>
TAP_LOG(ERR, "%s: failed to create multiq qdisc.",<br>
pmd->name);<br>
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h<br>
index af18b29090..ce4322ad04 100644<br>
--- a/drivers/net/tap/rte_eth_tap.h<br>
+++ b/drivers/net/tap/rte_eth_tap.h<br>
@@ -81,10 +81,8 @@ struct pmd_internals {<br>
#ifdef HAVE_TCA_FLOWER<br>
int nlsk_fd; /* Netlink socket fd */<br>
int flow_isolate; /* 1 if flow isolation is enabled */<br>
- int rss_enabled; /* 1 if RSS is enabled, else 0 */<br>
- /* implicit rules set when RSS is enabled */<br>
- int map_fd; /* BPF RSS map fd */<br>
- int bpf_fd[RTE_PMD_TAP_MAX_QUEUES];/* List of bpf fds per queue */<br>
+<br>
+ struct tap_rss *rss; /* BPF program */<br>
<br>
LIST_HEAD(tap_flows, rte_flow) flows; /* rte_flow rules */<br>
/* implicit rte_flow rules set when a remote device is active */<br>
diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c<br>
index 45321aee86..0a90c0487b 100644<br>
--- a/drivers/net/tap/tap_flow.c<br>
+++ b/drivers/net/tap/tap_flow.c<br>
@@ -15,25 +15,19 @@<br>
#include <rte_random.h><br>
#include <rte_malloc.h><br>
#include <rte_eth_tap.h><br>
+#include <rte_uuid.h><br>
<br>
#include <tap_flow.h><br>
-#include <tap_autoconf.h><br>
#include <tap_tcmsgs.h><br>
#include <tap_rss.h><br>
<br>
-/* RSS key management */<br>
-enum bpf_rss_key_e {<br>
- KEY_CMD_GET = 1,<br>
- KEY_CMD_RELEASE,<br>
- KEY_CMD_INIT,<br>
- KEY_CMD_DEINIT,<br>
-};<br>
-<br>
-enum key_status_e {<br>
- KEY_STAT_UNSPEC,<br>
- KEY_STAT_USED,<br>
- KEY_STAT_AVAILABLE,<br>
-};<br>
+#ifdef HAVE_BPF_RSS<br>
+/* Workaround for warning in bpftool generated skeleton code */<br>
+#pragma GCC diagnostic push<br>
+#pragma GCC diagnostic ignored "-Wcast-qual"<br>
+#include "tap_rss.skel.h"<br>
+#pragma GCC diagnostic pop<br>
+#endif<br>
<br>
#define ISOLATE_HANDLE 1<br>
#define REMOTE_PROMISCUOUS_HANDLE 2<br>
@@ -41,8 +35,6 @@ enum key_status_e {<br>
struct rte_flow {<br>
LIST_ENTRY(rte_flow) next; /* Pointer to the next rte_flow structure */<br>
struct rte_flow *remote_flow; /* associated remote flow */<br>
- int bpf_fd[SEC_MAX]; /* list of bfs fds per ELF section */<br>
- uint32_t key_idx; /* RSS rule key index into BPF map */<br>
struct nlmsg msg;<br>
};<br>
<br>
@@ -69,12 +61,16 @@ struct action_data {<br>
struct skbedit {<br>
struct tc_skbedit skbedit;<br>
uint16_t queue;<br>
+ uint32_t mark;<br>
} skbedit;<br>
+#ifdef HAVE_BPF_RSS<br>
struct bpf {<br>
struct tc_act_bpf bpf;<br>
+ uint32_t map_key;<br>
int bpf_fd;<br>
const char *annotation;<br>
} bpf;<br>
+#endif<br>
};<br>
};<br>
<br>
@@ -112,13 +108,12 @@ tap_flow_isolate(struct rte_eth_dev *dev,<br>
int set,<br>
struct rte_flow_error *error);<br>
<br>
-static int bpf_rss_key(enum bpf_rss_key_e cmd, __u32 *key_idx);<br>
-static int rss_enable(struct pmd_internals *pmd,<br>
- const struct rte_flow_attr *attr,<br>
- struct rte_flow_error *error);<br>
+#ifdef HAVE_BPF_RSS<br>
+static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error);<br>
static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,<br>
const struct rte_flow_action_rss *rss,<br>
struct rte_flow_error *error);<br>
+#endif<br>
<br>
static const struct rte_flow_ops tap_flow_ops = {<br>
.validate = tap_flow_validate,<br>
@@ -853,11 +848,13 @@ add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata)<br>
&adata->mirred);<br>
} else if (strcmp("skbedit", adata->id) == 0) {<br>
tap_nlattr_add(&msg->nh, TCA_SKBEDIT_PARMS,<br>
- sizeof(adata->skbedit.skbedit),<br>
- &adata->skbedit.skbedit);<br>
- tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING,<br>
- adata->skbedit.queue);<br>
+ sizeof(adata->skbedit.skbedit), &adata->skbedit.skbedit);<br>
+ if (adata->skbedit.mark)<br>
+ tap_nlattr_add32(&msg->nh, TCA_SKBEDIT_MARK, adata->skbedit.mark);<br>
+ else<br>
+ tap_nlattr_add16(&msg->nh, TCA_SKBEDIT_QUEUE_MAPPING, adata->skbedit.queue);<br>
} else if (strcmp("bpf", adata->id) == 0) {<br>
+#ifdef HAVE_BPF_RSS<br>
tap_nlattr_add32(&msg->nh, TCA_ACT_BPF_FD, adata->bpf.bpf_fd);<br>
tap_nlattr_add(&msg->nh, TCA_ACT_BPF_NAME,<br>
strlen(adata->bpf.annotation) + 1,<br>
@@ -865,7 +862,12 @@ add_action(struct rte_flow *flow, size_t *act_index, struct action_data *adata)<br>
tap_nlattr_add(&msg->nh, TCA_ACT_BPF_PARMS,<br>
sizeof(adata->bpf.bpf),<br>
&adata->bpf.bpf);<br>
+#else<br>
+ TAP_LOG(ERR, "Internal error: bpf requested but not supported");<br>
+ return -1;<br>
+#endif<br>
} else {<br>
+ TAP_LOG(ERR, "Internal error: unknown action: %s", adata->id);<br>
return -1;<br>
}<br>
tap_nlattr_nested_finish(msg); /* nested TCA_ACT_OPTIONS */<br>
@@ -1104,8 +1106,7 @@ priv_flow_process(struct pmd_internals *pmd,<br>
},<br>
};<br>
<br>
- err = add_actions(flow, 1, &adata,<br>
- TCA_FLOWER_ACT);<br>
+ err = add_actions(flow, 1, &adata, TCA_FLOWER_ACT);<br>
}<br>
} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {<br>
const struct rte_flow_action_queue *queue =<br>
@@ -1135,6 +1136,7 @@ priv_flow_process(struct pmd_internals *pmd,<br>
err = add_actions(flow, 1, &adata,<br>
TCA_FLOWER_ACT);<br>
}<br>
+#ifdef HAVE_BPF_RSS<br>
} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {<br>
const struct rte_flow_action_rss *rss =<br>
(const struct rte_flow_action_rss *)<br>
@@ -1143,13 +1145,14 @@ priv_flow_process(struct pmd_internals *pmd,<br>
if (action++)<br>
goto exit_action_not_supported;<br>
<br>
- if (!pmd->rss_enabled) {<br>
- err = rss_enable(pmd, attr, error);<br>
+ if (pmd->rss == NULL) {<br>
+ err = rss_enable(pmd, error);<br>
if (err)<br>
goto exit_return_error;<br>
}<br>
if (flow)<br>
err = rss_add_actions(flow, pmd, rss, error);<br>
+#endif<br>
} else {<br>
goto exit_action_not_supported;<br>
}<br>
@@ -1246,26 +1249,17 @@ tap_flow_set_handle(struct rte_flow *flow)<br>
*<br>
*/<br>
static void<br>
-tap_flow_free(struct pmd_internals *pmd, struct rte_flow *flow)<br>
+tap_flow_free(struct pmd_internals *pmd __rte_unused, struct rte_flow *flow)<br>
{<br>
- int i;<br>
-<br>
if (!flow)<br>
return;<br>
<br>
- if (pmd->rss_enabled) {<br>
- /* Close flow BPF file descriptors */<br>
- for (i = 0; i < SEC_MAX; i++)<br>
- if (flow->bpf_fd[i] != 0) {<br>
- close(flow->bpf_fd[i]);<br>
- flow->bpf_fd[i] = 0;<br>
- }<br>
-<br>
- /* Release the map key for this RSS rule */<br>
- bpf_rss_key(KEY_CMD_RELEASE, &flow->key_idx);<br>
- flow->key_idx = 0;<br>
- }<br>
-<br>
+#ifdef HAVE_BPF_RSS<br>
+ struct tap_rss *rss = pmd->rss;<br>
+ if (rss)<br>
+ bpf_map__delete_elem(rss->maps.rss_map,<br>
+ &flow->msg.t.tcm_handle, sizeof(uint32_t), 0);<br>
+#endif<br>
/* Free flow allocated memory */<br>
rte_free(flow);<br>
}<br>
@@ -1733,14 +1727,18 @@ tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error)<br>
return 0;<br>
}<br>
<br>
-#define MAX_RSS_KEYS 256<br>
-#define KEY_IDX_OFFSET (3 * MAX_RSS_KEYS)<br>
-#define SEC_NAME_CLS_Q "cls_q"<br>
-<br>
-static const char *sec_name[SEC_MAX] = {<br>
- [SEC_L3_L4] = "l3_l4",<br>
-};<br>
+/**<br>
+ * Cleanup when device is closed<br>
+ */<br>
+void tap_flow_bpf_destroy(struct pmd_internals *pmd __rte_unused)<br>
+{<br>
+#ifdef HAVE_BPF_RSS<br>
+ tap_rss__destroy(pmd->rss);<br>
+ pmd->rss = NULL;<br>
+#endif<br>
+}<br>
<br>
+#ifdef HAVE_BPF_RSS<br>
/**<br>
* Enable RSS on tap: create TC rules for queuing.<br>
*<br>
@@ -1755,225 +1753,32 @@ static const char *sec_name[SEC_MAX] = {<br>
*<br>
* @return 0 on success, negative value on failure.<br>
*/<br>
-static int rss_enable(struct pmd_internals *pmd,<br>
- const struct rte_flow_attr *attr,<br>
- struct rte_flow_error *error)<br>
+static int rss_enable(struct pmd_internals *pmd, struct rte_flow_error *error)<br>
{<br>
- struct rte_flow *rss_flow = NULL;<br>
- struct nlmsg *msg = NULL;<br>
- /* 4096 is the maximum number of instructions for a BPF program */<br>
- char annotation[64];<br>
- int i;<br>
- int err = 0;<br>
-<br>
- /* unlimit locked memory */<br>
- struct rlimit memlock_limit = {<br>
- .rlim_cur = RLIM_INFINITY,<br>
- .rlim_max = RLIM_INFINITY,<br>
- };<br>
- setrlimit(RLIMIT_MEMLOCK, &memlock_limit);<br>
-<br>
- /* Get a new map key for a new RSS rule */<br>
- err = bpf_rss_key(KEY_CMD_INIT, NULL);<br>
- if (err < 0) {<br>
- rte_flow_error_set(<br>
- error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
- "Failed to initialize BPF RSS keys");<br>
-<br>
- return -1;<br>
- }<br>
-<br>
- /*<br>
- * Create BPF RSS MAP<br>
- */<br>
- pmd->map_fd = tap_flow_bpf_rss_map_create(sizeof(__u32), /* key size */<br>
- sizeof(struct rss_key),<br>
- MAX_RSS_KEYS);<br>
- if (pmd->map_fd < 0) {<br>
- TAP_LOG(ERR,<br>
- "Failed to create BPF map (%d): %s",<br>
- errno, strerror(errno));<br>
- rte_flow_error_set(<br>
- error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
- "Kernel too old or not configured "<br>
- "to support BPF maps");<br>
-<br>
- return -ENOTSUP;<br>
- }<br>
-<br>
- /*<br>
- * Add a rule per queue to match reclassified packets and direct them to<br>
- * the correct queue.<br>
- */<br>
- for (i = 0; i < pmd->dev->data->nb_rx_queues; i++) {<br>
- pmd->bpf_fd[i] = tap_flow_bpf_cls_q(i);<br>
- if (pmd->bpf_fd[i] < 0) {<br>
- TAP_LOG(ERR,<br>
- "Failed to load BPF section %s for queue %d",<br>
- SEC_NAME_CLS_Q, i);<br>
- rte_flow_error_set(<br>
- error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,<br>
- NULL,<br>
- "Kernel too old or not configured "<br>
- "to support BPF programs loading");<br>
-<br>
- return -ENOTSUP;<br>
- }<br>
-<br>
- rss_flow = rte_zmalloc(__func__, sizeof(struct rte_flow), 0);<br>
- if (!rss_flow) {<br>
- TAP_LOG(ERR,<br>
- "Cannot allocate memory for rte_flow");<br>
- return -1;<br>
- }<br>
- msg = &rss_flow->msg;<br>
- tc_init_msg(msg, pmd->if_index, RTM_NEWTFILTER, NLM_F_REQUEST |<br>
- NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE);<br>
- msg->t.tcm_info = TC_H_MAKE(0, htons(ETH_P_ALL));<br>
- tap_flow_set_handle(rss_flow);<br>
- uint16_t group = attr->group << GROUP_SHIFT;<br>
- uint16_t prio = group | (i + PRIORITY_OFFSET);<br>
- msg->t.tcm_info = TC_H_MAKE(prio << 16, msg->t.tcm_info);<br>
- msg->t.tcm_parent = TC_H_MAKE(MULTIQ_MAJOR_HANDLE, 0);<br>
-<br>
- tap_nlattr_add(&msg->nh, TCA_KIND, sizeof("bpf"), "bpf");<br>
- if (tap_nlattr_nested_start(msg, TCA_OPTIONS) < 0)<br>
- return -1;<br>
- tap_nlattr_add32(&msg->nh, TCA_BPF_FD, pmd->bpf_fd[i]);<br>
- snprintf(annotation, sizeof(annotation), "[%s%d]",<br>
- SEC_NAME_CLS_Q, i);<br>
- tap_nlattr_add(&msg->nh, TCA_BPF_NAME, strlen(annotation) + 1,<br>
- annotation);<br>
- /* Actions */<br>
- {<br>
- struct action_data adata = {<br>
- .id = "skbedit",<br>
- .skbedit = {<br>
- .skbedit = {<br>
- .action = TC_ACT_PIPE,<br>
- },<br>
- .queue = i,<br>
- },<br>
- };<br>
- if (add_actions(rss_flow, 1, &adata, TCA_BPF_ACT) < 0)<br>
- return -1;<br>
- }<br>
- tap_nlattr_nested_finish(msg); /* nested TCA_OPTIONS */<br>
+ int err;<br>
<br>
- /* Netlink message is now ready to be sent */<br>
- if (tap_nl_send(pmd->nlsk_fd, &msg->nh) < 0)<br>
- return -1;<br>
- err = tap_nl_recv_ack(pmd->nlsk_fd);<br>
- if (err < 0) {<br>
- TAP_LOG(ERR,<br>
- "Kernel refused TC filter rule creation (%d): %s",<br>
- errno, strerror(errno));<br>
- return err;<br>
- }<br>
+ /* Load the BPF program (defined in tap_bpf.h from skeleton) */<br>
+ pmd->rss = tap_rss__open_and_load();<br>
+ if (pmd->rss == NULL) {<br>
+ TAP_LOG(ERR, "Failed to load BPF object: %s", strerror(errno));<br>
+ rte_flow_error_set(error, errno, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
+ "BPF object could not be loaded");<br>
+ return -errno;<br>
}<br>
<br>
- pmd->rss_enabled = 1;<br>
- return err;<br>
-}<br>
-<br>
-/**<br>
- * Manage bpf RSS keys repository with operations: init, get, release<br>
- *<br>
- * @param[in] cmd<br>
- * Command on RSS keys: init, get, release<br>
- *<br>
- * @param[in, out] key_idx<br>
- * Pointer to RSS Key index (out for get command, in for release command)<br>
- *<br>
- * @return -1 if couldn't get, release or init the RSS keys, 0 otherwise.<br>
- */<br>
-static int bpf_rss_key(enum bpf_rss_key_e cmd, __u32 *key_idx)<br>
-{<br>
- __u32 i;<br>
- int err = 0;<br>
- static __u32 num_used_keys;<br>
- static __u32 rss_keys[MAX_RSS_KEYS] = {KEY_STAT_UNSPEC};<br>
- static __u32 rss_keys_initialized;<br>
- __u32 key;<br>
-<br>
- switch (cmd) {<br>
- case KEY_CMD_GET:<br>
- if (!rss_keys_initialized) {<br>
- err = -1;<br>
- break;<br>
- }<br>
-<br>
- if (num_used_keys == RTE_DIM(rss_keys)) {<br>
- err = -1;<br>
- break;<br>
- }<br>
-<br>
- *key_idx = num_used_keys % RTE_DIM(rss_keys);<br>
- while (rss_keys[*key_idx] == KEY_STAT_USED)<br>
- *key_idx = (*key_idx + 1) % RTE_DIM(rss_keys);<br>
-<br>
- rss_keys[*key_idx] = KEY_STAT_USED;<br>
-<br>
- /*<br>
- * Add an offset to key_idx in order to handle a case of<br>
- * RSS and non RSS flows mixture.<br>
- * If a non RSS flow is destroyed it has an eBPF map<br>
- * index 0 (initialized on flow creation) and might<br>
- * unintentionally remove RSS entry 0 from eBPF map.<br>
- * To avoid this issue, add an offset to the real index<br>
- * during a KEY_CMD_GET operation and subtract this offset<br>
- * during a KEY_CMD_RELEASE operation in order to restore<br>
- * the real index.<br>
- */<br>
- *key_idx += KEY_IDX_OFFSET;<br>
- num_used_keys++;<br>
- break;<br>
-<br>
- case KEY_CMD_RELEASE:<br>
- if (!rss_keys_initialized)<br>
- break;<br>
-<br>
- /*<br>
- * Subtract offset to restore real key index<br>
- * If a non RSS flow is falsely trying to release map<br>
- * entry 0 - the offset subtraction will calculate the real<br>
- * map index as an out-of-range value and the release operation<br>
- * will be silently ignored.<br>
- */<br>
- key = *key_idx - KEY_IDX_OFFSET;<br>
- if (key >= RTE_DIM(rss_keys))<br>
- break;<br>
-<br>
- if (rss_keys[key] == KEY_STAT_USED) {<br>
- rss_keys[key] = KEY_STAT_AVAILABLE;<br>
- num_used_keys--;<br>
- }<br>
- break;<br>
-<br>
- case KEY_CMD_INIT:<br>
- for (i = 0; i < RTE_DIM(rss_keys); i++)<br>
- rss_keys[i] = KEY_STAT_AVAILABLE;<br>
-<br>
- rss_keys_initialized = 1;<br>
- num_used_keys = 0;<br>
- break;<br>
-<br>
- case KEY_CMD_DEINIT:<br>
- for (i = 0; i < RTE_DIM(rss_keys); i++)<br>
- rss_keys[i] = KEY_STAT_UNSPEC;<br>
-<br>
- rss_keys_initialized = 0;<br>
- num_used_keys = 0;<br>
- break;<br>
-<br>
- default:<br>
- break;<br>
+ /* Attach the maps defined in BPF program */<br>
+ err = tap_rss__attach(pmd->rss);<br>
+ if (err < 0) {<br>
+ TAP_LOG(ERR, "Failed to attach BPF object: %d", err);<br>
+ rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
+ "BPF object could not be attached");<br>
+ tap_flow_bpf_destroy(pmd);<br>
+ return err;<br>
}<br>
<br>
- return err;<br>
+ return 0;<br>
}<br>
<br>
-<br>
/* Default RSS hash key also used by mlx devices */<br>
static const uint8_t rss_hash_default_key[] = {<br>
0x2c, 0xc6, 0x81, 0xd1,<br>
@@ -2006,9 +1811,11 @@ static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,<br>
const struct rte_flow_action_rss *rss,<br>
struct rte_flow_error *error)<br>
{<br>
+ const struct bpf_program *rss_prog = pmd->rss->progs.rss_flow_action;<br>
struct rss_key rss_entry = { };<br>
const uint8_t *key_in;<br>
uint32_t hash_type = 0;<br>
+ uint32_t handle = flow->msg.t.tcm_handle;<br>
unsigned int i;<br>
int err;<br>
<br>
@@ -2067,34 +1874,24 @@ static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,<br>
else if (rss->types & (RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 | RTE_ETH_RSS_IPV6_EX))<br>
hash_type |= RTE_BIT32(HASH_FIELD_IPV6_L3);<br>
<br>
- /* Get a new map key for a new RSS rule */<br>
- err = bpf_rss_key(KEY_CMD_GET, &flow->key_idx);<br>
- if (err < 0) {<br>
- rte_flow_error_set(<br>
- error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
- "Failed to get BPF RSS key");<br>
-<br>
- return -1;<br>
- }<br>
+ rss_entry.hash_fields = hash_type;<br>
+ rte_convert_rss_key((const uint32_t *)key_in, (uint32_t *)rss_entry.key,<br>
+ TAP_RSS_HASH_KEY_SIZE);<br>
<br>
/* Update RSS map entry with queues */<br>
rss_entry.nb_queues = rss->queue_num;<br>
for (i = 0; i < rss->queue_num; i++)<br>
rss_entry.queues[i] = rss->queue[i];<br>
<br>
- rss_entry.hash_fields = hash_type;<br>
- rte_convert_rss_key((const uint32_t *)key_in, (uint32_t *)rss_entry.key,<br>
- TAP_RSS_HASH_KEY_SIZE);<br>
-<br>
-<br>
- /* Add this RSS entry to map */<br>
- err = tap_flow_bpf_update_rss_elem(pmd->map_fd,<br>
- &flow->key_idx, &rss_entry);<br>
<br>
+ /* Add this way for BPF to find entry in map */<br>
+ err = bpf_map__update_elem(pmd->rss->maps.rss_map,<br>
+ &handle, sizeof(handle),<br>
+ &rss_entry, sizeof(rss_entry), 0);<br>
if (err) {<br>
TAP_LOG(ERR,<br>
- "Failed to update BPF map entry #%u (%d): %s",<br>
- flow->key_idx, errno, strerror(errno));<br>
+ "Failed to update BPF map entry %#x (%d): %s",<br>
+ handle, errno, strerror(errno));<br>
rte_flow_error_set(<br>
error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
"Kernel too old or not configured "<br>
@@ -2103,47 +1900,28 @@ static int rss_add_actions(struct rte_flow *flow, struct pmd_internals *pmd,<br>
return -ENOTSUP;<br>
}<br>
<br>
-<br>
- /*<br>
- * Load bpf rules to calculate hash for this key_idx<br>
- */<br>
-<br>
- flow->bpf_fd[SEC_L3_L4] =<br>
- tap_flow_bpf_calc_l3_l4_hash(flow->key_idx, pmd->map_fd);<br>
- if (flow->bpf_fd[SEC_L3_L4] < 0) {<br>
- TAP_LOG(ERR,<br>
- "Failed to load BPF section %s (%d): %s",<br>
- sec_name[SEC_L3_L4], errno, strerror(errno));<br>
- rte_flow_error_set(<br>
- error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, NULL,<br>
- "Kernel too old or not configured "<br>
- "to support BPF program loading");<br>
-<br>
- return -ENOTSUP;<br>
- }<br>
-<br>
- /* Actions */<br>
- {<br>
- struct action_data adata[] = {<br>
- {<br>
- .id = "bpf",<br>
- .bpf = {<br>
- .bpf_fd = flow->bpf_fd[SEC_L3_L4],<br>
- .annotation = sec_name[SEC_L3_L4],<br>
- .bpf = {<br>
- .action = TC_ACT_PIPE,<br>
- },<br>
- },<br>
+ /* Add actions to mark packet then run the RSS BPF program */<br>
+ struct action_data adata[] = {<br>
+ {<br>
+ .id = "skbedit",<br>
+ .skbedit = {<br>
+ .skbedit.action = TC_ACT_PIPE,<br>
+ .mark = handle,<br>
},<br>
- };<br>
-<br>
- if (add_actions(flow, RTE_DIM(adata), adata,<br>
- TCA_FLOWER_ACT) < 0)<br>
- return -1;<br>
- }<br>
+ },<br>
+ {<br>
+ .id = "bpf",<br>
+ .bpf = {<br>
+ .bpf.action = TC_ACT_PIPE,<br>
+ .annotation = "tap_rss",<br>
+ .bpf_fd = bpf_program__fd(rss_prog),<br>
+ },<br>
+ },<br>
+ };<br>
<br>
- return 0;<br>
+ return add_actions(flow, RTE_DIM(adata), adata, TCA_FLOWER_ACT);<br>
}<br>
+#endif<br>
<br>
/**<br>
* Get rte_flow operations.<br>
diff --git a/drivers/net/tap/tap_flow.h b/drivers/net/tap/tap_flow.h<br>
index 240fbc3dfa..8b19347a93 100644<br>
--- a/drivers/net/tap/tap_flow.h<br>
+++ b/drivers/net/tap/tap_flow.h<br>
@@ -9,7 +9,11 @@<br>
#include <rte_flow.h><br>
#include <rte_flow_driver.h><br>
#include <rte_eth_tap.h><br>
-#include <tap_autoconf.h><br>
+<br>
+/**<br>
+ * Mask of unsupported RSS types<br>
+ */<br>
+#define TAP_RSS_HF_MASK (~(RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP))<br>
<br>
/**<br>
* In TC, priority 0 means we require the kernel to allocate one for us.<br>
@@ -41,11 +45,6 @@ enum implicit_rule_index {<br>
TAP_REMOTE_MAX_IDX,<br>
};<br>
<br>
-enum bpf_fd_idx {<br>
- SEC_L3_L4,<br>
- SEC_MAX,<br>
-};<br>
-<br>
int tap_dev_flow_ops_get(struct rte_eth_dev *dev,<br>
const struct rte_flow_ops **ops);<br>
int tap_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error);<br>
@@ -57,10 +56,6 @@ int tap_flow_implicit_destroy(struct pmd_internals *pmd,<br>
int tap_flow_implicit_flush(struct pmd_internals *pmd,<br>
struct rte_flow_error *error);<br>
<br>
-int tap_flow_bpf_cls_q(__u32 queue_idx);<br>
-int tap_flow_bpf_calc_l3_l4_hash(__u32 key_idx, int map_fd);<br>
-int tap_flow_bpf_rss_map_create(unsigned int key_size, unsigned int value_size,<br>
- unsigned int max_entries);<br>
-int tap_flow_bpf_update_rss_elem(int fd, void *key, void *value);<br>
+void tap_flow_bpf_destroy(struct pmd_internals *pmd);<br>
<br>
#endif /* _TAP_FLOW_H_ */<br>
diff --git a/drivers/net/tap/tap_rss.h b/drivers/net/tap/tap_rss.h<br>
index 6009be7031..65bd8991b1 100644<br>
--- a/drivers/net/tap/tap_rss.h<br>
+++ b/drivers/net/tap/tap_rss.h<br>
@@ -5,16 +5,14 @@<br>
#ifndef _TAP_RSS_H_<br>
#define _TAP_RSS_H_<br>
<br>
-#ifndef TAP_MAX_QUEUES<br>
-#define TAP_MAX_QUEUES 16<br>
+/* Size of the map from BPF classid to queue table */<br>
+#ifndef TAP_RSS_MAX<br>
+#define TAP_RSS_MAX 32<br>
#endif<br>
<br>
-/* Fixed RSS hash key size in bytes. */<br>
+/* Standard Toeplitz hash key size */<br>
#define TAP_RSS_HASH_KEY_SIZE 40<br>
<br>
-/* Supported RSS */<br>
-#define TAP_RSS_HF_MASK (~(RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP))<br>
-<br>
/* hashed fields for RSS */<br>
enum hash_field {<br>
HASH_FIELD_IPV4_L3, /* IPv4 src/dst addr */<br>
diff --git a/drivers/net/tap/tap_tcmsgs.h b/drivers/net/tap/tap_tcmsgs.h<br>
index a64cb29d6f..9411626661 100644<br>
--- a/drivers/net/tap/tap_tcmsgs.h<br>
+++ b/drivers/net/tap/tap_tcmsgs.h<br>
@@ -6,7 +6,6 @@<br>
#ifndef _TAP_TCMSGS_H_<br>
#define _TAP_TCMSGS_H_<br>
<br>
-#include <tap_autoconf.h><br>
#include <linux/if_ether.h><br>
#include <linux/rtnetlink.h><br>
#include <linux/pkt_sched.h><br>
@@ -14,9 +13,10 @@<br>
#include <linux/tc_act/tc_mirred.h><br>
#include <linux/tc_act/tc_gact.h><br>
#include <linux/tc_act/tc_skbedit.h><br>
-#ifdef HAVE_TC_ACT_BPF<br>
+#ifdef HAVE_BPF_RSS<br>
#include <linux/tc_act/tc_bpf.h><br>
#endif<br>
+<br>
#include <inttypes.h><br>
<br>
#include <rte_ether.h><br>
-- <br>
2.43.0<br>
<br>
</blockquote></div>