[PATCH v3 2/3] node/kernel_rx: support receiving packets from kernel
Nithin Dabilpuram
nithind1988 at gmail.com
Mon Jun 5 14:50:41 CEST 2023
Acked-by: Nithin Dabilpuram <ndabilpuram at marvell.com>
On Fri, Jun 2, 2023 at 9:52 PM Vamsi Attunuru <vattunuru at marvell.com> wrote:
>
> Adds a node to receive packets from kernel over
> a raw socket.
>
> Signed-off-by: Vamsi Attunuru <vattunuru at marvell.com>
> ---
> doc/guides/prog_guide/graph_lib.rst | 8 +
> lib/node/kernel_rx.c | 276 ++++++++++++++++++++++++++++
> lib/node/kernel_rx_priv.h | 48 +++++
> lib/node/meson.build | 1 +
> 4 files changed, 333 insertions(+)
>
> diff --git a/doc/guides/prog_guide/graph_lib.rst b/doc/guides/prog_guide/graph_lib.rst
> index fa22b014f3..4b05bcee3c 100644
> --- a/doc/guides/prog_guide/graph_lib.rst
> +++ b/doc/guides/prog_guide/graph_lib.rst
> @@ -401,3 +401,11 @@ socket interface to transmit the packets, it uses the packet's destination
> IP address in sockaddr_in address structure and ``sendto`` function to send
> data on the raw socket. Aftering sending the burst of packets to kernel,
> this node free up the packet buffers.
> +
> +kernel_rx
> +~~~~~~~~~
> +This node is a source node which receives packets from kernel and forwards to
> +any of the intermediate nodes. It uses the raw socket interface to receive
> +packets from kernel. Uses ``poll`` function to poll on the socket fd for
> +``POLLIN`` events to read the packets from raw socket to stream buffer and does
> +``rte_node_next_stream_move()`` when there are received packets.
> diff --git a/lib/node/kernel_rx.c b/lib/node/kernel_rx.c
> new file mode 100644
> index 0000000000..2dba7c8cc7
> --- /dev/null
> +++ b/lib/node/kernel_rx.c
> @@ -0,0 +1,276 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2023 Marvell International Ltd.
> + */
> +
> +#include <fcntl.h>
> +#include <poll.h>
> +#include <stdlib.h>
> +#include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <unistd.h>
> +
> +#include <rte_debug.h>
> +#include <rte_ethdev.h>
> +#include <rte_graph.h>
> +#include <rte_graph_worker.h>
> +#include <rte_ip.h>
> +#include <rte_malloc.h>
> +#include <rte_mbuf.h>
> +#include <rte_mempool.h>
> +#include <rte_net.h>
> +
> +#include "ethdev_rx_priv.h"
> +#include "kernel_rx_priv.h"
> +#include "node_private.h"
> +
> +static inline struct rte_mbuf *
> +alloc_rx_mbuf(kernel_rx_node_ctx_t *ctx)
> +{
> + kernel_rx_info_t *rx = ctx->recv_info;
> +
> + if (rx->idx >= rx->cnt) {
> + uint16_t cnt;
> +
> + rx->idx = 0;
> + rx->cnt = 0;
> +
> + cnt = rte_pktmbuf_alloc_bulk(ctx->pktmbuf_pool, rx->rx_bufs, KERN_RX_CACHE_COUNT);
> + if (cnt <= 0)
> + return NULL;
> +
> + rx->cnt = cnt;
> + }
> +
> + return rx->rx_bufs[rx->idx++];
> +}
> +
> +static inline void
> +mbuf_update(struct rte_mbuf **mbufs, uint16_t nb_pkts)
> +{
> + struct rte_net_hdr_lens hdr_lens;
> + struct rte_mbuf *m;
> + int i;
> +
> + for (i = 0; i < nb_pkts; i++) {
> + m = mbufs[i];
> +
> + m->packet_type = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
> +
> + m->ol_flags = 0;
> + m->tx_offload = 0;
> +
> + m->l2_len = hdr_lens.l2_len;
> + m->l3_len = hdr_lens.l3_len;
> + m->l4_len = hdr_lens.l4_len;
> + }
> +}
> +
> +static uint16_t
> +recv_pkt_parse(void **objs, uint16_t nb_pkts)
> +{
> + uint16_t pkts_left = nb_pkts;
> + struct rte_mbuf **pkts;
> + int i;
> +
> + pkts = (struct rte_mbuf **)objs;
> +
> + if (pkts_left >= 4) {
> + for (i = 0; i < 4; i++)
> + rte_prefetch0(rte_pktmbuf_mtod(pkts[i], void *));
> + }
> +
> + while (pkts_left >= 12) {
> + /* Prefetch next-next mbufs */
> + rte_prefetch0(pkts[8]);
> + rte_prefetch0(pkts[9]);
> + rte_prefetch0(pkts[10]);
> + rte_prefetch0(pkts[11]);
> +
> + /* Prefetch next mbuf data */
> + rte_prefetch0(rte_pktmbuf_mtod(pkts[4], void *));
> + rte_prefetch0(rte_pktmbuf_mtod(pkts[5], void *));
> + rte_prefetch0(rte_pktmbuf_mtod(pkts[6], void *));
> + rte_prefetch0(rte_pktmbuf_mtod(pkts[7], void *));
> +
> + /* Extract ptype of mbufs */
> + mbuf_update(pkts, 4);
> +
> + pkts += 4;
> + pkts_left -= 4;
> + }
> +
> + if (pkts_left > 0)
> + mbuf_update(pkts, pkts_left);
> +
> + return nb_pkts;
> +}
> +
> +static uint16_t
> +kernel_rx_node_do(struct rte_graph *graph, struct rte_node *node, kernel_rx_node_ctx_t *ctx)
> +{
> + kernel_rx_info_t *rx;
> + uint16_t next_index;
> + int fd;
> +
> + rx = ctx->recv_info;
> + next_index = rx->node_next;
> +
> + fd = rx->sock;
> + if (fd > 0) {
> + struct rte_mbuf **mbufs;
> + uint16_t len = 0, count = 0;
> + int nb_cnt, i;
> +
> + nb_cnt = (node->size >= RTE_GRAPH_BURST_SIZE) ? RTE_GRAPH_BURST_SIZE : node->size;
> +
> + mbufs = (struct rte_mbuf **)node->objs;
> + for (i = 0; i < nb_cnt; i++) {
> + struct rte_mbuf *m = alloc_rx_mbuf(ctx);
> +
> + if (!m)
> + break;
> +
> + len = read(fd, rte_pktmbuf_mtod(m, char *), rte_pktmbuf_tailroom(m));
> + if (len == 0 || len == 0xFFFF) {
> + rte_pktmbuf_free(m);
> + if (rx->idx <= 0)
> + node_dbg("kernel_rx", "rx_mbuf array is empty\n");
> + rx->idx--;
> + break;
> + }
> + *mbufs++ = m;
> +
> + m->port = node->id;
> + rte_pktmbuf_data_len(m) = len;
> +
> + count++;
> + }
> +
> + if (count) {
> + recv_pkt_parse(node->objs, count);
> + node->idx = count;
> +
> + /* Enqueue to next node */
> + rte_node_next_stream_move(graph, node, next_index);
> + }
> +
> + return count;
> + }
> +
> + return 0;
> +}
> +
> +static uint16_t
> +kernel_rx_node_process(struct rte_graph *graph, struct rte_node *node, void **objs,
> + uint16_t nb_objs)
> +{
> + kernel_rx_node_ctx_t *ctx = (kernel_rx_node_ctx_t *)node->ctx;
> + int fd;
> +
> + RTE_SET_USED(objs);
> + RTE_SET_USED(nb_objs);
> +
> + if (!ctx)
> + return 0;
> +
> + fd = ctx->recv_info->sock;
> + if (fd > 0) {
> + struct pollfd fds = {.fd = fd, .events = POLLIN};
> +
> + if (poll(&fds, 1, 0) > 0) {
> + if (fds.revents & POLLIN)
> + return kernel_rx_node_do(graph, node, ctx);
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int
> +kernel_rx_node_init(const struct rte_graph *graph, struct rte_node *node)
> +{
> + struct kernel_rx_node_main *rx_node_main = kernel_rx_node_data_get();
> + kernel_rx_node_ctx_t *ctx = (kernel_rx_node_ctx_t *)node->ctx;
> + kernel_rx_node_elem_t *elem = rx_node_main->head;
> + kernel_rx_info_t *recv_info;
> + int sock;
> +
> + while (elem) {
> + if (elem->nid == node->id) {
> + /* Update node specific context */
> + memcpy(ctx, &elem->ctx, sizeof(kernel_rx_node_ctx_t));
> + break;
> + }
> + elem = elem->next;
> + }
> +
> + RTE_VERIFY(elem != NULL);
> +
> + if (ctx->pktmbuf_pool == NULL) {
> + node_err("kernel_rx", "Invalid mbuf pool on graph %s\n", graph->name);
> + return -EINVAL;
> + }
> +
> + recv_info = rte_zmalloc_socket("kernel_rx_info", sizeof(kernel_rx_info_t),
> + RTE_CACHE_LINE_SIZE, graph->socket);
> + if (!recv_info) {
> + node_err("kernel_rx", "Kernel recv_info is NULL\n");
> + return -ENOMEM;
> + }
> +
> + sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
> + if (sock < 0) {
> + node_err("kernel_rx", "Unable to open RAW socket\n");
> + return sock;
> + }
> +
> + recv_info->sock = sock;
> + ctx->recv_info = recv_info;
> +
> + return 0;
> +}
> +
> +static void
> +kernel_rx_node_fini(const struct rte_graph *graph __rte_unused, struct rte_node *node)
> +{
> + kernel_rx_node_ctx_t *ctx = (kernel_rx_node_ctx_t *)node->ctx;
> +
> + if (ctx->recv_info) {
> + close(ctx->recv_info->sock);
> + ctx->recv_info->sock = -1;
> + rte_free(ctx->recv_info);
> + }
> +
> + ctx->recv_info = NULL;
> +}
> +
> +struct kernel_rx_node_main *
> +kernel_rx_node_data_get(void)
> +{
> + static struct kernel_rx_node_main kernel_rx_main;
> +
> + return &kernel_rx_main;
> +}
> +
> +static struct rte_node_register kernel_rx_node_base = {
> + .process = kernel_rx_node_process,
> + .flags = RTE_NODE_SOURCE_F,
> + .name = "kernel_rx",
> +
> + .init = kernel_rx_node_init,
> + .fini = kernel_rx_node_fini,
> +
> + .nb_edges = KERNEL_RX_NEXT_MAX,
> + .next_nodes = {
> + [KERNEL_RX_NEXT_PKT_CLS] = "pkt_cls",
> + [KERNEL_RX_NEXT_IP4_LOOKUP] = "ip4_lookup",
> + },
> +};
> +
> +struct rte_node_register *
> +kernel_rx_node_get(void)
> +{
> + return &kernel_rx_node_base;
> +}
> +
> +RTE_NODE_REGISTER(kernel_rx_node_base);
> diff --git a/lib/node/kernel_rx_priv.h b/lib/node/kernel_rx_priv.h
> new file mode 100644
> index 0000000000..f1aa2344d7
> --- /dev/null
> +++ b/lib/node/kernel_rx_priv.h
> @@ -0,0 +1,48 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2023 Marvell International Ltd.
> + */
> +
> +#ifndef __KERNEL_RX_PRIV_H__
> +#define __KERNEL_RX_PRIV_H__
> +
> +#define KERN_RX_CACHE_COUNT 64
> +
> +typedef struct kernel_rx_info {
> + struct rte_mbuf *rx_bufs[KERN_RX_CACHE_COUNT];
> + uint16_t node_next;
> + uint16_t idx;
> + uint16_t cnt;
> + int sock;
> +} kernel_rx_info_t;
> +
> +/* kernel_rx node context structure */
> +typedef struct kernel_rx_node_ctx {
> + struct rte_mempool *pktmbuf_pool;
> + kernel_rx_info_t *recv_info;
> +} kernel_rx_node_ctx_t;
> +
> +/* kernel_rx node list element structure */
> +typedef struct kernel_rx_node_elem {
> + struct kernel_rx_node_elem *next; /* Pointer to the next node element. */
> + struct kernel_rx_node_ctx ctx; /* kernel_rx node context. */
> + rte_node_t nid; /* Node identifier of the kernel_rx node. */
> +} kernel_rx_node_elem_t;
> +
> +enum kernel_rx_next_nodes {
> + KERNEL_RX_NEXT_PKT_CLS,
> + KERNEL_RX_NEXT_IP4_LOOKUP,
> + KERNEL_RX_NEXT_MAX,
> +};
> +
> +/* kernel_rx node main structure */
> +struct kernel_rx_node_main {
> + kernel_rx_node_elem_t *head; /* Pointer to the head node element. */
> +};
> +
> +/* Get the pointer of kernel_rx node data */
> +struct kernel_rx_node_main *kernel_rx_node_data_get(void);
> +
> +/* Get the pointer of kernel_rx node register structure */
> +struct rte_node_register *kernel_rx_node_get(void);
> +
> +#endif /* __KERNEL_RX_PRIV_H__ */
> diff --git a/lib/node/meson.build b/lib/node/meson.build
> index 0520be23ff..1f04fd1d7e 100644
> --- a/lib/node/meson.build
> +++ b/lib/node/meson.build
> @@ -13,6 +13,7 @@ sources = files(
> 'ethdev_tx.c',
> 'ip4_lookup.c',
> 'ip4_rewrite.c',
> + 'kernel_rx.c',
> 'kernel_tx.c',
> 'log.c',
> 'null.c',
> --
> 2.25.1
>
More information about the dev
mailing list