[dpdk-dev] [PATCH v2 1/6] net/af_xdp: introduce AF XDP PMD driver

Stephen Hemminger stephen at networkplumber.org
Tue Mar 19 17:14:17 CET 2019


Lots of little review comments. This is what I saw in 30 minutes.
Expect more.


On Tue, 19 Mar 2019 15:12:51 +0800
Xiaolong Ye <xiaolong.ye at intel.com> wrote:

> +	nb_pkts = nb_pkts < ETH_AF_XDP_RX_BATCH_SIZE ?
> +		nb_pkts : ETH_AF_XDP_RX_BATCH_SIZE;

Maybe use RTE_MIN() ?

> +		mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
> +		if (mbuf) {
> +			memcpy(rte_pktmbuf_mtod(mbuf, void*), pkt, len);

Space necessary in "void*"
Use rte_memcpy.

> +static void pull_umem_cq(struct xsk_umem_info *umem, int size)
> +{
> +	struct xsk_ring_cons *cq = &umem->cq;
> +	int i, n;
> +	uint32_t idx_cq;
> +	uint64_t addr;
> +
> +	n = xsk_ring_cons__peek(cq, size, &idx_cq);
> +	if (n > 0) {
> +		for (i = 0; i < n; i++) {

You don't need the if (n > 0) since if n <= 0 the for loop
would happen 0 times.

> +
> +static void kick_tx(struct pkt_tx_queue *txq)
> +{
> +	struct xsk_umem_info *umem = txq->pair->umem;
> +	int ret;
> +
> +	while (1) {
> +		ret = sendto(xsk_socket__fd(txq->pair->xsk), NULL, 0,
> +			     MSG_DONTWAIT, NULL, 0);
> +
> +		/* everything is ok */
> +		if (ret >= 0)
> +			break;

I would prefer:
	while ((send(xsk_socket__fd(fd, NULL, 0, MSG_DONTWAIT) < 0) {

Because:
	- use while() to make looping clearer rather than while(1)
	- use send() rather than sendto() because you aren't sending with addr
	- you don't care about return value (ie.ret)

> +
> +		/* some thing unexpected */
> +		if (errno != EBUSY && errno != EAGAIN)
> +			break;

What about EINTR



> +static void
> +eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +
> +	dev_info->if_index = internals->if_index;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;

Cast here is unnecessary.

> +	dev_info->max_rx_queues = 1;
> +	dev_info->max_tx_queues = 1;
> +	dev_info->min_rx_bufsize = 0;

dev_info is already zero, don't need to fill other values.

> +
> +static void
> +eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +
> +	dev_info->if_index = internals->if_index;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
> +	dev_info->max_rx_queues = 1;
> +	dev_info->max_tx_queues = 1;
> +	dev_info->min_rx_bufsize = 0;
> +
> +	dev_info->default_rxportconf.nb_queues = 1;
> +	dev_info->default_txportconf.nb_queues = 1;
> +	dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
> +	dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
> +}
> +
> +static int
> +eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	struct xdp_statistics xdp_stats;
> +	struct pkt_rx_queue *rxq;
> +	socklen_t optlen;
> +	int i;
> +
> +	optlen = sizeof(struct xdp_statistics);

In theory each call to getsockopt() could change or reduce the value of
optlen. Best to initialize in loop before each call.

> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		rxq = &internals->rx_queues[i];
> +		stats->q_ipackets[i] = internals->rx_queues[i].rx_pkts;
> +		stats->q_ibytes[i] = internals->rx_queues[i].rx_bytes;
> +
> +		stats->q_opackets[i] = internals->tx_queues[i].tx_pkts;
> +		stats->q_obytes[i] = internals->tx_queues[i].tx_bytes;
> +
> +		stats->ipackets += stats->q_ipackets[i];
> +		stats->ibytes += stats->q_ibytes[i];
> +		stats->imissed += internals->rx_queues[i].rx_dropped;
> +		getsockopt(xsk_socket__fd(rxq->xsk), SOL_XDP, XDP_STATISTICS,
> +				&xdp_stats, &optlen);

You need to check return value of getsockopt() otherwise coverity will complain.

> +static void
> +eth_stats_reset(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	int i;
> +
> +	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
> +		internals->rx_queues[i].rx_pkts = 0;
> +		internals->rx_queues[i].rx_bytes = 0;
> +		internals->rx_queues[i].rx_dropped = 0;
> +
> +		internals->tx_queues[i].tx_pkts = 0;
> +		internals->tx_queues[i].err_pkts = 0;
> +		internals->tx_queues[i].tx_bytes = 0;

Put all the statistics together and use memset?

> +static struct xsk_umem_info *xdp_umem_configure(void)
> +{
> +	struct xsk_umem_info *umem;
> +	struct xsk_umem_config usr_config = {
> +		.fill_size = ETH_AF_XDP_DFLT_NUM_DESCS,
> +		.comp_size = ETH_AF_XDP_DFLT_NUM_DESCS,
> +		.frame_size = ETH_AF_XDP_FRAME_SIZE,
> +		.frame_headroom = ETH_AF_XDP_DATA_HEADROOM };
> +	void *bufs = NULL;
> +	char ring_name[0x100];

0x100 is unconvential here. Instead use RTE_RING_NAMESIZE
but variable is unnecessary, see below

> +	int ret;
> +	uint64_t i;
> +
> +	umem = calloc(1, sizeof(*umem));

Why not use rte_zmalloc_node to:
   1. work with primary/secondary
   2. guarantee memory on correct numa node?

> +	if (!umem) {
> +		RTE_LOG(ERR, AF_XDP, "Failed to allocate umem info");
> +		return NULL;
> +	}
> +
> +	snprintf(ring_name, 0x100, "af_xdp_ring");

If ring is always the same, why copy it. Just use string literal

> +/** parse name argument */
> +static int
> +parse_name_arg(const char *key __rte_unused,
> +	       const char *value, void *extra_args)
> +{
> +	char *name = extra_args;
> +
> +	if (strlen(value) > IFNAMSIZ) {

Why not:
	if (strnlen(value, IFNAMSIZ) >= IFNAMSIZ) {

> +
> +static int
> +init_internals(struct rte_vdev_device *dev,
> +	       const char *if_name,
> +	       int queue_idx,
> +	       struct rte_eth_dev **eth_dev)

If you changed the function to return the new eth_dev (and return NULL on error)
then you wouldn't have to pass eth_dev by reference.

static struct rte_eth_dev *
allocate_ethdev(struct rte_vdev_device *dev, const char *if_name, uint16_t queue_idx)
{ 

> +{
> +	const char *name = rte_vdev_device_name(dev);
> +	const unsigned int numa_node = dev->device.numa_node;
> +	struct pmd_internals *internals = NULL;

Useless initialization, first thing you do is allocate this.


> +	int ret;
> +	int i;
> +
> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);



More information about the dev mailing list