[dpdk-dev] [PATCH v9 04/10] baseband/acc100: add queue configuration

Chautru, Nicolas nicolas.chautru at intel.com
Wed Sep 30 03:03:29 CEST 2020


Hi Tom, 

> From: Tom Rix <trix at redhat.com>
> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> > Adding function to create and configure queues for the device. Still
> > no capability.
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru at intel.com>
> > Reviewed-by: Rosen Xu <rosen.xu at intel.com>
> > Acked-by: Liu Tianjiao <Tianjiao.liu at intel.com>
> > ---
> >  drivers/baseband/acc100/rte_acc100_pmd.c | 420
> > ++++++++++++++++++++++++++++++-
> > drivers/baseband/acc100/rte_acc100_pmd.h |  45 ++++
> >  2 files changed, 464 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 7807a30..7a21c57 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -26,6 +26,22 @@
> >  RTE_LOG_REGISTER(acc100_logtype, pmd.bb.acc100, NOTICE);  #endif
> >
> > +/* Write to MMIO register address */
> > +static inline void
> > +mmio_write(void *addr, uint32_t value) {
> > +	*((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value); }
> > +
> > +/* Write a register of a ACC100 device */ static inline void
> > +acc100_reg_write(struct acc100_device *d, uint32_t offset, uint32_t
> > +payload) {
> > +	void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
> > +	mmio_write(reg_addr, payload);
> > +	usleep(1000);
> rte_acc100_pmd.h defines LONG_WAIT , could this #define be used instead
> ?

ok

> > +}
> > +
> >  /* Read a register of a ACC100 device */  static inline uint32_t
> > acc100_reg_read(struct acc100_device *d, uint32_t offset) @@ -36,6
> > +52,22 @@
> >  	return rte_le_to_cpu_32(ret);
> >  }
> >
> > +/* Basic Implementation of Log2 for exact 2^N */ static inline
> > +uint32_t log2_basic(uint32_t value)
> mirrors the function rte_bsf32

rte_bsf32 is also undefined for zero input.
I could just replace __builtin_ctz() by rte_bsf32() indeed.

> > +{
> > +	return (value == 0) ? 0 : __builtin_ctz(value); }
> > +
> > +/* Calculate memory alignment offset assuming alignment is 2^N */
> > +static inline uint32_t calc_mem_alignment_offset(void
> > +*unaligned_virt_mem, uint32_t alignment) {
> > +	rte_iova_t unaligned_phy_mem =
> rte_malloc_virt2iova(unaligned_virt_mem);
> > +	return (uint32_t)(alignment -
> > +			(unaligned_phy_mem & (alignment-1))); }
> > +
> >  /* Calculate the offset of the enqueue register */  static inline
> > uint32_t  queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id,
> > uint16_t aq_id) @@ -204,10 +236,393 @@
> >  			acc100_conf->q_dl_5g.aq_depth_log2);
> >  }
> >
> > +static void
> > +free_base_addresses(void **base_addrs, int size) {
> > +	int i;
> > +	for (i = 0; i < size; i++)
> > +		rte_free(base_addrs[i]);
> > +}
> > +
> > +static inline uint32_t
> > +get_desc_len(void)
> > +{
> > +	return sizeof(union acc100_dma_desc); }
> > +
> > +/* Allocate the 2 * 64MB block for the sw rings */ static int
> > +alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct
> acc100_device *d,
> > +		int socket)
> see earlier comment about name of function.

replied in other patch set

> > +{
> > +	uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
> > +	d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
> > +			2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
> > +	if (d->sw_rings_base == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate memory for %s:%u",
> > +				dev->device->driver->name,
> > +				dev->data->dev_id);
> > +		return -ENOMEM;
> > +	}
> > +	memset(d->sw_rings_base, 0, ACC100_SIZE_64MBYTE);
> > +	uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
> > +			d->sw_rings_base, ACC100_SIZE_64MBYTE);
> > +	d->sw_rings = RTE_PTR_ADD(d->sw_rings_base,
> next_64mb_align_offset);
> > +	d->sw_rings_phys = rte_malloc_virt2iova(d->sw_rings_base) +
> > +			next_64mb_align_offset;
> > +	d->sw_ring_size = MAX_QUEUE_DEPTH * get_desc_len();
> > +	d->sw_ring_max_depth = d->sw_ring_size / get_desc_len();
> > +
> > +	return 0;
> > +}
> > +
> > +/* Attempt to allocate minimised memory space for sw rings */ static
> > +void alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct
> > +acc100_device *d,
> > +		uint16_t num_queues, int socket)
> > +{
> > +	rte_iova_t sw_rings_base_phy, next_64mb_align_addr_phy;
> > +	uint32_t next_64mb_align_offset;
> > +	rte_iova_t sw_ring_phys_end_addr;
> > +	void *base_addrs[SW_RING_MEM_ALLOC_ATTEMPTS];
> > +	void *sw_rings_base;
> > +	int i = 0;
> > +	uint32_t q_sw_ring_size = MAX_QUEUE_DEPTH * get_desc_len();
> > +	uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
> > +
> > +	/* Find an aligned block of memory to store sw rings */
> > +	while (i < SW_RING_MEM_ALLOC_ATTEMPTS) {
> > +		/*
> > +		 * sw_ring allocated memory is guaranteed to be aligned to
> > +		 * q_sw_ring_size at the condition that the requested size is
> > +		 * less than the page size
> > +		 */
> > +		sw_rings_base = rte_zmalloc_socket(
> > +				dev->device->driver->name,
> > +				dev_sw_ring_size, q_sw_ring_size, socket);
> > +
> > +		if (sw_rings_base == NULL) {
> > +			rte_bbdev_log(ERR,
> > +					"Failed to allocate memory for
> %s:%u",
> > +					dev->device->driver->name,
> > +					dev->data->dev_id);
> > +			break;
> > +		}
> > +
> > +		sw_rings_base_phy = rte_malloc_virt2iova(sw_rings_base);
> > +		next_64mb_align_offset = calc_mem_alignment_offset(
> > +				sw_rings_base, ACC100_SIZE_64MBYTE);
> > +		next_64mb_align_addr_phy = sw_rings_base_phy +
> > +				next_64mb_align_offset;
> > +		sw_ring_phys_end_addr = sw_rings_base_phy +
> dev_sw_ring_size;
> > +
> > +		/* Check if the end of the sw ring memory block is before the
> > +		 * start of next 64MB aligned mem address
> > +		 */
> > +		if (sw_ring_phys_end_addr < next_64mb_align_addr_phy) {
> > +			d->sw_rings_phys = sw_rings_base_phy;
> > +			d->sw_rings = sw_rings_base;
> > +			d->sw_rings_base = sw_rings_base;
> > +			d->sw_ring_size = q_sw_ring_size;
> > +			d->sw_ring_max_depth = MAX_QUEUE_DEPTH;
> > +			break;
> > +		}
> > +		/* Store the address of the unaligned mem block */
> > +		base_addrs[i] = sw_rings_base;
> > +		i++;
> > +	}
> > +
> 
> This looks like a bug.
> 
> Freeing memory that was just allocated.
> 
> Looks like it could be part of an error handler for memory access in the loop
> failing.

You are not the first person to raise concerns in that serie for that piece of code.
I agree this is a bit convoluted but functionally correct. 

> 
> There should be a better way to allocate aligned memory like round up the
> size and use an offset to the alignment you need.

This is actually the fall back option below in case that first iterative option fails (but more wasteful in memory).
If really that looks too dodgy we could skip that first attempt method and go directly to the 2nd option which is more wasteful, 
but really that is doing what it is supposed to do hence ok to me as it is. 
Let me know what you think. 

> 
> > +	/* Free all unaligned blocks of mem allocated in the loop */
> > +	free_base_addresses(base_addrs, i);
> > +}
> > +
> > +
> > +/* Allocate 64MB memory used for all software rings */ static int
> > +acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> > +socket_id) {
> > +	uint32_t phys_low, phys_high, payload;
> > +	struct acc100_device *d = dev->data->dev_private;
> > +	const struct acc100_registry_addr *reg_addr;
> > +
> > +	if (d->pf_device && !d->acc100_conf.pf_mode_en) {
> > +		rte_bbdev_log(NOTICE,
> > +				"%s has PF mode disabled. This PF can't be
> used.",
> > +				dev->data->name);
> > +		return -ENODEV;
> > +	}
> > +
> > +	alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> > +
> > +	/* If minimal memory space approach failed, then allocate
> > +	 * the 2 * 64MB block for the sw rings
> > +	 */
> > +	if (d->sw_rings == NULL)
> > +		alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> This can fail as well, but is unhandled.

ok can add. 

> > +
> > +	/* Configure ACC100 with the base address for DMA descriptor rings
> > +	 * Same descriptor rings used for UL and DL DMA Engines
> > +	 * Note : Assuming only VF0 bundle is used for PF mode
> > +	 */
> > +	phys_high = (uint32_t)(d->sw_rings_phys >> 32);
> > +	phys_low  = (uint32_t)(d->sw_rings_phys &
> ~(ACC100_SIZE_64MBYTE-1));
> > +
> > +	/* Choose correct registry addresses for the device type */
> > +	if (d->pf_device)
> > +		reg_addr = &pf_reg_addr;
> > +	else
> > +		reg_addr = &vf_reg_addr;
> could reg_addr be part of acc100_device struct ?

I don't see this as useful really as part of the device data in my opinion.

> > +
> > +	/* Read the populated cfg from ACC100 registers */
> > +	fetch_acc100_config(dev);
> > +
> > +	/* Mark as configured properly */
> > +	d->configured = true;
> should set configured at the end, as the function can still fail.

ok

> > +
> > +	/* Release AXI from PF */
> > +	if (d->pf_device)
> > +		acc100_reg_write(d, HWPfDmaAxiControl, 1);
> > +
> > +	acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
> > +
> > +	/*
> > +	 * Configure Ring Size to the max queue ring size
> > +	 * (used for wrapping purpose)
> > +	 */
> > +	payload = log2_basic(d->sw_ring_size / 64);
> > +	acc100_reg_write(d, reg_addr->ring_size, payload);
> > +
> > +	/* Configure tail pointer for use when SDONE enabled */
> > +	d->tail_ptrs = rte_zmalloc_socket(
> > +			dev->device->driver->name,
> > +			ACC100_NUM_QGRPS * ACC100_NUM_AQS *
> sizeof(uint32_t),
> > +			RTE_CACHE_LINE_SIZE, socket_id);
> > +	if (d->tail_ptrs == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
> > +				dev->device->driver->name,
> > +				dev->data->dev_id);
> > +		rte_free(d->sw_rings);
> > +		return -ENOMEM;
> > +	}
> > +	d->tail_ptr_phys = rte_malloc_virt2iova(d->tail_ptrs);
> > +
> > +	phys_high = (uint32_t)(d->tail_ptr_phys >> 32);
> > +	phys_low  = (uint32_t)(d->tail_ptr_phys);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
> > +	acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
> > +
> > +	d->harq_layout = rte_zmalloc_socket("HARQ Layout",
> > +			ACC100_HARQ_LAYOUT * sizeof(*d->harq_layout),
> > +			RTE_CACHE_LINE_SIZE, dev->data->socket_id);
> unchecked

ok will add. 

> > +
> > +	rte_bbdev_log_debug(
> > +			"ACC100 (%s) configured  sw_rings = %p,
> sw_rings_phys = %#"
> > +			PRIx64, dev->data->name, d->sw_rings, d-
> >sw_rings_phys);
> > +
> > +	return 0;
> > +}
> > +
> >  /* Free 64MB memory used for software rings */  static int
> > -acc100_dev_close(struct rte_bbdev *dev  __rte_unused)
> > +acc100_dev_close(struct rte_bbdev *dev)
> >  {
> > +	struct acc100_device *d = dev->data->dev_private;
> > +	if (d->sw_rings_base != NULL) {
> > +		rte_free(d->tail_ptrs);
> > +		rte_free(d->sw_rings_base);
> > +		d->sw_rings_base = NULL;
> > +	}
> > +	usleep(1000);
> similar LONG_WAIT

ok

> > +	return 0;
> > +}
> > +
> > +
> > +/**
> > + * Report a ACC100 queue index which is free
> > + * Return 0 to 16k for a valid queue_idx or -1 when no queue is
> > +available
> > + * Note : Only supporting VF0 Bundle for PF mode  */ static int
> > +acc100_find_free_queue_idx(struct rte_bbdev *dev,
> > +		const struct rte_bbdev_queue_conf *conf) {
> > +	struct acc100_device *d = dev->data->dev_private;
> > +	int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
> > +	int acc = op_2_acc[conf->op_type];
> > +	struct rte_q_topology_t *qtop = NULL;
> > +	qtopFromAcc(&qtop, acc, &(d->acc100_conf));
> > +	if (qtop == NULL)
> > +		return -1;
> > +	/* Identify matching QGroup Index which are sorted in priority order
> */
> > +	uint16_t group_idx = qtop->first_qgroup_index;
> > +	group_idx += conf->priority;
> > +	if (group_idx >= ACC100_NUM_QGRPS ||
> > +			conf->priority >= qtop->num_qgroups) {
> > +		rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
> > +				dev->data->name, conf->priority);
> > +		return -1;
> > +	}
> > +	/* Find a free AQ_idx  */
> > +	uint16_t aq_idx;
> > +	for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
> > +		if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) ==
> 0) {
> > +			/* Mark the Queue as assigned */
> > +			d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
> > +			/* Report the AQ Index */
> > +			return (group_idx << GRP_ID_SHIFT) + aq_idx;
> > +		}
> > +	}
> > +	rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
> > +			dev->data->name, conf->priority);
> > +	return -1;
> > +}
> > +
> > +/* Setup ACC100 queue */
> > +static int
> > +acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> > +		const struct rte_bbdev_queue_conf *conf) {
> > +	struct acc100_device *d = dev->data->dev_private;
> > +	struct acc100_queue *q;
> > +	int16_t q_idx;
> > +
> > +	/* Allocate the queue data structure. */
> > +	q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
> > +			RTE_CACHE_LINE_SIZE, conf->socket);
> > +	if (q == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate queue memory");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	q->d = d;
> > +	q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size *
> queue_id));
> > +	q->ring_addr_phys = d->sw_rings_phys + (d->sw_ring_size *
> queue_id);
> > +
> > +	/* Prepare the Ring with default descriptor format */
> > +	union acc100_dma_desc *desc = NULL;
> > +	unsigned int desc_idx, b_idx;
> > +	int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
> > +		ACC100_FCW_LE_BLEN : (conf->op_type ==
> RTE_BBDEV_OP_TURBO_DEC ?
> > +		ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
> > +
> > +	for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
> > +		desc = q->ring_addr + desc_idx;
> > +		desc->req.word0 = ACC100_DMA_DESC_TYPE;
> > +		desc->req.word1 = 0; /**< Timestamp */
> > +		desc->req.word2 = 0;
> > +		desc->req.word3 = 0;
> > +		uint64_t fcw_offset = (desc_idx << 8) +
> ACC100_DESC_FCW_OFFSET;
> > +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = fcw_len;
> > +		desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
> > +		desc->req.data_ptrs[0].last = 0;
> > +		desc->req.data_ptrs[0].dma_ext = 0;
> > +		for (b_idx = 1; b_idx < ACC100_DMA_MAX_NUM_POINTERS -
> 1;
> > +				b_idx++) {
> > +			desc->req.data_ptrs[b_idx].blkid =
> ACC100_DMA_BLKID_IN;
> > +			desc->req.data_ptrs[b_idx].last = 1;
> > +			desc->req.data_ptrs[b_idx].dma_ext = 0;
> > +			b_idx++;
> 
> This works, but it would be better to only inc the index in the for loop
> statement.
> 
> The second data set should accessed as [b_idx+1]
> 
> And the loop inc by +2

Matter of preference maybe? 

> 
> > +			desc->req.data_ptrs[b_idx].blkid =
> > +					ACC100_DMA_BLKID_OUT_ENC;
> > +			desc->req.data_ptrs[b_idx].last = 1;
> > +			desc->req.data_ptrs[b_idx].dma_ext = 0;
> > +		}
> > +		/* Preset some fields of LDPC FCW */
> > +		desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
> > +		desc->req.fcw_ld.gain_i = 1;
> > +		desc->req.fcw_ld.gain_h = 1;
> > +	}
> > +
> > +	q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
> > +			RTE_CACHE_LINE_SIZE,
> > +			RTE_CACHE_LINE_SIZE, conf->socket);
> > +	if (q->lb_in == NULL) {
> 
> q is not freed.

ok thanks

> 
> > +		rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
> > +		return -ENOMEM;
> > +	}
> > +	q->lb_in_addr_phys = rte_malloc_virt2iova(q->lb_in);
> > +	q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
> > +			RTE_CACHE_LINE_SIZE,
> > +			RTE_CACHE_LINE_SIZE, conf->socket);
> > +	if (q->lb_out == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
> > +		return -ENOMEM;
> 
> q->lb_in is not freed
> 
> q is not freed

ok too thanks

> 
> > +	}
> > +	q->lb_out_addr_phys = rte_malloc_virt2iova(q->lb_out);
> > +
> > +	/*
> > +	 * Software queue ring wraps synchronously with the HW when it
> reaches
> > +	 * the boundary of the maximum allocated queue size, no matter
> what the
> > +	 * sw queue size is. This wrapping is guarded by setting the
> wrap_mask
> > +	 * to represent the maximum queue size as allocated at the time
> when
> > +	 * the device has been setup (in configure()).
> > +	 *
> > +	 * The queue depth is set to the queue size value (conf->queue_size).
> > +	 * This limits the occupancy of the queue at any point of time, so
> that
> > +	 * the queue does not get swamped with enqueue requests.
> > +	 */
> > +	q->sw_ring_depth = conf->queue_size;
> > +	q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
> > +
> > +	q->op_type = conf->op_type;
> > +
> > +	q_idx = acc100_find_free_queue_idx(dev, conf);
> > +	if (q_idx == -1) {
> > +		rte_free(q);
> 
> This will leak the other two ptr's
> This function needs better error handling.

Yes agreed. Thanks.

> 
> Tom
> 

Thanks for your review Tom, aiming to push updated serie tomorrow.

Nic



> > +		return -1;
> > +	}
> > +
> > +	q->qgrp_id = (q_idx >> GRP_ID_SHIFT) & 0xF;
> > +	q->vf_id = (q_idx >> VF_ID_SHIFT)  & 0x3F;
> > +	q->aq_id = q_idx & 0xF;
> > +	q->aq_depth = (conf->op_type ==  RTE_BBDEV_OP_TURBO_DEC) ?
> > +			(1 << d->acc100_conf.q_ul_4g.aq_depth_log2) :
> > +			(1 << d->acc100_conf.q_dl_4g.aq_depth_log2);
> > +
> > +	q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
> > +			queue_offset(d->pf_device,
> > +					q->vf_id, q->qgrp_id, q->aq_id));
> > +
> > +	rte_bbdev_log_debug(
> > +			"Setup dev%u q%u: qgrp_id=%u, vf_id=%u,
> aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
> > +			dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
> > +			q->aq_id, q->aq_depth, q->mmio_reg_enqueue);
> > +
> > +	dev->data->queues[queue_id].queue_private = q;
> > +	return 0;
> > +}
> > +
> > +/* Release ACC100 queue */
> > +static int
> > +acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id) {
> > +	struct acc100_device *d = dev->data->dev_private;
> > +	struct acc100_queue *q = dev->data->queues[q_id].queue_private;
> > +
> > +	if (q != NULL) {
> > +		/* Mark the Queue as un-assigned */
> > +		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
> > +				(1 << q->aq_id));
> > +		rte_free(q->lb_in);
> > +		rte_free(q->lb_out);
> > +		rte_free(q);
> > +		dev->data->queues[q_id].queue_private = NULL;
> > +	}
> > +
> >  	return 0;
> >  }
> >
> > @@ -258,8 +673,11 @@
> >  }
> >
> >  static const struct rte_bbdev_ops acc100_bbdev_ops = {
> > +	.setup_queues = acc100_setup_queues,
> >  	.close = acc100_dev_close,
> >  	.info_get = acc100_dev_info_get,
> > +	.queue_setup = acc100_queue_setup,
> > +	.queue_release = acc100_queue_release,
> >  };
> >
> >  /* ACC100 PCI PF address map */
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.h
> > b/drivers/baseband/acc100/rte_acc100_pmd.h
> > index 662e2c8..0e2b79c 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.h
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.h
> > @@ -518,11 +518,56 @@ struct acc100_registry_addr {
> >  	.ddr_range = HWVfDmaDdrBaseRangeRoVf,  };
> >
> > +/* Structure associated with each queue. */ struct
> > +__rte_cache_aligned acc100_queue {
> > +	union acc100_dma_desc *ring_addr;  /* Virtual address of sw ring */
> > +	rte_iova_t ring_addr_phys;  /* Physical address of software ring */
> > +	uint32_t sw_ring_head;  /* software ring head */
> > +	uint32_t sw_ring_tail;  /* software ring tail */
> > +	/* software ring size (descriptors, not bytes) */
> > +	uint32_t sw_ring_depth;
> > +	/* mask used to wrap enqueued descriptors on the sw ring */
> > +	uint32_t sw_ring_wrap_mask;
> > +	/* MMIO register used to enqueue descriptors */
> > +	void *mmio_reg_enqueue;
> > +	uint8_t vf_id;  /* VF ID (max = 63) */
> > +	uint8_t qgrp_id;  /* Queue Group ID */
> > +	uint16_t aq_id;  /* Atomic Queue ID */
> > +	uint16_t aq_depth;  /* Depth of atomic queue */
> > +	uint32_t aq_enqueued;  /* Count how many "batches" have been
> enqueued */
> > +	uint32_t aq_dequeued;  /* Count how many "batches" have been
> dequeued */
> > +	uint32_t irq_enable;  /* Enable ops dequeue interrupts if set to 1 */
> > +	struct rte_mempool *fcw_mempool;  /* FCW mempool */
> > +	enum rte_bbdev_op_type op_type;  /* Type of this Queue: TE or TD
> */
> > +	/* Internal Buffers for loopback input */
> > +	uint8_t *lb_in;
> > +	uint8_t *lb_out;
> > +	rte_iova_t lb_in_addr_phys;
> > +	rte_iova_t lb_out_addr_phys;
> > +	struct acc100_device *d;
> > +};
> > +
> >  /* Private data structure for each ACC100 device */  struct
> > acc100_device {
> >  	void *mmio_base;  /**< Base address of MMIO registers (BAR0) */
> > +	void *sw_rings_base;  /* Base addr of un-aligned memory for sw
> rings */
> > +	void *sw_rings;  /* 64MBs of 64MB aligned memory for sw rings */
> > +	rte_iova_t sw_rings_phys;  /* Physical address of sw_rings */
> > +	/* Virtual address of the info memory routed to the this function
> under
> > +	 * operation, whether it is PF or VF.
> > +	 */
> > +	union acc100_harq_layout_data *harq_layout;
> > +	uint32_t sw_ring_size;
> >  	uint32_t ddr_size; /* Size in kB */
> > +	uint32_t *tail_ptrs; /* Base address of response tail pointer buffer */
> > +	rte_iova_t tail_ptr_phys; /* Physical address of tail pointers */
> > +	/* Max number of entries available for each queue in device,
> depending
> > +	 * on how many queues are enabled with configure()
> > +	 */
> > +	uint32_t sw_ring_max_depth;
> >  	struct acc100_conf acc100_conf; /* ACC100 Initial configuration */
> > +	/* Bitmap capturing which Queues have already been assigned */
> > +	uint16_t q_assigned_bit_map[ACC100_NUM_QGRPS];
> >  	bool pf_device; /**< True if this is a PF ACC100 device */
> >  	bool configured; /**< True if this ACC100 device is configured */
> > };



More information about the dev mailing list