[dpdk-dev] [PATCH v9 07/10] baseband/acc100: add support for 4G processing

Chautru, Nicolas nicolas.chautru at intel.com
Wed Sep 30 21:10:38 CEST 2020


Hi Tom, 

> From: Tom Rix <trix at redhat.com>
> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> > Adding capability for 4G encode and decoder processing
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru at intel.com>
> > Acked-by: Liu Tianjiao <Tianjiao.liu at intel.com>
> > ---
> >  doc/guides/bbdevs/features/acc100.ini    |    4 +-
> >  drivers/baseband/acc100/rte_acc100_pmd.c | 1010
> > ++++++++++++++++++++++++++++--
> >  2 files changed, 945 insertions(+), 69 deletions(-)
> >
> > diff --git a/doc/guides/bbdevs/features/acc100.ini
> > b/doc/guides/bbdevs/features/acc100.ini
> > index 40c7adc..642cd48 100644
> > --- a/doc/guides/bbdevs/features/acc100.ini
> > +++ b/doc/guides/bbdevs/features/acc100.ini
> > @@ -4,8 +4,8 @@
> >  ; Refer to default.ini for the full list of available PMD features.
> >  ;
> >  [Features]
> > -Turbo Decoder (4G)     = N
> > -Turbo Encoder (4G)     = N
> > +Turbo Decoder (4G)     = Y
> > +Turbo Encoder (4G)     = Y
> >  LDPC Decoder (5G)      = Y
> >  LDPC Encoder (5G)      = Y
> >  LLR/HARQ Compression   = Y
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index e484c0a..7d4c3df 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -339,7 +339,6 @@
> >  	free_base_addresses(base_addrs, i);
> >  }
> >
> > -
> >  /* Allocate 64MB memory used for all software rings */  static int
> > acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> > socket_id) @@ -637,6 +636,41 @@
> >
> >  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> >  		{
> > +			.type = RTE_BBDEV_OP_TURBO_DEC,
> > +			.cap.turbo_dec = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> > +					RTE_BBDEV_TURBO_CRC_TYPE_24B
> |
> > +
> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> > +
> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> > +					RTE_BBDEV_TURBO_MAP_DEC |
> > +
> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> > +
> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> > +				.max_llr_modulus = INT8_MAX,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_soft_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_ENC,
> > +			.cap.turbo_enc = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> > +
> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> > +					RTE_BBDEV_TURBO_RATE_MATCH |
> > +
> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> >  			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >  			.cap.ldpc_enc = {
> >  				.capability_flags =
> > @@ -719,7 +753,6 @@
> >  #endif
> >  }
> >
> > -
> >  static const struct rte_bbdev_ops acc100_bbdev_ops = {
> >  	.setup_queues = acc100_setup_queues,
> >  	.close = acc100_dev_close,
> > @@ -763,6 +796,58 @@
> >  	return tail;
> >  }
> >
> > +/* Fill in a frame control word for turbo encoding. */ static inline
> > +void acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct
> > +acc100_fcw_te *fcw) {
> > +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
> > +	if (fcw->code_block_mode == 0) { /* For TB mode */
> > +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
> > +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
> > +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
> > +		fcw->c = op->turbo_enc.tb_params.c;
> > +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
> > +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
> > +
> > +		if (check_bit(op->turbo_enc.op_flags,
> > +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> > +			fcw->bypass_rm = 0;
> > +			fcw->cab = op->turbo_enc.tb_params.cab;
> > +			fcw->ea = op->turbo_enc.tb_params.ea;
> > +			fcw->eb = op->turbo_enc.tb_params.eb;
> > +		} else {
> > +			/* E is set to the encoding output size when RM is
> > +			 * bypassed.
> > +			 */
> > +			fcw->bypass_rm = 1;
> > +			fcw->cab = fcw->c_neg;
> > +			fcw->ea = 3 * fcw->k_neg + 12;
> > +			fcw->eb = 3 * fcw->k_pos + 12;
> > +		}
> > +	} else { /* For CB mode */
> > +		fcw->k_pos = op->turbo_enc.cb_params.k;
> > +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
> > +
> > +		if (check_bit(op->turbo_enc.op_flags,
> > +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> > +			fcw->bypass_rm = 0;
> > +			fcw->eb = op->turbo_enc.cb_params.e;
> > +		} else {
> > +			/* E is set to the encoding output size when RM is
> > +			 * bypassed.
> > +			 */
> > +			fcw->bypass_rm = 1;
> > +			fcw->eb = 3 * fcw->k_pos + 12;
> > +		}
> > +	}
> > +
> > +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
> > +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
> > +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
> > +	fcw->rv_idx1 = op->turbo_enc.rv_index; }
> > +
> >  /* Compute value of k0.
> >   * Based on 3GPP 38.212 Table 5.4.2.1-2
> >   * Starting position of different redundancy versions, k0 @@ -813,6
> > +898,25 @@
> >  	fcw->mcb_count = num_cb;
> >  }
> >
> > +/* Fill in a frame control word for turbo decoding. */ static inline
> > +void acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
> > +acc100_fcw_td *fcw) {
> > +	/* Note : Early termination is always enabled for 4GUL */
> > +	fcw->fcw_ver = 1;
> > +	if (op->turbo_dec.code_block_mode == 0)
> > +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> > +	else
> > +		fcw->k_pos = op->turbo_dec.cb_params.k;
> > +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> > +	fcw->bypass_sb_deint = 0;
> > +	fcw->raw_decoder_input_on = 0;
> > +	fcw->max_iter = op->turbo_dec.iter_max;
> > +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> > +}
> > +
> >  /* Fill in a frame control word for LDPC decoding. */  static inline
> > void  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct
> > acc100_fcw_ld *fcw, @@ -1042,6 +1146,87 @@  }
> >
> >  static inline int
> > +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> > +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *output, uint32_t *in_offset,
> > +		uint32_t *out_offset, uint32_t *out_length,
> > +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
> r) {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint32_t e, ea, eb, length;
> > +	uint16_t k, k_neg, k_pos;
> > +	uint8_t cab, c_neg;
> > +
> > +	desc->word0 = ACC100_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_enc.code_block_mode == 0) {
> > +		ea = op->turbo_enc.tb_params.ea;
> > +		eb = op->turbo_enc.tb_params.eb;
> > +		cab = op->turbo_enc.tb_params.cab;
> > +		k_neg = op->turbo_enc.tb_params.k_neg;
> > +		k_pos = op->turbo_enc.tb_params.k_pos;
> > +		c_neg = op->turbo_enc.tb_params.c_neg;
> > +		e = (r < cab) ? ea : eb;
> > +		k = (r < c_neg) ? k_neg : k_pos;
> > +	} else {
> > +		e = op->turbo_enc.cb_params.e;
> > +		k = op->turbo_enc.cb_params.k;
> > +	}
> > +
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> > +		length = (k - 24) >> 3;
> > +	else
> > +		length = k >> 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
> > +length))) {
> 
> similar to other patches, this check can be combined to <=
> 
> change generally

same comment on other patch

> 
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, length);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> > +			length, seg_total_left, next_triplet);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= length;
> > +
> > +	/* Set output length */
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_RATE_MATCH))
> > +		/* Integer round up division by 8 */
> > +		*out_length = (e + 7) >> 3;
> > +	else
> > +		*out_length = (k >> 3) * 3 + 2;
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_out(desc, output,
> *out_offset,
> > +			*out_length, next_triplet,
> ACC100_DMA_BLKID_OUT_ENC);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	op->turbo_enc.output.length += *out_length;
> > +	*out_offset += *out_length;
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
> >  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >  		struct rte_mbuf *output, uint32_t *in_offset, @@ -1110,6
> +1295,117
> > @@  }
> >
> >  static inline int
> > +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> > +		uint32_t *in_offset, uint32_t *h_out_offset,
> > +		uint32_t *s_out_offset, uint32_t *h_out_length,
> > +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> > +		uint32_t *seg_total_left, uint8_t r) {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint16_t k;
> > +	uint16_t crc24_overlap = 0;
> > +	uint32_t e, kw;
> > +
> > +	desc->word0 = ACC100_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_dec.code_block_mode == 0) {
> > +		k = (r < op->turbo_dec.tb_params.c_neg)
> > +			? op->turbo_dec.tb_params.k_neg
> > +			: op->turbo_dec.tb_params.k_pos;
> > +		e = (r < op->turbo_dec.tb_params.cab)
> > +			? op->turbo_dec.tb_params.ea
> > +			: op->turbo_dec.tb_params.eb;
> > +	} else {
> > +		k = op->turbo_dec.cb_params.k;
> > +		e = op->turbo_dec.cb_params.e;
> > +	}
> > +
> > +	if ((op->turbo_dec.code_block_mode == 0)
> > +		&& !check_bit(op->turbo_dec.op_flags,
> > +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> > +		crc24_overlap = 24;
> > +
> > +	/* Calculates circular buffer size.
> > +	 * According to 3gpp 36.212 section 5.1.4.2
> > +	 *   Kw = 3 * Kpi,
> > +	 * where:
> > +	 *   Kpi = nCol * nRow
> > +	 * where nCol is 32 and nRow can be calculated from:
> > +	 *   D =< nCol * nRow
> > +	 * where D is the size of each output from turbo encoder block (k +
> 4).
> > +	 */
> > +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, kw);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> kw,
> > +			seg_total_left, next_triplet);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= kw;
> > +
> > +	next_triplet = acc100_dma_fill_blk_type_out(
> > +			desc, h_output, *h_out_offset,
> > +			k >> 3, next_triplet,
> ACC100_DMA_BLKID_OUT_HARD);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +
> > +	*h_out_length = ((k - crc24_overlap) >> 3);
> > +	op->turbo_dec.hard_output.length += *h_out_length;
> > +	*h_out_offset += *h_out_length;
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER))
> > +			*s_out_length = e;
> > +		else
> > +			*s_out_length = (k * 3) + 12;
> > +
> > +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
> > +				*s_out_offset, *s_out_length, next_triplet,
> > +				ACC100_DMA_BLKID_OUT_SOFT);
> > +		if (unlikely(next_triplet < 0)) {
> > +			rte_bbdev_log(ERR,
> > +					"Mismatch between data to process
> and mbuf data length in bbdev_op: %p",
> > +					op);
> > +			return -1;
> > +		}
> > +
> > +		op->turbo_dec.soft_output.length += *s_out_length;
> > +		*s_out_offset += *s_out_length;
> > +	}
> > +
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> >  		struct acc100_dma_req_desc *desc,
> >  		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
> 1374,6
> > +1670,57 @@
> >
> >  /* Enqueue one encode operations for ACC100 device in CB mode */
> > static inline int
> > +enqueue_enc_one_op_cb(struct acc100_queue *q, struct
> rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> > +
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> > +			- in_offset;
> > +
> > +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> > +			&in_offset, &out_offset, &out_length,
> &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	mbuf_append(output_head, output, out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +			sizeof(desc->req.fcw_te) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +
> > +	/* Check if any data left after processing one CB */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left after processing one CB:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> > +/* Enqueue one encode operations for ACC100 device in CB mode */
> > +static inline int
> >  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct
> rte_bbdev_enc_op **ops,
> >  		uint16_t total_enqueued_cbs, int16_t num)  { @@ -1481,78
> +1828,235
> > @@
> >  	return 1;
> >  }
> >
> > -static inline int
> > -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> > -		uint16_t total_enqueued_cbs) {
> > -	struct acc100_fcw_ld *fcw;
> > -	union acc100_dma_desc *desc;
> > -	int next_triplet = 1;
> > -	struct rte_mbuf *hq_output_head, *hq_output;
> > -	uint16_t harq_in_length = op-
> >ldpc_dec.harq_combined_input.length;
> > -	if (harq_in_length == 0) {
> > -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> > -		return -EINVAL;
> > -	}
> >
> > -	int h_comp = check_bit(op->ldpc_dec.op_flags,
> > -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> > -			) ? 1 : 0;
> > -	if (h_comp == 1)
> > -		harq_in_length = harq_in_length * 8 / 6;
> > -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> > -	uint16_t harq_dma_length_in = (h_comp == 0) ?
> > -			harq_in_length :
> > -			harq_in_length * 6 / 8;
> > -	uint16_t harq_dma_length_out = harq_dma_length_in;
> > -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> > -
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> > -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> > -	uint16_t harq_index = (ddr_mem_in ?
> > -			op->ldpc_dec.harq_combined_input.offset :
> > -			op->ldpc_dec.harq_combined_output.offset)
> > -			/ ACC100_HARQ_OFFSET;
> > +/* Enqueue one encode operations for ACC100 device in TB mode. */
> > +static inline int enqueue_enc_one_op_tb(struct acc100_queue *q,
> > +struct rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +	uint16_t current_enqueued_cbs = 0;
> >
> >  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >  			& q->sw_ring_wrap_mask);
> >  	desc = q->ring_addr + desc_idx;
> > -	fcw = &desc->req.fcw_ld;
> > -	/* Set the FCW from loopback into DDR */
> > -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> > -	fcw->FCWversion = ACC100_FCW_VER;
> > -	fcw->qm = 2;
> > -	fcw->Zc = 384;
> > -	if (harq_in_length < 16 * N_ZC_1)
> > -		fcw->Zc = 16;
> > -	fcw->ncb = fcw->Zc * N_ZC_1;
> > -	fcw->rm_e = 2;
> > -	fcw->hcin_en = 1;
> > -	fcw->hcout_en = 1;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> > +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >
> > -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> %d %d\n",
> > -			ddr_mem_in, harq_index,
> > -			harq_layout[harq_index].offset, harq_in_length,
> > -			harq_dma_length_in);
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> >
> > -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> > -		fcw->hcin_size0 = harq_layout[harq_index].size0;
> > -		fcw->hcin_offset = harq_layout[harq_index].offset;
> > -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> > -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> > -		if (h_comp == 1)
> > -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> > -	} else {
> > -		fcw->hcin_size0 = harq_in_length;
> > -	}
> > -	harq_layout[harq_index].val = 0;
> > -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> > -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> > -	fcw->hcout_size0 = harq_in_length;
> > -	fcw->hcin_decomp_mode = h_comp;
> > -	fcw->hcout_comp_mode = h_comp;
> > -	fcw->gain_i = 1;
> > -	fcw->gain_h = 1;
> > +	c = op->turbo_enc.tb_params.c;
> > +	r = op->turbo_enc.tb_params.r;
> >
> > -	/* Set the prefix of descriptor. This could be done at polling */
> > +	while (mbuf_total_left > 0 && r < c) {
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
> > +
> > +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input,
> output,
> > +				&in_offset, &out_offset, &out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +		mbuf_append(output_head, output, out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +				sizeof(desc->req.fcw_te) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			output = output->next;
> > +			out_offset = 0;
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +	if (unlikely(desc == NULL))
> > +		return current_enqueued_cbs;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left for processing:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +
> > +	/* Set SDone on last CB descriptor for TB mode. */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> > +
> > +/** Enqueue one decode operations for ACC100 device in CB mode */
> > +static inline int enqueue_dec_one_op_cb(struct acc100_queue *q,
> > +struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(input == NULL)) {
> > +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> > +		return -EFAULT;
> > +	}
> > +#endif
> > +
> > +	/* Set up DMA descriptor */
> > +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +
> > +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
> > +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> > +			&h_out_length, &s_out_length, &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	/* Hard output */
> > +	mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +		mbuf_append(s_output_head, s_output, s_out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +			sizeof(desc->req.fcw_td) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left after processing one CB:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> logic similar to debug in mbuf_append, should be a common function.

Not exactly except if I miss your point. 

> > +
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> > +static inline int
> > +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs) {
> > +	struct acc100_fcw_ld *fcw;
> > +	union acc100_dma_desc *desc;
> > +	int next_triplet = 1;
> > +	struct rte_mbuf *hq_output_head, *hq_output;
> > +	uint16_t harq_in_length = op-
> >ldpc_dec.harq_combined_input.length;
> > +	if (harq_in_length == 0) {
> > +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	int h_comp = check_bit(op->ldpc_dec.op_flags,
> > +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> > +			) ? 1 : 0;
> > +	if (h_comp == 1)
> > +		harq_in_length = harq_in_length * 8 / 6;
> > +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> > +	uint16_t harq_dma_length_in = (h_comp == 0) ?
> Can these h_comp checks be combined to a single if/else ?

it may be clearer, ok.


> > +			harq_in_length :
> > +			harq_in_length * 6 / 8;
> > +	uint16_t harq_dma_length_out = harq_dma_length_in;
> > +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> > +
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> > +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> > +	uint16_t harq_index = (ddr_mem_in ?
> > +			op->ldpc_dec.harq_combined_input.offset :
> > +			op->ldpc_dec.harq_combined_output.offset)
> > +			/ ACC100_HARQ_OFFSET;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	fcw = &desc->req.fcw_ld;
> > +	/* Set the FCW from loopback into DDR */
> > +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> > +	fcw->FCWversion = ACC100_FCW_VER;
> > +	fcw->qm = 2;
> > +	fcw->Zc = 384;
> these magic numbers should have #defines

These are not magic numbers, but actually 3GPP values

> > +	if (harq_in_length < 16 * N_ZC_1)
> > +		fcw->Zc = 16;
> > +	fcw->ncb = fcw->Zc * N_ZC_1;
> > +	fcw->rm_e = 2;
> > +	fcw->hcin_en = 1;
> > +	fcw->hcout_en = 1;
> > +
> > +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> %d %d\n",
> > +			ddr_mem_in, harq_index,
> > +			harq_layout[harq_index].offset, harq_in_length,
> > +			harq_dma_length_in);
> > +
> > +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> > +		fcw->hcin_size0 = harq_layout[harq_index].size0;
> > +		fcw->hcin_offset = harq_layout[harq_index].offset;
> > +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> > +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> > +		if (h_comp == 1)
> > +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> > +	} else {
> > +		fcw->hcin_size0 = harq_in_length;
> > +	}
> > +	harq_layout[harq_index].val = 0;
> > +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> > +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> > +	fcw->hcout_size0 = harq_in_length;
> > +	fcw->hcin_decomp_mode = h_comp;
> > +	fcw->hcout_comp_mode = h_comp;
> > +	fcw->gain_i = 1;
> > +	fcw->gain_h = 1;
> > +
> > +	/* Set the prefix of descriptor. This could be done at polling */
> >  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >  	desc->req.word1 = 0; /**< Timestamp could be disabled */
> >  	desc->req.word2 = 0;
> > @@ -1816,6 +2320,107 @@
> >  	return current_enqueued_cbs;
> >  }
> >
> > +/* Enqueue one decode operations for ACC100 device in TB mode */
> > +static inline int enqueue_dec_one_op_tb(struct acc100_queue *q,
> > +struct rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc100_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +	uint16_t current_enqueued_cbs = 0;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> > +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	c = op->turbo_dec.tb_params.c;
> > +	r = op->turbo_dec.tb_params.r;
> > +
> > +	while (mbuf_total_left > 0 && r < c) {
> > +
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
> > +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
> > +				h_output, s_output, &in_offset,
> &h_out_offset,
> > +				&s_out_offset, &h_out_length,
> &s_out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +
> > +		/* Hard output */
> > +		mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +		/* Soft output */
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +			mbuf_append(s_output_head, s_output,
> s_out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +				sizeof(desc->req.fcw_td) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			h_output = h_output->next;
> > +			h_out_offset = 0;
> > +
> > +			if (check_bit(op->turbo_dec.op_flags,
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
> {
> > +				s_output = s_output->next;
> > +				s_out_offset = 0;
> > +			}
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +	if (unlikely(desc == NULL))
> > +		return current_enqueued_cbs;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	/* Check if any CBs left for processing */
> > +	if (mbuf_total_left != 0) {
> > +		rte_bbdev_log(ERR,
> > +				"Some date still left for processing:
> mbuf_total_left = %u",
> > +				mbuf_total_left);
> > +		return -EINVAL;
> > +	}
> > +#endif
> > +	/* Set SDone on last CB descriptor for TB mode */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> >
> >  /* Calculates number of CBs in processed encoder TB based on 'r' and
> input
> >   * length.
> > @@ -1893,6 +2498,45 @@
> >  	return cbs_in_tb;
> >  }
> >
> > +/* Enqueue encode operations for ACC100 device in CB mode. */ static
> > +uint16_t acc100_enqueue_enc_cb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i;
> > +	union acc100_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0))
> > +			break;
> > +		avail -= 1;
> > +
> > +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +	return i;
> > +}
> > +
> >  /* Check we can mux encode operations with common FCW */  static
> > inline bool  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
> > @@ -1960,6 +2604,52 @@
> >  	return i;
> >  }
> >
> > +/* Enqueue encode operations for ACC100 device in TB mode. */ static
> > +uint16_t acc100_enqueue_enc_tb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - cbs_in_tb < 0))
> > +			break;
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret < 0)
> > +			break;
> > +		enqueued_cbs += ret;
> > +	}
> > +
> other similar functions have a (i == 0) check here.

ok

> > +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> > +
> > +/* Enqueue encode operations for ACC100 device. */ static uint16_t
> > +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	if (unlikely(num == 0))
> > +		return 0;
> num == 0 check should move into the tb/cb functions

same comment on other patch, why not catch it early?

> > +	if (ops[0]->turbo_enc.code_block_mode == 0)
> > +		return acc100_enqueue_enc_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_enc_cb(q_data, ops, num); }
> > +
> >  /* Enqueue encode operations for ACC100 device. */  static uint16_t
> > acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
> > -1967,7 +2657,51 @@  {
> >  	if (unlikely(num == 0))
> >  		return 0;
> > -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> > +	if (ops[0]->ldpc_enc.code_block_mode == 0)
> > +		return acc100_enqueue_enc_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num); }
> > +
> > +
> > +/* Enqueue decode operations for ACC100 device in CB mode */ static
> > +uint16_t acc100_enqueue_dec_cb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 
> Seems like the 10th variant of a similar function could these be combined to
> fewer functions ?
> 
> Maybe by passing in a function pointer to the enqueue_one_dec_one* that
> does the work ?

They have some variants related to the actual operation and constraints.
Not obvious to have a valuable refactor. 


> 
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i;
> > +	union acc100_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0))
> > +			break;
> > +		avail -= 1;
> > +
> > +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> >  }
> >
> >  /* Check we can mux encode operations with common FCW */ @@ -
> 2065,6
> > +2799,53 @@
> >  	return i;
> >  }
> >
> > +
> > +/* Enqueue decode operations for ACC100 device in TB mode */ static
> > +uint16_t acc100_enqueue_dec_tb(struct rte_bbdev_queue_data
> *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 11th ;)
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >sw_ring_head;
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - cbs_in_tb < 0))
> > +			break;
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret < 0)
> > +			break;
> > +		enqueued_cbs += ret;
> > +	}
> > +
> > +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> > +
> > +/* Enqueue decode operations for ACC100 device. */ static uint16_t
> > +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	if (unlikely(num == 0))
> > +		return 0;
> similar move the num == 0 check to the tb/cb functions.

same comment

> > +	if (ops[0]->turbo_dec.code_block_mode == 0)
> > +		return acc100_enqueue_dec_tb(q_data, ops, num);
> > +	else
> > +		return acc100_enqueue_dec_cb(q_data, ops, num); }
> > +
> >  /* Enqueue decode operations for ACC100 device. */  static uint16_t
> > acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> > -2388,6 +3169,51 @@
> >  	return cb_idx;
> >  }
> >
> > +/* Dequeue encode operations from ACC100 device. */ static uint16_t
> > +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	uint16_t dequeue_num;
> > +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i;
> > +	uint16_t dequeued_cbs = 0;
> > +	struct rte_bbdev_enc_op *op;
> > +	int ret;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == 0 && q == NULL))
> 
> ops is a pointer so should compare with NULL
> 
> The && likely needs to be ||
> 
> Maybe print out a message so caller knows something wrong happened.

ok

> 
> > +		return 0;
> > +#endif
> > +
> > +	dequeue_num = (avail < num) ? avail : num;
> > +
> > +	for (i = 0; i < dequeue_num; ++i) {
> > +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> > +			& q->sw_ring_wrap_mask))->req.op_addr;
> > +		if (op->turbo_enc.code_block_mode == 0)
> > +			ret = dequeue_enc_one_op_tb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +		else
> > +			ret = dequeue_enc_one_op_cb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +
> > +		if (ret < 0)
> > +			break;
> > +		dequeued_cbs += ret;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_cbs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += i;
> > +
> > +	return i;
> > +}
> > +
> >  /* Dequeue LDPC encode operations from ACC100 device. */  static
> > uint16_t  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data
> *q_data,
> > @@ -2426,6 +3252,52 @@
> >  	return dequeued_cbs;
> >  }
> >
> > +
> > +/* Dequeue decode operations from ACC100 device. */ static uint16_t
> > +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> 
> very similar to enc function above, consider how to combine them to a
> single function.
> 
> Tom
> 
> > +	struct acc100_queue *q = q_data->queue_private;
> > +	uint16_t dequeue_num;
> > +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i;
> > +	uint16_t dequeued_cbs = 0;
> > +	struct rte_bbdev_dec_op *op;
> > +	int ret;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == 0 && q == NULL))
> > +		return 0;
> > +#endif
> > +
> > +	dequeue_num = (avail < num) ? avail : num;
> > +
> > +	for (i = 0; i < dequeue_num; ++i) {
> > +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> > +			& q->sw_ring_wrap_mask))->req.op_addr;
> > +		if (op->turbo_dec.code_block_mode == 0)
> > +			ret = dequeue_dec_one_op_tb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +		else
> > +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> > +					dequeued_cbs, &aq_dequeued);
> > +
> > +		if (ret < 0)
> > +			break;
> > +		dequeued_cbs += ret;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_cbs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += i;
> > +
> > +	return i;
> > +}
> > +
> >  /* Dequeue decode operations from ACC100 device. */  static uint16_t
> > acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> > -2479,6 +3351,10 @@
> >  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
> >
> >  	dev->dev_ops = &acc100_bbdev_ops;
> > +	dev->enqueue_enc_ops = acc100_enqueue_enc;
> > +	dev->enqueue_dec_ops = acc100_enqueue_dec;
> > +	dev->dequeue_enc_ops = acc100_dequeue_enc;
> > +	dev->dequeue_dec_ops = acc100_dequeue_dec;
> >  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
> >  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
> >  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;



More information about the dev mailing list