[dpdk-dev] [PATCH v9 07/10] baseband/acc100: add support for 4G processing

Chautru, Nicolas nicolas.chautru at intel.com
Thu Oct 1 23:46:38 CEST 2020


Hi Tom,

> From: Tom Rix <trix at redhat.com>
> On 9/30/20 12:10 PM, Chautru, Nicolas wrote:
> > Hi Tom,
> >
> >> From: Tom Rix <trix at redhat.com>
> >> On 9/28/20 5:29 PM, Nicolas Chautru wrote:
> >>> Adding capability for 4G encode and decoder processing
> >>>
> >>> Signed-off-by: Nicolas Chautru <nicolas.chautru at intel.com>
> >>> Acked-by: Liu Tianjiao <Tianjiao.liu at intel.com>
> >>> ---
> >>>  doc/guides/bbdevs/features/acc100.ini    |    4 +-
> >>>  drivers/baseband/acc100/rte_acc100_pmd.c | 1010
> >>> ++++++++++++++++++++++++++++--
> >>>  2 files changed, 945 insertions(+), 69 deletions(-)
> >>>
> >>> diff --git a/doc/guides/bbdevs/features/acc100.ini
> >>> b/doc/guides/bbdevs/features/acc100.ini
> >>> index 40c7adc..642cd48 100644
> >>> --- a/doc/guides/bbdevs/features/acc100.ini
> >>> +++ b/doc/guides/bbdevs/features/acc100.ini
> >>> @@ -4,8 +4,8 @@
> >>>  ; Refer to default.ini for the full list of available PMD features.
> >>>  ;
> >>>  [Features]
> >>> -Turbo Decoder (4G)     = N
> >>> -Turbo Encoder (4G)     = N
> >>> +Turbo Decoder (4G)     = Y
> >>> +Turbo Encoder (4G)     = Y
> >>>  LDPC Decoder (5G)      = Y
> >>>  LDPC Encoder (5G)      = Y
> >>>  LLR/HARQ Compression   = Y
> >>> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> index e484c0a..7d4c3df 100644
> >>> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> >>> @@ -339,7 +339,6 @@
> >>>  	free_base_addresses(base_addrs, i);  }
> >>>
> >>> -
> >>>  /* Allocate 64MB memory used for all software rings */  static int
> >>> acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> >>> socket_id) @@ -637,6 +636,41 @@
> >>>
> >>>  	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> >>>  		{
> >>> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> >>> +			.cap.turbo_dec = {
> >>> +				.capability_flags =
> >>> +
> >> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> >>> +					RTE_BBDEV_TURBO_CRC_TYPE_24B
> >> |
> >>> +
> >> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> >>> +
> >> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> >>> +
> >> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> >>> +					RTE_BBDEV_TURBO_MAP_DEC |
> >>> +
> >> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> >>> +
> >> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> >>> +				.max_llr_modulus = INT8_MAX,
> >>> +				.num_buffers_src =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_hard_out =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_soft_out =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +			}
> >>> +		},
> >>> +		{
> >>> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> >>> +			.cap.turbo_enc = {
> >>> +				.capability_flags =
> >>> +
> >> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> >>> +
> >> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> >>> +					RTE_BBDEV_TURBO_RATE_MATCH |
> >>> +
> >> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> >>> +				.num_buffers_src =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +				.num_buffers_dst =
> >>> +
> >> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> >>> +			}
> >>> +		},
> >>> +		{
> >>>  			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >>>  			.cap.ldpc_enc = {
> >>>  				.capability_flags =
> >>> @@ -719,7 +753,6 @@
> >>>  #endif
> >>>  }
> >>>
> >>> -
> >>>  static const struct rte_bbdev_ops acc100_bbdev_ops = {
> >>>  	.setup_queues = acc100_setup_queues,
> >>>  	.close = acc100_dev_close,
> >>> @@ -763,6 +796,58 @@
> >>>  	return tail;
> >>>  }
> >>>
> >>> +/* Fill in a frame control word for turbo encoding. */ static
> >>> +inline void acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op,
> >>> +struct acc100_fcw_te *fcw) {
> >>> +	fcw->code_block_mode = op->turbo_enc.code_block_mode;
> >>> +	if (fcw->code_block_mode == 0) { /* For TB mode */
> >>> +		fcw->k_neg = op->turbo_enc.tb_params.k_neg;
> >>> +		fcw->k_pos = op->turbo_enc.tb_params.k_pos;
> >>> +		fcw->c_neg = op->turbo_enc.tb_params.c_neg;
> >>> +		fcw->c = op->turbo_enc.tb_params.c;
> >>> +		fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
> >>> +		fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
> >>> +
> >>> +		if (check_bit(op->turbo_enc.op_flags,
> >>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> >>> +			fcw->bypass_rm = 0;
> >>> +			fcw->cab = op->turbo_enc.tb_params.cab;
> >>> +			fcw->ea = op->turbo_enc.tb_params.ea;
> >>> +			fcw->eb = op->turbo_enc.tb_params.eb;
> >>> +		} else {
> >>> +			/* E is set to the encoding output size when RM is
> >>> +			 * bypassed.
> >>> +			 */
> >>> +			fcw->bypass_rm = 1;
> >>> +			fcw->cab = fcw->c_neg;
> >>> +			fcw->ea = 3 * fcw->k_neg + 12;
> >>> +			fcw->eb = 3 * fcw->k_pos + 12;
> >>> +		}
> >>> +	} else { /* For CB mode */
> >>> +		fcw->k_pos = op->turbo_enc.cb_params.k;
> >>> +		fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
> >>> +
> >>> +		if (check_bit(op->turbo_enc.op_flags,
> >>> +				RTE_BBDEV_TURBO_RATE_MATCH)) {
> >>> +			fcw->bypass_rm = 0;
> >>> +			fcw->eb = op->turbo_enc.cb_params.e;
> >>> +		} else {
> >>> +			/* E is set to the encoding output size when RM is
> >>> +			 * bypassed.
> >>> +			 */
> >>> +			fcw->bypass_rm = 1;
> >>> +			fcw->eb = 3 * fcw->k_pos + 12;
> >>> +		}
> >>> +	}
> >>> +
> >>> +	fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
> >>> +			RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
> >>> +	fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
> >>> +			RTE_BBDEV_TURBO_CRC_24B_ATTACH);
> >>> +	fcw->rv_idx1 = op->turbo_enc.rv_index; }
> >>> +
> >>>  /* Compute value of k0.
> >>>   * Based on 3GPP 38.212 Table 5.4.2.1-2
> >>>   * Starting position of different redundancy versions, k0 @@ -813,6
> >>> +898,25 @@
> >>>  	fcw->mcb_count = num_cb;
> >>>  }
> >>>
> >>> +/* Fill in a frame control word for turbo decoding. */ static
> >>> +inline void acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op,
> >>> +struct acc100_fcw_td *fcw) {
> >>> +	/* Note : Early termination is always enabled for 4GUL */
> >>> +	fcw->fcw_ver = 1;
> >>> +	if (op->turbo_dec.code_block_mode == 0)
> >>> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> >>> +	else
> >>> +		fcw->k_pos = op->turbo_dec.cb_params.k;
> >>> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> >>> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> >>> +	fcw->bypass_sb_deint = 0;
> >>> +	fcw->raw_decoder_input_on = 0;
> >>> +	fcw->max_iter = op->turbo_dec.iter_max;
> >>> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> >>> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> >>> +}
> >>> +
> >>>  /* Fill in a frame control word for LDPC decoding. */  static
> >>> inline void  acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op,
> >>> struct acc100_fcw_ld *fcw, @@ -1042,6 +1146,87 @@  }
> >>>
> >>>  static inline int
> >>> +acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> >>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>> +		struct rte_mbuf *output, uint32_t *in_offset,
> >>> +		uint32_t *out_offset, uint32_t *out_length,
> >>> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
> >> r) {
> >>> +	int next_triplet = 1; /* FCW already done */
> >>> +	uint32_t e, ea, eb, length;
> >>> +	uint16_t k, k_neg, k_pos;
> >>> +	uint8_t cab, c_neg;
> >>> +
> >>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> >>> +	desc->word1 = 0; /**< Timestamp could be disabled */
> >>> +	desc->word2 = 0;
> >>> +	desc->word3 = 0;
> >>> +	desc->numCBs = 1;
> >>> +
> >>> +	if (op->turbo_enc.code_block_mode == 0) {
> >>> +		ea = op->turbo_enc.tb_params.ea;
> >>> +		eb = op->turbo_enc.tb_params.eb;
> >>> +		cab = op->turbo_enc.tb_params.cab;
> >>> +		k_neg = op->turbo_enc.tb_params.k_neg;
> >>> +		k_pos = op->turbo_enc.tb_params.k_pos;
> >>> +		c_neg = op->turbo_enc.tb_params.c_neg;
> >>> +		e = (r < cab) ? ea : eb;
> >>> +		k = (r < c_neg) ? k_neg : k_pos;
> >>> +	} else {
> >>> +		e = op->turbo_enc.cb_params.e;
> >>> +		k = op->turbo_enc.cb_params.k;
> >>> +	}
> >>> +
> >>> +	if (check_bit(op->turbo_enc.op_flags,
> >> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> >>> +		length = (k - 24) >> 3;
> >>> +	else
> >>> +		length = k >> 3;
> >>> +
> >>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left <
> >>> +length))) {
> >> similar to other patches, this check can be combined to <=
> >>
> >> change generally
> > same comment on other patch
> >
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between mbuf length and
> >> included CB sizes: mbuf len %u, cb len %u",
> >>> +				*mbuf_total_left, length);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> >>> +			length, seg_total_left, next_triplet);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->m2dlen = next_triplet;
> >>> +	*mbuf_total_left -= length;
> >>> +
> >>> +	/* Set output length */
> >>> +	if (check_bit(op->turbo_enc.op_flags,
> >> RTE_BBDEV_TURBO_RATE_MATCH))
> >>> +		/* Integer round up division by 8 */
> >>> +		*out_length = (e + 7) >> 3;
> >>> +	else
> >>> +		*out_length = (k >> 3) * 3 + 2;
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_out(desc, output,
> >> *out_offset,
> >>> +			*out_length, next_triplet,
> >> ACC100_DMA_BLKID_OUT_ENC);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	op->turbo_enc.output.length += *out_length;
> >>> +	*out_offset += *out_length;
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->d2mlen = next_triplet - desc->m2dlen;
> >>> +
> >>> +	desc->op_addr = op;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static inline int
> >>>  acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
> >>>  		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>>  		struct rte_mbuf *output, uint32_t *in_offset, @@ -1110,6
> >> +1295,117
> >>> @@  }
> >>>
> >>>  static inline int
> >>> +acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> >>> +		struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
> >>> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> >>> +		uint32_t *in_offset, uint32_t *h_out_offset,
> >>> +		uint32_t *s_out_offset, uint32_t *h_out_length,
> >>> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> >>> +		uint32_t *seg_total_left, uint8_t r) {
> >>> +	int next_triplet = 1; /* FCW already done */
> >>> +	uint16_t k;
> >>> +	uint16_t crc24_overlap = 0;
> >>> +	uint32_t e, kw;
> >>> +
> >>> +	desc->word0 = ACC100_DMA_DESC_TYPE;
> >>> +	desc->word1 = 0; /**< Timestamp could be disabled */
> >>> +	desc->word2 = 0;
> >>> +	desc->word3 = 0;
> >>> +	desc->numCBs = 1;
> >>> +
> >>> +	if (op->turbo_dec.code_block_mode == 0) {
> >>> +		k = (r < op->turbo_dec.tb_params.c_neg)
> >>> +			? op->turbo_dec.tb_params.k_neg
> >>> +			: op->turbo_dec.tb_params.k_pos;
> >>> +		e = (r < op->turbo_dec.tb_params.cab)
> >>> +			? op->turbo_dec.tb_params.ea
> >>> +			: op->turbo_dec.tb_params.eb;
> >>> +	} else {
> >>> +		k = op->turbo_dec.cb_params.k;
> >>> +		e = op->turbo_dec.cb_params.e;
> >>> +	}
> >>> +
> >>> +	if ((op->turbo_dec.code_block_mode == 0)
> >>> +		&& !check_bit(op->turbo_dec.op_flags,
> >>> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> >>> +		crc24_overlap = 24;
> >>> +
> >>> +	/* Calculates circular buffer size.
> >>> +	 * According to 3gpp 36.212 section 5.1.4.2
> >>> +	 *   Kw = 3 * Kpi,
> >>> +	 * where:
> >>> +	 *   Kpi = nCol * nRow
> >>> +	 * where nCol is 32 and nRow can be calculated from:
> >>> +	 *   D =< nCol * nRow
> >>> +	 * where D is the size of each output from turbo encoder block (k
> >>> ++
> >> 4).
> >>> +	 */
> >>> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> >>> +
> >>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between mbuf length and
> >> included CB sizes: mbuf len %u, cb len %u",
> >>> +				*mbuf_total_left, kw);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
> >> kw,
> >>> +			seg_total_left, next_triplet);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->m2dlen = next_triplet;
> >>> +	*mbuf_total_left -= kw;
> >>> +
> >>> +	next_triplet = acc100_dma_fill_blk_type_out(
> >>> +			desc, h_output, *h_out_offset,
> >>> +			k >> 3, next_triplet,
> >> ACC100_DMA_BLKID_OUT_HARD);
> >>> +	if (unlikely(next_triplet < 0)) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Mismatch between data to process and
> >> mbuf data length in bbdev_op: %p",
> >>> +				op);
> >>> +		return -1;
> >>> +	}
> >>> +
> >>> +	*h_out_length = ((k - crc24_overlap) >> 3);
> >>> +	op->turbo_dec.hard_output.length += *h_out_length;
> >>> +	*h_out_offset += *h_out_length;
> >>> +
> >>> +	/* Soft output */
> >>> +	if (check_bit(op->turbo_dec.op_flags,
> >> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> >>> +		if (check_bit(op->turbo_dec.op_flags,
> >>> +				RTE_BBDEV_TURBO_EQUALIZER))
> >>> +			*s_out_length = e;
> >>> +		else
> >>> +			*s_out_length = (k * 3) + 12;
> >>> +
> >>> +		next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
> >>> +				*s_out_offset, *s_out_length, next_triplet,
> >>> +				ACC100_DMA_BLKID_OUT_SOFT);
> >>> +		if (unlikely(next_triplet < 0)) {
> >>> +			rte_bbdev_log(ERR,
> >>> +					"Mismatch between data to process
> >> and mbuf data length in bbdev_op: %p",
> >>> +					op);
> >>> +			return -1;
> >>> +		}
> >>> +
> >>> +		op->turbo_dec.soft_output.length += *s_out_length;
> >>> +		*s_out_offset += *s_out_length;
> >>> +	}
> >>> +
> >>> +	desc->data_ptrs[next_triplet - 1].last = 1;
> >>> +	desc->d2mlen = next_triplet - desc->m2dlen;
> >>> +
> >>> +	desc->op_addr = op;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static inline int
> >>>  acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> >>>  		struct acc100_dma_req_desc *desc,
> >>>  		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
> >> 1374,6
> >>> +1670,57 @@
> >>>
> >>>  /* Enqueue one encode operations for ACC100 device in CB mode */
> >>> static inline int
> >>> +enqueue_enc_one_op_cb(struct acc100_queue *q, struct
> >> rte_bbdev_enc_op *op,
> >>> +		uint16_t total_enqueued_cbs)
> >>> +{
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> >>> +		seg_total_left;
> >>> +	struct rte_mbuf *input, *output_head, *output;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >>> +
> >>> +	input = op->turbo_enc.input.data;
> >>> +	output_head = output = op->turbo_enc.output.data;
> >>> +	in_offset = op->turbo_enc.input.offset;
> >>> +	out_offset = op->turbo_enc.output.offset;
> >>> +	out_length = 0;
> >>> +	mbuf_total_left = op->turbo_enc.input.length;
> >>> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> >>> +			- in_offset;
> >>> +
> >>> +	ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
> >>> +			&in_offset, &out_offset, &out_length,
> >> &mbuf_total_left,
> >>> +			&seg_total_left, 0);
> >>> +
> >>> +	if (unlikely(ret < 0))
> >>> +		return ret;
> >>> +
> >>> +	mbuf_append(output_head, output, out_length);
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> >>> +			sizeof(desc->req.fcw_te) - 8);
> >>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >>> +
> >>> +	/* Check if any data left after processing one CB */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left after processing one CB:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +	/* One CB (one op) was successfully prepared to enqueue */
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +/* Enqueue one encode operations for ACC100 device in CB mode */
> >>> +static inline int
> >>>  enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct
> >> rte_bbdev_enc_op **ops,
> >>>  		uint16_t total_enqueued_cbs, int16_t num)  { @@ -1481,78
> >> +1828,235
> >>> @@
> >>>  	return 1;
> >>>  }
> >>>
> >>> -static inline int
> >>> -harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> >>> -		uint16_t total_enqueued_cbs) {
> >>> -	struct acc100_fcw_ld *fcw;
> >>> -	union acc100_dma_desc *desc;
> >>> -	int next_triplet = 1;
> >>> -	struct rte_mbuf *hq_output_head, *hq_output;
> >>> -	uint16_t harq_in_length = op-
> >>> ldpc_dec.harq_combined_input.length;
> >>> -	if (harq_in_length == 0) {
> >>> -		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> >>> -		return -EINVAL;
> >>> -	}
> >>>
> >>> -	int h_comp = check_bit(op->ldpc_dec.op_flags,
> >>> -			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> >>> -			) ? 1 : 0;
> >>> -	if (h_comp == 1)
> >>> -		harq_in_length = harq_in_length * 8 / 6;
> >>> -	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> >>> -	uint16_t harq_dma_length_in = (h_comp == 0) ?
> >>> -			harq_in_length :
> >>> -			harq_in_length * 6 / 8;
> >>> -	uint16_t harq_dma_length_out = harq_dma_length_in;
> >>> -	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> >>> -
> >> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> >>> -	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> >>> -	uint16_t harq_index = (ddr_mem_in ?
> >>> -			op->ldpc_dec.harq_combined_input.offset :
> >>> -			op->ldpc_dec.harq_combined_output.offset)
> >>> -			/ ACC100_HARQ_OFFSET;
> >>> +/* Enqueue one encode operations for ACC100 device in TB mode. */
> >>> +static inline int enqueue_enc_one_op_tb(struct acc100_queue *q,
> >>> +struct rte_bbdev_enc_op *op,
> >>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint8_t r, c;
> >>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> >>> +		seg_total_left;
> >>> +	struct rte_mbuf *input, *output_head, *output;
> >>> +	uint16_t current_enqueued_cbs = 0;
> >>>
> >>>  	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>>  			& q->sw_ring_wrap_mask);
> >>>  	desc = q->ring_addr + desc_idx;
> >>> -	fcw = &desc->req.fcw_ld;
> >>> -	/* Set the FCW from loopback into DDR */
> >>> -	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> >>> -	fcw->FCWversion = ACC100_FCW_VER;
> >>> -	fcw->qm = 2;
> >>> -	fcw->Zc = 384;
> >>> -	if (harq_in_length < 16 * N_ZC_1)
> >>> -		fcw->Zc = 16;
> >>> -	fcw->ncb = fcw->Zc * N_ZC_1;
> >>> -	fcw->rm_e = 2;
> >>> -	fcw->hcin_en = 1;
> >>> -	fcw->hcout_en = 1;
> >>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> >>> +	acc100_fcw_te_fill(op, &desc->req.fcw_te);
> >>>
> >>> -	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> >> %d %d\n",
> >>> -			ddr_mem_in, harq_index,
> >>> -			harq_layout[harq_index].offset, harq_in_length,
> >>> -			harq_dma_length_in);
> >>> +	input = op->turbo_enc.input.data;
> >>> +	output_head = output = op->turbo_enc.output.data;
> >>> +	in_offset = op->turbo_enc.input.offset;
> >>> +	out_offset = op->turbo_enc.output.offset;
> >>> +	out_length = 0;
> >>> +	mbuf_total_left = op->turbo_enc.input.length;
> >>>
> >>> -	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> >>> -		fcw->hcin_size0 = harq_layout[harq_index].size0;
> >>> -		fcw->hcin_offset = harq_layout[harq_index].offset;
> >>> -		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> >>> -		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> >>> -		if (h_comp == 1)
> >>> -			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> >>> -	} else {
> >>> -		fcw->hcin_size0 = harq_in_length;
> >>> -	}
> >>> -	harq_layout[harq_index].val = 0;
> >>> -	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> >>> -			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> >>> -	fcw->hcout_size0 = harq_in_length;
> >>> -	fcw->hcin_decomp_mode = h_comp;
> >>> -	fcw->hcout_comp_mode = h_comp;
> >>> -	fcw->gain_i = 1;
> >>> -	fcw->gain_h = 1;
> >>> +	c = op->turbo_enc.tb_params.c;
> >>> +	r = op->turbo_enc.tb_params.r;
> >>>
> >>> -	/* Set the prefix of descriptor. This could be done at polling */
> >>> +	while (mbuf_total_left > 0 && r < c) {
> >>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +		/* Set up DMA descriptor */
> >>> +		desc = q->ring_addr + ((q->sw_ring_head +
> >> total_enqueued_cbs)
> >>> +				& q->sw_ring_wrap_mask);
> >>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> >> fcw_offset;
> >>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
> >>> +
> >>> +		ret = acc100_dma_desc_te_fill(op, &desc->req, &input,
> >> output,
> >>> +				&in_offset, &out_offset, &out_length,
> >>> +				&mbuf_total_left, &seg_total_left, r);
> >>> +		if (unlikely(ret < 0))
> >>> +			return ret;
> >>> +		mbuf_append(output_head, output, out_length);
> >>> +
> >>> +		/* Set total number of CBs in TB */
> >>> +		desc->req.cbs_in_tb = cbs_in_tb;
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> >>> +				sizeof(desc->req.fcw_te) - 8);
> >>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >> #endif
> >>> +
> >>> +		if (seg_total_left == 0) {
> >>> +			/* Go to the next mbuf */
> >>> +			input = input->next;
> >>> +			in_offset = 0;
> >>> +			output = output->next;
> >>> +			out_offset = 0;
> >>> +		}
> >>> +
> >>> +		total_enqueued_cbs++;
> >>> +		current_enqueued_cbs++;
> >>> +		r++;
> >>> +	}
> >>> +
> >>> +	if (unlikely(desc == NULL))
> >>> +		return current_enqueued_cbs;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left for processing:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +
> >>> +	/* Set SDone on last CB descriptor for TB mode. */
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	return current_enqueued_cbs;
> >>> +}
> >>> +
> >>> +/** Enqueue one decode operations for ACC100 device in CB mode */
> >>> +static inline int enqueue_dec_one_op_cb(struct acc100_queue *q,
> >>> +struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs)
> >>> +{
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> >>> +		h_out_length, mbuf_total_left, seg_total_left;
> >>> +	struct rte_mbuf *input, *h_output_head, *h_output,
> >>> +		*s_output_head, *s_output;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> >>> +
> >>> +	input = op->turbo_dec.input.data;
> >>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> >>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> >>> +	in_offset = op->turbo_dec.input.offset;
> >>> +	h_out_offset = op->turbo_dec.hard_output.offset;
> >>> +	s_out_offset = op->turbo_dec.soft_output.offset;
> >>> +	h_out_length = s_out_length = 0;
> >>> +	mbuf_total_left = op->turbo_dec.input.length;
> >>> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(input == NULL)) {
> >>> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> >>> +		return -EFAULT;
> >>> +	}
> >>> +#endif
> >>> +
> >>> +	/* Set up DMA descriptor */
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +
> >>> +	ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
> >>> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> >>> +			&h_out_length, &s_out_length, &mbuf_total_left,
> >>> +			&seg_total_left, 0);
> >>> +
> >>> +	if (unlikely(ret < 0))
> >>> +		return ret;
> >>> +
> >>> +	/* Hard output */
> >>> +	mbuf_append(h_output_head, h_output, h_out_length);
> >>> +
> >>> +	/* Soft output */
> >>> +	if (check_bit(op->turbo_dec.op_flags,
> >> RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >>> +		mbuf_append(s_output_head, s_output, s_out_length);
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> >>> +			sizeof(desc->req.fcw_td) - 8);
> >>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >>> +
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left after processing one CB:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >> logic similar to debug in mbuf_append, should be a common function.
> > Not exactly except if I miss your point.
> 
> I look for code blocks that look similar and want you to consider if they can
> be combined
> 
> into a general function or macro.  A general function is easier to maintain. In
> this case,
> 
> it seems like logging of something is wrong with mbuf is similar to an earlier
> block of code.
> 
> Nothing is functionally wrong.

For that very example I can add a common error trap.
Not totally convinced of the value and this prevents using %s, __func__, but ok. 

> 
> >
> >>> +
> >>> +	/* One CB (one op) was successfully prepared to enqueue */
> >>> +	return 1;
> >>> +}
> >>> +
> >>> +static inline int
> >>> +harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs) {
> >>> +	struct acc100_fcw_ld *fcw;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int next_triplet = 1;
> >>> +	struct rte_mbuf *hq_output_head, *hq_output;
> >>> +	uint16_t harq_in_length = op-
> >>> ldpc_dec.harq_combined_input.length;
> >>> +	if (harq_in_length == 0) {
> >>> +		rte_bbdev_log(ERR, "Loopback of invalid null size\n");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	int h_comp = check_bit(op->ldpc_dec.op_flags,
> >>> +			RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
> >>> +			) ? 1 : 0;
> >>> +	if (h_comp == 1)
> >>> +		harq_in_length = harq_in_length * 8 / 6;
> >>> +	harq_in_length = RTE_ALIGN(harq_in_length, 64);
> >>> +	uint16_t harq_dma_length_in = (h_comp == 0) ?
> >> Can these h_comp checks be combined to a single if/else ?
> > it may be clearer, ok.
> >
> >
> >>> +			harq_in_length :
> >>> +			harq_in_length * 6 / 8;
> >>> +	uint16_t harq_dma_length_out = harq_dma_length_in;
> >>> +	bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
> >>> +
> >> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
> >>> +	union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
> >>> +	uint16_t harq_index = (ddr_mem_in ?
> >>> +			op->ldpc_dec.harq_combined_input.offset :
> >>> +			op->ldpc_dec.harq_combined_output.offset)
> >>> +			/ ACC100_HARQ_OFFSET;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	fcw = &desc->req.fcw_ld;
> >>> +	/* Set the FCW from loopback into DDR */
> >>> +	memset(fcw, 0, sizeof(struct acc100_fcw_ld));
> >>> +	fcw->FCWversion = ACC100_FCW_VER;
> >>> +	fcw->qm = 2;
> >>> +	fcw->Zc = 384;
> >> these magic numbers should have #defines
> > These are not magic numbers, but actually 3GPP values
> ok
> >
> >>> +	if (harq_in_length < 16 * N_ZC_1)
> >>> +		fcw->Zc = 16;
> >>> +	fcw->ncb = fcw->Zc * N_ZC_1;
> >>> +	fcw->rm_e = 2;
> >>> +	fcw->hcin_en = 1;
> >>> +	fcw->hcout_en = 1;
> >>> +
> >>> +	rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length
> >> %d %d\n",
> >>> +			ddr_mem_in, harq_index,
> >>> +			harq_layout[harq_index].offset, harq_in_length,
> >>> +			harq_dma_length_in);
> >>> +
> >>> +	if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
> >>> +		fcw->hcin_size0 = harq_layout[harq_index].size0;
> >>> +		fcw->hcin_offset = harq_layout[harq_index].offset;
> >>> +		fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
> >>> +		harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
> >>> +		if (h_comp == 1)
> >>> +			harq_dma_length_in = harq_dma_length_in * 6 / 8;
> >>> +	} else {
> >>> +		fcw->hcin_size0 = harq_in_length;
> >>> +	}
> >>> +	harq_layout[harq_index].val = 0;
> >>> +	rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
> >>> +			fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
> >>> +	fcw->hcout_size0 = harq_in_length;
> >>> +	fcw->hcin_decomp_mode = h_comp;
> >>> +	fcw->hcout_comp_mode = h_comp;
> >>> +	fcw->gain_i = 1;
> >>> +	fcw->gain_h = 1;
> >>> +
> >>> +	/* Set the prefix of descriptor. This could be done at polling */
> >>>  	desc->req.word0 = ACC100_DMA_DESC_TYPE;
> >>>  	desc->req.word1 = 0; /**< Timestamp could be disabled */
> >>>  	desc->req.word2 = 0;
> >>> @@ -1816,6 +2320,107 @@
> >>>  	return current_enqueued_cbs;
> >>>  }
> >>>
> >>> +/* Enqueue one decode operations for ACC100 device in TB mode */
> >>> +static inline int enqueue_dec_one_op_tb(struct acc100_queue *q,
> >>> +struct rte_bbdev_dec_op *op,
> >>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> >>> +	union acc100_dma_desc *desc = NULL;
> >>> +	int ret;
> >>> +	uint8_t r, c;
> >>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> >>> +		h_out_length, mbuf_total_left, seg_total_left;
> >>> +	struct rte_mbuf *input, *h_output_head, *h_output,
> >>> +		*s_output_head, *s_output;
> >>> +	uint16_t current_enqueued_cbs = 0;
> >>> +
> >>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc = q->ring_addr + desc_idx;
> >>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
> >>> +	acc100_fcw_td_fill(op, &desc->req.fcw_td);
> >>> +
> >>> +	input = op->turbo_dec.input.data;
> >>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> >>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> >>> +	in_offset = op->turbo_dec.input.offset;
> >>> +	h_out_offset = op->turbo_dec.hard_output.offset;
> >>> +	s_out_offset = op->turbo_dec.soft_output.offset;
> >>> +	h_out_length = s_out_length = 0;
> >>> +	mbuf_total_left = op->turbo_dec.input.length;
> >>> +	c = op->turbo_dec.tb_params.c;
> >>> +	r = op->turbo_dec.tb_params.r;
> >>> +
> >>> +	while (mbuf_total_left > 0 && r < c) {
> >>> +
> >>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> >>> +
> >>> +		/* Set up DMA descriptor */
> >>> +		desc = q->ring_addr + ((q->sw_ring_head +
> >> total_enqueued_cbs)
> >>> +				& q->sw_ring_wrap_mask);
> >>> +		desc->req.data_ptrs[0].address = q->ring_addr_phys +
> >> fcw_offset;
> >>> +		desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
> >>> +		ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
> >>> +				h_output, s_output, &in_offset,
> >> &h_out_offset,
> >>> +				&s_out_offset, &h_out_length,
> >> &s_out_length,
> >>> +				&mbuf_total_left, &seg_total_left, r);
> >>> +
> >>> +		if (unlikely(ret < 0))
> >>> +			return ret;
> >>> +
> >>> +		/* Hard output */
> >>> +		mbuf_append(h_output_head, h_output, h_out_length);
> >>> +
> >>> +		/* Soft output */
> >>> +		if (check_bit(op->turbo_dec.op_flags,
> >>> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >>> +			mbuf_append(s_output_head, s_output,
> >> s_out_length);
> >>> +
> >>> +		/* Set total number of CBs in TB */
> >>> +		desc->req.cbs_in_tb = cbs_in_tb;
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> >>> +				sizeof(desc->req.fcw_td) - 8);
> >>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> >> #endif
> >>> +
> >>> +		if (seg_total_left == 0) {
> >>> +			/* Go to the next mbuf */
> >>> +			input = input->next;
> >>> +			in_offset = 0;
> >>> +			h_output = h_output->next;
> >>> +			h_out_offset = 0;
> >>> +
> >>> +			if (check_bit(op->turbo_dec.op_flags,
> >>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
> >> {
> >>> +				s_output = s_output->next;
> >>> +				s_out_offset = 0;
> >>> +			}
> >>> +		}
> >>> +
> >>> +		total_enqueued_cbs++;
> >>> +		current_enqueued_cbs++;
> >>> +		r++;
> >>> +	}
> >>> +
> >>> +	if (unlikely(desc == NULL))
> >>> +		return current_enqueued_cbs;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	/* Check if any CBs left for processing */
> >>> +	if (mbuf_total_left != 0) {
> >>> +		rte_bbdev_log(ERR,
> >>> +				"Some date still left for processing:
> >> mbuf_total_left = %u",
> >>> +				mbuf_total_left);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +#endif
> >>> +	/* Set SDone on last CB descriptor for TB mode */
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	return current_enqueued_cbs;
> >>> +}
> >>>
> >>>  /* Calculates number of CBs in processed encoder TB based on 'r'
> >>> and
> >> input
> >>>   * length.
> >>> @@ -1893,6 +2498,45 @@
> >>>  	return cbs_in_tb;
> >>>  }
> >>>
> >>> +/* Enqueue encode operations for ACC100 device in CB mode. */
> >>> +static uint16_t acc100_enqueue_enc_cb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - 1 < 0))
> >>> +			break;
> >>> +		avail -= 1;
> >>> +
> >>> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (unlikely(i == 0))
> >>> +		return 0; /* Nothing to enqueue */
> >>> +
> >>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Check we can mux encode operations with common FCW */  static
> >>> inline bool  check_mux(struct rte_bbdev_enc_op **ops, uint16_t num)
> >>> { @@ -1960,6 +2604,52 @@
> >>>  	return i;
> >>>  }
> >>>
> >>> +/* Enqueue encode operations for ACC100 device in TB mode. */
> >>> +static uint16_t acc100_enqueue_enc_tb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i, enqueued_cbs = 0;
> >>> +	uint8_t cbs_in_tb;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - cbs_in_tb < 0))
> >>> +			break;
> >>> +		avail -= cbs_in_tb;
> >>> +
> >>> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
> >> cbs_in_tb);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		enqueued_cbs += ret;
> >>> +	}
> >>> +
> >> other similar functions have a (i == 0) check here.
> > ok
> >
> >>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>> +/* Enqueue encode operations for ACC100 device. */ static uint16_t
> >>> +acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	if (unlikely(num == 0))
> >>> +		return 0;
> >> num == 0 check should move into the tb/cb functions
> > same comment on other patch, why not catch it early?
> >
> >>> +	if (ops[0]->turbo_enc.code_block_mode == 0)
> >>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_enc_cb(q_data, ops, num); }
> >>> +
> >>>  /* Enqueue encode operations for ACC100 device. */  static uint16_t
> >>> acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
> >>> -1967,7 +2657,51 @@  {
> >>>  	if (unlikely(num == 0))
> >>>  		return 0;
> >>> -	return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
> >>> +	if (ops[0]->ldpc_enc.code_block_mode == 0)
> >>> +		return acc100_enqueue_enc_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_ldpc_enc_cb(q_data, ops, num); }
> >>> +
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device in CB mode */ static
> >>> +uint16_t acc100_enqueue_dec_cb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> Seems like the 10th variant of a similar function could these be
> >> combined to fewer functions ?
> >>
> >> Maybe by passing in a function pointer to the enqueue_one_dec_one*
> >> that does the work ?
> > They have some variants related to the actual operation and constraints.
> > Not obvious to have a valuable refactor.
> >
> As above nothing functionally wrong, just something to consider
> 
> ok.

I agree in principle and this was done to a number of places (ie. acc100_dma_fill_blk_type_out(), etc...). 
I think we can look in parallel into that idea you suggested earlier of "common code" and future refactory around that, 
notably to support future generation of devices in that family with feature overlap and hence avoid/limit code duplication moving forward across different PMDs. 
More something for 2021. Thanks

> 
> Tom
> 
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i;
> >>> +	union acc100_dma_desc *desc;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - 1 < 0))
> >>> +			break;
> >>> +		avail -= 1;
> >>> +
> >>> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +	}
> >>> +
> >>> +	if (unlikely(i == 0))
> >>> +		return 0; /* Nothing to enqueue */
> >>> +
> >>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> >>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> >>> +			& q->sw_ring_wrap_mask);
> >>> +	desc->req.sdone_enable = 1;
> >>> +	desc->req.irq_enable = q->irq_enable;
> >>> +
> >>> +	acc100_dma_enqueue(q, i, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>>  }
> >>>
> >>>  /* Check we can mux encode operations with common FCW */ @@ -
> >> 2065,6
> >>> +2799,53 @@
> >>>  	return i;
> >>>  }
> >>>
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device in TB mode */ static
> >>> +uint16_t acc100_enqueue_dec_tb(struct rte_bbdev_queue_data
> >> *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> 11th ;)
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q-
> >>> sw_ring_head;
> >>> +	uint16_t i, enqueued_cbs = 0;
> >>> +	uint8_t cbs_in_tb;
> >>> +	int ret;
> >>> +
> >>> +	for (i = 0; i < num; ++i) {
> >>> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> >>> +		/* Check if there are available space for further processing */
> >>> +		if (unlikely(avail - cbs_in_tb < 0))
> >>> +			break;
> >>> +		avail -= cbs_in_tb;
> >>> +
> >>> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
> >> cbs_in_tb);
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		enqueued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> >>> +
> >>> +	/* Update stats */
> >>> +	q_data->queue_stats.enqueued_count += i;
> >>> +	q_data->queue_stats.enqueue_err_count += num - i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>> +/* Enqueue decode operations for ACC100 device. */ static uint16_t
> >>> +acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >>> +	if (unlikely(num == 0))
> >>> +		return 0;
> >> similar move the num == 0 check to the tb/cb functions.
> > same comment
> >
> >>> +	if (ops[0]->turbo_dec.code_block_mode == 0)
> >>> +		return acc100_enqueue_dec_tb(q_data, ops, num);
> >>> +	else
> >>> +		return acc100_enqueue_dec_cb(q_data, ops, num); }
> >>> +
> >>>  /* Enqueue decode operations for ACC100 device. */  static uint16_t
> >>> acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, @@
> >>> -2388,6 +3169,51 @@
> >>>  	return cb_idx;
> >>>  }
> >>>
> >>> +/* Dequeue encode operations from ACC100 device. */ static uint16_t
> >>> +acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	uint16_t dequeue_num;
> >>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> >>> +	uint32_t aq_dequeued = 0;
> >>> +	uint16_t i;
> >>> +	uint16_t dequeued_cbs = 0;
> >>> +	struct rte_bbdev_enc_op *op;
> >>> +	int ret;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(ops == 0 && q == NULL))
> >> ops is a pointer so should compare with NULL
> >>
> >> The && likely needs to be ||
> >>
> >> Maybe print out a message so caller knows something wrong happened.
> > ok
> >
> >>> +		return 0;
> >>> +#endif
> >>> +
> >>> +	dequeue_num = (avail < num) ? avail : num;
> >>> +
> >>> +	for (i = 0; i < dequeue_num; ++i) {
> >>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> >>> +			& q->sw_ring_wrap_mask))->req.op_addr;
> >>> +		if (op->turbo_enc.code_block_mode == 0)
> >>> +			ret = dequeue_enc_one_op_tb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +		else
> >>> +			ret = dequeue_enc_one_op_cb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		dequeued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	q->aq_dequeued += aq_dequeued;
> >>> +	q->sw_ring_tail += dequeued_cbs;
> >>> +
> >>> +	/* Update enqueue stats */
> >>> +	q_data->queue_stats.dequeued_count += i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Dequeue LDPC encode operations from ACC100 device. */  static
> >>> uint16_t  acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data
> >> *q_data,
> >>> @@ -2426,6 +3252,52 @@
> >>>  	return dequeued_cbs;
> >>>  }
> >>>
> >>> +
> >>> +/* Dequeue decode operations from ACC100 device. */ static uint16_t
> >>> +acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> >>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> >> very similar to enc function above, consider how to combine them to a
> >> single function.
> >>
> >> Tom
> >>
> >>> +	struct acc100_queue *q = q_data->queue_private;
> >>> +	uint16_t dequeue_num;
> >>> +	uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
> >>> +	uint32_t aq_dequeued = 0;
> >>> +	uint16_t i;
> >>> +	uint16_t dequeued_cbs = 0;
> >>> +	struct rte_bbdev_dec_op *op;
> >>> +	int ret;
> >>> +
> >>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> >>> +	if (unlikely(ops == 0 && q == NULL))
> >>> +		return 0;
> >>> +#endif
> >>> +
> >>> +	dequeue_num = (avail < num) ? avail : num;
> >>> +
> >>> +	for (i = 0; i < dequeue_num; ++i) {
> >>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> >>> +			& q->sw_ring_wrap_mask))->req.op_addr;
> >>> +		if (op->turbo_dec.code_block_mode == 0)
> >>> +			ret = dequeue_dec_one_op_tb(q, &ops[i],
> >> dequeued_cbs,
> >>> +					&aq_dequeued);
> >>> +		else
> >>> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> >>> +					dequeued_cbs, &aq_dequeued);
> >>> +
> >>> +		if (ret < 0)
> >>> +			break;
> >>> +		dequeued_cbs += ret;
> >>> +	}
> >>> +
> >>> +	q->aq_dequeued += aq_dequeued;
> >>> +	q->sw_ring_tail += dequeued_cbs;
> >>> +
> >>> +	/* Update enqueue stats */
> >>> +	q_data->queue_stats.dequeued_count += i;
> >>> +
> >>> +	return i;
> >>> +}
> >>> +
> >>>  /* Dequeue decode operations from ACC100 device. */  static
> >>> uint16_t acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data
> >>> *q_data, @@
> >>> -2479,6 +3351,10 @@
> >>>  	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
> >>>
> >>>  	dev->dev_ops = &acc100_bbdev_ops;
> >>> +	dev->enqueue_enc_ops = acc100_enqueue_enc;
> >>> +	dev->enqueue_dec_ops = acc100_enqueue_dec;
> >>> +	dev->dequeue_enc_ops = acc100_dequeue_enc;
> >>> +	dev->dequeue_dec_ops = acc100_dequeue_dec;
> >>>  	dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
> >>>  	dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
> >>>  	dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;



More information about the dev mailing list