[PATCH v3 11/12] baseband/acc: add support for VRB2 engine error detection

Maxime Coquelin maxime.coquelin at redhat.com
Tue Oct 3 19:26:27 CEST 2023



On 10/3/23 19:22, Chautru, Nicolas wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin at redhat.com>
>> Sent: Tuesday, October 3, 2023 8:16 AM
>> To: Chautru, Nicolas <nicolas.chautru at intel.com>; dev at dpdk.org
>> Cc: hemant.agrawal at nxp.com; david.marchand at redhat.com; Vargas, Hernan
>> <hernan.vargas at intel.com>
>> Subject: Re: [PATCH v3 11/12] baseband/acc: add support for VRB2 engine
>> error detection
>>
>>
>>
>> On 9/29/23 18:35, Nicolas Chautru wrote:
>>> Adding missing incremental functionality for the VRB2 variant. Notably
>>> detection of engine error during the dequeue. Minor cosmetic edits.
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru at intel.com>
>>> ---
>>>    drivers/baseband/acc/rte_vrb_pmd.c  | 20 ++++++++++++--------
>>>    drivers/baseband/acc/vrb1_pf_enum.h | 17 ++++++++++++-----
>>>    2 files changed, 24 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/baseband/acc/rte_vrb_pmd.c
>>> b/drivers/baseband/acc/rte_vrb_pmd.c
>>> index a9d3db86e6..3eb1a380fc 100644
>>> --- a/drivers/baseband/acc/rte_vrb_pmd.c
>>> +++ b/drivers/baseband/acc/rte_vrb_pmd.c
>>> @@ -1504,6 +1504,7 @@ vrb_fcw_td_fill(const struct rte_bbdev_dec_op
>> *op, struct acc_fcw_td *fcw)
>>>    				fcw->ea = op->turbo_dec.cb_params.e;
>>>    				fcw->eb = op->turbo_dec.cb_params.e;
>>>    			}
>>> +
>>>    			if (op->turbo_dec.rv_index == 0)
>>>    				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
>>>    			else if (op->turbo_dec.rv_index == 1) @@ -2304,7
>> +2305,7 @@
>>> enqueue_ldpc_enc_n_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op
>> **ops,
>>>    	return num;
>>>    }
>>>
>>> -/* Enqueue one encode operations for device for a partial TB
>>> +/* Enqueue one encode operations for VRB1 device for a partial TB
>>>     * all codes blocks have same configuration multiplexed on the same
>> descriptor.
>>>     */
>>>    static inline void
>>> @@ -2649,7 +2650,7 @@ enqueue_dec_one_op_cb(struct acc_queue *q,
>> struct rte_bbdev_dec_op *op,
>>>    	return 1;
>>>    }
>>>
>>> -/** Enqueue one decode operations for device in CB mode */
>>> +/** Enqueue one decode operations for device in CB mode. */
>>>    static inline int
>>>    vrb_enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>>    		uint16_t total_enqueued_cbs, bool same_op) @@ -2801,7
>> +2802,6 @@
>>> vrb_enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>>    		desc->req.data_ptrs[0].blen = ACC_FCW_LD_BLEN;
>>>    		rte_memcpy(&desc->req.fcw_ld, &desc_first->req.fcw_ld,
>> ACC_FCW_LD_BLEN);
>>>    		desc->req.fcw_ld.tb_trailer_size = (c - r - 1) * trail_len;
>>> -
>>>    		if (q->d->device_variant == VRB1_VARIANT)
>>>    			ret = vrb1_dma_desc_ld_fill(op, &desc->req, &input,
>>>    					h_output, &in_offset, &h_out_offset,
>> @@ -3226,7 +3226,6 @@
>>> vrb_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
>>>    			break;
>>>    		}
>>>    		avail -= 1;
>>> -
>>
>> Is it intentionnally removed?
> 
> Cosmetic but slightly more readable. I don’t have a strong rule for these.

OK, if that's intentionnal that's OK to me.

>>
>>>    		rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d
>> %d %d\n",
>>>    			i, ops[i]->ldpc_dec.op_flags, ops[i]-
>>> ldpc_dec.rv_index,
>>>    			ops[i]->ldpc_dec.iter_max, ops[i]-
>>> ldpc_dec.iter_count, @@
>>> -3354,6 +3353,7 @@ vrb_dequeue_enc_one_op_cb(struct acc_queue *q,
>> struct rte_bbdev_enc_op **ref_op,
>>>    	op->status |= ((rsp.input_err) ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
>>>    	op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
>>>    	op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
>>> +	op->status |= ((rsp.engine_hung) ? (1 << RTE_BBDEV_ENGINE_ERROR)
>> :
>>> +0);
>>>
>>>    	if (desc->req.last_desc_in_batch) {
>>>    		(*aq_dequeued)++;
>>> @@ -3470,6 +3470,7 @@ vrb_dequeue_enc_one_op_tb(struct acc_queue
>> *q, struct rte_bbdev_enc_op **ref_op,
>>>    		op->status |= ((rsp.input_err) ? (1 <<
>> RTE_BBDEV_DATA_ERROR) : 0);
>>>    		op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR)
>> : 0);
>>>    		op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) :
>> 0);
>>> +		op->status |= ((rsp.engine_hung) ? (1 <<
>> RTE_BBDEV_ENGINE_ERROR) :
>>> +0);
>>>
>>>    		if (desc->req.last_desc_in_batch) {
>>>    			(*aq_dequeued)++;
>>> @@ -3516,6 +3517,8 @@ vrb_dequeue_dec_one_op_cb(struct
>> rte_bbdev_queue_data *q_data,
>>>    	op->status |= ((rsp.input_err) ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
>>>    	op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
>>>    	op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
>>> +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
>>> +
>>>    	if (op->status != 0) {
>>>    		/* These errors are not expected. */
>>>    		q_data->queue_stats.dequeue_err_count++;
>>> @@ -3569,6 +3572,7 @@ vrb_dequeue_ldpc_dec_one_op_cb(struct
>> rte_bbdev_queue_data *q_data,
>>>    	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
>>>    	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
>>>    	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
>>> +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
>>>    	if (op->status != 0)
>>>    		q_data->queue_stats.dequeue_err_count++;
>>>
>>> @@ -3650,6 +3654,7 @@ vrb_dequeue_dec_one_op_tb(struct acc_queue
>> *q, struct rte_bbdev_dec_op **ref_op,
>>>    		op->status |= ((rsp.input_err) ? (1 <<
>> RTE_BBDEV_DATA_ERROR) : 0);
>>>    		op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR)
>> : 0);
>>>    		op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) :
>> 0);
>>> +		op->status |= ((rsp.engine_hung) ? (1 <<
>> RTE_BBDEV_ENGINE_ERROR) :
>>> +0);
>>
>> It kinf of highlights the need for refactoring I suggested in previous patch! It
>> would have been done in one place.
> 
> That is fair, some of the logic is fairly common indeed.
> I would create now an internal ticket to refactor some of this for next release. Thanks.

Thanks,
Maxime

>>
>>>
>>>    		if (check_bit(op->ldpc_dec.op_flags,
>> RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK))
>>>    			tb_crc_check ^= desc->rsp.add_info_1; @@ -3701,7
>> +3706,6 @@
>>> vrb_dequeue_enc(struct rte_bbdev_queue_data *q_data,
>>>    	if (avail == 0)
>>>    		return 0;
>>>    	op = acc_op_tail(q, 0);
>>> -
>>>    	cbm = op->turbo_enc.code_block_mode;
>>>
>>>    	for (i = 0; i < avail; i++) {
>>> @@ -4041,9 +4045,8 @@ vrb_enqueue_fft_one_op(struct acc_queue *q,
>> struct rte_bbdev_fft_op *op,
>>>    				&in_offset, &out_offset, &win_offset,
>> &pwr_offset);
>>>    	}
>>>    #ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> -	rte_memdump(stderr, "FCW", &desc->req.fcw_fft,
>>> -			sizeof(desc->req.fcw_fft));
>>> -	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>>> +	rte_memdump(stderr, "FCW", fcw, 128);
>>> +	rte_memdump(stderr, "Req Desc.", desc, 128);
>>>    #endif
>>>    	return 1;
>>>    }
>>> @@ -4116,6 +4119,7 @@ vrb_dequeue_fft_one_op(struct
>> rte_bbdev_queue_data *q_data,
>>>    	op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
>>>    	op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
>>>    	op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
>>> +	op->status |= rsp.engine_hung << RTE_BBDEV_ENGINE_ERROR;
>>>    	if (op->status != 0)
>>>    		q_data->queue_stats.dequeue_err_count++;
>>>
>>> diff --git a/drivers/baseband/acc/vrb1_pf_enum.h
>>> b/drivers/baseband/acc/vrb1_pf_enum.h
>>> index 82a36685e9..6dc359800f 100644
>>> --- a/drivers/baseband/acc/vrb1_pf_enum.h
>>> +++ b/drivers/baseband/acc/vrb1_pf_enum.h
>>> @@ -98,11 +98,18 @@ enum {
>>>    	ACC_PF_INT_DMA_UL5G_DESC_IRQ = 8,
>>>    	ACC_PF_INT_DMA_DL5G_DESC_IRQ = 9,
>>>    	ACC_PF_INT_DMA_MLD_DESC_IRQ = 10,
>>> -	ACC_PF_INT_ARAM_ECC_1BIT_ERR = 11,
>>> -	ACC_PF_INT_PARITY_ERR = 12,
>>> -	ACC_PF_INT_QMGR_ERR = 13,
>>> -	ACC_PF_INT_INT_REQ_OVERFLOW = 14,
>>> -	ACC_PF_INT_APB_TIMEOUT = 15,
>>> +	ACC_PF_INT_ARAM_ACCESS_ERR = 11,
>>> +	ACC_PF_INT_ARAM_ECC_1BIT_ERR = 12,
>>> +	ACC_PF_INT_PARITY_ERR = 13,
>>> +	ACC_PF_INT_QMGR_OVERFLOW = 14,
>>> +	ACC_PF_INT_QMGR_ERR = 15,
>>> +	ACC_PF_INT_ATS_ERR = 22,
>>> +	ACC_PF_INT_ARAM_FUUL = 23,
>>> +	ACC_PF_INT_EXTRA_READ = 24,
>>> +	ACC_PF_INT_COMPLETION_TIMEOUT = 25,
>>> +	ACC_PF_INT_CORE_HANG = 26,
>>> +	ACC_PF_INT_DMA_HANG = 28,
>>> +	ACC_PF_INT_DS_HANG = 27,
>>>    };
>>>
>>>    #endif /* VRB1_PF_ENUM_H */
>>
>>
>> Reviewed-by: Maxime Coquelin <maxime.coquelin at redhat.com>
>>
>> Thanks,
>> Maxime
> 



More information about the dev mailing list