[dpdk-dev] [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT
Jerin Jacob
jerin.jacob at caviumnetworks.com
Thu Jun 14 05:20:06 CEST 2018
-----Original Message-----
> Date: Fri, 8 Jun 2018 22:15:13 +0530
> From: Anoob Joseph <anoob.joseph at caviumnetworks.com>
> To: Akhil Goyal <akhil.goyal at nxp.com>, Pablo de Lara
> <pablo.de.lara.guarch at intel.com>, Thomas Monjalon <thomas at monjalon.net>
> Cc: Ankur Dwivedi <ankur.dwivedi at cavium.com>, Jerin Jacob
> <jerin.jacob at caviumnetworks.com>, Murthy NSSR
> <Nidadavolu.Murthy at cavium.com>, Narayana Prasad
> <narayanaprasad.athreya at caviumnetworks.com>, Nithin Dabilpuram
> <nithin.dabilpuram at cavium.com>, Ragothaman Jayaraman
> <Ragothaman.Jayaraman at cavium.com>, Srisivasubramanian Srinivasan
> <Srisivasubramanian.Srinivasan at cavium.com>, dev at dpdk.org
> Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT
> X-Mailer: git-send-email 2.7.4
>
> From: Ankur Dwivedi <ankur.dwivedi at cavium.com>
>
> Adds hardware enqueue/dequeue API of instructions to a queue pair
> for Cavium CPT device.
>
> Signed-off-by: Ankur Dwivedi <ankur.dwivedi at cavium.com>
> Signed-off-by: Murthy NSSR <Nidadavolu.Murthy at cavium.com>
> Signed-off-by: Nithin Dabilpuram <nithin.dabilpuram at cavium.com>
> Signed-off-by: Ragothaman Jayaraman <Ragothaman.Jayaraman at cavium.com>
> Signed-off-by: Srisivasubramanian Srinivasan <Srisivasubramanian.Srinivasan at cavium.com>
> ---
> drivers/crypto/cpt/base/cpt.h | 102 +++++++
> drivers/crypto/cpt/base/cpt_device.c | 4 +-
> drivers/crypto/cpt/base/cpt_request_mgr.c | 424 ++++++++++++++++++++++++++++++
> drivers/crypto/cpt/base/cpt_request_mgr.h | 75 ++++++
> 4 files changed, 603 insertions(+), 2 deletions(-)
> create mode 100644 drivers/crypto/cpt/base/cpt.h
> create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c
> create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h
>
> diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h
> new file mode 100644
> index 0000000..11407ae
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#ifndef __BASE_CPT_H__
> +#define __BASE_CPT_H__
> +
> +/* Linux Includes */
> +#include <endian.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <errno.h>
> +#include <sys/cdefs.h>
> +#include <unistd.h>
> +#include <assert.h>
alphabetical order
> +
> +/* DPDK includes */
> +#include <rte_byteorder.h>
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_memory.h>
> +#include <rte_prefetch.h>
> +
> +#include "../cpt_pmd_logs.h"
> +#include "mcode_defines.h"
> +
> +/** @cond __INTERNAL_DOCUMENTATION__ */
> +
> +/* Declarations */
> +typedef struct cpt_instance cpt_instance_t;
> +
> +/*
> + * Generic Defines
> + */
> +
> +/* Buffer pointer */
> +typedef struct buf_ptr {
> + void *vaddr;
> + phys_addr_t dma_addr;
> + uint32_t size;
> + uint32_t resv;
> +} buf_ptr_t;
> +
> +/* IOV Pointer */
> +typedef struct{
> + int buf_cnt;
> + buf_ptr_t bufs[0];
> +} iov_ptr_t;
> +
> +typedef struct app_data {
> + uint64_t pktout;
> + void *marker;
> +} app_data_t;
> +
> +/* Instance operations */
> +
> +/* Enqueue an SE/AE request */
> +int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags,
> + void *event, uint64_t event_flags);
> +
> +/* Dequeue completed SE requests as burst */
> +int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt,
> + void *resp[], uint8_t cc[]);
> +
> +/* Marks event as done in event driven mode */
> +int32_t cpt_event_mark_done(void *marker, uint8_t *op_error);
> +
> +/* Checks queue full condition */
> +uint16_t cpt_queue_full(cpt_instance_t *instance);
> +
> +/* Misc */
> +uint32_t cpt_get_instance_count(void);
> +
> +#define ENQ_FLAG_SYNC 0x01
> +#define ENQ_FLAG_EVENT 0x02
> +#define ENQ_FLAG_NODOORBELL 0x04
> +#define ENQ_FLAG_ONLY_DOORBELL 0x08
> +
> +
> +#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff)
> +#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff)
> +#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff)
> +
> +#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt) \
> + (((uint64_t)__tag & 0xffffffff) | \
> + (((uint64_t)__grp & 0xffff) << 32) | \
> + (((uint64_t)__tt & 0xff) << 48))
> +
> +
> +/* cpt instance */
> +struct cpt_instance {
> + /* 0th cache line */
> + uint32_t queue_id;
> + uint64_t rsvd;
> +};
> +
#ifndef __hot
> +#define __hot __attribute__((hot))
#endif
> +/** @endcond */
> +
> +#endif /* __BASE_CPT_H__ */
> diff --git a/drivers/crypto/cpt/base/cpt_device.c b/drivers/crypto/cpt/base/cpt_device.c
> index b7cd5b5..a50e5b8 100644
> --- a/drivers/crypto/cpt/base/cpt_device.c
> +++ b/drivers/crypto/cpt/base/cpt_device.c
> @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev,
> uint64_t *next_ptr;
> uint64_t pg_sz = sysconf(_SC_PAGESIZE);
>
> - PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name);
> + PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name);
>
> cpt_instance = &cptvf->instance;
>
> @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance)
> return -EINVAL;
> }
>
> - PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name);
> + PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name);
>
> rz = (struct rte_memzone *)instance->rsvd;
> rte_memzone_free(rz);
> diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c b/drivers/crypto/cpt/base/cpt_request_mgr.c
> new file mode 100644
> index 0000000..8b9b1ff
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c
> @@ -0,0 +1,424 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#include "cpt_request_mgr.h"
> +#include "cpt_debug.h"
> +#include <rte_atomic.h>
> +
> +#define MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++)
> +
> +#define __hot __attribute__((hot))
same as above
> +
> +static inline uint64_t cpu_cycles(void)
> +{
> + return rte_get_timer_cycles();
> +}
> +
> +static inline uint64_t cpu_cycles_freq(void)
> +{
> + return rte_get_timer_hz();
> +}
> +
> +static inline void *
> +get_cpt_inst(struct command_queue *cqueue, void *req)
> +{
> + (void)req;
> + PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req);
> + return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE];
> +}
> +
> +static inline void
> +mark_cpt_inst(struct cpt_vf *cptvf,
> + struct command_queue *queue,
> + uint32_t ring_door_bell)
> +{
> +#ifdef CMD_DEBUG
> + /* DEBUG */
> + {
> + uint32_t i = queue->idx * CPT_INST_SIZE;
> + cpt_inst_s_t *cmd = (void *)&queue->qhead[i];
> + uint64_t *p = (void *)&queue->qhead[i];
> +
> + PRINT("\nQUEUE parameters:");
> + PRINT("Queue index = %u\n",
> + queue->idx);
> + PRINT("Queue HEAD = %p\n",
> + queue->qhead);
> + PRINT("Command Entry = %p\n",
> + cmd);
> +
> + PRINT("\nCPT_INST_S format:");
> + PRINT("cmd->s.doneint = %x\n", cmd->s.doneint);
> + PRINT("cmd->s.res_addr = %lx\n", cmd->s.res_addr);
> + PRINT("cmd->s.grp = %x\n", cmd->s.grp);
> + PRINT("cmd->s.tag = %x\n", cmd->s.tag);
> + PRINT("cmd->s.tt = %x\n", cmd->s.tt);
> + PRINT("cmd->s.wq_ptr = %lx\n", cmd->s.wq_ptr);
> + PRINT("cmd->s.ei0 = %lx\n", cmd->s.ei0);
> + PRINT("cmd->s.ei1 = %lx\n", cmd->s.ei1);
> + PRINT("cmd->s.ei2 = %lx\n", cmd->s.ei2);
> + PRINT("cmd->s.ei3 = %lx\n", cmd->s.ei3);
> +
> + PRINT("\nCommand dump from queue HEAD:");
> + for (i = 0; i < CPT_INST_SIZE / 8; i++)
> + PRINT("%lx\n", p[i]);
> + }
> +#endif
> + if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) {
> + uint32_t cchunk = queue->cchunk;
> + MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS);
> + queue->qhead = queue->chead[cchunk].head;
> + queue->idx = 0;
> + queue->cchunk = cchunk;
> + }
> +
> + if (ring_door_bell) {
> + /* Memory barrier to flush pending writes */
> + rte_smp_wmb();
> + cptvf_write_vq_doorbell(cptvf, ring_door_bell);
> + }
> +}
> +
> +static inline uint8_t
> +check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf)
> +{
> + uint8_t ret = ERR_REQ_PENDING;
> + volatile cpt_res_s_t *cptres;
> +
> + cptres = (volatile cpt_res_s_t *)user_req->completion_addr;
> +
> + if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) {
> + /*
> + * Wait for some time for this command to get completed
> + * before timing out
> + */
> + if (cpu_cycles() < user_req->time_out)
> + return ret;
> + /*
> + * TODO: See if alternate caddr can be used to not loop
> + * longer than needed.
> + */
> + if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) &&
> + (user_req->extra_time < TIME_IN_RESET_COUNT)) {
> + user_req->extra_time++;
> + return ret;
> + }
> +
> + if (cptres->s.compcode != CPT_COMP_E_NOTDONE)
> + goto complete;
> +
> + ret = ERR_REQ_TIMEOUT;
> + PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req);
> + cptvf_poll_misc(cptvf);
> + dump_cpt_request_sglist(&user_req->dbg_inst,
> + "Response Packet Gather in", 1, 1);
> + goto exit;
> + }
> +
> +complete:
> + if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) {
> + ret = 0; /* success */
> + PMD_RX_LOG(DEBUG, "MC status %.8x\n",
> + *((volatile uint32_t *)user_req->alternate_caddr));
> + PMD_RX_LOG(DEBUG, "HW status %.8x\n",
> + *((volatile uint32_t *)user_req->completion_addr));
> + } else if ((cptres->s.compcode == CPT_COMP_E_SWERR) ||
> + (cptres->s.compcode == CPT_COMP_E_FAULT)) {
> + ret = (uint8_t)*user_req->alternate_caddr;
> + if (!ret)
> + ret = ERR_BAD_ALT_CCODE;
> + PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :"
> + "%x\n", user_req,
> + (cptres->s.compcode == CPT_COMP_E_FAULT) ?
> + "DMA Fault" : "Software error", ret);
> + } else {
> + PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code"
> + " %d\n",
> + user_req, cptres->s.compcode);
> + ret = (uint8_t)*user_req->alternate_caddr;
> + }
> +
> +exit:
> + dump_cpt_request_sglist(&user_req->dbg_inst,
> + "Response Packet Scatter Out", 1, 0);
> + return ret;
> +}
> +
> +
> +/*
> + * cpt_enqueue_req()
> + *
> + * SE & AE request enqueue function
> + */
> +int32_t __hot
> +cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags,
> + void *event, uint64_t event_flags)
> +{
> + struct pending_queue *pqueue;
> + struct cpt_vf *cptvf;
> + cpt_inst_s_t *cpt_ist_p = NULL;
> + cpt_request_info_t *user_req = (cpt_request_info_t *)req;
> + struct command_queue *cqueue;
> + int32_t ret = 0;
> +
> +#ifdef CPTVF_STRICT_PARAM_CHECK
> + if (unlikely(!instance)) {
> + PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n",
> + instance, req);
> + return -EINVAL;
> + }
> +#endif
> +
> + cptvf = (struct cpt_vf *)instance;
> + pqueue = &cptvf->pqueue;
> +
> + if (unlikely(!req)) {
> + /* ring only pending doorbells */
> + if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) {
> + /* Memory barrier to flush pending writes */
> + rte_smp_wmb();
Cross check it is rte_wmb() indented here as it barrier between device
and CPU
> + cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell);
> + pqueue->p_doorbell = 0;
> + }
> + return 0;
> + }
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)
if this config useful for end user then expose it in config file and
explain the details in documentation.
> + /* Ask the application to try again later */
> + if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >=
> + DEFAULT_CMD_QLEN)) {
> + return -EAGAIN;
> + }
> +#else
> + if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN))
> + return -EAGAIN;
> +#endif
> + cqueue = &cptvf->cqueue;
> + cpt_ist_p = get_cpt_inst(cqueue, req);
> + rte_prefetch_non_temporal(cpt_ist_p);
> +
> + /* EI0, EI1, EI2, EI3 are already prepared */
> + /* HW W0 */
> + cpt_ist_p->u[0] = 0;
> + /* HW W1 */
> + cpt_ist_p->s.res_addr = user_req->comp_baddr;
> + /* HW W2 */
> + cpt_ist_p->u[2] = 0;
> + /* HW W3 */
> + cpt_ist_p->s.wq_ptr = 0;
> +
> + /* MC EI0 */
> + cpt_ist_p->s.ei0 = user_req->ist.ei0;
> + /* MC EI1 */
> + cpt_ist_p->s.ei1 = user_req->ist.ei1;
> + /* MC EI2 */
> + cpt_ist_p->s.ei2 = user_req->ist.ei2;
> + /* MC EI3 */
> + cpt_ist_p->s.ei3 = user_req->ist.ei3;
> +
> + PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n",
> + req,
> + user_req->op,
> + user_req->dma_mode,
> + user_req->se_req);
> +
> +#ifdef CPT_DEBUG
> + {
> + vq_cmd_word0_t vq_cmd_w0;
> + vq_cmd_word3_t vq_cmd_w3;
> +
> + vq_cmd_w3.u64 = cpt_ist_p->s.ei3;
> + vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0);
> + user_req->dbg_inst = *cpt_ist_p;
> +
> + if (vq_cmd_w3.s.cptr) {
> + PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n",
> + (uint64_t)vq_cmd_w3.s.cptr);
> + /* Dump max context i.e 448 bytes */
> + cpt_dump_buffer("CONTEXT",
> + os_iova2va((uint64_t)vq_cmd_w3.s.cptr),
> + 448);
> + }
> +
> + dump_cpt_request_info(user_req, cpt_ist_p);
> + dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1);
> + dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0);
> + cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4],
> + sizeof(vq_cmd_w0));
> + cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5],
> + sizeof(uint64_t));
> + cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6],
> + sizeof(uint64_t));
> + cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7],
> + sizeof(vq_cmd_w3));
> + }
> +#endif
> +
> + if (likely(!(flags & ENQ_FLAG_SYNC))) {
> + void *op = user_req->op;
> +
> + if (unlikely(flags & ENQ_FLAG_EVENT)) {
> + app_data_t *app_data = op;
> +
> + /* Event based completion */
> + cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags);
> + cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags);
> + cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags);
> + cpt_ist_p->s.wq_ptr = (uint64_t)event;
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)
> + app_data->marker = user_req;
> + __atomic_fetch_add(&pqueue->pending_count,
> + 1, __ATOMIC_RELAXED);
> +#else
> + rid_t *rid_e;
> + /*
> + * Mark it as in progress in pending queue, software
> + * will mark it when completion is received
> + */
> + rid_e = &pqueue->rid_queue[pqueue->enq_tail];
> + rid_e->rid = (uint64_t)user_req;
> + /* rid_e->op = op; */
> + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> + app_data->marker = rid_e;
> +#endif
> +
> + cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p,
> + sizeof(*cpt_ist_p));
> +
> + mark_cpt_inst(cptvf, cqueue, 1);
> +
> + } else {
> + uint32_t doorbell = 0;
> +
> + if (likely(flags & ENQ_FLAG_NODOORBELL))
> + pqueue->p_doorbell++;
> + else
> + doorbell = ++pqueue->p_doorbell;
> +
> + /* Fill time_out cycles */
> + user_req->time_out = cpu_cycles() +
> + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> + user_req->extra_time = 0;
> +
> + cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> + sizeof(*cpt_ist_p));
> +
> + /* Default mode of software queue */
> + mark_cpt_inst(cptvf, cqueue, doorbell);
> +
> + pqueue->p_doorbell -= doorbell;
> + pqueue->rid_queue[pqueue->enq_tail].rid =
> + (uint64_t)user_req;
> + /* pqueue->rid_queue[pqueue->enq_tail].op = op; */
> + /* We will use soft queue length here to limit
> + * requests
> + */
> + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> + pqueue->pending_count += 1;
> + }
> +
> + PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n",
> + user_req, op);
> + } else {
> + /*
> + * Synchronous operation,
> + * hold until completion / timeout
> + */
> + /* Fill time_out cycles */
> + user_req->time_out = cpu_cycles() +
> + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> + user_req->extra_time = 0;
> +
> + cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> + sizeof(*cpt_ist_p));
> +
> + /* Default mode of software queue */
> + mark_cpt_inst(cptvf, cqueue, 1);
> +
> + do {
> + /* TODO: should we pause */
> + ret = check_nb_command_id(user_req, cptvf);
> + cptvf_poll_misc(cptvf);
> +#if 0
???
> + PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n",
> + cptvf->dev_name,
> + cptvf_read_vq_doorbell(cptvf));
> +#endif
> + } while (ret == ERR_REQ_PENDING);
> +
More information about the dev
mailing list