<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div style="font-family: "IntelOne Text"; font-size: 10pt; color: rgb(0, 0, 0);" class="elementToProof">
Acked-by: Kai Ji <kai.ji@intel.com></div>
<div style="font-family: "IntelOne Text"; font-size: 10pt; color: rgb(0, 0, 0);" class="elementToProof">
<br>
</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Nicolau, Radu <radu.nicolau@intel.com><br>
<b>Sent:</b> 06 August 2025 15:48<br>
<b>To:</b> dev@dpdk.org <dev@dpdk.org><br>
<b>Cc:</b> Nicolau, Radu <radu.nicolau@intel.com>; stable@dpdk.org <stable@dpdk.org>; Ji, Kai <kai.ji@intel.com>; Fan Zhang <fanzhang.oss@gmail.com><br>
<b>Subject:</b> [PATCH] crypto/qat: fix source buffer alignment</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Fix performance regression resulting from using non cache-aligned<br>
source buffers when using cryptodev API.<br>
<br>
Fixes: fb3b9f492205 ("crypto/qat: rework burst data path")<br>
Cc: stable@dpdk.org<br>
<br>
Signed-off-by: Radu Nicolau <radu.nicolau@intel.com><br>
---<br>
drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c | 14 ++++++------<br>
drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c | 6 ++---<br>
drivers/crypto/qat/dev/qat_crypto_pmd_gens.h | 21 ++++++++++++++++-<br>
drivers/crypto/qat/dev/qat_sym_pmd_gen1.c | 24 ++++++++++----------<br>
4 files changed, 42 insertions(+), 23 deletions(-)<br>
<br>
diff --git a/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c b/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c<br>
index 0dcb5a7cb4..c196cf3cdb 100644<br>
--- a/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c<br>
+++ b/drivers/crypto/qat/dev/qat_crypto_pmd_gen3.c<br>
@@ -422,7 +422,7 @@ qat_sym_build_op_aead_gen3(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -466,7 +466,7 @@ qat_sym_build_op_auth_gen3(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -564,7 +564,7 @@ qat_sym_dp_enqueue_single_aead_gen3(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -623,7 +623,7 @@ qat_sym_dp_enqueue_aead_jobs_gen3(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0) || error)<br>
@@ -677,7 +677,7 @@ qat_sym_dp_enqueue_single_auth_gen3(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -732,12 +732,12 @@ qat_sym_dp_enqueue_auth_jobs_gen3(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec, vec->src_sgl[i].num,<br>
- vec->dest_sgl[i].vec, vec->dest_sgl[i].num);<br>
+ vec->dest_sgl[i].vec, vec->dest_sgl[i].num, NULL, NULL);<br>
} else {<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0))<br>
diff --git a/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c b/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c<br>
index 638da1a173..f42ce7c178 100644<br>
--- a/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c<br>
+++ b/drivers/crypto/qat/dev/qat_crypto_pmd_gen4.c<br>
@@ -219,7 +219,7 @@ qat_sym_build_op_aead_gen4(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(qat_req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -376,7 +376,7 @@ qat_sym_dp_enqueue_single_aead_gen4(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -435,7 +435,7 @@ qat_sym_dp_enqueue_aead_jobs_gen4(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0) || error)<br>
diff --git a/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h b/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h<br>
index 1f19c69f88..67dc889b50 100644<br>
--- a/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h<br>
+++ b/drivers/crypto/qat/dev/qat_crypto_pmd_gens.h<br>
@@ -430,7 +430,8 @@ static __rte_always_inline int32_t<br>
qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req,<br>
void *opaque, struct qat_sym_op_cookie *cookie,<br>
struct rte_crypto_vec *src_vec, uint16_t n_src,<br>
- struct rte_crypto_vec *dst_vec, uint16_t n_dst)<br>
+ struct rte_crypto_vec *dst_vec, uint16_t n_dst,<br>
+ union rte_crypto_sym_ofs *ofs, struct rte_crypto_op *op)<br>
{<br>
struct qat_sgl *list;<br>
uint32_t i;<br>
@@ -502,6 +503,24 @@ qat_sym_build_req_set_data(struct icp_qat_fw_la_bulk_req *req,<br>
dst_data_start = src_data_start;<br>
}<br>
<br>
+ /* For crypto API only try to align the in-place buffers*/<br>
+ if (op != NULL && likely(n_dst == 0)) {<br>
+ uint16_t offset = src_data_start & RTE_CACHE_LINE_MASK;<br>
+ if (offset) {<br>
+ rte_iova_t buff_addr = rte_mbuf_iova_get(op->sym->m_src);<br>
+ /* make sure src_data_start is still within the buffer */<br>
+ if (src_data_start - offset >= buff_addr) {<br>
+ src_data_start -= offset;<br>
+ dst_data_start = src_data_start;<br>
+ ofs->ofs.auth.head += offset;<br>
+ ofs->ofs.cipher.head += offset;<br>
+ tl_src += offset;<br>
+ total_len_src = tl_src;<br>
+ total_len_dst = tl_src;<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
req->comn_mid.src_data_addr = src_data_start;<br>
req->comn_mid.dest_data_addr = dst_data_start;<br>
req->comn_mid.src_length = total_len_src;<br>
diff --git a/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c b/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c<br>
index 8cb85fd8df..6da0f6c645 100644<br>
--- a/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c<br>
+++ b/drivers/crypto/qat/dev/qat_sym_pmd_gen1.c<br>
@@ -242,7 +242,7 @@ qat_sym_build_op_cipher_gen1(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -294,7 +294,7 @@ qat_sym_build_op_auth_gen1(void *in_op, struct qat_sym_session *ctx,<br>
req->comn_hdr.serv_specif_flags, 0);<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -339,7 +339,7 @@ qat_sym_build_op_aead_gen1(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -384,7 +384,7 @@ qat_sym_build_op_chain_gen1(void *in_op, struct qat_sym_session *ctx,<br>
}<br>
<br>
total_len = qat_sym_build_req_set_data(req, in_op, cookie,<br>
- in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num);<br>
+ in_sgl.vec, in_sgl.num, out_sgl.vec, out_sgl.num, &ofs, op);<br>
if (unlikely(total_len < 0)) {<br>
op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;<br>
return -EINVAL;<br>
@@ -512,7 +512,7 @@ qat_sym_dp_enqueue_single_cipher_gen1(void *qp_data, uint8_t *drv_ctx,<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -571,7 +571,7 @@ qat_sym_dp_enqueue_cipher_jobs_gen1(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0 || error))<br>
@@ -623,7 +623,7 @@ qat_sym_dp_enqueue_single_auth_gen1(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -690,7 +690,7 @@ qat_sym_dp_enqueue_auth_jobs_gen1(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0 || error))<br>
@@ -747,7 +747,7 @@ qat_sym_dp_enqueue_single_chain_gen1(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -815,7 +815,7 @@ qat_sym_dp_enqueue_chain_jobs_gen1(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0 || error))<br>
@@ -877,7 +877,7 @@ qat_sym_dp_enqueue_single_aead_gen1(void *qp_data, uint8_t *drv_ctx,<br>
rte_mov128((uint8_t *)req, (const uint8_t *)&(ctx->fw_req));<br>
rte_prefetch0((uint8_t *)tx_queue->base_addr + tail);<br>
data_len = qat_sym_build_req_set_data(req, user_data, cookie,<br>
- data, n_data_vecs, NULL, 0);<br>
+ data, n_data_vecs, NULL, 0, NULL, NULL);<br>
if (unlikely(data_len < 0))<br>
return -1;<br>
<br>
@@ -936,7 +936,7 @@ qat_sym_dp_enqueue_aead_jobs_gen1(void *qp_data, uint8_t *drv_ctx,<br>
data_len = qat_sym_build_req_set_data(req,<br>
user_data[i], cookie,<br>
vec->src_sgl[i].vec,<br>
- vec->src_sgl[i].num, NULL, 0);<br>
+ vec->src_sgl[i].num, NULL, 0, NULL, NULL);<br>
}<br>
<br>
if (unlikely(data_len < 0) || error)<br>
-- <br>
2.50.1<br>
<br>
</div>
</span></font></div>
</body>
</html>