<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div class="elementToProof" style="text-align: left; text-indent: 0px; line-height: 1.1; margin: 20px 0px 10px; font-family: "IntelOne Text"; font-size: 10pt; color: rgb(0, 0, 0);">
Acked-by: Kai Ji <kai.ji@intel.com></div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
<b>Sent:</b> 07 June 2024 13:47<br>
<b>To:</b> Ji, Kai <kai.ji@intel.com><br>
<b>Cc:</b> dev@dpdk.org <dev@dpdk.org>; Wathsala Vithanage <wathsala.vithanage@arm.com><br>
<b>Subject:</b> [PATCH v4 3/5] crypto/openssl: per-qp cipher context clones</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Currently EVP_CIPHER_CTXs are allocated, copied to (from<br>
openssl_session), and then freed for every cipher operation (ie. per<br>
packet). This is very inefficient, and avoidable.<br>
<br>
Make each openssl_session hold an array of pointers to per-queue-pair<br>
cipher context copies. These are populated on first use by allocating a<br>
new context and copying from the main context. These copies can then be<br>
used in a thread-safe manner by different worker lcores simultaneously.<br>
Consequently the cipher context allocation and copy only has to happen<br>
once - the first time a given qp uses an openssl_session. This brings<br>
about a large performance boost.<br>
<br>
Throughput performance uplift measurements for AES-CBC-128 encrypt on<br>
Ampere Altra Max platform:<br>
1 worker lcore<br>
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
| 64 | 1.51 | 2.94 | 94.4% |<br>
| 256 | 4.90 | 8.05 | 64.3% |<br>
| 1024 | 11.07 | 14.21 | 28.3% |<br>
| 2048 | 14.03 | 16.28 | 16.0% |<br>
| 4096 | 16.20 | 17.59 | 8.6% |<br>
<br>
8 worker lcores<br>
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
| 64 | 3.05 | 23.74 | 678.8% |<br>
| 256 | 10.46 | 64.86 | 520.3% |<br>
| 1024 | 40.97 | 113.80 | 177.7% |<br>
| 2048 | 73.25 | 130.21 | 77.8% |<br>
| 4096 | 103.89 | 140.62 | 35.4% |<br>
<br>
Signed-off-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage@arm.com><br>
---<br>
drivers/crypto/openssl/openssl_pmd_private.h | 11 +-<br>
drivers/crypto/openssl/rte_openssl_pmd.c | 105 ++++++++++++-------<br>
drivers/crypto/openssl/rte_openssl_pmd_ops.c | 34 +++++-<br>
3 files changed, 108 insertions(+), 42 deletions(-)<br>
<br>
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h<br>
index 0f038b218c..bad7dcf2f5 100644<br>
--- a/drivers/crypto/openssl/openssl_pmd_private.h<br>
+++ b/drivers/crypto/openssl/openssl_pmd_private.h<br>
@@ -166,6 +166,14 @@ struct __rte_cache_aligned openssl_session {<br>
/**< digest length */<br>
} auth;<br>
<br>
+ uint16_t ctx_copies_len;<br>
+ /* < number of entries in ctx_copies */<br>
+ EVP_CIPHER_CTX *qp_ctx[];<br>
+ /**< Flexible array member of per-queue-pair pointers to copies of EVP<br>
+ * context structure. Cipher contexts are not safe to use from multiple<br>
+ * cores simultaneously, so maintaining these copies allows avoiding<br>
+ * per-buffer copying into a temporary context.<br>
+ */<br>
};<br>
<br>
/** OPENSSL crypto private asymmetric session structure */<br>
@@ -217,7 +225,8 @@ struct __rte_cache_aligned openssl_asym_session {<br>
/** Set and validate OPENSSL crypto session parameters */<br>
extern int<br>
openssl_set_session_parameters(struct openssl_session *sess,<br>
- const struct rte_crypto_sym_xform *xform);<br>
+ const struct rte_crypto_sym_xform *xform,<br>
+ uint16_t nb_queue_pairs);<br>
<br>
/** Reset OPENSSL crypto session parameters */<br>
extern void<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
index 70f2069985..df44cc097e 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
@@ -467,13 +467,10 @@ openssl_set_sess_aead_dec_param(struct openssl_session *sess,<br>
return 0;<br>
}<br>
<br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30200000L)<br>
static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,<br>
struct openssl_session *sess)<br>
{<br>
-#if (OPENSSL_VERSION_NUMBER > 0x30200000L)<br>
- *dest = EVP_CIPHER_CTX_dup(sess->ctx);<br>
- return 0;<br>
-#elif (OPENSSL_VERSION_NUMBER > 0x30000000L)<br>
/* OpenSSL versions 3.0.0 <= V < 3.2.0 have no dupctx() implementation<br>
* for AES-GCM and AES-CCM. In this case, we have to create new empty<br>
* contexts and initialise, as we did the original context.<br>
@@ -489,13 +486,8 @@ static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,<br>
return openssl_set_sess_aead_dec_param(sess, sess->aead_algo,<br>
sess->auth.digest_length, sess->cipher.key.data,<br>
dest);<br>
-#else<br>
- *dest = EVP_CIPHER_CTX_new();<br>
- if (EVP_CIPHER_CTX_copy(*dest, sess->cipher.ctx) != 1)<br>
- return -EINVAL;<br>
- return 0;<br>
-#endif<br>
}<br>
+#endif<br>
<br>
/** Set session cipher parameters */<br>
static int<br>
@@ -824,7 +816,8 @@ openssl_set_session_aead_parameters(struct openssl_session *sess,<br>
/** Parse crypto xform chain and set private session parameters */<br>
int<br>
openssl_set_session_parameters(struct openssl_session *sess,<br>
- const struct rte_crypto_sym_xform *xform)<br>
+ const struct rte_crypto_sym_xform *xform,<br>
+ uint16_t nb_queue_pairs)<br>
{<br>
const struct rte_crypto_sym_xform *cipher_xform = NULL;<br>
const struct rte_crypto_sym_xform *auth_xform = NULL;<br>
@@ -886,6 +879,12 @@ openssl_set_session_parameters(struct openssl_session *sess,<br>
}<br>
}<br>
<br>
+ /*<br>
+ * With only one queue pair, the array of copies is not needed.<br>
+ * Otherwise, one entry per queue pair is required.<br>
+ */<br>
+ sess->ctx_copies_len = nb_queue_pairs > 1 ? nb_queue_pairs : 0;<br>
+<br>
return 0;<br>
}<br>
<br>
@@ -893,6 +892,13 @@ openssl_set_session_parameters(struct openssl_session *sess,<br>
void<br>
openssl_reset_session(struct openssl_session *sess)<br>
{<br>
+ for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {<br>
+ if (sess->qp_ctx[i] != NULL) {<br>
+ EVP_CIPHER_CTX_free(sess->qp_ctx[i]);<br>
+ sess->qp_ctx[i] = NULL;<br>
+ }<br>
+ }<br>
+<br>
EVP_CIPHER_CTX_free(sess->cipher.ctx);<br>
<br>
if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)<br>
@@ -959,7 +965,7 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op *op)<br>
sess = (struct openssl_session *)_sess->driver_priv_data;<br>
<br>
if (unlikely(openssl_set_session_parameters(sess,<br>
- op->sym->xform) != 0)) {<br>
+ op->sym->xform, 1) != 0)) {<br>
rte_mempool_put(qp->sess_mp, _sess);<br>
sess = NULL;<br>
}<br>
@@ -1607,11 +1613,45 @@ process_openssl_auth_cmac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,<br>
# endif<br>
/*----------------------------------------------------------------------------*/<br>
<br>
+static inline EVP_CIPHER_CTX *<br>
+get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
+{<br>
+ /* If the array is not being used, just return the main context. */<br>
+ if (sess->ctx_copies_len == 0)<br>
+ return sess->cipher.ctx;<br>
+<br>
+ EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];<br>
+<br>
+ if (unlikely(*lctx == NULL)) {<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30200000L<br>
+ /* EVP_CIPHER_CTX_dup() added in OSSL 3.2 */<br>
+ *lctx = EVP_CIPHER_CTX_dup(sess->cipher.ctx);<br>
+ return *lctx;<br>
+#elif OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ if (sess->chain_order == OPENSSL_CHAIN_COMBINED) {<br>
+ /* AESNI special-cased to use openssl_aesni_ctx_clone()<br>
+ * to allow for working around lack of<br>
+ * EVP_CIPHER_CTX_copy support for 3.0.0 <= OSSL Version<br>
+ * < 3.2.0.<br>
+ */<br>
+ if (openssl_aesni_ctx_clone(lctx, sess) != 0)<br>
+ *lctx = NULL;<br>
+ return *lctx;<br>
+ }<br>
+#endif<br>
+<br>
+ *lctx = EVP_CIPHER_CTX_new();<br>
+ EVP_CIPHER_CTX_copy(*lctx, sess->cipher.ctx);<br>
+ }<br>
+<br>
+ return *lctx;<br>
+}<br>
+<br>
/** Process auth/cipher combined operation */<br>
static void<br>
-process_openssl_combined_op<br>
- (struct rte_crypto_op *op, struct openssl_session *sess,<br>
- struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)<br>
+process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
+ struct openssl_session *sess, struct rte_mbuf *mbuf_src,<br>
+ struct rte_mbuf *mbuf_dst)<br>
{<br>
/* cipher */<br>
uint8_t *dst = NULL, *iv, *tag, *aad;<br>
@@ -1628,11 +1668,7 @@ process_openssl_combined_op<br>
return;<br>
}<br>
<br>
- EVP_CIPHER_CTX *ctx;<br>
- if (openssl_aesni_ctx_clone(&ctx, sess) != 0) {<br>
- op->status = RTE_CRYPTO_OP_STATUS_ERROR;<br>
- return;<br>
- }<br>
+ EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);<br>
<br>
iv = rte_crypto_op_ctod_offset(op, uint8_t *,<br>
sess->iv.offset);<br>
@@ -1688,8 +1724,6 @@ process_openssl_combined_op<br>
dst, tag, taglen, ctx);<br>
}<br>
<br>
- EVP_CIPHER_CTX_free(ctx);<br>
-<br>
if (status != 0) {<br>
if (status == (-EFAULT) &&<br>
sess->auth.operation ==<br>
@@ -1702,14 +1736,13 @@ process_openssl_combined_op<br>
<br>
/** Process cipher operation */<br>
static void<br>
-process_openssl_cipher_op<br>
- (struct rte_crypto_op *op, struct openssl_session *sess,<br>
- struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)<br>
+process_openssl_cipher_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
+ struct openssl_session *sess, struct rte_mbuf *mbuf_src,<br>
+ struct rte_mbuf *mbuf_dst)<br>
{<br>
uint8_t *dst, *iv;<br>
int srclen, status;<br>
uint8_t inplace = (mbuf_src == mbuf_dst) ? 1 : 0;<br>
- EVP_CIPHER_CTX *ctx_copy;<br>
<br>
/*<br>
* Segmented OOP destination buffer is not supported for encryption/<br>
@@ -1728,24 +1761,22 @@ process_openssl_cipher_op<br>
<br>
iv = rte_crypto_op_ctod_offset(op, uint8_t *,<br>
sess->iv.offset);<br>
- ctx_copy = EVP_CIPHER_CTX_new();<br>
- EVP_CIPHER_CTX_copy(ctx_copy, sess->cipher.ctx);<br>
+<br>
+ EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);<br>
<br>
if (sess->cipher.mode == OPENSSL_CIPHER_LIB)<br>
if (sess->cipher.direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)<br>
status = process_openssl_cipher_encrypt(mbuf_src, dst,<br>
op->sym->cipher.data.offset, iv,<br>
- srclen, ctx_copy, inplace);<br>
+ srclen, ctx, inplace);<br>
else<br>
status = process_openssl_cipher_decrypt(mbuf_src, dst,<br>
op->sym->cipher.data.offset, iv,<br>
- srclen, ctx_copy, inplace);<br>
+ srclen, ctx, inplace);<br>
else<br>
status = process_openssl_cipher_des3ctr(mbuf_src, dst,<br>
- op->sym->cipher.data.offset, iv, srclen,<br>
- ctx_copy);<br>
+ op->sym->cipher.data.offset, iv, srclen, ctx);<br>
<br>
- EVP_CIPHER_CTX_free(ctx_copy);<br>
if (status != 0)<br>
op->status = RTE_CRYPTO_OP_STATUS_ERROR;<br>
}<br>
@@ -3150,13 +3181,13 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
<br>
switch (sess->chain_order) {<br>
case OPENSSL_CHAIN_ONLY_CIPHER:<br>
- process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
break;<br>
case OPENSSL_CHAIN_ONLY_AUTH:<br>
process_openssl_auth_op(qp, op, sess, msrc, mdst);<br>
break;<br>
case OPENSSL_CHAIN_CIPHER_AUTH:<br>
- process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
/* OOP */<br>
if (msrc != mdst)<br>
copy_plaintext(msrc, mdst, op);<br>
@@ -3164,10 +3195,10 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
break;<br>
case OPENSSL_CHAIN_AUTH_CIPHER:<br>
process_openssl_auth_op(qp, op, sess, msrc, mdst);<br>
- process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
break;<br>
case OPENSSL_CHAIN_COMBINED:<br>
- process_openssl_combined_op(op, sess, msrc, mdst);<br>
+ process_openssl_combined_op(qp, op, sess, msrc, mdst);<br>
break;<br>
case OPENSSL_CHAIN_CIPHER_BPI:<br>
process_openssl_docsis_bpi_op(op, sess, msrc, mdst);<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
index b16baaa08f..4209c6ab6f 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
@@ -794,9 +794,34 @@ openssl_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,<br>
<br>
/** Returns the size of the symmetric session structure */<br>
static unsigned<br>
-openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev __rte_unused)<br>
+openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)<br>
{<br>
- return sizeof(struct openssl_session);<br>
+ /*<br>
+ * For 0 qps, return the max size of the session - this is necessary if<br>
+ * the user calls into this function to create the session mempool,<br>
+ * without first configuring the number of qps for the cryptodev.<br>
+ */<br>
+ if (dev->data->nb_queue_pairs == 0) {<br>
+ unsigned int max_nb_qps = ((struct openssl_private *)<br>
+ dev->data->dev_private)->max_nb_qpairs;<br>
+ return sizeof(struct openssl_session) +<br>
+ (sizeof(void *) * max_nb_qps);<br>
+ }<br>
+<br>
+ /*<br>
+ * With only one queue pair, the thread safety of multiple context<br>
+ * copies is not necessary, so don't allocate extra memory for the<br>
+ * array.<br>
+ */<br>
+ if (dev->data->nb_queue_pairs == 1)<br>
+ return sizeof(struct openssl_session);<br>
+<br>
+ /*<br>
+ * Otherwise, the size of the flexible array member should be enough to<br>
+ * fit pointers to per-qp contexts.<br>
+ */<br>
+ return sizeof(struct openssl_session) +<br>
+ (sizeof(void *) * dev->data->nb_queue_pairs);<br>
}<br>
<br>
/** Returns the size of the asymmetric session structure */<br>
@@ -808,7 +833,7 @@ openssl_pmd_asym_session_get_size(struct rte_cryptodev *dev __rte_unused)<br>
<br>
/** Configure the session from a crypto xform chain */<br>
static int<br>
-openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,<br>
+openssl_pmd_sym_session_configure(struct rte_cryptodev *dev,<br>
struct rte_crypto_sym_xform *xform,<br>
struct rte_cryptodev_sym_session *sess)<br>
{<br>
@@ -820,7 +845,8 @@ openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,<br>
return -EINVAL;<br>
}<br>
<br>
- ret = openssl_set_session_parameters(sess_private_data, xform);<br>
+ ret = openssl_set_session_parameters(sess_private_data, xform,<br>
+ dev->data->nb_queue_pairs);<br>
if (ret != 0) {<br>
OPENSSL_LOG(ERR, "failed configure session parameters");<br>
<br>
-- <br>
2.34.1<br>
<br>
</div>
</span></font></div>
</body>
</html>