<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div class="elementToProof" style="text-align: left; text-indent: 0px; line-height: 1.1; margin: 20px 0px 10px; font-family: "IntelOne Text"; font-size: 10pt; color: rgb(0, 0, 0);">
Acked-by: Kai Ji <kai.ji@intel.com></div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
<b>Sent:</b> 07 June 2024 13:47<br>
<b>To:</b> Ji, Kai <kai.ji@intel.com><br>
<b>Cc:</b> dev@dpdk.org <dev@dpdk.org>; Wathsala Vithanage <wathsala.vithanage@arm.com><br>
<b>Subject:</b> [PATCH v4 3/5] crypto/openssl: per-qp cipher context clones</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">Currently EVP_CIPHER_CTXs are allocated, copied to (from<br>
openssl_session), and then freed for every cipher operation (ie. per<br>
packet). This is very inefficient, and avoidable.<br>
<br>
Make each openssl_session hold an array of pointers to per-queue-pair<br>
cipher context copies. These are populated on first use by allocating a<br>
new context and copying from the main context. These copies can then be<br>
used in a thread-safe manner by different worker lcores simultaneously.<br>
Consequently the cipher context allocation and copy only has to happen<br>
once - the first time a given qp uses an openssl_session. This brings<br>
about a large performance boost.<br>
<br>
Throughput performance uplift measurements for AES-CBC-128 encrypt on<br>
Ampere Altra Max platform:<br>
1 worker lcore<br>
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
|              64 |          1.51 |               2.94 |    94.4% |<br>
|             256 |          4.90 |               8.05 |    64.3% |<br>
|            1024 |         11.07 |              14.21 |    28.3% |<br>
|            2048 |         14.03 |              16.28 |    16.0% |<br>
|            4096 |         16.20 |              17.59 |     8.6% |<br>
<br>
8 worker lcores<br>
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
|              64 |          3.05 |              23.74 |   678.8% |<br>
|             256 |         10.46 |              64.86 |   520.3% |<br>
|            1024 |         40.97 |             113.80 |   177.7% |<br>
|            2048 |         73.25 |             130.21 |    77.8% |<br>
|            4096 |        103.89 |             140.62 |    35.4% |<br>
<br>
Signed-off-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage@arm.com><br>
---<br>
 drivers/crypto/openssl/openssl_pmd_private.h |  11 +-<br>
 drivers/crypto/openssl/rte_openssl_pmd.c     | 105 ++++++++++++-------<br>
 drivers/crypto/openssl/rte_openssl_pmd_ops.c |  34 +++++-<br>
 3 files changed, 108 insertions(+), 42 deletions(-)<br>
<br>
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h<br>
index 0f038b218c..bad7dcf2f5 100644<br>
--- a/drivers/crypto/openssl/openssl_pmd_private.h<br>
+++ b/drivers/crypto/openssl/openssl_pmd_private.h<br>
@@ -166,6 +166,14 @@ struct __rte_cache_aligned openssl_session {<br>
                 /**< digest length */<br>
         } auth;<br>
 <br>
+       uint16_t ctx_copies_len;<br>
+       /* < number of entries in ctx_copies */<br>
+       EVP_CIPHER_CTX *qp_ctx[];<br>
+       /**< Flexible array member of per-queue-pair pointers to copies of EVP<br>
+        * context structure. Cipher contexts are not safe to use from multiple<br>
+        * cores simultaneously, so maintaining these copies allows avoiding<br>
+        * per-buffer copying into a temporary context.<br>
+        */<br>
 };<br>
 <br>
 /** OPENSSL crypto private asymmetric session structure */<br>
@@ -217,7 +225,8 @@ struct __rte_cache_aligned openssl_asym_session {<br>
 /** Set and validate OPENSSL crypto session parameters */<br>
 extern int<br>
 openssl_set_session_parameters(struct openssl_session *sess,<br>
-               const struct rte_crypto_sym_xform *xform);<br>
+               const struct rte_crypto_sym_xform *xform,<br>
+               uint16_t nb_queue_pairs);<br>
 <br>
 /** Reset OPENSSL crypto session parameters */<br>
 extern void<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
index 70f2069985..df44cc097e 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
@@ -467,13 +467,10 @@ openssl_set_sess_aead_dec_param(struct openssl_session *sess,<br>
         return 0;<br>
 }<br>
 <br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30200000L)<br>
 static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,<br>
                 struct openssl_session *sess)<br>
 {<br>
-#if (OPENSSL_VERSION_NUMBER > 0x30200000L)<br>
-       *dest = EVP_CIPHER_CTX_dup(sess->ctx);<br>
-       return 0;<br>
-#elif (OPENSSL_VERSION_NUMBER > 0x30000000L)<br>
         /* OpenSSL versions 3.0.0 <= V < 3.2.0 have no dupctx() implementation<br>
          * for AES-GCM and AES-CCM. In this case, we have to create new empty<br>
          * contexts and initialise, as we did the original context.<br>
@@ -489,13 +486,8 @@ static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,<br>
                 return openssl_set_sess_aead_dec_param(sess, sess->aead_algo,<br>
                                 sess->auth.digest_length, sess->cipher.key.data,<br>
                                 dest);<br>
-#else<br>
-       *dest = EVP_CIPHER_CTX_new();<br>
-       if (EVP_CIPHER_CTX_copy(*dest, sess->cipher.ctx) != 1)<br>
-               return -EINVAL;<br>
-       return 0;<br>
-#endif<br>
 }<br>
+#endif<br>
 <br>
 /** Set session cipher parameters */<br>
 static int<br>
@@ -824,7 +816,8 @@ openssl_set_session_aead_parameters(struct openssl_session *sess,<br>
 /** Parse crypto xform chain and set private session parameters */<br>
 int<br>
 openssl_set_session_parameters(struct openssl_session *sess,<br>
-               const struct rte_crypto_sym_xform *xform)<br>
+               const struct rte_crypto_sym_xform *xform,<br>
+               uint16_t nb_queue_pairs)<br>
 {<br>
         const struct rte_crypto_sym_xform *cipher_xform = NULL;<br>
         const struct rte_crypto_sym_xform *auth_xform = NULL;<br>
@@ -886,6 +879,12 @@ openssl_set_session_parameters(struct openssl_session *sess,<br>
                 }<br>
         }<br>
 <br>
+       /*<br>
+        * With only one queue pair, the array of copies is not needed.<br>
+        * Otherwise, one entry per queue pair is required.<br>
+        */<br>
+       sess->ctx_copies_len = nb_queue_pairs > 1 ? nb_queue_pairs : 0;<br>
+<br>
         return 0;<br>
 }<br>
 <br>
@@ -893,6 +892,13 @@ openssl_set_session_parameters(struct openssl_session *sess,<br>
 void<br>
 openssl_reset_session(struct openssl_session *sess)<br>
 {<br>
+       for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {<br>
+               if (sess->qp_ctx[i] != NULL) {<br>
+                       EVP_CIPHER_CTX_free(sess->qp_ctx[i]);<br>
+                       sess->qp_ctx[i] = NULL;<br>
+               }<br>
+       }<br>
+<br>
         EVP_CIPHER_CTX_free(sess->cipher.ctx);<br>
 <br>
         if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)<br>
@@ -959,7 +965,7 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op *op)<br>
                 sess = (struct openssl_session *)_sess->driver_priv_data;<br>
 <br>
                 if (unlikely(openssl_set_session_parameters(sess,<br>
-                               op->sym->xform) != 0)) {<br>
+                               op->sym->xform, 1) != 0)) {<br>
                         rte_mempool_put(qp->sess_mp, _sess);<br>
                         sess = NULL;<br>
                 }<br>
@@ -1607,11 +1613,45 @@ process_openssl_auth_cmac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,<br>
 # endif<br>
 /*----------------------------------------------------------------------------*/<br>
 <br>
+static inline EVP_CIPHER_CTX *<br>
+get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
+{<br>
+       /* If the array is not being used, just return the main context. */<br>
+       if (sess->ctx_copies_len == 0)<br>
+               return sess->cipher.ctx;<br>
+<br>
+       EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];<br>
+<br>
+       if (unlikely(*lctx == NULL)) {<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30200000L<br>
+               /* EVP_CIPHER_CTX_dup() added in OSSL 3.2 */<br>
+               *lctx = EVP_CIPHER_CTX_dup(sess->cipher.ctx);<br>
+               return *lctx;<br>
+#elif OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+               if (sess->chain_order == OPENSSL_CHAIN_COMBINED) {<br>
+                       /* AESNI special-cased to use openssl_aesni_ctx_clone()<br>
+                        * to allow for working around lack of<br>
+                        * EVP_CIPHER_CTX_copy support for 3.0.0 <= OSSL Version<br>
+                        * < 3.2.0.<br>
+                        */<br>
+                       if (openssl_aesni_ctx_clone(lctx, sess) != 0)<br>
+                               *lctx = NULL;<br>
+                       return *lctx;<br>
+               }<br>
+#endif<br>
+<br>
+               *lctx = EVP_CIPHER_CTX_new();<br>
+               EVP_CIPHER_CTX_copy(*lctx, sess->cipher.ctx);<br>
+       }<br>
+<br>
+       return *lctx;<br>
+}<br>
+<br>
 /** Process auth/cipher combined operation */<br>
 static void<br>
-process_openssl_combined_op<br>
-               (struct rte_crypto_op *op, struct openssl_session *sess,<br>
-               struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)<br>
+process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
+               struct openssl_session *sess, struct rte_mbuf *mbuf_src,<br>
+               struct rte_mbuf *mbuf_dst)<br>
 {<br>
         /* cipher */<br>
         uint8_t *dst = NULL, *iv, *tag, *aad;<br>
@@ -1628,11 +1668,7 @@ process_openssl_combined_op<br>
                 return;<br>
         }<br>
 <br>
-       EVP_CIPHER_CTX *ctx;<br>
-       if (openssl_aesni_ctx_clone(&ctx, sess) != 0) {<br>
-               op->status = RTE_CRYPTO_OP_STATUS_ERROR;<br>
-               return;<br>
-       }<br>
+       EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);<br>
 <br>
         iv = rte_crypto_op_ctod_offset(op, uint8_t *,<br>
                         sess->iv.offset);<br>
@@ -1688,8 +1724,6 @@ process_openssl_combined_op<br>
                                         dst, tag, taglen, ctx);<br>
         }<br>
 <br>
-       EVP_CIPHER_CTX_free(ctx);<br>
-<br>
         if (status != 0) {<br>
                 if (status == (-EFAULT) &&<br>
                                 sess->auth.operation ==<br>
@@ -1702,14 +1736,13 @@ process_openssl_combined_op<br>
 <br>
 /** Process cipher operation */<br>
 static void<br>
-process_openssl_cipher_op<br>
-               (struct rte_crypto_op *op, struct openssl_session *sess,<br>
-               struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)<br>
+process_openssl_cipher_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
+               struct openssl_session *sess, struct rte_mbuf *mbuf_src,<br>
+               struct rte_mbuf *mbuf_dst)<br>
 {<br>
         uint8_t *dst, *iv;<br>
         int srclen, status;<br>
         uint8_t inplace = (mbuf_src == mbuf_dst) ? 1 : 0;<br>
-       EVP_CIPHER_CTX *ctx_copy;<br>
 <br>
         /*<br>
          * Segmented OOP destination buffer is not supported for encryption/<br>
@@ -1728,24 +1761,22 @@ process_openssl_cipher_op<br>
 <br>
         iv = rte_crypto_op_ctod_offset(op, uint8_t *,<br>
                         sess->iv.offset);<br>
-       ctx_copy = EVP_CIPHER_CTX_new();<br>
-       EVP_CIPHER_CTX_copy(ctx_copy, sess->cipher.ctx);<br>
+<br>
+       EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);<br>
 <br>
         if (sess->cipher.mode == OPENSSL_CIPHER_LIB)<br>
                 if (sess->cipher.direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)<br>
                         status = process_openssl_cipher_encrypt(mbuf_src, dst,<br>
                                         op->sym->cipher.data.offset, iv,<br>
-                                       srclen, ctx_copy, inplace);<br>
+                                       srclen, ctx, inplace);<br>
                 else<br>
                         status = process_openssl_cipher_decrypt(mbuf_src, dst,<br>
                                         op->sym->cipher.data.offset, iv,<br>
-                                       srclen, ctx_copy, inplace);<br>
+                                       srclen, ctx, inplace);<br>
         else<br>
                 status = process_openssl_cipher_des3ctr(mbuf_src, dst,<br>
-                               op->sym->cipher.data.offset, iv, srclen,<br>
-                               ctx_copy);<br>
+                               op->sym->cipher.data.offset, iv, srclen, ctx);<br>
 <br>
-       EVP_CIPHER_CTX_free(ctx_copy);<br>
         if (status != 0)<br>
                 op->status = RTE_CRYPTO_OP_STATUS_ERROR;<br>
 }<br>
@@ -3150,13 +3181,13 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
 <br>
         switch (sess->chain_order) {<br>
         case OPENSSL_CHAIN_ONLY_CIPHER:<br>
-               process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+               process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
                 break;<br>
         case OPENSSL_CHAIN_ONLY_AUTH:<br>
                 process_openssl_auth_op(qp, op, sess, msrc, mdst);<br>
                 break;<br>
         case OPENSSL_CHAIN_CIPHER_AUTH:<br>
-               process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+               process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
                 /* OOP */<br>
                 if (msrc != mdst)<br>
                         copy_plaintext(msrc, mdst, op);<br>
@@ -3164,10 +3195,10 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
                 break;<br>
         case OPENSSL_CHAIN_AUTH_CIPHER:<br>
                 process_openssl_auth_op(qp, op, sess, msrc, mdst);<br>
-               process_openssl_cipher_op(op, sess, msrc, mdst);<br>
+               process_openssl_cipher_op(qp, op, sess, msrc, mdst);<br>
                 break;<br>
         case OPENSSL_CHAIN_COMBINED:<br>
-               process_openssl_combined_op(op, sess, msrc, mdst);<br>
+               process_openssl_combined_op(qp, op, sess, msrc, mdst);<br>
                 break;<br>
         case OPENSSL_CHAIN_CIPHER_BPI:<br>
                 process_openssl_docsis_bpi_op(op, sess, msrc, mdst);<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
index b16baaa08f..4209c6ab6f 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
@@ -794,9 +794,34 @@ openssl_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,<br>
 <br>
 /** Returns the size of the symmetric session structure */<br>
 static unsigned<br>
-openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev __rte_unused)<br>
+openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)<br>
 {<br>
-       return sizeof(struct openssl_session);<br>
+       /*<br>
+        * For 0 qps, return the max size of the session - this is necessary if<br>
+        * the user calls into this function to create the session mempool,<br>
+        * without first configuring the number of qps for the cryptodev.<br>
+        */<br>
+       if (dev->data->nb_queue_pairs == 0) {<br>
+               unsigned int max_nb_qps = ((struct openssl_private *)<br>
+                               dev->data->dev_private)->max_nb_qpairs;<br>
+               return sizeof(struct openssl_session) +<br>
+                               (sizeof(void *) * max_nb_qps);<br>
+       }<br>
+<br>
+       /*<br>
+        * With only one queue pair, the thread safety of multiple context<br>
+        * copies is not necessary, so don't allocate extra memory for the<br>
+        * array.<br>
+        */<br>
+       if (dev->data->nb_queue_pairs == 1)<br>
+               return sizeof(struct openssl_session);<br>
+<br>
+       /*<br>
+        * Otherwise, the size of the flexible array member should be enough to<br>
+        * fit pointers to per-qp contexts.<br>
+        */<br>
+       return sizeof(struct openssl_session) +<br>
+               (sizeof(void *) * dev->data->nb_queue_pairs);<br>
 }<br>
 <br>
 /** Returns the size of the asymmetric session structure */<br>
@@ -808,7 +833,7 @@ openssl_pmd_asym_session_get_size(struct rte_cryptodev *dev __rte_unused)<br>
 <br>
 /** Configure the session from a crypto xform chain */<br>
 static int<br>
-openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,<br>
+openssl_pmd_sym_session_configure(struct rte_cryptodev *dev,<br>
                 struct rte_crypto_sym_xform *xform,<br>
                 struct rte_cryptodev_sym_session *sess)<br>
 {<br>
@@ -820,7 +845,8 @@ openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,<br>
                 return -EINVAL;<br>
         }<br>
 <br>
-       ret = openssl_set_session_parameters(sess_private_data, xform);<br>
+       ret = openssl_set_session_parameters(sess_private_data, xform,<br>
+                       dev->data->nb_queue_pairs);<br>
         if (ret != 0) {<br>
                 OPENSSL_LOG(ERR, "failed configure session parameters");<br>
 <br>
-- <br>
2.34.1<br>
<br>
</div>
</span></font></div>
</body>
</html>