<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=us-ascii">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Hi Jack,</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Thanks for prompt response, patch queued to 23.11.2 release candidate queue.</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Regards,</div>
<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
Xueming</div>
<div id="appendonsend"></div>
<hr style="display:inline-block;width:98%" tabindex="-1">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" style="font-size:11pt" color="#000000"><b>From:</b> Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
<b>Sent:</b> Monday, August 12, 2024 9:46 PM<br>
<b>To:</b> Xueming Li <xuemingl@nvidia.com><br>
<b>Cc:</b> stable@dpdk.org <stable@dpdk.org>; Kai Ji <kai.ji@intel.com>; Wathsala Vithanage <wathsala.vithanage@arm.com><br>
<b>Subject:</b> [PATCH 23.11] crypto/openssl: make per-QP auth context clones</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt;">
<div class="PlainText">[ upstream commit 17d5bc6135afdb38ddf02595bfa15aa5142d80b1 ]<br>
<br>
Currently EVP auth ctxs (e.g. EVP_MD_CTX, EVP_MAC_CTX) are allocated,<br>
copied to (from openssl_session), and then freed for every auth<br>
operation (ie. per packet). This is very inefficient, and avoidable.<br>
<br>
Make each openssl_session hold an array of structures, containing<br>
pointers to per-queue-pair cipher and auth context copies. These are<br>
populated on first use by allocating a new context and copying from the<br>
main context. These copies can then be used in a thread-safe manner by<br>
different worker lcores simultaneously. Consequently the auth context<br>
allocation and copy only has to happen once - the first time a given qp<br>
uses an openssl_session. This brings about a large performance boost.<br>
<br>
Throughput performance uplift measurements for HMAC-SHA1 generate on<br>
Ampere Altra Max platform:<br>
1 worker lcore<br>
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
| 64 | 0.63 | 1.42 | 123.5% |<br>
| 256 | 2.24 | 4.40 | 96.4% |<br>
| 1024 | 6.15 | 9.26 | 50.6% |<br>
| 2048 | 8.68 | 11.38 | 31.1% |<br>
| 4096 | 10.92 | 12.84 | 17.6% |<br>
<br>
8 worker lcores<br>
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |<br>
|-----------------+---------------+--------------------+----------|<br>
| 64 | 0.93 | 11.35 | 1122.5% |<br>
| 256 | 3.70 | 35.30 | 853.7% |<br>
| 1024 | 15.22 | 74.27 | 387.8% |<br>
| 2048 | 30.20 | 91.08 | 201.6% |<br>
| 4096 | 56.92 | 102.76 | 80.5% |<br>
<br>
Cc: stable@dpdk.org<br>
<br>
Signed-off-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com><br>
Acked-by: Kai Ji <kai.ji@intel.com><br>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage@arm.com><br>
---<br>
drivers/crypto/openssl/compat.h | 26 +++<br>
drivers/crypto/openssl/openssl_pmd_private.h | 25 ++-<br>
drivers/crypto/openssl/rte_openssl_pmd.c | 176 +++++++++++++++----<br>
drivers/crypto/openssl/rte_openssl_pmd_ops.c | 7 +-<br>
4 files changed, 193 insertions(+), 41 deletions(-)<br>
<br>
diff --git a/drivers/crypto/openssl/compat.h b/drivers/crypto/openssl/compat.h<br>
index 9f9167c4f1..e1814fea8c 100644<br>
--- a/drivers/crypto/openssl/compat.h<br>
+++ b/drivers/crypto/openssl/compat.h<br>
@@ -5,6 +5,32 @@<br>
#ifndef __RTA_COMPAT_H__<br>
#define __RTA_COMPAT_H__<br>
<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+static __rte_always_inline void<br>
+free_hmac_ctx(EVP_MAC_CTX *ctx)<br>
+{<br>
+ EVP_MAC_CTX_free(ctx);<br>
+}<br>
+<br>
+static __rte_always_inline void<br>
+free_cmac_ctx(EVP_MAC_CTX *ctx)<br>
+{<br>
+ EVP_MAC_CTX_free(ctx);<br>
+}<br>
+#else<br>
+static __rte_always_inline void<br>
+free_hmac_ctx(HMAC_CTX *ctx)<br>
+{<br>
+ HMAC_CTX_free(ctx);<br>
+}<br>
+<br>
+static __rte_always_inline void<br>
+free_cmac_ctx(CMAC_CTX *ctx)<br>
+{<br>
+ CMAC_CTX_free(ctx);<br>
+}<br>
+#endif<br>
+<br>
#if (OPENSSL_VERSION_NUMBER < 0x10100000L)<br>
<br>
static __rte_always_inline int<br>
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h<br>
index 370de1d53b..aa3f466e74 100644<br>
--- a/drivers/crypto/openssl/openssl_pmd_private.h<br>
+++ b/drivers/crypto/openssl/openssl_pmd_private.h<br>
@@ -80,6 +80,20 @@ struct openssl_qp {<br>
*/<br>
} __rte_cache_aligned;<br>
<br>
+struct evp_ctx_pair {<br>
+ EVP_CIPHER_CTX *cipher;<br>
+ union {<br>
+ EVP_MD_CTX *auth;<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ EVP_MAC_CTX *hmac;<br>
+ EVP_MAC_CTX *cmac;<br>
+#else<br>
+ HMAC_CTX *hmac;<br>
+ CMAC_CTX *cmac;<br>
+#endif<br>
+ };<br>
+};<br>
+<br>
/** OPENSSL crypto private session structure */<br>
struct openssl_session {<br>
enum openssl_chain_order chain_order;<br>
@@ -168,11 +182,12 @@ struct openssl_session {<br>
<br>
uint16_t ctx_copies_len;<br>
/* < number of entries in ctx_copies */<br>
- EVP_CIPHER_CTX *qp_ctx[];<br>
- /**< Flexible array member of per-queue-pair pointers to copies of EVP<br>
- * context structure. Cipher contexts are not safe to use from multiple<br>
- * cores simultaneously, so maintaining these copies allows avoiding<br>
- * per-buffer copying into a temporary context.<br>
+ struct evp_ctx_pair qp_ctx[];<br>
+ /**< Flexible array member of per-queue-pair structures, each containing<br>
+ * pointers to copies of the cipher and auth EVP contexts. Cipher<br>
+ * contexts are not safe to use from multiple cores simultaneously, so<br>
+ * maintaining these copies allows avoiding per-buffer copying into a<br>
+ * temporary context.<br>
*/<br>
} __rte_cache_aligned;<br>
<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
index 7518ffa3be..101111e85b 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c<br>
@@ -894,40 +894,45 @@ openssl_set_session_parameters(struct openssl_session *sess,<br>
void<br>
openssl_reset_session(struct openssl_session *sess)<br>
{<br>
+ /* Free all the qp_ctx entries. */<br>
for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {<br>
- if (sess->qp_ctx[i] != NULL) {<br>
- EVP_CIPHER_CTX_free(sess->qp_ctx[i]);<br>
- sess->qp_ctx[i] = NULL;<br>
+ if (sess->qp_ctx[i].cipher != NULL) {<br>
+ EVP_CIPHER_CTX_free(sess->qp_ctx[i].cipher);<br>
+ sess->qp_ctx[i].cipher = NULL;<br>
+ }<br>
+<br>
+ switch (sess->auth.mode) {<br>
+ case OPENSSL_AUTH_AS_AUTH:<br>
+ EVP_MD_CTX_destroy(sess->qp_ctx[i].auth);<br>
+ sess->qp_ctx[i].auth = NULL;<br>
+ break;<br>
+ case OPENSSL_AUTH_AS_HMAC:<br>
+ free_hmac_ctx(sess->qp_ctx[i].hmac);<br>
+ sess->qp_ctx[i].hmac = NULL;<br>
+ break;<br>
+ case OPENSSL_AUTH_AS_CMAC:<br>
+ free_cmac_ctx(sess->qp_ctx[i].cmac);<br>
+ sess->qp_ctx[i].cmac = NULL;<br>
+ break;<br>
}<br>
}<br>
<br>
EVP_CIPHER_CTX_free(sess->cipher.ctx);<br>
<br>
- if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)<br>
- EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);<br>
-<br>
switch (sess->auth.mode) {<br>
case OPENSSL_AUTH_AS_AUTH:<br>
EVP_MD_CTX_destroy(sess->auth.auth.ctx);<br>
break;<br>
case OPENSSL_AUTH_AS_HMAC:<br>
- EVP_PKEY_free(sess->auth.hmac.pkey);<br>
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
- EVP_MAC_CTX_free(sess->auth.hmac.ctx);<br>
-# else<br>
- HMAC_CTX_free(sess->auth.hmac.ctx);<br>
-# endif<br>
+ free_hmac_ctx(sess->auth.hmac.ctx);<br>
break;<br>
case OPENSSL_AUTH_AS_CMAC:<br>
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
- EVP_MAC_CTX_free(sess->auth.cmac.ctx);<br>
-# else<br>
- CMAC_CTX_free(sess->auth.cmac.ctx);<br>
-# endif<br>
- break;<br>
- default:<br>
+ free_cmac_ctx(sess->auth.cmac.ctx);<br>
break;<br>
}<br>
+<br>
+ if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)<br>
+ EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);<br>
}<br>
<br>
/** Provide session for operation */<br>
@@ -1469,6 +1474,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,<br>
if (m == 0)<br>
goto process_auth_err;<br>
<br>
+ if (EVP_MAC_init(ctx, NULL, 0, NULL) <= 0)<br>
+ goto process_auth_err;<br>
+<br>
src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset);<br>
<br>
l = rte_pktmbuf_data_len(m) - offset;<br>
@@ -1495,11 +1503,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,<br>
if (EVP_MAC_final(ctx, dst, &dstlen, DIGEST_LENGTH_MAX) != 1)<br>
goto process_auth_err;<br>
<br>
- EVP_MAC_CTX_free(ctx);<br>
return 0;<br>
<br>
process_auth_err:<br>
- EVP_MAC_CTX_free(ctx);<br>
OPENSSL_LOG(ERR, "Process openssl auth failed");<br>
return -EINVAL;<br>
}<br>
@@ -1618,7 +1624,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
if (sess->ctx_copies_len == 0)<br>
return sess->cipher.ctx;<br>
<br>
- EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];<br>
+ EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id].cipher;<br>
<br>
if (unlikely(*lctx == NULL)) {<br>
#if OPENSSL_VERSION_NUMBER >= 0x30200000L<br>
@@ -1645,6 +1651,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
return *lctx;<br>
}<br>
<br>
+static inline EVP_MD_CTX *<br>
+get_local_auth_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
+{<br>
+ /* If the array is not being used, just return the main context. */<br>
+ if (sess->ctx_copies_len == 0)<br>
+ return sess->auth.auth.ctx;<br>
+<br>
+ EVP_MD_CTX **lctx = &sess->qp_ctx[qp->id].auth;<br>
+<br>
+ if (unlikely(*lctx == NULL)) {<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30100000L<br>
+ /* EVP_MD_CTX_dup() added in OSSL 3.1 */<br>
+ *lctx = EVP_MD_CTX_dup(sess->auth.auth.ctx);<br>
+#else<br>
+ *lctx = EVP_MD_CTX_new();<br>
+ EVP_MD_CTX_copy(*lctx, sess->auth.auth.ctx);<br>
+#endif<br>
+ }<br>
+<br>
+ return *lctx;<br>
+}<br>
+<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+static inline EVP_MAC_CTX *<br>
+#else<br>
+static inline HMAC_CTX *<br>
+#endif<br>
+get_local_hmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
+{<br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)<br>
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of<br>
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their<br>
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid<br>
+ * undefined behavior of provided macs on EVP_MAC<br>
+ * reinitialization"). In cases where the fix is not present,<br>
+ * fall back to duplicating the context every buffer as a<br>
+ * workaround, at the cost of performance.<br>
+ */<br>
+ RTE_SET_USED(qp);<br>
+ return EVP_MAC_CTX_dup(sess->auth.hmac.ctx);<br>
+#else<br>
+ if (sess->ctx_copies_len == 0)<br>
+ return sess->auth.hmac.ctx;<br>
+<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ EVP_MAC_CTX **lctx =<br>
+#else<br>
+ HMAC_CTX **lctx =<br>
+#endif<br>
+ &sess->qp_ctx[qp->id].hmac;<br>
+<br>
+ if (unlikely(*lctx == NULL)) {<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ *lctx = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);<br>
+#else<br>
+ *lctx = HMAC_CTX_new();<br>
+ HMAC_CTX_copy(*lctx, sess->auth.hmac.ctx);<br>
+#endif<br>
+ }<br>
+<br>
+ return *lctx;<br>
+#endif<br>
+}<br>
+<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+static inline EVP_MAC_CTX *<br>
+#else<br>
+static inline CMAC_CTX *<br>
+#endif<br>
+get_local_cmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)<br>
+{<br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)<br>
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of<br>
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their<br>
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid<br>
+ * undefined behavior of provided macs on EVP_MAC<br>
+ * reinitialization"). In cases where the fix is not present,<br>
+ * fall back to duplicating the context every buffer as a<br>
+ * workaround, at the cost of performance.<br>
+ */<br>
+ RTE_SET_USED(qp);<br>
+ return EVP_MAC_CTX_dup(sess->auth.cmac.ctx);<br>
+#else<br>
+ if (sess->ctx_copies_len == 0)<br>
+ return sess->auth.cmac.ctx;<br>
+<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ EVP_MAC_CTX **lctx =<br>
+#else<br>
+ CMAC_CTX **lctx =<br>
+#endif<br>
+ &sess->qp_ctx[qp->id].cmac;<br>
+<br>
+ if (unlikely(*lctx == NULL)) {<br>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
+ *lctx = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);<br>
+#else<br>
+ *lctx = CMAC_CTX_new();<br>
+ CMAC_CTX_copy(*lctx, sess->auth.cmac.ctx);<br>
+#endif<br>
+ }<br>
+<br>
+ return *lctx;<br>
+#endif<br>
+}<br>
+<br>
/** Process auth/cipher combined operation */<br>
static void<br>
process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
@@ -1893,42 +2005,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,<br>
<br>
switch (sess->auth.mode) {<br>
case OPENSSL_AUTH_AS_AUTH:<br>
- ctx_a = EVP_MD_CTX_create();<br>
- EVP_MD_CTX_copy_ex(ctx_a, sess->auth.auth.ctx);<br>
+ ctx_a = get_local_auth_ctx(sess, qp);<br>
status = process_openssl_auth(mbuf_src, dst,<br>
op->sym->auth.data.offset, NULL, NULL, srclen,<br>
ctx_a, sess->auth.auth.evp_algo);<br>
- EVP_MD_CTX_destroy(ctx_a);<br>
break;<br>
case OPENSSL_AUTH_AS_HMAC:<br>
+ ctx_h = get_local_hmac_ctx(sess, qp);<br>
# if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
- ctx_h = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);<br>
status = process_openssl_auth_mac(mbuf_src, dst,<br>
op->sym->auth.data.offset, srclen,<br>
ctx_h);<br>
# else<br>
- ctx_h = HMAC_CTX_new();<br>
- HMAC_CTX_copy(ctx_h, sess->auth.hmac.ctx);<br>
status = process_openssl_auth_hmac(mbuf_src, dst,<br>
op->sym->auth.data.offset, srclen,<br>
ctx_h);<br>
- HMAC_CTX_free(ctx_h);<br>
# endif<br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)<br>
+ EVP_MAC_CTX_free(ctx_h);<br>
+#endif<br>
break;<br>
case OPENSSL_AUTH_AS_CMAC:<br>
+ ctx_c = get_local_cmac_ctx(sess, qp);<br>
# if OPENSSL_VERSION_NUMBER >= 0x30000000L<br>
- ctx_c = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);<br>
status = process_openssl_auth_mac(mbuf_src, dst,<br>
op->sym->auth.data.offset, srclen,<br>
ctx_c);<br>
# else<br>
- ctx_c = CMAC_CTX_new();<br>
- CMAC_CTX_copy(ctx_c, sess->auth.cmac.ctx);<br>
status = process_openssl_auth_cmac(mbuf_src, dst,<br>
op->sym->auth.data.offset, srclen,<br>
ctx_c);<br>
- CMAC_CTX_free(ctx_c);<br>
# endif<br>
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)<br>
+ EVP_MAC_CTX_free(ctx_c);<br>
+#endif<br>
break;<br>
default:<br>
status = -1;<br>
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
index 4209c6ab6f..1bbb855a59 100644<br>
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c<br>
@@ -805,7 +805,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)<br>
unsigned int max_nb_qps = ((struct openssl_private *)<br>
dev->data->dev_private)->max_nb_qpairs;<br>
return sizeof(struct openssl_session) +<br>
- (sizeof(void *) * max_nb_qps);<br>
+ (sizeof(struct evp_ctx_pair) * max_nb_qps);<br>
}<br>
<br>
/*<br>
@@ -818,10 +818,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)<br>
<br>
/*<br>
* Otherwise, the size of the flexible array member should be enough to<br>
- * fit pointers to per-qp contexts.<br>
+ * fit pointers to per-qp contexts. This is twice the number of queue<br>
+ * pairs, to allow for auth and cipher contexts.<br>
*/<br>
return sizeof(struct openssl_session) +<br>
- (sizeof(void *) * dev->data->nb_queue_pairs);<br>
+ (sizeof(struct evp_ctx_pair) * dev->data->nb_queue_pairs);<br>
}<br>
<br>
/** Returns the size of the asymmetric session structure */<br>
-- <br>
2.34.1<br>
<br>
</div>
</span></font></div>
</body>
</html>