patch 'crypto/openssl: make per-QP auth context clones' has been queued to stable release 22.11.6
luca.boccassi at gmail.com
luca.boccassi at gmail.com
Mon Jul 15 17:25:58 CEST 2024
Hi,
FYI, your patch has been queued to stable release 22.11.6
Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 07/17/24. So please
shout if anyone has objections.
Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.
Queued patches are on a temporary branch at:
https://github.com/bluca/dpdk-stable
This queued commit can be viewed at:
https://github.com/bluca/dpdk-stable/commit/ccb65683980ca885fa3b85420fee2d2434a96834
Thanks.
Luca Boccassi
---
>From ccb65683980ca885fa3b85420fee2d2434a96834 Mon Sep 17 00:00:00 2001
From: Jack Bond-Preston <jack.bond-preston at foss.arm.com>
Date: Wed, 3 Jul 2024 13:45:50 +0000
Subject: [PATCH] crypto/openssl: make per-QP auth context clones
[ upstream commit 17d5bc6135afdb38ddf02595bfa15aa5142d80b1 ]
Currently EVP auth ctxs (e.g. EVP_MD_CTX, EVP_MAC_CTX) are allocated,
copied to (from openssl_session), and then freed for every auth
operation (ie. per packet). This is very inefficient, and avoidable.
Make each openssl_session hold an array of structures, containing
pointers to per-queue-pair cipher and auth context copies. These are
populated on first use by allocating a new context and copying from the
main context. These copies can then be used in a thread-safe manner by
different worker lcores simultaneously. Consequently the auth context
allocation and copy only has to happen once - the first time a given qp
uses an openssl_session. This brings about a large performance boost.
Throughput performance uplift measurements for HMAC-SHA1 generate on
Ampere Altra Max platform:
1 worker lcore
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 0.63 | 1.42 | 123.5% |
| 256 | 2.24 | 4.40 | 96.4% |
| 1024 | 6.15 | 9.26 | 50.6% |
| 2048 | 8.68 | 11.38 | 31.1% |
| 4096 | 10.92 | 12.84 | 17.6% |
8 worker lcores
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 0.93 | 11.35 | 1122.5% |
| 256 | 3.70 | 35.30 | 853.7% |
| 1024 | 15.22 | 74.27 | 387.8% |
| 2048 | 30.20 | 91.08 | 201.6% |
| 4096 | 56.92 | 102.76 | 80.5% |
Signed-off-by: Jack Bond-Preston <jack.bond-preston at foss.arm.com>
Acked-by: Kai Ji <kai.ji at intel.com>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage at arm.com>
---
drivers/crypto/openssl/compat.h | 26 +++
drivers/crypto/openssl/openssl_pmd_private.h | 25 ++-
drivers/crypto/openssl/rte_openssl_pmd.c | 190 +++++++++++++++----
drivers/crypto/openssl/rte_openssl_pmd_ops.c | 7 +-
4 files changed, 200 insertions(+), 48 deletions(-)
diff --git a/drivers/crypto/openssl/compat.h b/drivers/crypto/openssl/compat.h
index 9f9167c4f1..e1814fea8c 100644
--- a/drivers/crypto/openssl/compat.h
+++ b/drivers/crypto/openssl/compat.h
@@ -5,6 +5,32 @@
#ifndef __RTA_COMPAT_H__
#define __RTA_COMPAT_H__
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static __rte_always_inline void
+free_hmac_ctx(EVP_MAC_CTX *ctx)
+{
+ EVP_MAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(EVP_MAC_CTX *ctx)
+{
+ EVP_MAC_CTX_free(ctx);
+}
+#else
+static __rte_always_inline void
+free_hmac_ctx(HMAC_CTX *ctx)
+{
+ HMAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(CMAC_CTX *ctx)
+{
+ CMAC_CTX_free(ctx);
+}
+#endif
+
#if (OPENSSL_VERSION_NUMBER < 0x10100000L)
static __rte_always_inline int
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h
index 810b539f10..d67e39cddb 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -79,6 +79,20 @@ struct openssl_qp {
*/
} __rte_cache_aligned;
+struct evp_ctx_pair {
+ EVP_CIPHER_CTX *cipher;
+ union {
+ EVP_MD_CTX *auth;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX *hmac;
+ EVP_MAC_CTX *cmac;
+#else
+ HMAC_CTX *hmac;
+ CMAC_CTX *cmac;
+#endif
+ };
+};
+
/** OPENSSL crypto private session structure */
struct openssl_session {
enum openssl_chain_order chain_order;
@@ -167,11 +181,12 @@ struct openssl_session {
uint16_t ctx_copies_len;
/* < number of entries in ctx_copies */
- EVP_CIPHER_CTX *qp_ctx[];
- /**< Flexible array member of per-queue-pair pointers to copies of EVP
- * context structure. Cipher contexts are not safe to use from multiple
- * cores simultaneously, so maintaining these copies allows avoiding
- * per-buffer copying into a temporary context.
+ struct evp_ctx_pair qp_ctx[];
+ /**< Flexible array member of per-queue-pair structures, each containing
+ * pointers to copies of the cipher and auth EVP contexts. Cipher
+ * contexts are not safe to use from multiple cores simultaneously, so
+ * maintaining these copies allows avoiding per-buffer copying into a
+ * temporary context.
*/
} __rte_cache_aligned;
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c
index 62a179b6b6..72db0fd40f 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -891,40 +891,45 @@ openssl_set_session_parameters(struct openssl_session *sess,
void
openssl_reset_session(struct openssl_session *sess)
{
+ /* Free all the qp_ctx entries. */
for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {
- if (sess->qp_ctx[i] != NULL) {
- EVP_CIPHER_CTX_free(sess->qp_ctx[i]);
- sess->qp_ctx[i] = NULL;
+ if (sess->qp_ctx[i].cipher != NULL) {
+ EVP_CIPHER_CTX_free(sess->qp_ctx[i].cipher);
+ sess->qp_ctx[i].cipher = NULL;
+ }
+
+ switch (sess->auth.mode) {
+ case OPENSSL_AUTH_AS_AUTH:
+ EVP_MD_CTX_destroy(sess->qp_ctx[i].auth);
+ sess->qp_ctx[i].auth = NULL;
+ break;
+ case OPENSSL_AUTH_AS_HMAC:
+ free_hmac_ctx(sess->qp_ctx[i].hmac);
+ sess->qp_ctx[i].hmac = NULL;
+ break;
+ case OPENSSL_AUTH_AS_CMAC:
+ free_cmac_ctx(sess->qp_ctx[i].cmac);
+ sess->qp_ctx[i].cmac = NULL;
+ break;
}
}
EVP_CIPHER_CTX_free(sess->cipher.ctx);
+ switch (sess->auth.mode) {
+ case OPENSSL_AUTH_AS_AUTH:
+ EVP_MD_CTX_destroy(sess->auth.auth.ctx);
+ break;
+ case OPENSSL_AUTH_AS_HMAC:
+ free_hmac_ctx(sess->auth.hmac.ctx);
+ break;
+ case OPENSSL_AUTH_AS_CMAC:
+ free_cmac_ctx(sess->auth.cmac.ctx);
+ break;
+ }
+
if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);
-
- switch (sess->auth.mode) {
- case OPENSSL_AUTH_AS_AUTH:
- EVP_MD_CTX_destroy(sess->auth.auth.ctx);
- break;
- case OPENSSL_AUTH_AS_HMAC:
- EVP_PKEY_free(sess->auth.hmac.pkey);
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- EVP_MAC_CTX_free(sess->auth.hmac.ctx);
-# else
- HMAC_CTX_free(sess->auth.hmac.ctx);
-# endif
- break;
- case OPENSSL_AUTH_AS_CMAC:
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- EVP_MAC_CTX_free(sess->auth.cmac.ctx);
-# else
- CMAC_CTX_free(sess->auth.cmac.ctx);
-# endif
- break;
- default:
- break;
- }
}
/** Provide session for operation */
@@ -1470,6 +1475,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
if (m == 0)
goto process_auth_err;
+ if (EVP_MAC_init(ctx, NULL, 0, NULL) <= 0)
+ goto process_auth_err;
+
src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset);
l = rte_pktmbuf_data_len(m) - offset;
@@ -1496,11 +1504,9 @@ process_auth_final:
if (EVP_MAC_final(ctx, dst, &dstlen, DIGEST_LENGTH_MAX) != 1)
goto process_auth_err;
- EVP_MAC_CTX_free(ctx);
return 0;
process_auth_err:
- EVP_MAC_CTX_free(ctx);
OPENSSL_LOG(ERR, "Process openssl auth failed");
return -EINVAL;
}
@@ -1619,7 +1625,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
if (sess->ctx_copies_len == 0)
return sess->cipher.ctx;
- EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];
+ EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id].cipher;
if (unlikely(*lctx == NULL)) {
#if OPENSSL_VERSION_NUMBER >= 0x30200000L
@@ -1646,6 +1652,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
return *lctx;
}
+static inline EVP_MD_CTX *
+get_local_auth_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+ /* If the array is not being used, just return the main context. */
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.auth.ctx;
+
+ EVP_MD_CTX **lctx = &sess->qp_ctx[qp->id].auth;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30100000L
+ /* EVP_MD_CTX_dup() added in OSSL 3.1 */
+ *lctx = EVP_MD_CTX_dup(sess->auth.auth.ctx);
+#else
+ *lctx = EVP_MD_CTX_new();
+ EVP_MD_CTX_copy(*lctx, sess->auth.auth.ctx);
+#endif
+ }
+
+ return *lctx;
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline HMAC_CTX *
+#endif
+get_local_hmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+ * undefined behavior of provided macs on EVP_MAC
+ * reinitialization"). In cases where the fix is not present,
+ * fall back to duplicating the context every buffer as a
+ * workaround, at the cost of performance.
+ */
+ RTE_SET_USED(qp);
+ return EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.hmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX **lctx =
+#else
+ HMAC_CTX **lctx =
+#endif
+ &sess->qp_ctx[qp->id].hmac;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ *lctx = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+ *lctx = HMAC_CTX_new();
+ HMAC_CTX_copy(*lctx, sess->auth.hmac.ctx);
+#endif
+ }
+
+ return *lctx;
+#endif
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline CMAC_CTX *
+#endif
+get_local_cmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ /* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+ * EVP_MAC_CTXs is broken, and doesn't actually reset their
+ * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+ * undefined behavior of provided macs on EVP_MAC
+ * reinitialization"). In cases where the fix is not present,
+ * fall back to duplicating the context every buffer as a
+ * workaround, at the cost of performance.
+ */
+ RTE_SET_USED(qp);
+ return EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+ if (sess->ctx_copies_len == 0)
+ return sess->auth.cmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ EVP_MAC_CTX **lctx =
+#else
+ CMAC_CTX **lctx =
+#endif
+ &sess->qp_ctx[qp->id].cmac;
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+ *lctx = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+ *lctx = CMAC_CTX_new();
+ CMAC_CTX_copy(*lctx, sess->auth.cmac.ctx);
+#endif
+ }
+
+ return *lctx;
+#endif
+}
+
/** Process auth/cipher combined operation */
static void
process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,
@@ -1894,42 +2006,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
switch (sess->auth.mode) {
case OPENSSL_AUTH_AS_AUTH:
- ctx_a = EVP_MD_CTX_create();
- EVP_MD_CTX_copy_ex(ctx_a, sess->auth.auth.ctx);
+ ctx_a = get_local_auth_ctx(sess, qp);
status = process_openssl_auth(mbuf_src, dst,
op->sym->auth.data.offset, NULL, NULL, srclen,
ctx_a, sess->auth.auth.evp_algo);
- EVP_MD_CTX_destroy(ctx_a);
break;
case OPENSSL_AUTH_AS_HMAC:
+ ctx_h = get_local_hmac_ctx(sess, qp);
# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- ctx_h = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
status = process_openssl_auth_mac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_h);
# else
- ctx_h = HMAC_CTX_new();
- HMAC_CTX_copy(ctx_h, sess->auth.hmac.ctx);
status = process_openssl_auth_hmac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_h);
- HMAC_CTX_free(ctx_h);
# endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ EVP_MAC_CTX_free(ctx_h);
+#endif
break;
case OPENSSL_AUTH_AS_CMAC:
+ ctx_c = get_local_cmac_ctx(sess, qp);
# if OPENSSL_VERSION_NUMBER >= 0x30000000L
- ctx_c = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
status = process_openssl_auth_mac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_c);
# else
- ctx_c = CMAC_CTX_new();
- CMAC_CTX_copy(ctx_c, sess->auth.cmac.ctx);
status = process_openssl_auth_cmac(mbuf_src, dst,
op->sym->auth.data.offset, srclen,
ctx_c);
- CMAC_CTX_free(ctx_c);
# endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+ EVP_MAC_CTX_free(ctx_c);
+#endif
break;
default:
status = -1;
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index a448279029..18a8095ba2 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -788,7 +788,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
unsigned int max_nb_qps = ((struct openssl_private *)
dev->data->dev_private)->max_nb_qpairs;
return sizeof(struct openssl_session) +
- (sizeof(void *) * max_nb_qps);
+ (sizeof(struct evp_ctx_pair) * max_nb_qps);
}
/*
@@ -801,10 +801,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
/*
* Otherwise, the size of the flexible array member should be enough to
- * fit pointers to per-qp contexts.
+ * fit pointers to per-qp contexts. This is twice the number of queue
+ * pairs, to allow for auth and cipher contexts.
*/
return sizeof(struct openssl_session) +
- (sizeof(void *) * dev->data->nb_queue_pairs);
+ (sizeof(struct evp_ctx_pair) * dev->data->nb_queue_pairs);
}
/** Returns the size of the asymmetric session structure */
--
2.39.2
---
Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- - 2024-07-15 16:19:35.790373889 +0100
+++ 0020-crypto-openssl-make-per-QP-auth-context-clones.patch 2024-07-15 16:19:34.492204841 +0100
@@ -1 +1 @@
-From 17d5bc6135afdb38ddf02595bfa15aa5142d80b1 Mon Sep 17 00:00:00 2001
+From ccb65683980ca885fa3b85420fee2d2434a96834 Mon Sep 17 00:00:00 2001
@@ -5,0 +6,2 @@
+[ upstream commit 17d5bc6135afdb38ddf02595bfa15aa5142d80b1 ]
+
@@ -38,2 +39,0 @@
-Cc: stable at dpdk.org
-
@@ -88 +88 @@
-index bad7dcf2f5..a50e4d4918 100644
+index 810b539f10..d67e39cddb 100644
@@ -91 +91 @@
-@@ -80,6 +80,20 @@ struct __rte_cache_aligned openssl_qp {
+@@ -79,6 +79,20 @@ struct openssl_qp {
@@ -93 +93 @@
- };
+ } __rte_cache_aligned;
@@ -110 +110 @@
- struct __rte_cache_aligned openssl_session {
+ struct openssl_session {
@@ -112 +112 @@
-@@ -168,11 +182,12 @@ struct __rte_cache_aligned openssl_session {
+@@ -167,11 +181,12 @@ struct openssl_session {
@@ -128 +128 @@
- };
+ } __rte_cache_aligned;
@@ -131 +131 @@
-index df44cc097e..7e2e505222 100644
+index 62a179b6b6..72db0fd40f 100644
@@ -134 +134 @@
-@@ -892,40 +892,45 @@ openssl_set_session_parameters(struct openssl_session *sess,
+@@ -891,40 +891,45 @@ openssl_set_session_parameters(struct openssl_session *sess,
@@ -206 +206 @@
-@@ -1471,6 +1476,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
+@@ -1470,6 +1475,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
@@ -216 +216 @@
-@@ -1497,11 +1505,9 @@ process_auth_final:
+@@ -1496,11 +1504,9 @@ process_auth_final:
@@ -228 +228 @@
-@@ -1620,7 +1626,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+@@ -1619,7 +1625,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
@@ -237 +237 @@
-@@ -1647,6 +1653,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+@@ -1646,6 +1652,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
@@ -350 +350 @@
-@@ -1895,42 +2007,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
+@@ -1894,42 +2006,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
@@ -403 +403 @@
-index 4209c6ab6f..1bbb855a59 100644
+index a448279029..18a8095ba2 100644
@@ -406 +406 @@
-@@ -805,7 +805,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
+@@ -788,7 +788,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
@@ -415 +415 @@
-@@ -818,10 +818,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
+@@ -801,10 +801,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
More information about the stable
mailing list