patch 'crypto/openssl: make per-QP cipher context clones' has been queued to stable release 23.11.2
Xueming Li
xuemingl at nvidia.com
Mon Aug 12 14:48:24 CEST 2024
Hi,
FYI, your patch has been queued to stable release 23.11.2
Note it hasn't been pushed to http://dpdk.org/browse/dpdk-stable yet.
It will be pushed if I get no objections before 08/14/24. So please
shout if anyone has objections.
Also note that after the patch there's a diff of the upstream commit vs the
patch applied to the branch. This will indicate if there was any rebasing
needed to apply to the stable branch. If there were code changes for rebasing
(ie: not only metadata diffs), please double check that the rebase was
correctly done.
Queued patches are on a temporary branch at:
https://git.dpdk.org/dpdk-stable/log/?h=23.11-staging
This queued commit can be viewed at:
https://git.dpdk.org/dpdk-stable/commit/?h=23.11-staging&id=4f8c97e94187b19adde37b6b01d42441e10f91da
Thanks.
Xueming Li <xuemingl at nvidia.com>
---
>From 4f8c97e94187b19adde37b6b01d42441e10f91da Mon Sep 17 00:00:00 2001
From: Jack Bond-Preston <jack.bond-preston at foss.arm.com>
Date: Wed, 3 Jul 2024 13:45:49 +0000
Subject: [PATCH] crypto/openssl: make per-QP cipher context clones
Cc: Xueming Li <xuemingl at nvidia.com>
[ upstream commit b1d71126023521fe740ec473abfe5b295035b859 ]
Currently EVP_CIPHER_CTXs are allocated, copied to (from
openssl_session), and then freed for every cipher operation (ie. per
packet). This is very inefficient, and avoidable.
Make each openssl_session hold an array of pointers to per-queue-pair
cipher context copies. These are populated on first use by allocating a
new context and copying from the main context. These copies can then be
used in a thread-safe manner by different worker lcores simultaneously.
Consequently the cipher context allocation and copy only has to happen
once - the first time a given qp uses an openssl_session. This brings
about a large performance boost.
Throughput performance uplift measurements for AES-CBC-128 encrypt on
Ampere Altra Max platform:
1 worker lcore
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 1.51 | 2.94 | 94.4% |
| 256 | 4.90 | 8.05 | 64.3% |
| 1024 | 11.07 | 14.21 | 28.3% |
| 2048 | 14.03 | 16.28 | 16.0% |
| 4096 | 16.20 | 17.59 | 8.6% |
8 worker lcores
| buffer sz (B) | prev (Gbps) | optimised (Gbps) | uplift |
|-----------------+---------------+--------------------+----------|
| 64 | 3.05 | 23.74 | 678.8% |
| 256 | 10.46 | 64.86 | 520.3% |
| 1024 | 40.97 | 113.80 | 177.7% |
| 2048 | 73.25 | 130.21 | 77.8% |
| 4096 | 103.89 | 140.62 | 35.4% |
Signed-off-by: Jack Bond-Preston <jack.bond-preston at foss.arm.com>
Acked-by: Kai Ji <kai.ji at intel.com>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage at arm.com>
---
drivers/crypto/openssl/openssl_pmd_private.h | 11 +-
drivers/crypto/openssl/rte_openssl_pmd.c | 105 ++++++++++++-------
drivers/crypto/openssl/rte_openssl_pmd_ops.c | 34 +++++-
3 files changed, 108 insertions(+), 42 deletions(-)
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h
index 334912d335..370de1d53b 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -166,6 +166,14 @@ struct openssl_session {
/**< digest length */
} auth;
+ uint16_t ctx_copies_len;
+ /* < number of entries in ctx_copies */
+ EVP_CIPHER_CTX *qp_ctx[];
+ /**< Flexible array member of per-queue-pair pointers to copies of EVP
+ * context structure. Cipher contexts are not safe to use from multiple
+ * cores simultaneously, so maintaining these copies allows avoiding
+ * per-buffer copying into a temporary context.
+ */
} __rte_cache_aligned;
/** OPENSSL crypto private asymmetric session structure */
@@ -217,7 +225,8 @@ struct openssl_asym_session {
/** Set and validate OPENSSL crypto session parameters */
extern int
openssl_set_session_parameters(struct openssl_session *sess,
- const struct rte_crypto_sym_xform *xform);
+ const struct rte_crypto_sym_xform *xform,
+ uint16_t nb_queue_pairs);
/** Reset OPENSSL crypto session parameters */
extern void
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c
index bd09d58d88..df44cc097e 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -467,13 +467,10 @@ openssl_set_sess_aead_dec_param(struct openssl_session *sess,
return 0;
}
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30200000L)
static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,
struct openssl_session *sess)
{
-#if (OPENSSL_VERSION_NUMBER >= 0x30200000L)
- *dest = EVP_CIPHER_CTX_dup(sess->ctx);
- return 0;
-#elif (OPENSSL_VERSION_NUMBER >= 0x30000000L)
/* OpenSSL versions 3.0.0 <= V < 3.2.0 have no dupctx() implementation
* for AES-GCM and AES-CCM. In this case, we have to create new empty
* contexts and initialise, as we did the original context.
@@ -489,13 +486,8 @@ static int openssl_aesni_ctx_clone(EVP_CIPHER_CTX **dest,
return openssl_set_sess_aead_dec_param(sess, sess->aead_algo,
sess->auth.digest_length, sess->cipher.key.data,
dest);
-#else
- *dest = EVP_CIPHER_CTX_new();
- if (EVP_CIPHER_CTX_copy(*dest, sess->cipher.ctx) != 1)
- return -EINVAL;
- return 0;
-#endif
}
+#endif
/** Set session cipher parameters */
static int
@@ -824,7 +816,8 @@ openssl_set_session_aead_parameters(struct openssl_session *sess,
/** Parse crypto xform chain and set private session parameters */
int
openssl_set_session_parameters(struct openssl_session *sess,
- const struct rte_crypto_sym_xform *xform)
+ const struct rte_crypto_sym_xform *xform,
+ uint16_t nb_queue_pairs)
{
const struct rte_crypto_sym_xform *cipher_xform = NULL;
const struct rte_crypto_sym_xform *auth_xform = NULL;
@@ -886,6 +879,12 @@ openssl_set_session_parameters(struct openssl_session *sess,
}
}
+ /*
+ * With only one queue pair, the array of copies is not needed.
+ * Otherwise, one entry per queue pair is required.
+ */
+ sess->ctx_copies_len = nb_queue_pairs > 1 ? nb_queue_pairs : 0;
+
return 0;
}
@@ -893,6 +892,13 @@ openssl_set_session_parameters(struct openssl_session *sess,
void
openssl_reset_session(struct openssl_session *sess)
{
+ for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {
+ if (sess->qp_ctx[i] != NULL) {
+ EVP_CIPHER_CTX_free(sess->qp_ctx[i]);
+ sess->qp_ctx[i] = NULL;
+ }
+ }
+
EVP_CIPHER_CTX_free(sess->cipher.ctx);
if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
@@ -959,7 +965,7 @@ get_session(struct openssl_qp *qp, struct rte_crypto_op *op)
sess = (struct openssl_session *)_sess->driver_priv_data;
if (unlikely(openssl_set_session_parameters(sess,
- op->sym->xform) != 0)) {
+ op->sym->xform, 1) != 0)) {
rte_mempool_put(qp->sess_mp, _sess);
sess = NULL;
}
@@ -1607,11 +1613,45 @@ process_auth_err:
# endif
/*----------------------------------------------------------------------------*/
+static inline EVP_CIPHER_CTX *
+get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+ /* If the array is not being used, just return the main context. */
+ if (sess->ctx_copies_len == 0)
+ return sess->cipher.ctx;
+
+ EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];
+
+ if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30200000L
+ /* EVP_CIPHER_CTX_dup() added in OSSL 3.2 */
+ *lctx = EVP_CIPHER_CTX_dup(sess->cipher.ctx);
+ return *lctx;
+#elif OPENSSL_VERSION_NUMBER >= 0x30000000L
+ if (sess->chain_order == OPENSSL_CHAIN_COMBINED) {
+ /* AESNI special-cased to use openssl_aesni_ctx_clone()
+ * to allow for working around lack of
+ * EVP_CIPHER_CTX_copy support for 3.0.0 <= OSSL Version
+ * < 3.2.0.
+ */
+ if (openssl_aesni_ctx_clone(lctx, sess) != 0)
+ *lctx = NULL;
+ return *lctx;
+ }
+#endif
+
+ *lctx = EVP_CIPHER_CTX_new();
+ EVP_CIPHER_CTX_copy(*lctx, sess->cipher.ctx);
+ }
+
+ return *lctx;
+}
+
/** Process auth/cipher combined operation */
static void
-process_openssl_combined_op
- (struct rte_crypto_op *op, struct openssl_session *sess,
- struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)
+process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,
+ struct openssl_session *sess, struct rte_mbuf *mbuf_src,
+ struct rte_mbuf *mbuf_dst)
{
/* cipher */
uint8_t *dst = NULL, *iv, *tag, *aad;
@@ -1628,11 +1668,7 @@ process_openssl_combined_op
return;
}
- EVP_CIPHER_CTX *ctx;
- if (openssl_aesni_ctx_clone(&ctx, sess) != 0) {
- op->status = RTE_CRYPTO_OP_STATUS_ERROR;
- return;
- }
+ EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);
iv = rte_crypto_op_ctod_offset(op, uint8_t *,
sess->iv.offset);
@@ -1688,8 +1724,6 @@ process_openssl_combined_op
dst, tag, taglen, ctx);
}
- EVP_CIPHER_CTX_free(ctx);
-
if (status != 0) {
if (status == (-EFAULT) &&
sess->auth.operation ==
@@ -1702,14 +1736,13 @@ process_openssl_combined_op
/** Process cipher operation */
static void
-process_openssl_cipher_op
- (struct rte_crypto_op *op, struct openssl_session *sess,
- struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)
+process_openssl_cipher_op(struct openssl_qp *qp, struct rte_crypto_op *op,
+ struct openssl_session *sess, struct rte_mbuf *mbuf_src,
+ struct rte_mbuf *mbuf_dst)
{
uint8_t *dst, *iv;
int srclen, status;
uint8_t inplace = (mbuf_src == mbuf_dst) ? 1 : 0;
- EVP_CIPHER_CTX *ctx_copy;
/*
* Segmented OOP destination buffer is not supported for encryption/
@@ -1728,24 +1761,22 @@ process_openssl_cipher_op
iv = rte_crypto_op_ctod_offset(op, uint8_t *,
sess->iv.offset);
- ctx_copy = EVP_CIPHER_CTX_new();
- EVP_CIPHER_CTX_copy(ctx_copy, sess->cipher.ctx);
+
+ EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);
if (sess->cipher.mode == OPENSSL_CIPHER_LIB)
if (sess->cipher.direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
status = process_openssl_cipher_encrypt(mbuf_src, dst,
op->sym->cipher.data.offset, iv,
- srclen, ctx_copy, inplace);
+ srclen, ctx, inplace);
else
status = process_openssl_cipher_decrypt(mbuf_src, dst,
op->sym->cipher.data.offset, iv,
- srclen, ctx_copy, inplace);
+ srclen, ctx, inplace);
else
status = process_openssl_cipher_des3ctr(mbuf_src, dst,
- op->sym->cipher.data.offset, iv, srclen,
- ctx_copy);
+ op->sym->cipher.data.offset, iv, srclen, ctx);
- EVP_CIPHER_CTX_free(ctx_copy);
if (status != 0)
op->status = RTE_CRYPTO_OP_STATUS_ERROR;
}
@@ -3150,13 +3181,13 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,
switch (sess->chain_order) {
case OPENSSL_CHAIN_ONLY_CIPHER:
- process_openssl_cipher_op(op, sess, msrc, mdst);
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);
break;
case OPENSSL_CHAIN_ONLY_AUTH:
process_openssl_auth_op(qp, op, sess, msrc, mdst);
break;
case OPENSSL_CHAIN_CIPHER_AUTH:
- process_openssl_cipher_op(op, sess, msrc, mdst);
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);
/* OOP */
if (msrc != mdst)
copy_plaintext(msrc, mdst, op);
@@ -3164,10 +3195,10 @@ process_op(struct openssl_qp *qp, struct rte_crypto_op *op,
break;
case OPENSSL_CHAIN_AUTH_CIPHER:
process_openssl_auth_op(qp, op, sess, msrc, mdst);
- process_openssl_cipher_op(op, sess, msrc, mdst);
+ process_openssl_cipher_op(qp, op, sess, msrc, mdst);
break;
case OPENSSL_CHAIN_COMBINED:
- process_openssl_combined_op(op, sess, msrc, mdst);
+ process_openssl_combined_op(qp, op, sess, msrc, mdst);
break;
case OPENSSL_CHAIN_CIPHER_BPI:
process_openssl_docsis_bpi_op(op, sess, msrc, mdst);
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index b16baaa08f..4209c6ab6f 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -794,9 +794,34 @@ qp_setup_cleanup:
/** Returns the size of the symmetric session structure */
static unsigned
-openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev __rte_unused)
+openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
{
- return sizeof(struct openssl_session);
+ /*
+ * For 0 qps, return the max size of the session - this is necessary if
+ * the user calls into this function to create the session mempool,
+ * without first configuring the number of qps for the cryptodev.
+ */
+ if (dev->data->nb_queue_pairs == 0) {
+ unsigned int max_nb_qps = ((struct openssl_private *)
+ dev->data->dev_private)->max_nb_qpairs;
+ return sizeof(struct openssl_session) +
+ (sizeof(void *) * max_nb_qps);
+ }
+
+ /*
+ * With only one queue pair, the thread safety of multiple context
+ * copies is not necessary, so don't allocate extra memory for the
+ * array.
+ */
+ if (dev->data->nb_queue_pairs == 1)
+ return sizeof(struct openssl_session);
+
+ /*
+ * Otherwise, the size of the flexible array member should be enough to
+ * fit pointers to per-qp contexts.
+ */
+ return sizeof(struct openssl_session) +
+ (sizeof(void *) * dev->data->nb_queue_pairs);
}
/** Returns the size of the asymmetric session structure */
@@ -808,7 +833,7 @@ openssl_pmd_asym_session_get_size(struct rte_cryptodev *dev __rte_unused)
/** Configure the session from a crypto xform chain */
static int
-openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,
+openssl_pmd_sym_session_configure(struct rte_cryptodev *dev,
struct rte_crypto_sym_xform *xform,
struct rte_cryptodev_sym_session *sess)
{
@@ -820,7 +845,8 @@ openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,
return -EINVAL;
}
- ret = openssl_set_session_parameters(sess_private_data, xform);
+ ret = openssl_set_session_parameters(sess_private_data, xform,
+ dev->data->nb_queue_pairs);
if (ret != 0) {
OPENSSL_LOG(ERR, "failed configure session parameters");
--
2.34.1
---
Diff of the applied patch vs upstream commit (please double-check if non-empty:
---
--- - 2024-08-12 20:44:03.442227826 +0800
+++ 0027-crypto-openssl-make-per-QP-cipher-context-clones.patch 2024-08-12 20:44:01.965069269 +0800
@@ -1 +1 @@
-From b1d71126023521fe740ec473abfe5b295035b859 Mon Sep 17 00:00:00 2001
+From 4f8c97e94187b19adde37b6b01d42441e10f91da Mon Sep 17 00:00:00 2001
@@ -4,0 +5,3 @@
+Cc: Xueming Li <xuemingl at nvidia.com>
+
+[ upstream commit b1d71126023521fe740ec473abfe5b295035b859 ]
@@ -38,2 +40,0 @@
-Cc: stable at dpdk.org
-
@@ -50 +51 @@
-index 0f038b218c..bad7dcf2f5 100644
+index 334912d335..370de1d53b 100644
@@ -53 +54 @@
-@@ -166,6 +166,14 @@ struct __rte_cache_aligned openssl_session {
+@@ -166,6 +166,14 @@ struct openssl_session {
@@ -65 +66 @@
- };
+ } __rte_cache_aligned;
@@ -68 +69 @@
-@@ -217,7 +225,8 @@ struct __rte_cache_aligned openssl_asym_session {
+@@ -217,7 +225,8 @@ struct openssl_asym_session {
More information about the stable
mailing list