[PATCH v5 4/5] crypto/openssl: per-qp auth context clones
    Jack Bond-Preston 
    jack.bond-preston at foss.arm.com
       
    Wed Jul  3 15:45:50 CEST 2024
    
    
  
Currently EVP auth ctxs (e.g. EVP_MD_CTX, EVP_MAC_CTX) are allocated,
copied to (from openssl_session), and then freed for every auth
operation (ie. per packet). This is very inefficient, and avoidable.
Make each openssl_session hold an array of structures, containing
pointers to per-queue-pair cipher and auth context copies. These are
populated on first use by allocating a new context and copying from the
main context. These copies can then be used in a thread-safe manner by
different worker lcores simultaneously. Consequently the auth context
allocation and copy only has to happen once - the first time a given qp
uses an openssl_session. This brings about a large performance boost.
Throughput performance uplift measurements for HMAC-SHA1 generate on
Ampere Altra Max platform:
1 worker lcore
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |
|-----------------+---------------+--------------------+----------|
|              64 |          0.63 |               1.42 |   123.5% |
|             256 |          2.24 |               4.40 |    96.4% |
|            1024 |          6.15 |               9.26 |    50.6% |
|            2048 |          8.68 |              11.38 |    31.1% |
|            4096 |         10.92 |              12.84 |    17.6% |
8 worker lcores
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |
|-----------------+---------------+--------------------+----------|
|              64 |          0.93 |              11.35 |  1122.5% |
|             256 |          3.70 |              35.30 |   853.7% |
|            1024 |         15.22 |              74.27 |   387.8% |
|            2048 |         30.20 |              91.08 |   201.6% |
|            4096 |         56.92 |             102.76 |    80.5% |
Signed-off-by: Jack Bond-Preston <jack.bond-preston at foss.arm.com>
Acked-by: Kai Ji <kai.ji at intel.com>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage at arm.com>
---
 drivers/crypto/openssl/compat.h              |  26 +++
 drivers/crypto/openssl/openssl_pmd_private.h |  25 ++-
 drivers/crypto/openssl/rte_openssl_pmd.c     | 176 +++++++++++++++----
 drivers/crypto/openssl/rte_openssl_pmd_ops.c |   7 +-
 4 files changed, 193 insertions(+), 41 deletions(-)
diff --git a/drivers/crypto/openssl/compat.h b/drivers/crypto/openssl/compat.h
index 9f9167c4f1..e1814fea8c 100644
--- a/drivers/crypto/openssl/compat.h
+++ b/drivers/crypto/openssl/compat.h
@@ -5,6 +5,32 @@
 #ifndef __RTA_COMPAT_H__
 #define __RTA_COMPAT_H__
 
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static __rte_always_inline void
+free_hmac_ctx(EVP_MAC_CTX *ctx)
+{
+	EVP_MAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(EVP_MAC_CTX *ctx)
+{
+	EVP_MAC_CTX_free(ctx);
+}
+#else
+static __rte_always_inline void
+free_hmac_ctx(HMAC_CTX *ctx)
+{
+	HMAC_CTX_free(ctx);
+}
+
+static __rte_always_inline void
+free_cmac_ctx(CMAC_CTX *ctx)
+{
+	CMAC_CTX_free(ctx);
+}
+#endif
+
 #if (OPENSSL_VERSION_NUMBER < 0x10100000L)
 
 static __rte_always_inline int
diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h
index bad7dcf2f5..a50e4d4918 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -80,6 +80,20 @@ struct __rte_cache_aligned openssl_qp {
 	 */
 };
 
+struct evp_ctx_pair {
+	EVP_CIPHER_CTX *cipher;
+	union {
+		EVP_MD_CTX *auth;
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+		EVP_MAC_CTX *hmac;
+		EVP_MAC_CTX *cmac;
+#else
+		HMAC_CTX *hmac;
+		CMAC_CTX *cmac;
+#endif
+	};
+};
+
 /** OPENSSL crypto private session structure */
 struct __rte_cache_aligned openssl_session {
 	enum openssl_chain_order chain_order;
@@ -168,11 +182,12 @@ struct __rte_cache_aligned openssl_session {
 
 	uint16_t ctx_copies_len;
 	/* < number of entries in ctx_copies */
-	EVP_CIPHER_CTX *qp_ctx[];
-	/**< Flexible array member of per-queue-pair pointers to copies of EVP
-	 * context structure. Cipher contexts are not safe to use from multiple
-	 * cores simultaneously, so maintaining these copies allows avoiding
-	 * per-buffer copying into a temporary context.
+	struct evp_ctx_pair qp_ctx[];
+	/**< Flexible array member of per-queue-pair structures, each containing
+	 * pointers to copies of the cipher and auth EVP contexts. Cipher
+	 * contexts are not safe to use from multiple cores simultaneously, so
+	 * maintaining these copies allows avoiding per-buffer copying into a
+	 * temporary context.
 	 */
 };
 
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c
index df44cc097e..7e2e505222 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -892,40 +892,45 @@ openssl_set_session_parameters(struct openssl_session *sess,
 void
 openssl_reset_session(struct openssl_session *sess)
 {
+	/* Free all the qp_ctx entries. */
 	for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {
-		if (sess->qp_ctx[i] != NULL) {
-			EVP_CIPHER_CTX_free(sess->qp_ctx[i]);
-			sess->qp_ctx[i] = NULL;
+		if (sess->qp_ctx[i].cipher != NULL) {
+			EVP_CIPHER_CTX_free(sess->qp_ctx[i].cipher);
+			sess->qp_ctx[i].cipher = NULL;
+		}
+
+		switch (sess->auth.mode) {
+		case OPENSSL_AUTH_AS_AUTH:
+			EVP_MD_CTX_destroy(sess->qp_ctx[i].auth);
+			sess->qp_ctx[i].auth = NULL;
+			break;
+		case OPENSSL_AUTH_AS_HMAC:
+			free_hmac_ctx(sess->qp_ctx[i].hmac);
+			sess->qp_ctx[i].hmac = NULL;
+			break;
+		case OPENSSL_AUTH_AS_CMAC:
+			free_cmac_ctx(sess->qp_ctx[i].cmac);
+			sess->qp_ctx[i].cmac = NULL;
+			break;
 		}
 	}
 
 	EVP_CIPHER_CTX_free(sess->cipher.ctx);
 
-	if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
-		EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);
-
 	switch (sess->auth.mode) {
 	case OPENSSL_AUTH_AS_AUTH:
 		EVP_MD_CTX_destroy(sess->auth.auth.ctx);
 		break;
 	case OPENSSL_AUTH_AS_HMAC:
-		EVP_PKEY_free(sess->auth.hmac.pkey);
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
-		EVP_MAC_CTX_free(sess->auth.hmac.ctx);
-# else
-		HMAC_CTX_free(sess->auth.hmac.ctx);
-# endif
+		free_hmac_ctx(sess->auth.hmac.ctx);
 		break;
 	case OPENSSL_AUTH_AS_CMAC:
-# if OPENSSL_VERSION_NUMBER >= 0x30000000L
-		EVP_MAC_CTX_free(sess->auth.cmac.ctx);
-# else
-		CMAC_CTX_free(sess->auth.cmac.ctx);
-# endif
-		break;
-	default:
+		free_cmac_ctx(sess->auth.cmac.ctx);
 		break;
 	}
+
+	if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
+		EVP_CIPHER_CTX_free(sess->cipher.bpi_ctx);
 }
 
 /** Provide session for operation */
@@ -1471,6 +1476,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
 	if (m == 0)
 		goto process_auth_err;
 
+	if (EVP_MAC_init(ctx, NULL, 0, NULL) <= 0)
+		goto process_auth_err;
+
 	src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset);
 
 	l = rte_pktmbuf_data_len(m) - offset;
@@ -1497,11 +1505,9 @@ process_openssl_auth_mac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
 	if (EVP_MAC_final(ctx, dst, &dstlen, DIGEST_LENGTH_MAX) != 1)
 		goto process_auth_err;
 
-	EVP_MAC_CTX_free(ctx);
 	return 0;
 
 process_auth_err:
-	EVP_MAC_CTX_free(ctx);
 	OPENSSL_LOG(ERR, "Process openssl auth failed");
 	return -EINVAL;
 }
@@ -1620,7 +1626,7 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
 	if (sess->ctx_copies_len == 0)
 		return sess->cipher.ctx;
 
-	EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];
+	EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id].cipher;
 
 	if (unlikely(*lctx == NULL)) {
 #if OPENSSL_VERSION_NUMBER >= 0x30200000L
@@ -1647,6 +1653,112 @@ get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
 	return *lctx;
 }
 
+static inline EVP_MD_CTX *
+get_local_auth_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+	/* If the array is not being used, just return the main context. */
+	if (sess->ctx_copies_len == 0)
+		return sess->auth.auth.ctx;
+
+	EVP_MD_CTX **lctx = &sess->qp_ctx[qp->id].auth;
+
+	if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30100000L
+		/* EVP_MD_CTX_dup() added in OSSL 3.1 */
+		*lctx = EVP_MD_CTX_dup(sess->auth.auth.ctx);
+#else
+		*lctx = EVP_MD_CTX_new();
+		EVP_MD_CTX_copy(*lctx, sess->auth.auth.ctx);
+#endif
+	}
+
+	return *lctx;
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline HMAC_CTX *
+#endif
+get_local_hmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+	/* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+	 * EVP_MAC_CTXs is broken, and doesn't actually reset their
+	 * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+	 * undefined behavior of provided macs on EVP_MAC
+	 * reinitialization"). In cases where the fix is not present,
+	 * fall back to duplicating the context every buffer as a
+	 * workaround, at the cost of performance.
+	 */
+	RTE_SET_USED(qp);
+	return EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+	if (sess->ctx_copies_len == 0)
+		return sess->auth.hmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+	EVP_MAC_CTX **lctx =
+#else
+	HMAC_CTX **lctx =
+#endif
+		&sess->qp_ctx[qp->id].hmac;
+
+	if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+		*lctx = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
+#else
+		*lctx = HMAC_CTX_new();
+		HMAC_CTX_copy(*lctx, sess->auth.hmac.ctx);
+#endif
+	}
+
+	return *lctx;
+#endif
+}
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+static inline EVP_MAC_CTX *
+#else
+static inline CMAC_CTX *
+#endif
+get_local_cmac_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+	/* For OpenSSL versions 3.0.0 <= v < 3.0.3, re-initing of
+	 * EVP_MAC_CTXs is broken, and doesn't actually reset their
+	 * state. This was fixed in OSSL commit c9ddc5af5199 ("Avoid
+	 * undefined behavior of provided macs on EVP_MAC
+	 * reinitialization"). In cases where the fix is not present,
+	 * fall back to duplicating the context every buffer as a
+	 * workaround, at the cost of performance.
+	 */
+	RTE_SET_USED(qp);
+	return EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+	if (sess->ctx_copies_len == 0)
+		return sess->auth.cmac.ctx;
+
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+	EVP_MAC_CTX **lctx =
+#else
+	CMAC_CTX **lctx =
+#endif
+		&sess->qp_ctx[qp->id].cmac;
+
+	if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+		*lctx = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
+#else
+		*lctx = CMAC_CTX_new();
+		CMAC_CTX_copy(*lctx, sess->auth.cmac.ctx);
+#endif
+	}
+
+	return *lctx;
+#endif
+}
+
 /** Process auth/cipher combined operation */
 static void
 process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,
@@ -1895,42 +2007,40 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
 
 	switch (sess->auth.mode) {
 	case OPENSSL_AUTH_AS_AUTH:
-		ctx_a = EVP_MD_CTX_create();
-		EVP_MD_CTX_copy_ex(ctx_a, sess->auth.auth.ctx);
+		ctx_a = get_local_auth_ctx(sess, qp);
 		status = process_openssl_auth(mbuf_src, dst,
 				op->sym->auth.data.offset, NULL, NULL, srclen,
 				ctx_a, sess->auth.auth.evp_algo);
-		EVP_MD_CTX_destroy(ctx_a);
 		break;
 	case OPENSSL_AUTH_AS_HMAC:
+		ctx_h = get_local_hmac_ctx(sess, qp);
 # if OPENSSL_VERSION_NUMBER >= 0x30000000L
-		ctx_h = EVP_MAC_CTX_dup(sess->auth.hmac.ctx);
 		status = process_openssl_auth_mac(mbuf_src, dst,
 				op->sym->auth.data.offset, srclen,
 				ctx_h);
 # else
-		ctx_h = HMAC_CTX_new();
-		HMAC_CTX_copy(ctx_h, sess->auth.hmac.ctx);
 		status = process_openssl_auth_hmac(mbuf_src, dst,
 				op->sym->auth.data.offset, srclen,
 				ctx_h);
-		HMAC_CTX_free(ctx_h);
 # endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+		EVP_MAC_CTX_free(ctx_h);
+#endif
 		break;
 	case OPENSSL_AUTH_AS_CMAC:
+		ctx_c = get_local_cmac_ctx(sess, qp);
 # if OPENSSL_VERSION_NUMBER >= 0x30000000L
-		ctx_c = EVP_MAC_CTX_dup(sess->auth.cmac.ctx);
 		status = process_openssl_auth_mac(mbuf_src, dst,
 				op->sym->auth.data.offset, srclen,
 				ctx_c);
 # else
-		ctx_c = CMAC_CTX_new();
-		CMAC_CTX_copy(ctx_c, sess->auth.cmac.ctx);
 		status = process_openssl_auth_cmac(mbuf_src, dst,
 				op->sym->auth.data.offset, srclen,
 				ctx_c);
-		CMAC_CTX_free(ctx_c);
 # endif
+#if (OPENSSL_VERSION_NUMBER >= 0x30000000L && OPENSSL_VERSION_NUMBER < 0x30003000L)
+		EVP_MAC_CTX_free(ctx_c);
+#endif
 		break;
 	default:
 		status = -1;
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index 4209c6ab6f..1bbb855a59 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -805,7 +805,7 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
 		unsigned int max_nb_qps = ((struct openssl_private *)
 				dev->data->dev_private)->max_nb_qpairs;
 		return sizeof(struct openssl_session) +
-				(sizeof(void *) * max_nb_qps);
+				(sizeof(struct evp_ctx_pair) * max_nb_qps);
 	}
 
 	/*
@@ -818,10 +818,11 @@ openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
 
 	/*
 	 * Otherwise, the size of the flexible array member should be enough to
-	 * fit pointers to per-qp contexts.
+	 * fit pointers to per-qp contexts. This is twice the number of queue
+	 * pairs, to allow for auth and cipher contexts.
 	 */
 	return sizeof(struct openssl_session) +
-		(sizeof(void *) * dev->data->nb_queue_pairs);
+		(sizeof(struct evp_ctx_pair) * dev->data->nb_queue_pairs);
 }
 
 /** Returns the size of the asymmetric session structure */
-- 
2.34.1
    
    
More information about the dev
mailing list