Without this patch, the chacha block counter is not incremented on neon rounds, resulting in incorrect calculations and corrupt packets. This also switches to using `--no-numbered --zero-commit` so that future diffs are smaller. Reported-by: Hans Geiblinger <cybrnook2002@yahoo.com> Reviewed-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Cc: David Bauer <mail@david-bauer.net> Cc: Petr Štetiar <ynezz@true.cz> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
		
			
				
	
	
		
			252 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			252 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 | 
						|
From: Ard Biesheuvel <ardb@kernel.org>
 | 
						|
Date: Fri, 8 Nov 2019 13:22:20 +0100
 | 
						|
Subject: [PATCH] crypto: x86/poly1305 - unify Poly1305 state struct with
 | 
						|
 generic code
 | 
						|
 | 
						|
commit ad8f5b88383ea685f2b8df2a12ee3e08089a1287 upstream.
 | 
						|
 | 
						|
In preparation of exposing a Poly1305 library interface directly from
 | 
						|
the accelerated x86 driver, align the state descriptor of the x86 code
 | 
						|
with the one used by the generic driver. This is needed to make the
 | 
						|
library interface unified between all implementations.
 | 
						|
 | 
						|
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
 | 
						|
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
 | 
						|
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
 | 
						|
---
 | 
						|
 arch/x86/crypto/poly1305_glue.c    | 88 ++++++++++--------------------
 | 
						|
 crypto/poly1305_generic.c          |  6 +-
 | 
						|
 include/crypto/internal/poly1305.h |  4 +-
 | 
						|
 include/crypto/poly1305.h          | 18 +++---
 | 
						|
 4 files changed, 43 insertions(+), 73 deletions(-)
 | 
						|
 | 
						|
--- a/arch/x86/crypto/poly1305_glue.c
 | 
						|
+++ b/arch/x86/crypto/poly1305_glue.c
 | 
						|
@@ -14,40 +14,14 @@
 | 
						|
 #include <linux/module.h>
 | 
						|
 #include <asm/simd.h>
 | 
						|
 
 | 
						|
-struct poly1305_simd_desc_ctx {
 | 
						|
-	struct poly1305_desc_ctx base;
 | 
						|
-	/* derived key u set? */
 | 
						|
-	bool uset;
 | 
						|
-#ifdef CONFIG_AS_AVX2
 | 
						|
-	/* derived keys r^3, r^4 set? */
 | 
						|
-	bool wset;
 | 
						|
-#endif
 | 
						|
-	/* derived Poly1305 key r^2 */
 | 
						|
-	u32 u[5];
 | 
						|
-	/* ... silently appended r^3 and r^4 when using AVX2 */
 | 
						|
-};
 | 
						|
-
 | 
						|
 asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
 | 
						|
 				    const u32 *r, unsigned int blocks);
 | 
						|
 asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
 | 
						|
 				     unsigned int blocks, const u32 *u);
 | 
						|
-#ifdef CONFIG_AS_AVX2
 | 
						|
 asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
 | 
						|
 				     unsigned int blocks, const u32 *u);
 | 
						|
-static bool poly1305_use_avx2;
 | 
						|
-#endif
 | 
						|
 
 | 
						|
-static int poly1305_simd_init(struct shash_desc *desc)
 | 
						|
-{
 | 
						|
-	struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
 | 
						|
-
 | 
						|
-	sctx->uset = false;
 | 
						|
-#ifdef CONFIG_AS_AVX2
 | 
						|
-	sctx->wset = false;
 | 
						|
-#endif
 | 
						|
-
 | 
						|
-	return crypto_poly1305_init(desc);
 | 
						|
-}
 | 
						|
+static bool poly1305_use_avx2 __ro_after_init;
 | 
						|
 
 | 
						|
 static void poly1305_simd_mult(u32 *a, const u32 *b)
 | 
						|
 {
 | 
						|
@@ -63,53 +37,49 @@ static void poly1305_simd_mult(u32 *a, c
 | 
						|
 static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
 | 
						|
 					 const u8 *src, unsigned int srclen)
 | 
						|
 {
 | 
						|
-	struct poly1305_simd_desc_ctx *sctx;
 | 
						|
 	unsigned int blocks, datalen;
 | 
						|
 
 | 
						|
-	BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
 | 
						|
-	sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
 | 
						|
-
 | 
						|
 	if (unlikely(!dctx->sset)) {
 | 
						|
 		datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
 | 
						|
 		src += srclen - datalen;
 | 
						|
 		srclen = datalen;
 | 
						|
 	}
 | 
						|
 
 | 
						|
-#ifdef CONFIG_AS_AVX2
 | 
						|
-	if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 | 
						|
-		if (unlikely(!sctx->wset)) {
 | 
						|
-			if (!sctx->uset) {
 | 
						|
-				memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
 | 
						|
-				poly1305_simd_mult(sctx->u, dctx->r.r);
 | 
						|
-				sctx->uset = true;
 | 
						|
+	if (IS_ENABLED(CONFIG_AS_AVX2) &&
 | 
						|
+	    poly1305_use_avx2 &&
 | 
						|
+	    srclen >= POLY1305_BLOCK_SIZE * 4) {
 | 
						|
+		if (unlikely(dctx->rset < 4)) {
 | 
						|
+			if (dctx->rset < 2) {
 | 
						|
+				dctx->r[1] = dctx->r[0];
 | 
						|
+				poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
 | 
						|
 			}
 | 
						|
-			memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
 | 
						|
-			poly1305_simd_mult(sctx->u + 5, dctx->r.r);
 | 
						|
-			memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
 | 
						|
-			poly1305_simd_mult(sctx->u + 10, dctx->r.r);
 | 
						|
-			sctx->wset = true;
 | 
						|
+			dctx->r[2] = dctx->r[1];
 | 
						|
+			poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
 | 
						|
+			dctx->r[3] = dctx->r[2];
 | 
						|
+			poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
 | 
						|
+			dctx->rset = 4;
 | 
						|
 		}
 | 
						|
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
 | 
						|
-		poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
 | 
						|
-				     sctx->u);
 | 
						|
+		poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
 | 
						|
+				     dctx->r[1].r);
 | 
						|
 		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 | 
						|
 		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 | 
						|
 	}
 | 
						|
-#endif
 | 
						|
+
 | 
						|
 	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 | 
						|
-		if (unlikely(!sctx->uset)) {
 | 
						|
-			memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
 | 
						|
-			poly1305_simd_mult(sctx->u, dctx->r.r);
 | 
						|
-			sctx->uset = true;
 | 
						|
+		if (unlikely(dctx->rset < 2)) {
 | 
						|
+			dctx->r[1] = dctx->r[0];
 | 
						|
+			poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
 | 
						|
+			dctx->rset = 2;
 | 
						|
 		}
 | 
						|
 		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
 | 
						|
-		poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
 | 
						|
-				     sctx->u);
 | 
						|
+		poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
 | 
						|
+				     blocks, dctx->r[1].r);
 | 
						|
 		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 | 
						|
 		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 | 
						|
 	}
 | 
						|
 	if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
-		poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
 | 
						|
+		poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
 | 
						|
 		srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
 	}
 | 
						|
 	return srclen;
 | 
						|
@@ -159,10 +129,10 @@ static int poly1305_simd_update(struct s
 | 
						|
 
 | 
						|
 static struct shash_alg alg = {
 | 
						|
 	.digestsize	= POLY1305_DIGEST_SIZE,
 | 
						|
-	.init		= poly1305_simd_init,
 | 
						|
+	.init		= crypto_poly1305_init,
 | 
						|
 	.update		= poly1305_simd_update,
 | 
						|
 	.final		= crypto_poly1305_final,
 | 
						|
-	.descsize	= sizeof(struct poly1305_simd_desc_ctx),
 | 
						|
+	.descsize	= sizeof(struct poly1305_desc_ctx),
 | 
						|
 	.base		= {
 | 
						|
 		.cra_name		= "poly1305",
 | 
						|
 		.cra_driver_name	= "poly1305-simd",
 | 
						|
@@ -177,14 +147,14 @@ static int __init poly1305_simd_mod_init
 | 
						|
 	if (!boot_cpu_has(X86_FEATURE_XMM2))
 | 
						|
 		return -ENODEV;
 | 
						|
 
 | 
						|
-#ifdef CONFIG_AS_AVX2
 | 
						|
-	poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
 | 
						|
+	poly1305_use_avx2 = IS_ENABLED(CONFIG_AS_AVX2) &&
 | 
						|
+			    boot_cpu_has(X86_FEATURE_AVX) &&
 | 
						|
 			    boot_cpu_has(X86_FEATURE_AVX2) &&
 | 
						|
 			    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
 | 
						|
-	alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
 | 
						|
+	alg.descsize = sizeof(struct poly1305_desc_ctx) + 5 * sizeof(u32);
 | 
						|
 	if (poly1305_use_avx2)
 | 
						|
 		alg.descsize += 10 * sizeof(u32);
 | 
						|
-#endif
 | 
						|
+
 | 
						|
 	return crypto_register_shash(&alg);
 | 
						|
 }
 | 
						|
 
 | 
						|
--- a/crypto/poly1305_generic.c
 | 
						|
+++ b/crypto/poly1305_generic.c
 | 
						|
@@ -25,7 +25,7 @@ int crypto_poly1305_init(struct shash_de
 | 
						|
 
 | 
						|
 	poly1305_core_init(&dctx->h);
 | 
						|
 	dctx->buflen = 0;
 | 
						|
-	dctx->rset = false;
 | 
						|
+	dctx->rset = 0;
 | 
						|
 	dctx->sset = false;
 | 
						|
 
 | 
						|
 	return 0;
 | 
						|
@@ -43,7 +43,7 @@ static void poly1305_blocks(struct poly1
 | 
						|
 		srclen = datalen;
 | 
						|
 	}
 | 
						|
 
 | 
						|
-	poly1305_core_blocks(&dctx->h, &dctx->r, src,
 | 
						|
+	poly1305_core_blocks(&dctx->h, dctx->r, src,
 | 
						|
 			     srclen / POLY1305_BLOCK_SIZE, 1);
 | 
						|
 }
 | 
						|
 
 | 
						|
@@ -95,7 +95,7 @@ int crypto_poly1305_final(struct shash_d
 | 
						|
 		dctx->buf[dctx->buflen++] = 1;
 | 
						|
 		memset(dctx->buf + dctx->buflen, 0,
 | 
						|
 		       POLY1305_BLOCK_SIZE - dctx->buflen);
 | 
						|
-		poly1305_core_blocks(&dctx->h, &dctx->r, dctx->buf, 1, 0);
 | 
						|
+		poly1305_core_blocks(&dctx->h, dctx->r, dctx->buf, 1, 0);
 | 
						|
 	}
 | 
						|
 
 | 
						|
 	poly1305_core_emit(&dctx->h, digest);
 | 
						|
--- a/include/crypto/internal/poly1305.h
 | 
						|
+++ b/include/crypto/internal/poly1305.h
 | 
						|
@@ -46,10 +46,10 @@ unsigned int crypto_poly1305_setdesckey(
 | 
						|
 {
 | 
						|
 	if (!dctx->sset) {
 | 
						|
 		if (!dctx->rset && srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
-			poly1305_core_setkey(&dctx->r, src);
 | 
						|
+			poly1305_core_setkey(dctx->r, src);
 | 
						|
 			src += POLY1305_BLOCK_SIZE;
 | 
						|
 			srclen -= POLY1305_BLOCK_SIZE;
 | 
						|
-			dctx->rset = true;
 | 
						|
+			dctx->rset = 1;
 | 
						|
 		}
 | 
						|
 		if (srclen >= POLY1305_BLOCK_SIZE) {
 | 
						|
 			dctx->s[0] = get_unaligned_le32(src +  0);
 | 
						|
--- a/include/crypto/poly1305.h
 | 
						|
+++ b/include/crypto/poly1305.h
 | 
						|
@@ -22,20 +22,20 @@ struct poly1305_state {
 | 
						|
 };
 | 
						|
 
 | 
						|
 struct poly1305_desc_ctx {
 | 
						|
-	/* key */
 | 
						|
-	struct poly1305_key r;
 | 
						|
-	/* finalize key */
 | 
						|
-	u32 s[4];
 | 
						|
-	/* accumulator */
 | 
						|
-	struct poly1305_state h;
 | 
						|
 	/* partial buffer */
 | 
						|
 	u8 buf[POLY1305_BLOCK_SIZE];
 | 
						|
 	/* bytes used in partial buffer */
 | 
						|
 	unsigned int buflen;
 | 
						|
-	/* r key has been set */
 | 
						|
-	bool rset;
 | 
						|
-	/* s key has been set */
 | 
						|
+	/* how many keys have been set in r[] */
 | 
						|
+	unsigned short rset;
 | 
						|
+	/* whether s[] has been set */
 | 
						|
 	bool sset;
 | 
						|
+	/* finalize key */
 | 
						|
+	u32 s[4];
 | 
						|
+	/* accumulator */
 | 
						|
+	struct poly1305_state h;
 | 
						|
+	/* key */
 | 
						|
+	struct poly1305_key r[1];
 | 
						|
 };
 | 
						|
 
 | 
						|
 #endif
 |