kernel-5.4: backport latest patches for wireguard
These are the latest patches that just landed upstream for 5.13, will be backported by Greg into 5.10 (because of stable@), and are now in the 5.4 backport branch of wireguard: https://git.zx2c4.com/wireguard-linux/log/?h=backport-5.4.y Cc: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Tested-by: Stijn Segers <foss@volatilesystems.org>
This commit is contained in:
		 Jason A. Donenfeld
					Jason A. Donenfeld
				
			
				
					committed by
					
						 Hauke Mehrtens
						Hauke Mehrtens
					
				
			
			
				
	
			
			
			 Hauke Mehrtens
						Hauke Mehrtens
					
				
			
						parent
						
							79481c71dc
						
					
				
				
					commit
					2a3b2f59fe
				
			| @@ -0,0 +1,60 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Maciej W. Rozycki" <macro@orcam.me.uk> | ||||
| Date: Thu, 11 Mar 2021 21:50:47 -0700 | ||||
| Subject: [PATCH] crypto: mips/poly1305 - enable for all MIPS processors | ||||
|  | ||||
| commit 6c810cf20feef0d4338e9b424ab7f2644a8b353e upstream. | ||||
|  | ||||
| The MIPS Poly1305 implementation is generic MIPS code written such as to | ||||
| support down to the original MIPS I and MIPS III ISA for the 32-bit and | ||||
| 64-bit variant respectively.  Lift the current limitation then to enable | ||||
| code for MIPSr1 ISA or newer processors only and have it available for | ||||
| all MIPS processors. | ||||
|  | ||||
| Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk> | ||||
| Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") | ||||
| Cc: stable@vger.kernel.org # v5.5+ | ||||
| Acked-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  arch/mips/crypto/Makefile | 4 ++-- | ||||
|  crypto/Kconfig            | 2 +- | ||||
|  drivers/net/Kconfig       | 2 +- | ||||
|  3 files changed, 4 insertions(+), 4 deletions(-) | ||||
|  | ||||
| --- a/arch/mips/crypto/Makefile | ||||
| +++ b/arch/mips/crypto/Makefile | ||||
| @@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to | ||||
|  obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o | ||||
|  poly1305-mips-y := poly1305-core.o poly1305-glue.o | ||||
|   | ||||
| -perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 | ||||
| -perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 | ||||
| +perlasm-flavour-$(CONFIG_32BIT) := o32 | ||||
| +perlasm-flavour-$(CONFIG_64BIT) := 64 | ||||
|   | ||||
|  quiet_cmd_perlasm = PERLASM $@ | ||||
|        cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) | ||||
| --- a/crypto/Kconfig | ||||
| +++ b/crypto/Kconfig | ||||
| @@ -740,7 +740,7 @@ config CRYPTO_POLY1305_X86_64 | ||||
|   | ||||
|  config CRYPTO_POLY1305_MIPS | ||||
|  	tristate "Poly1305 authenticator algorithm (MIPS optimized)" | ||||
| -	depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) | ||||
| +	depends on MIPS | ||||
|  	select CRYPTO_ARCH_HAVE_LIB_POLY1305 | ||||
|   | ||||
|  config CRYPTO_MD4 | ||||
| --- a/drivers/net/Kconfig | ||||
| +++ b/drivers/net/Kconfig | ||||
| @@ -92,7 +92,7 @@ config WIREGUARD | ||||
|  	select CRYPTO_POLY1305_ARM if ARM | ||||
|  	select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON | ||||
|  	select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 | ||||
| -	select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) | ||||
| +	select CRYPTO_POLY1305_MIPS if MIPS | ||||
|  	help | ||||
|  	  WireGuard is a secure, fast, and easy to use replacement for IPSec | ||||
|  	  that uses modern cryptography and clever networking tricks. It's | ||||
| @@ -0,0 +1,24 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> | ||||
| Date: Sat, 27 Mar 2021 19:39:43 -0700 | ||||
| Subject: [PATCH] crypto: mips: add poly1305-core.S to .gitignore | ||||
|  | ||||
| commit dc92d0df51dc61de88bf6f4884a17bf73d5c6326 upstream. | ||||
|  | ||||
| poly1305-core.S is an auto-generated file, so it should be ignored. | ||||
|  | ||||
| Fixes: a11d055e7a64 ("crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation") | ||||
| Signed-off-by: Ilya Lipnitskiy <ilya.lipnitskiy@gmail.com> | ||||
| Cc: Ard Biesheuvel <ardb@kernel.org> | ||||
| Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  arch/mips/crypto/.gitignore | 2 ++ | ||||
|  1 file changed, 2 insertions(+) | ||||
|  create mode 100644 arch/mips/crypto/.gitignore | ||||
|  | ||||
| --- /dev/null | ||||
| +++ b/arch/mips/crypto/.gitignore | ||||
| @@ -0,0 +1,2 @@ | ||||
| +# SPDX-License-Identifier: GPL-2.0-only | ||||
| +poly1305-core.S | ||||
| @@ -0,0 +1,172 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: Arnd Bergmann <arnd@arndb.de> | ||||
| Date: Mon, 22 Mar 2021 18:05:15 +0100 | ||||
| Subject: [PATCH] crypto: poly1305 - fix poly1305_core_setkey() declaration | ||||
| MIME-Version: 1.0 | ||||
| Content-Type: text/plain; charset=UTF-8 | ||||
| Content-Transfer-Encoding: 8bit | ||||
|  | ||||
| commit 8d195e7a8ada68928f2aedb2c18302a4518fe68e upstream. | ||||
|  | ||||
| gcc-11 points out a mismatch between the declaration and the definition | ||||
| of poly1305_core_setkey(): | ||||
|  | ||||
| lib/crypto/poly1305-donna32.c:13:67: error: argument 2 of type ‘const u8[16]’ {aka ‘const unsigned char[16]’} with mismatched bound [-Werror=array-parameter=] | ||||
|    13 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) | ||||
|       |                                                          ~~~~~~~~~^~~~~~~~~~~ | ||||
| In file included from lib/crypto/poly1305-donna32.c:11: | ||||
| include/crypto/internal/poly1305.h:21:68: note: previously declared as ‘const u8 *’ {aka ‘const unsigned char *’} | ||||
|    21 | void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); | ||||
|  | ||||
| This is harmless in principle, as the calling conventions are the same, | ||||
| but the more specific prototype allows better type checking in the | ||||
| caller. | ||||
|  | ||||
| Change the declaration to match the actual function definition. | ||||
| The poly1305_simd_init() is a bit suspicious here, as it previously | ||||
| had a 32-byte argument type, but looks like it needs to take the | ||||
| 16-byte POLY1305_BLOCK_SIZE array instead. | ||||
|  | ||||
| Fixes: 1c08a104360f ("crypto: poly1305 - add new 32 and 64-bit generic versions") | ||||
| Signed-off-by: Arnd Bergmann <arnd@arndb.de> | ||||
| Reviewed-by: Ard Biesheuvel <ardb@kernel.org> | ||||
| Reviewed-by: Eric Biggers <ebiggers@google.com> | ||||
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  arch/arm/crypto/poly1305-glue.c    | 2 +- | ||||
|  arch/arm64/crypto/poly1305-glue.c  | 2 +- | ||||
|  arch/mips/crypto/poly1305-glue.c   | 2 +- | ||||
|  arch/x86/crypto/poly1305_glue.c    | 6 +++--- | ||||
|  include/crypto/internal/poly1305.h | 3 ++- | ||||
|  include/crypto/poly1305.h          | 6 ++++-- | ||||
|  lib/crypto/poly1305-donna32.c      | 3 ++- | ||||
|  lib/crypto/poly1305-donna64.c      | 3 ++- | ||||
|  lib/crypto/poly1305.c              | 3 ++- | ||||
|  9 files changed, 18 insertions(+), 12 deletions(-) | ||||
|  | ||||
| --- a/arch/arm/crypto/poly1305-glue.c | ||||
| +++ b/arch/arm/crypto/poly1305-glue.c | ||||
| @@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *s | ||||
|   | ||||
|  static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); | ||||
|   | ||||
| -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | ||||
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) | ||||
|  { | ||||
|  	poly1305_init_arm(&dctx->h, key); | ||||
|  	dctx->s[0] = get_unaligned_le32(key + 16); | ||||
| --- a/arch/arm64/crypto/poly1305-glue.c | ||||
| +++ b/arch/arm64/crypto/poly1305-glue.c | ||||
| @@ -25,7 +25,7 @@ asmlinkage void poly1305_emit(void *stat | ||||
|   | ||||
|  static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); | ||||
|   | ||||
| -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | ||||
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) | ||||
|  { | ||||
|  	poly1305_init_arm64(&dctx->h, key); | ||||
|  	dctx->s[0] = get_unaligned_le32(key + 16); | ||||
| --- a/arch/mips/crypto/poly1305-glue.c | ||||
| +++ b/arch/mips/crypto/poly1305-glue.c | ||||
| @@ -17,7 +17,7 @@ asmlinkage void poly1305_init_mips(void | ||||
|  asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); | ||||
|  asmlinkage void poly1305_emit_mips(void *state, u8 *digest, const u32 *nonce); | ||||
|   | ||||
| -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | ||||
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) | ||||
|  { | ||||
|  	poly1305_init_mips(&dctx->h, key); | ||||
|  	dctx->s[0] = get_unaligned_le32(key + 16); | ||||
| --- a/arch/x86/crypto/poly1305_glue.c | ||||
| +++ b/arch/x86/crypto/poly1305_glue.c | ||||
| @@ -15,7 +15,7 @@ | ||||
|  #include <asm/simd.h> | ||||
|   | ||||
|  asmlinkage void poly1305_init_x86_64(void *ctx, | ||||
| -				     const u8 key[POLY1305_KEY_SIZE]); | ||||
| +				     const u8 key[POLY1305_BLOCK_SIZE]); | ||||
|  asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, | ||||
|  				       const size_t len, const u32 padbit); | ||||
|  asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_DIGEST_SIZE], | ||||
| @@ -80,7 +80,7 @@ static void convert_to_base2_64(void *ct | ||||
|  	state->is_base2_26 = 0; | ||||
|  } | ||||
|   | ||||
| -static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_KEY_SIZE]) | ||||
| +static void poly1305_simd_init(void *ctx, const u8 key[POLY1305_BLOCK_SIZE]) | ||||
|  { | ||||
|  	poly1305_init_x86_64(ctx, key); | ||||
|  } | ||||
| @@ -128,7 +128,7 @@ static void poly1305_simd_emit(void *ctx | ||||
|  		poly1305_emit_avx(ctx, mac, nonce); | ||||
|  } | ||||
|   | ||||
| -void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) | ||||
| +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) | ||||
|  { | ||||
|  	poly1305_simd_init(&dctx->h, key); | ||||
|  	dctx->s[0] = get_unaligned_le32(&key[16]); | ||||
| --- a/include/crypto/internal/poly1305.h | ||||
| +++ b/include/crypto/internal/poly1305.h | ||||
| @@ -18,7 +18,8 @@ | ||||
|   * only the ε-almost-∆-universal hash function (not the full MAC) is computed. | ||||
|   */ | ||||
|   | ||||
| -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 *raw_key); | ||||
| +void poly1305_core_setkey(struct poly1305_core_key *key, | ||||
| +			  const u8 raw_key[POLY1305_BLOCK_SIZE]); | ||||
|  static inline void poly1305_core_init(struct poly1305_state *state) | ||||
|  { | ||||
|  	*state = (struct poly1305_state){}; | ||||
| --- a/include/crypto/poly1305.h | ||||
| +++ b/include/crypto/poly1305.h | ||||
| @@ -58,8 +58,10 @@ struct poly1305_desc_ctx { | ||||
|  	}; | ||||
|  }; | ||||
|   | ||||
| -void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key); | ||||
| -void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key); | ||||
| +void poly1305_init_arch(struct poly1305_desc_ctx *desc, | ||||
| +			const u8 key[POLY1305_KEY_SIZE]); | ||||
| +void poly1305_init_generic(struct poly1305_desc_ctx *desc, | ||||
| +			   const u8 key[POLY1305_KEY_SIZE]); | ||||
|   | ||||
|  static inline void poly1305_init(struct poly1305_desc_ctx *desc, const u8 *key) | ||||
|  { | ||||
| --- a/lib/crypto/poly1305-donna32.c | ||||
| +++ b/lib/crypto/poly1305-donna32.c | ||||
| @@ -10,7 +10,8 @@ | ||||
|  #include <asm/unaligned.h> | ||||
|  #include <crypto/internal/poly1305.h> | ||||
|   | ||||
| -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) | ||||
| +void poly1305_core_setkey(struct poly1305_core_key *key, | ||||
| +			  const u8 raw_key[POLY1305_BLOCK_SIZE]) | ||||
|  { | ||||
|  	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ | ||||
|  	key->key.r[0] = (get_unaligned_le32(&raw_key[0])) & 0x3ffffff; | ||||
| --- a/lib/crypto/poly1305-donna64.c | ||||
| +++ b/lib/crypto/poly1305-donna64.c | ||||
| @@ -12,7 +12,8 @@ | ||||
|   | ||||
|  typedef __uint128_t u128; | ||||
|   | ||||
| -void poly1305_core_setkey(struct poly1305_core_key *key, const u8 raw_key[16]) | ||||
| +void poly1305_core_setkey(struct poly1305_core_key *key, | ||||
| +			  const u8 raw_key[POLY1305_BLOCK_SIZE]) | ||||
|  { | ||||
|  	u64 t0, t1; | ||||
|   | ||||
| --- a/lib/crypto/poly1305.c | ||||
| +++ b/lib/crypto/poly1305.c | ||||
| @@ -12,7 +12,8 @@ | ||||
|  #include <linux/module.h> | ||||
|  #include <asm/unaligned.h> | ||||
|   | ||||
| -void poly1305_init_generic(struct poly1305_desc_ctx *desc, const u8 *key) | ||||
| +void poly1305_init_generic(struct poly1305_desc_ctx *desc, | ||||
| +			   const u8 key[POLY1305_KEY_SIZE]) | ||||
|  { | ||||
|  	poly1305_core_setkey(&desc->core_r, key); | ||||
|  	desc->s[0] = get_unaligned_le32(key + 16); | ||||
| @@ -0,0 +1,29 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:30 +0200 | ||||
| Subject: [PATCH] wireguard: selftests: remove old conntrack kconfig value | ||||
|  | ||||
| commit acf2492b51c9a3c4dfb947f4d3477a86d315150f upstream. | ||||
|  | ||||
| On recent kernels, this config symbol is no longer used. | ||||
|  | ||||
| Reported-by: Rui Salvaterra <rsalvaterra@gmail.com> | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  tools/testing/selftests/wireguard/qemu/kernel.config | 1 - | ||||
|  1 file changed, 1 deletion(-) | ||||
|  | ||||
| --- a/tools/testing/selftests/wireguard/qemu/kernel.config | ||||
| +++ b/tools/testing/selftests/wireguard/qemu/kernel.config | ||||
| @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y | ||||
|  CONFIG_NETFILTER_XT_NAT=y | ||||
|  CONFIG_NETFILTER_XT_MATCH_LENGTH=y | ||||
|  CONFIG_NETFILTER_XT_MARK=y | ||||
| -CONFIG_NF_CONNTRACK_IPV4=y | ||||
|  CONFIG_NF_NAT_IPV4=y | ||||
|  CONFIG_IP_NF_IPTABLES=y | ||||
|  CONFIG_IP_NF_FILTER=y | ||||
| @@ -0,0 +1,31 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:31 +0200 | ||||
| Subject: [PATCH] wireguard: selftests: make sure rp_filter is disabled on | ||||
|  vethc | ||||
|  | ||||
| commit f8873d11d4121aad35024f9379e431e0c83abead upstream. | ||||
|  | ||||
| Some distros may enable strict rp_filter by default, which will prevent | ||||
| vethc from receiving the packets with an unrouteable reverse path address. | ||||
|  | ||||
| Reported-by: Hangbin Liu <liuhangbin@gmail.com> | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  tools/testing/selftests/wireguard/netns.sh | 1 + | ||||
|  1 file changed, 1 insertion(+) | ||||
|  | ||||
| --- a/tools/testing/selftests/wireguard/netns.sh | ||||
| +++ b/tools/testing/selftests/wireguard/netns.sh | ||||
| @@ -363,6 +363,7 @@ ip1 -6 rule add table main suppress_pref | ||||
|  ip1 -4 route add default dev wg0 table 51820 | ||||
|  ip1 -4 rule add not fwmark 51820 table 51820 | ||||
|  ip1 -4 rule add table main suppress_prefixlength 0 | ||||
| +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter' | ||||
|  # Flood the pings instead of sending just one, to trigger routing table reference counting bugs. | ||||
|  n1 ping -W 1 -c 100 -f 192.168.99.7 | ||||
|  n1 ping -W 1 -c 100 -f abab::1111 | ||||
| @@ -0,0 +1,33 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:32 +0200 | ||||
| Subject: [PATCH] wireguard: do not use -O3 | ||||
|  | ||||
| commit cc5060ca0285efe2728bced399a1955a7ce808b2 upstream. | ||||
|  | ||||
| Apparently, various versions of gcc have O3-related miscompiles. Looking | ||||
| at the difference between -O2 and -O3 for gcc 11 doesn't indicate | ||||
| miscompiles, but the difference also doesn't seem so significant for | ||||
| performance that it's worth risking. | ||||
|  | ||||
| Link: https://lore.kernel.org/lkml/CAHk-=wjuoGyxDhAF8SsrTkN0-YfCx7E6jUN3ikC_tn2AKWTTsA@mail.gmail.com/ | ||||
| Link: https://lore.kernel.org/lkml/CAHmME9otB5Wwxp7H8bR_i2uH2esEMvoBMC8uEXBMH9p0q1s6Bw@mail.gmail.com/ | ||||
| Reported-by: Linus Torvalds <torvalds@linux-foundation.org> | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/Makefile | 3 +-- | ||||
|  1 file changed, 1 insertion(+), 2 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/Makefile | ||||
| +++ b/drivers/net/wireguard/Makefile | ||||
| @@ -1,5 +1,4 @@ | ||||
| -ccflags-y := -O3 | ||||
| -ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' | ||||
| +ccflags-y := -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' | ||||
|  ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG | ||||
|  wireguard-y := main.o | ||||
|  wireguard-y += noise.o | ||||
| @@ -0,0 +1,66 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:33 +0200 | ||||
| Subject: [PATCH] wireguard: use synchronize_net rather than synchronize_rcu | ||||
|  | ||||
| commit 24b70eeeb4f46c09487f8155239ebfb1f875774a upstream. | ||||
|  | ||||
| Many of the synchronization points are sometimes called under the rtnl | ||||
| lock, which means we should use synchronize_net rather than | ||||
| synchronize_rcu. Under the hood, this expands to using the expedited | ||||
| flavor of function in the event that rtnl is held, in order to not stall | ||||
| other concurrent changes. | ||||
|  | ||||
| This fixes some very, very long delays when removing multiple peers at | ||||
| once, which would cause some operations to take several minutes. | ||||
|  | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/peer.c   | 6 +++--- | ||||
|  drivers/net/wireguard/socket.c | 2 +- | ||||
|  2 files changed, 4 insertions(+), 4 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/peer.c | ||||
| +++ b/drivers/net/wireguard/peer.c | ||||
| @@ -88,7 +88,7 @@ static void peer_make_dead(struct wg_pee | ||||
|  	/* Mark as dead, so that we don't allow jumping contexts after. */ | ||||
|  	WRITE_ONCE(peer->is_dead, true); | ||||
|   | ||||
| -	/* The caller must now synchronize_rcu() for this to take effect. */ | ||||
| +	/* The caller must now synchronize_net() for this to take effect. */ | ||||
|  } | ||||
|   | ||||
|  static void peer_remove_after_dead(struct wg_peer *peer) | ||||
| @@ -160,7 +160,7 @@ void wg_peer_remove(struct wg_peer *peer | ||||
|  	lockdep_assert_held(&peer->device->device_update_lock); | ||||
|   | ||||
|  	peer_make_dead(peer); | ||||
| -	synchronize_rcu(); | ||||
| +	synchronize_net(); | ||||
|  	peer_remove_after_dead(peer); | ||||
|  } | ||||
|   | ||||
| @@ -178,7 +178,7 @@ void wg_peer_remove_all(struct wg_device | ||||
|  		peer_make_dead(peer); | ||||
|  		list_add_tail(&peer->peer_list, &dead_peers); | ||||
|  	} | ||||
| -	synchronize_rcu(); | ||||
| +	synchronize_net(); | ||||
|  	list_for_each_entry_safe(peer, temp, &dead_peers, peer_list) | ||||
|  		peer_remove_after_dead(peer); | ||||
|  } | ||||
| --- a/drivers/net/wireguard/socket.c | ||||
| +++ b/drivers/net/wireguard/socket.c | ||||
| @@ -430,7 +430,7 @@ void wg_socket_reinit(struct wg_device * | ||||
|  	if (new4) | ||||
|  		wg->incoming_port = ntohs(inet_sk(new4)->inet_sport); | ||||
|  	mutex_unlock(&wg->socket_update_lock); | ||||
| -	synchronize_rcu(); | ||||
| +	synchronize_net(); | ||||
|  	sock_free(old4); | ||||
|  	sock_free(old6); | ||||
|  } | ||||
| @@ -0,0 +1,125 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:34 +0200 | ||||
| Subject: [PATCH] wireguard: peer: allocate in kmem_cache | ||||
|  | ||||
| commit a4e9f8e3287c9eb6bf70df982870980dd3341863 upstream. | ||||
|  | ||||
| With deployments having upwards of 600k peers now, this somewhat heavy | ||||
| structure could benefit from more fine-grained allocations. | ||||
| Specifically, instead of using a 2048-byte slab for a 1544-byte object, | ||||
| we can now use 1544-byte objects directly, thus saving almost 25% | ||||
| per-peer, or with 600k peers, that's a savings of 303 MiB. This also | ||||
| makes wireguard's memory usage more transparent in tools like slabtop | ||||
| and /proc/slabinfo. | ||||
|  | ||||
| Fixes: 8b5553ace83c ("wireguard: queueing: get rid of per-peer ring buffers") | ||||
| Suggested-by: Arnd Bergmann <arnd@arndb.de> | ||||
| Suggested-by: Matthew Wilcox <willy@infradead.org> | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/main.c |  7 +++++++ | ||||
|  drivers/net/wireguard/peer.c | 21 +++++++++++++++++---- | ||||
|  drivers/net/wireguard/peer.h |  3 +++ | ||||
|  3 files changed, 27 insertions(+), 4 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/main.c | ||||
| +++ b/drivers/net/wireguard/main.c | ||||
| @@ -28,6 +28,10 @@ static int __init mod_init(void) | ||||
|  #endif | ||||
|  	wg_noise_init(); | ||||
|   | ||||
| +	ret = wg_peer_init(); | ||||
| +	if (ret < 0) | ||||
| +		goto err_peer; | ||||
| + | ||||
|  	ret = wg_device_init(); | ||||
|  	if (ret < 0) | ||||
|  		goto err_device; | ||||
| @@ -44,6 +48,8 @@ static int __init mod_init(void) | ||||
|  err_netlink: | ||||
|  	wg_device_uninit(); | ||||
|  err_device: | ||||
| +	wg_peer_uninit(); | ||||
| +err_peer: | ||||
|  	return ret; | ||||
|  } | ||||
|   | ||||
| @@ -51,6 +57,7 @@ static void __exit mod_exit(void) | ||||
|  { | ||||
|  	wg_genetlink_uninit(); | ||||
|  	wg_device_uninit(); | ||||
| +	wg_peer_uninit(); | ||||
|  } | ||||
|   | ||||
|  module_init(mod_init); | ||||
| --- a/drivers/net/wireguard/peer.c | ||||
| +++ b/drivers/net/wireguard/peer.c | ||||
| @@ -15,6 +15,7 @@ | ||||
|  #include <linux/rcupdate.h> | ||||
|  #include <linux/list.h> | ||||
|   | ||||
| +static struct kmem_cache *peer_cache; | ||||
|  static atomic64_t peer_counter = ATOMIC64_INIT(0); | ||||
|   | ||||
|  struct wg_peer *wg_peer_create(struct wg_device *wg, | ||||
| @@ -29,10 +30,10 @@ struct wg_peer *wg_peer_create(struct wg | ||||
|  	if (wg->num_peers >= MAX_PEERS_PER_DEVICE) | ||||
|  		return ERR_PTR(ret); | ||||
|   | ||||
| -	peer = kzalloc(sizeof(*peer), GFP_KERNEL); | ||||
| +	peer = kmem_cache_zalloc(peer_cache, GFP_KERNEL); | ||||
|  	if (unlikely(!peer)) | ||||
|  		return ERR_PTR(ret); | ||||
| -	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) | ||||
| +	if (unlikely(dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))) | ||||
|  		goto err; | ||||
|   | ||||
|  	peer->device = wg; | ||||
| @@ -64,7 +65,7 @@ struct wg_peer *wg_peer_create(struct wg | ||||
|  	return peer; | ||||
|   | ||||
|  err: | ||||
| -	kfree(peer); | ||||
| +	kmem_cache_free(peer_cache, peer); | ||||
|  	return ERR_PTR(ret); | ||||
|  } | ||||
|   | ||||
| @@ -193,7 +194,8 @@ static void rcu_release(struct rcu_head | ||||
|  	/* The final zeroing takes care of clearing any remaining handshake key | ||||
|  	 * material and other potentially sensitive information. | ||||
|  	 */ | ||||
| -	kzfree(peer); | ||||
| +	memzero_explicit(peer, sizeof(*peer)); | ||||
| +	kmem_cache_free(peer_cache, peer); | ||||
|  } | ||||
|   | ||||
|  static void kref_release(struct kref *refcount) | ||||
| @@ -225,3 +227,14 @@ void wg_peer_put(struct wg_peer *peer) | ||||
|  		return; | ||||
|  	kref_put(&peer->refcount, kref_release); | ||||
|  } | ||||
| + | ||||
| +int __init wg_peer_init(void) | ||||
| +{ | ||||
| +	peer_cache = KMEM_CACHE(wg_peer, 0); | ||||
| +	return peer_cache ? 0 : -ENOMEM; | ||||
| +} | ||||
| + | ||||
| +void wg_peer_uninit(void) | ||||
| +{ | ||||
| +	kmem_cache_destroy(peer_cache); | ||||
| +} | ||||
| --- a/drivers/net/wireguard/peer.h | ||||
| +++ b/drivers/net/wireguard/peer.h | ||||
| @@ -80,4 +80,7 @@ void wg_peer_put(struct wg_peer *peer); | ||||
|  void wg_peer_remove(struct wg_peer *peer); | ||||
|  void wg_peer_remove_all(struct wg_device *wg); | ||||
|   | ||||
| +int wg_peer_init(void); | ||||
| +void wg_peer_uninit(void); | ||||
| + | ||||
|  #endif /* _WG_PEER_H */ | ||||
| @@ -0,0 +1,43 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:35 +0200 | ||||
| Subject: [PATCH] wireguard: allowedips: initialize list head in selftest | ||||
|  | ||||
| commit 46cfe8eee285cde465b420637507884551f5d7ca upstream. | ||||
|  | ||||
| The randomized trie tests weren't initializing the dummy peer list head, | ||||
| resulting in a NULL pointer dereference when used. Fix this by | ||||
| initializing it in the randomized trie test, just like we do for the | ||||
| static unit test. | ||||
|  | ||||
| While we're at it, all of the other strings like this have the word | ||||
| "self-test", so add it to the missing place here. | ||||
|  | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/selftest/allowedips.c | 3 ++- | ||||
|  1 file changed, 2 insertions(+), 1 deletion(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/selftest/allowedips.c | ||||
| +++ b/drivers/net/wireguard/selftest/allowedips.c | ||||
| @@ -296,6 +296,7 @@ static __init bool randomized_test(void) | ||||
|  			goto free; | ||||
|  		} | ||||
|  		kref_init(&peers[i]->refcount); | ||||
| +		INIT_LIST_HEAD(&peers[i]->allowedips_list); | ||||
|  	} | ||||
|   | ||||
|  	mutex_lock(&mutex); | ||||
| @@ -333,7 +334,7 @@ static __init bool randomized_test(void) | ||||
|  			if (wg_allowedips_insert_v4(&t, | ||||
|  						    (struct in_addr *)mutated, | ||||
|  						    cidr, peer, &mutex) < 0) { | ||||
| -				pr_err("allowedips random malloc: FAIL\n"); | ||||
| +				pr_err("allowedips random self-test malloc: FAIL\n"); | ||||
|  				goto free_locked; | ||||
|  			} | ||||
|  			if (horrible_allowedips_insert_v4(&h, | ||||
| @@ -0,0 +1,237 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:36 +0200 | ||||
| Subject: [PATCH] wireguard: allowedips: remove nodes in O(1) | ||||
|  | ||||
| commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream. | ||||
|  | ||||
| Previously, deleting peers would require traversing the entire trie in | ||||
| order to rebalance nodes and safely free them. This meant that removing | ||||
| 1000 peers from a trie with a half million nodes would take an extremely | ||||
| long time, during which we're holding the rtnl lock. Large-scale users | ||||
| were reporting 200ms latencies added to the networking stack as a whole | ||||
| every time their userspace software would queue up significant removals. | ||||
| That's a serious situation. | ||||
|  | ||||
| This commit fixes that by maintaining a double pointer to the parent's | ||||
| bit pointer for each node, and then using the already existing node list | ||||
| belonging to each peer to go directly to the node, fix up its pointers, | ||||
| and free it with RCU. This means removal is O(1) instead of O(n), and we | ||||
| don't use gobs of stack. | ||||
|  | ||||
| The removal algorithm has the same downside as the code that it fixes: | ||||
| it won't collapse needlessly long runs of fillers.  We can enhance that | ||||
| in the future if it ever becomes a problem. This commit documents that | ||||
| limitation with a TODO comment in code, a small but meaningful | ||||
| improvement over the prior situation. | ||||
|  | ||||
| Currently the biggest flaw, which the next commit addresses, is that | ||||
| because this increases the node size on 64-bit machines from 60 bytes to | ||||
| 68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up | ||||
| using twice as much memory per node, because of power-of-two | ||||
| allocations, which is a big bummer. We'll need to figure something out | ||||
| there. | ||||
|  | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/allowedips.c | 132 ++++++++++++----------------- | ||||
|  drivers/net/wireguard/allowedips.h |   9 +- | ||||
|  2 files changed, 57 insertions(+), 84 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/allowedips.c | ||||
| +++ b/drivers/net/wireguard/allowedips.c | ||||
| @@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc | ||||
|  	} | ||||
|  } | ||||
|   | ||||
| -static void walk_remove_by_peer(struct allowedips_node __rcu **top, | ||||
| -				struct wg_peer *peer, struct mutex *lock) | ||||
| -{ | ||||
| -#define REF(p) rcu_access_pointer(p) | ||||
| -#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock)) | ||||
| -#define PUSH(p) ({                                                             \ | ||||
| -		WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \ | ||||
| -		stack[len++] = p;                                              \ | ||||
| -	}) | ||||
| - | ||||
| -	struct allowedips_node __rcu **stack[128], **nptr; | ||||
| -	struct allowedips_node *node, *prev; | ||||
| -	unsigned int len; | ||||
| - | ||||
| -	if (unlikely(!peer || !REF(*top))) | ||||
| -		return; | ||||
| - | ||||
| -	for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) { | ||||
| -		nptr = stack[len - 1]; | ||||
| -		node = DEREF(nptr); | ||||
| -		if (!node) { | ||||
| -			--len; | ||||
| -			continue; | ||||
| -		} | ||||
| -		if (!prev || REF(prev->bit[0]) == node || | ||||
| -		    REF(prev->bit[1]) == node) { | ||||
| -			if (REF(node->bit[0])) | ||||
| -				PUSH(&node->bit[0]); | ||||
| -			else if (REF(node->bit[1])) | ||||
| -				PUSH(&node->bit[1]); | ||||
| -		} else if (REF(node->bit[0]) == prev) { | ||||
| -			if (REF(node->bit[1])) | ||||
| -				PUSH(&node->bit[1]); | ||||
| -		} else { | ||||
| -			if (rcu_dereference_protected(node->peer, | ||||
| -				lockdep_is_held(lock)) == peer) { | ||||
| -				RCU_INIT_POINTER(node->peer, NULL); | ||||
| -				list_del_init(&node->peer_list); | ||||
| -				if (!node->bit[0] || !node->bit[1]) { | ||||
| -					rcu_assign_pointer(*nptr, DEREF( | ||||
| -					       &node->bit[!REF(node->bit[0])])); | ||||
| -					kfree_rcu(node, rcu); | ||||
| -					node = DEREF(nptr); | ||||
| -				} | ||||
| -			} | ||||
| -			--len; | ||||
| -		} | ||||
| -	} | ||||
| - | ||||
| -#undef REF | ||||
| -#undef DEREF | ||||
| -#undef PUSH | ||||
| -} | ||||
| - | ||||
|  static unsigned int fls128(u64 a, u64 b) | ||||
|  { | ||||
|  	return a ? fls64(a) + 64U : fls64(b); | ||||
| @@ -224,6 +170,7 @@ static int add(struct allowedips_node __ | ||||
|  		RCU_INIT_POINTER(node->peer, peer); | ||||
|  		list_add_tail(&node->peer_list, &peer->allowedips_list); | ||||
|  		copy_and_assign_cidr(node, key, cidr, bits); | ||||
| +		rcu_assign_pointer(node->parent_bit, trie); | ||||
|  		rcu_assign_pointer(*trie, node); | ||||
|  		return 0; | ||||
|  	} | ||||
| @@ -243,9 +190,9 @@ static int add(struct allowedips_node __ | ||||
|  	if (!node) { | ||||
|  		down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); | ||||
|  	} else { | ||||
| -		down = rcu_dereference_protected(CHOOSE_NODE(node, key), | ||||
| -						 lockdep_is_held(lock)); | ||||
| +		down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock)); | ||||
|  		if (!down) { | ||||
| +			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key)); | ||||
|  			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); | ||||
|  			return 0; | ||||
|  		} | ||||
| @@ -254,29 +201,37 @@ static int add(struct allowedips_node __ | ||||
|  	parent = node; | ||||
|   | ||||
|  	if (newnode->cidr == cidr) { | ||||
| +		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits)); | ||||
|  		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); | ||||
| -		if (!parent) | ||||
| +		if (!parent) { | ||||
| +			rcu_assign_pointer(newnode->parent_bit, trie); | ||||
|  			rcu_assign_pointer(*trie, newnode); | ||||
| -		else | ||||
| -			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), | ||||
| -					   newnode); | ||||
| -	} else { | ||||
| -		node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
| -		if (unlikely(!node)) { | ||||
| -			list_del(&newnode->peer_list); | ||||
| -			kfree(newnode); | ||||
| -			return -ENOMEM; | ||||
| +		} else { | ||||
| +			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits)); | ||||
| +			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode); | ||||
|  		} | ||||
| -		INIT_LIST_HEAD(&node->peer_list); | ||||
| -		copy_and_assign_cidr(node, newnode->bits, cidr, bits); | ||||
| +		return 0; | ||||
| +	} | ||||
| + | ||||
| +	node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
| +	if (unlikely(!node)) { | ||||
| +		list_del(&newnode->peer_list); | ||||
| +		kfree(newnode); | ||||
| +		return -ENOMEM; | ||||
| +	} | ||||
| +	INIT_LIST_HEAD(&node->peer_list); | ||||
| +	copy_and_assign_cidr(node, newnode->bits, cidr, bits); | ||||
|   | ||||
| -		rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); | ||||
| -		rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); | ||||
| -		if (!parent) | ||||
| -			rcu_assign_pointer(*trie, node); | ||||
| -		else | ||||
| -			rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), | ||||
| -					   node); | ||||
| +	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits)); | ||||
| +	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); | ||||
| +	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits)); | ||||
| +	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); | ||||
| +	if (!parent) { | ||||
| +		rcu_assign_pointer(node->parent_bit, trie); | ||||
| +		rcu_assign_pointer(*trie, node); | ||||
| +	} else { | ||||
| +		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits)); | ||||
| +		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node); | ||||
|  	} | ||||
|  	return 0; | ||||
|  } | ||||
| @@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow | ||||
|  void wg_allowedips_remove_by_peer(struct allowedips *table, | ||||
|  				  struct wg_peer *peer, struct mutex *lock) | ||||
|  { | ||||
| +	struct allowedips_node *node, *child, *tmp; | ||||
| + | ||||
| +	if (list_empty(&peer->allowedips_list)) | ||||
| +		return; | ||||
|  	++table->seq; | ||||
| -	walk_remove_by_peer(&table->root4, peer, lock); | ||||
| -	walk_remove_by_peer(&table->root6, peer, lock); | ||||
| +	list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) { | ||||
| +		list_del_init(&node->peer_list); | ||||
| +		RCU_INIT_POINTER(node->peer, NULL); | ||||
| +		if (node->bit[0] && node->bit[1]) | ||||
| +			continue; | ||||
| +		child = rcu_dereference_protected( | ||||
| +				node->bit[!rcu_access_pointer(node->bit[0])], | ||||
| +				lockdep_is_held(lock)); | ||||
| +		if (child) | ||||
| +			child->parent_bit = node->parent_bit; | ||||
| +		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; | ||||
| +		kfree_rcu(node, rcu); | ||||
| + | ||||
| +		/* TODO: Note that we currently don't walk up and down in order to | ||||
| +		 * free any potential filler nodes. This means that this function | ||||
| +		 * doesn't free up as much as it could, which could be revisited | ||||
| +		 * at some point. | ||||
| +		 */ | ||||
| +	} | ||||
|  } | ||||
|   | ||||
|  int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr) | ||||
| --- a/drivers/net/wireguard/allowedips.h | ||||
| +++ b/drivers/net/wireguard/allowedips.h | ||||
| @@ -15,14 +15,11 @@ struct wg_peer; | ||||
|  struct allowedips_node { | ||||
|  	struct wg_peer __rcu *peer; | ||||
|  	struct allowedips_node __rcu *bit[2]; | ||||
| -	/* While it may seem scandalous that we waste space for v4, | ||||
| -	 * we're alloc'ing to the nearest power of 2 anyway, so this | ||||
| -	 * doesn't actually make a difference. | ||||
| -	 */ | ||||
| -	u8 bits[16] __aligned(__alignof(u64)); | ||||
|  	u8 cidr, bit_at_a, bit_at_b, bitlen; | ||||
| +	u8 bits[16] __aligned(__alignof(u64)); | ||||
|   | ||||
| -	/* Keep rarely used list at bottom to be beyond cache line. */ | ||||
| +	/* Keep rarely used members at bottom to be beyond cache line. */ | ||||
| +	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */ | ||||
|  	union { | ||||
|  		struct list_head peer_list; | ||||
|  		struct rcu_head rcu; | ||||
| @@ -0,0 +1,173 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:37 +0200 | ||||
| Subject: [PATCH] wireguard: allowedips: allocate nodes in kmem_cache | ||||
|  | ||||
| commit dc680de28ca849dfe589dc15ac56d22505f0ef11 upstream. | ||||
|  | ||||
| The previous commit moved from O(n) to O(1) for removal, but in the | ||||
| process introduced an additional pointer member to a struct that | ||||
| increased the size from 60 to 68 bytes, putting nodes in the 128-byte | ||||
| slab. With deployed systems having as many as 2 million nodes, this | ||||
| represents a significant doubling in memory usage (128 MiB -> 256 MiB). | ||||
| Fix this by using our own kmem_cache, that's sized exactly right. This | ||||
| also makes wireguard's memory usage more transparent in tools like | ||||
| slabtop and /proc/slabinfo. | ||||
|  | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Suggested-by: Arnd Bergmann <arnd@arndb.de> | ||||
| Suggested-by: Matthew Wilcox <willy@infradead.org> | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/allowedips.c | 31 ++++++++++++++++++++++++------ | ||||
|  drivers/net/wireguard/allowedips.h |  5 ++++- | ||||
|  drivers/net/wireguard/main.c       | 10 +++++++++- | ||||
|  3 files changed, 38 insertions(+), 8 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/allowedips.c | ||||
| +++ b/drivers/net/wireguard/allowedips.c | ||||
| @@ -6,6 +6,8 @@ | ||||
|  #include "allowedips.h" | ||||
|  #include "peer.h" | ||||
|   | ||||
| +static struct kmem_cache *node_cache; | ||||
| + | ||||
|  static void swap_endian(u8 *dst, const u8 *src, u8 bits) | ||||
|  { | ||||
|  	if (bits == 32) { | ||||
| @@ -40,6 +42,11 @@ static void push_rcu(struct allowedips_n | ||||
|  	} | ||||
|  } | ||||
|   | ||||
| +static void node_free_rcu(struct rcu_head *rcu) | ||||
| +{ | ||||
| +	kmem_cache_free(node_cache, container_of(rcu, struct allowedips_node, rcu)); | ||||
| +} | ||||
| + | ||||
|  static void root_free_rcu(struct rcu_head *rcu) | ||||
|  { | ||||
|  	struct allowedips_node *node, *stack[128] = { | ||||
| @@ -49,7 +56,7 @@ static void root_free_rcu(struct rcu_hea | ||||
|  	while (len > 0 && (node = stack[--len])) { | ||||
|  		push_rcu(stack, node->bit[0], &len); | ||||
|  		push_rcu(stack, node->bit[1], &len); | ||||
| -		kfree(node); | ||||
| +		kmem_cache_free(node_cache, node); | ||||
|  	} | ||||
|  } | ||||
|   | ||||
| @@ -164,7 +171,7 @@ static int add(struct allowedips_node __ | ||||
|  		return -EINVAL; | ||||
|   | ||||
|  	if (!rcu_access_pointer(*trie)) { | ||||
| -		node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
| +		node = kmem_cache_zalloc(node_cache, GFP_KERNEL); | ||||
|  		if (unlikely(!node)) | ||||
|  			return -ENOMEM; | ||||
|  		RCU_INIT_POINTER(node->peer, peer); | ||||
| @@ -180,7 +187,7 @@ static int add(struct allowedips_node __ | ||||
|  		return 0; | ||||
|  	} | ||||
|   | ||||
| -	newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); | ||||
| +	newnode = kmem_cache_zalloc(node_cache, GFP_KERNEL); | ||||
|  	if (unlikely(!newnode)) | ||||
|  		return -ENOMEM; | ||||
|  	RCU_INIT_POINTER(newnode->peer, peer); | ||||
| @@ -213,10 +220,10 @@ static int add(struct allowedips_node __ | ||||
|  		return 0; | ||||
|  	} | ||||
|   | ||||
| -	node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
| +	node = kmem_cache_zalloc(node_cache, GFP_KERNEL); | ||||
|  	if (unlikely(!node)) { | ||||
|  		list_del(&newnode->peer_list); | ||||
| -		kfree(newnode); | ||||
| +		kmem_cache_free(node_cache, newnode); | ||||
|  		return -ENOMEM; | ||||
|  	} | ||||
|  	INIT_LIST_HEAD(&node->peer_list); | ||||
| @@ -306,7 +313,7 @@ void wg_allowedips_remove_by_peer(struct | ||||
|  		if (child) | ||||
|  			child->parent_bit = node->parent_bit; | ||||
|  		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; | ||||
| -		kfree_rcu(node, rcu); | ||||
| +		call_rcu(&node->rcu, node_free_rcu); | ||||
|   | ||||
|  		/* TODO: Note that we currently don't walk up and down in order to | ||||
|  		 * free any potential filler nodes. This means that this function | ||||
| @@ -350,4 +357,16 @@ struct wg_peer *wg_allowedips_lookup_src | ||||
|  	return NULL; | ||||
|  } | ||||
|   | ||||
| +int __init wg_allowedips_slab_init(void) | ||||
| +{ | ||||
| +	node_cache = KMEM_CACHE(allowedips_node, 0); | ||||
| +	return node_cache ? 0 : -ENOMEM; | ||||
| +} | ||||
| + | ||||
| +void wg_allowedips_slab_uninit(void) | ||||
| +{ | ||||
| +	rcu_barrier(); | ||||
| +	kmem_cache_destroy(node_cache); | ||||
| +} | ||||
| + | ||||
|  #include "selftest/allowedips.c" | ||||
| --- a/drivers/net/wireguard/allowedips.h | ||||
| +++ b/drivers/net/wireguard/allowedips.h | ||||
| @@ -19,7 +19,7 @@ struct allowedips_node { | ||||
|  	u8 bits[16] __aligned(__alignof(u64)); | ||||
|   | ||||
|  	/* Keep rarely used members at bottom to be beyond cache line. */ | ||||
| -	struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */ | ||||
| +	struct allowedips_node *__rcu *parent_bit; | ||||
|  	union { | ||||
|  		struct list_head peer_list; | ||||
|  		struct rcu_head rcu; | ||||
| @@ -53,4 +53,7 @@ struct wg_peer *wg_allowedips_lookup_src | ||||
|  bool wg_allowedips_selftest(void); | ||||
|  #endif | ||||
|   | ||||
| +int wg_allowedips_slab_init(void); | ||||
| +void wg_allowedips_slab_uninit(void); | ||||
| + | ||||
|  #endif /* _WG_ALLOWEDIPS_H */ | ||||
| --- a/drivers/net/wireguard/main.c | ||||
| +++ b/drivers/net/wireguard/main.c | ||||
| @@ -21,10 +21,15 @@ static int __init mod_init(void) | ||||
|  { | ||||
|  	int ret; | ||||
|   | ||||
| +	ret = wg_allowedips_slab_init(); | ||||
| +	if (ret < 0) | ||||
| +		goto err_allowedips; | ||||
| + | ||||
|  #ifdef DEBUG | ||||
| +	ret = -ENOTRECOVERABLE; | ||||
|  	if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() || | ||||
|  	    !wg_ratelimiter_selftest()) | ||||
| -		return -ENOTRECOVERABLE; | ||||
| +		goto err_peer; | ||||
|  #endif | ||||
|  	wg_noise_init(); | ||||
|   | ||||
| @@ -50,6 +55,8 @@ err_netlink: | ||||
|  err_device: | ||||
|  	wg_peer_uninit(); | ||||
|  err_peer: | ||||
| +	wg_allowedips_slab_uninit(); | ||||
| +err_allowedips: | ||||
|  	return ret; | ||||
|  } | ||||
|   | ||||
| @@ -58,6 +65,7 @@ static void __exit mod_exit(void) | ||||
|  	wg_genetlink_uninit(); | ||||
|  	wg_device_uninit(); | ||||
|  	wg_peer_uninit(); | ||||
| +	wg_allowedips_slab_uninit(); | ||||
|  } | ||||
|   | ||||
|  module_init(mod_init); | ||||
| @@ -0,0 +1,521 @@ | ||||
| From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 | ||||
| From: "Jason A. Donenfeld" <Jason@zx2c4.com> | ||||
| Date: Fri, 4 Jun 2021 17:17:38 +0200 | ||||
| Subject: [PATCH] wireguard: allowedips: free empty intermediate nodes when | ||||
|  removing single node | ||||
|  | ||||
| commit bf7b042dc62a31f66d3a41dd4dfc7806f267b307 upstream. | ||||
|  | ||||
| When removing single nodes, it's possible that that node's parent is an | ||||
| empty intermediate node, in which case, it too should be removed. | ||||
| Otherwise the trie fills up and never is fully emptied, leading to | ||||
| gradual memory leaks over time for tries that are modified often. There | ||||
| was originally code to do this, but was removed during refactoring in | ||||
| 2016 and never reworked. Now that we have proper parent pointers from | ||||
| the previous commits, we can implement this properly. | ||||
|  | ||||
| In order to reduce branching and expensive comparisons, we want to keep | ||||
| the double pointer for parent assignment (which lets us easily chain up | ||||
| to the root), but we still need to actually get the parent's base | ||||
| address. So encode the bit number into the last two bits of the pointer, | ||||
| and pack and unpack it as needed. This is a little bit clumsy but is the | ||||
| fastest and less memory wasteful of the compromises. Note that we align | ||||
| the root struct here to a minimum of 4, because it's embedded into a | ||||
| larger struct, and we're relying on having the bottom two bits for our | ||||
| flag, which would only be 16-bit aligned on m68k. | ||||
|  | ||||
| The existing macro-based helpers were a bit unwieldy for adding the bit | ||||
| packing to, so this commit replaces them with safer and clearer ordinary | ||||
| functions. | ||||
|  | ||||
| We add a test to the randomized/fuzzer part of the selftests, to free | ||||
| the randomized tries by-peer, refuzz it, and repeat, until it's supposed | ||||
| to be empty, and then then see if that actually resulted in the whole | ||||
| thing being emptied. That combined with kmemcheck should hopefully make | ||||
| sure this commit is doing what it should. Along the way this resulted in | ||||
| various other cleanups of the tests and fixes for recent graphviz. | ||||
|  | ||||
| Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") | ||||
| Cc: stable@vger.kernel.org | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| Signed-off-by: David S. Miller <davem@davemloft.net> | ||||
| Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> | ||||
| --- | ||||
|  drivers/net/wireguard/allowedips.c          | 102 ++++++------ | ||||
|  drivers/net/wireguard/allowedips.h          |   4 +- | ||||
|  drivers/net/wireguard/selftest/allowedips.c | 162 ++++++++++---------- | ||||
|  3 files changed, 137 insertions(+), 131 deletions(-) | ||||
|  | ||||
| --- a/drivers/net/wireguard/allowedips.c | ||||
| +++ b/drivers/net/wireguard/allowedips.c | ||||
| @@ -30,8 +30,11 @@ static void copy_and_assign_cidr(struct | ||||
|  	node->bitlen = bits; | ||||
|  	memcpy(node->bits, src, bits / 8U); | ||||
|  } | ||||
| -#define CHOOSE_NODE(parent, key) \ | ||||
| -	parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1] | ||||
| + | ||||
| +static inline u8 choose(struct allowedips_node *node, const u8 *key) | ||||
| +{ | ||||
| +	return (key[node->bit_at_a] >> node->bit_at_b) & 1; | ||||
| +} | ||||
|   | ||||
|  static void push_rcu(struct allowedips_node **stack, | ||||
|  		     struct allowedips_node __rcu *p, unsigned int *len) | ||||
| @@ -112,7 +115,7 @@ static struct allowedips_node *find_node | ||||
|  			found = node; | ||||
|  		if (node->cidr == bits) | ||||
|  			break; | ||||
| -		node = rcu_dereference_bh(CHOOSE_NODE(node, key)); | ||||
| +		node = rcu_dereference_bh(node->bit[choose(node, key)]); | ||||
|  	} | ||||
|  	return found; | ||||
|  } | ||||
| @@ -144,8 +147,7 @@ static bool node_placement(struct allowe | ||||
|  			   u8 cidr, u8 bits, struct allowedips_node **rnode, | ||||
|  			   struct mutex *lock) | ||||
|  { | ||||
| -	struct allowedips_node *node = rcu_dereference_protected(trie, | ||||
| -						lockdep_is_held(lock)); | ||||
| +	struct allowedips_node *node = rcu_dereference_protected(trie, lockdep_is_held(lock)); | ||||
|  	struct allowedips_node *parent = NULL; | ||||
|  	bool exact = false; | ||||
|   | ||||
| @@ -155,13 +157,24 @@ static bool node_placement(struct allowe | ||||
|  			exact = true; | ||||
|  			break; | ||||
|  		} | ||||
| -		node = rcu_dereference_protected(CHOOSE_NODE(parent, key), | ||||
| -						 lockdep_is_held(lock)); | ||||
| +		node = rcu_dereference_protected(parent->bit[choose(parent, key)], lockdep_is_held(lock)); | ||||
|  	} | ||||
|  	*rnode = parent; | ||||
|  	return exact; | ||||
|  } | ||||
|   | ||||
| +static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) | ||||
| +{ | ||||
| +	node->parent_bit_packed = (unsigned long)parent | bit; | ||||
| +	rcu_assign_pointer(*parent, node); | ||||
| +} | ||||
| + | ||||
| +static inline void choose_and_connect_node(struct allowedips_node *parent, struct allowedips_node *node) | ||||
| +{ | ||||
| +	u8 bit = choose(parent, node->bits); | ||||
| +	connect_node(&parent->bit[bit], bit, node); | ||||
| +} | ||||
| + | ||||
|  static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key, | ||||
|  	       u8 cidr, struct wg_peer *peer, struct mutex *lock) | ||||
|  { | ||||
| @@ -177,8 +190,7 @@ static int add(struct allowedips_node __ | ||||
|  		RCU_INIT_POINTER(node->peer, peer); | ||||
|  		list_add_tail(&node->peer_list, &peer->allowedips_list); | ||||
|  		copy_and_assign_cidr(node, key, cidr, bits); | ||||
| -		rcu_assign_pointer(node->parent_bit, trie); | ||||
| -		rcu_assign_pointer(*trie, node); | ||||
| +		connect_node(trie, 2, node); | ||||
|  		return 0; | ||||
|  	} | ||||
|  	if (node_placement(*trie, key, cidr, bits, &node, lock)) { | ||||
| @@ -197,10 +209,10 @@ static int add(struct allowedips_node __ | ||||
|  	if (!node) { | ||||
|  		down = rcu_dereference_protected(*trie, lockdep_is_held(lock)); | ||||
|  	} else { | ||||
| -		down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock)); | ||||
| +		const u8 bit = choose(node, key); | ||||
| +		down = rcu_dereference_protected(node->bit[bit], lockdep_is_held(lock)); | ||||
|  		if (!down) { | ||||
| -			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key)); | ||||
| -			rcu_assign_pointer(CHOOSE_NODE(node, key), newnode); | ||||
| +			connect_node(&node->bit[bit], bit, newnode); | ||||
|  			return 0; | ||||
|  		} | ||||
|  	} | ||||
| @@ -208,15 +220,11 @@ static int add(struct allowedips_node __ | ||||
|  	parent = node; | ||||
|   | ||||
|  	if (newnode->cidr == cidr) { | ||||
| -		rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits)); | ||||
| -		rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down); | ||||
| -		if (!parent) { | ||||
| -			rcu_assign_pointer(newnode->parent_bit, trie); | ||||
| -			rcu_assign_pointer(*trie, newnode); | ||||
| -		} else { | ||||
| -			rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits)); | ||||
| -			rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode); | ||||
| -		} | ||||
| +		choose_and_connect_node(newnode, down); | ||||
| +		if (!parent) | ||||
| +			connect_node(trie, 2, newnode); | ||||
| +		else | ||||
| +			choose_and_connect_node(parent, newnode); | ||||
|  		return 0; | ||||
|  	} | ||||
|   | ||||
| @@ -229,17 +237,12 @@ static int add(struct allowedips_node __ | ||||
|  	INIT_LIST_HEAD(&node->peer_list); | ||||
|  	copy_and_assign_cidr(node, newnode->bits, cidr, bits); | ||||
|   | ||||
| -	rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits)); | ||||
| -	rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down); | ||||
| -	rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits)); | ||||
| -	rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode); | ||||
| -	if (!parent) { | ||||
| -		rcu_assign_pointer(node->parent_bit, trie); | ||||
| -		rcu_assign_pointer(*trie, node); | ||||
| -	} else { | ||||
| -		rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits)); | ||||
| -		rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node); | ||||
| -	} | ||||
| +	choose_and_connect_node(node, down); | ||||
| +	choose_and_connect_node(node, newnode); | ||||
| +	if (!parent) | ||||
| +		connect_node(trie, 2, node); | ||||
| +	else | ||||
| +		choose_and_connect_node(parent, node); | ||||
|  	return 0; | ||||
|  } | ||||
|   | ||||
| @@ -297,7 +300,8 @@ int wg_allowedips_insert_v6(struct allow | ||||
|  void wg_allowedips_remove_by_peer(struct allowedips *table, | ||||
|  				  struct wg_peer *peer, struct mutex *lock) | ||||
|  { | ||||
| -	struct allowedips_node *node, *child, *tmp; | ||||
| +	struct allowedips_node *node, *child, **parent_bit, *parent, *tmp; | ||||
| +	bool free_parent; | ||||
|   | ||||
|  	if (list_empty(&peer->allowedips_list)) | ||||
|  		return; | ||||
| @@ -307,19 +311,29 @@ void wg_allowedips_remove_by_peer(struct | ||||
|  		RCU_INIT_POINTER(node->peer, NULL); | ||||
|  		if (node->bit[0] && node->bit[1]) | ||||
|  			continue; | ||||
| -		child = rcu_dereference_protected( | ||||
| -				node->bit[!rcu_access_pointer(node->bit[0])], | ||||
| -				lockdep_is_held(lock)); | ||||
| +		child = rcu_dereference_protected(node->bit[!rcu_access_pointer(node->bit[0])], | ||||
| +						  lockdep_is_held(lock)); | ||||
|  		if (child) | ||||
| -			child->parent_bit = node->parent_bit; | ||||
| -		*rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child; | ||||
| +			child->parent_bit_packed = node->parent_bit_packed; | ||||
| +		parent_bit = (struct allowedips_node **)(node->parent_bit_packed & ~3UL); | ||||
| +		*parent_bit = child; | ||||
| +		parent = (void *)parent_bit - | ||||
| +			 offsetof(struct allowedips_node, bit[node->parent_bit_packed & 1]); | ||||
| +		free_parent = !rcu_access_pointer(node->bit[0]) && | ||||
| +			      !rcu_access_pointer(node->bit[1]) && | ||||
| +			      (node->parent_bit_packed & 3) <= 1 && | ||||
| +			      !rcu_access_pointer(parent->peer); | ||||
| +		if (free_parent) | ||||
| +			child = rcu_dereference_protected( | ||||
| +					parent->bit[!(node->parent_bit_packed & 1)], | ||||
| +					lockdep_is_held(lock)); | ||||
|  		call_rcu(&node->rcu, node_free_rcu); | ||||
| - | ||||
| -		/* TODO: Note that we currently don't walk up and down in order to | ||||
| -		 * free any potential filler nodes. This means that this function | ||||
| -		 * doesn't free up as much as it could, which could be revisited | ||||
| -		 * at some point. | ||||
| -		 */ | ||||
| +		if (!free_parent) | ||||
| +			continue; | ||||
| +		if (child) | ||||
| +			child->parent_bit_packed = parent->parent_bit_packed; | ||||
| +		*(struct allowedips_node **)(parent->parent_bit_packed & ~3UL) = child; | ||||
| +		call_rcu(&parent->rcu, node_free_rcu); | ||||
|  	} | ||||
|  } | ||||
|   | ||||
| --- a/drivers/net/wireguard/allowedips.h | ||||
| +++ b/drivers/net/wireguard/allowedips.h | ||||
| @@ -19,7 +19,7 @@ struct allowedips_node { | ||||
|  	u8 bits[16] __aligned(__alignof(u64)); | ||||
|   | ||||
|  	/* Keep rarely used members at bottom to be beyond cache line. */ | ||||
| -	struct allowedips_node *__rcu *parent_bit; | ||||
| +	unsigned long parent_bit_packed; | ||||
|  	union { | ||||
|  		struct list_head peer_list; | ||||
|  		struct rcu_head rcu; | ||||
| @@ -30,7 +30,7 @@ struct allowedips { | ||||
|  	struct allowedips_node __rcu *root4; | ||||
|  	struct allowedips_node __rcu *root6; | ||||
|  	u64 seq; | ||||
| -}; | ||||
| +} __aligned(4); /* We pack the lower 2 bits of &root, but m68k only gives 16-bit alignment. */ | ||||
|   | ||||
|  void wg_allowedips_init(struct allowedips *table); | ||||
|  void wg_allowedips_free(struct allowedips *table, struct mutex *mutex); | ||||
| --- a/drivers/net/wireguard/selftest/allowedips.c | ||||
| +++ b/drivers/net/wireguard/selftest/allowedips.c | ||||
| @@ -19,32 +19,22 @@ | ||||
|   | ||||
|  #include <linux/siphash.h> | ||||
|   | ||||
| -static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits, | ||||
| -					      u8 cidr) | ||||
| -{ | ||||
| -	swap_endian(dst, src, bits); | ||||
| -	memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8); | ||||
| -	if (cidr) | ||||
| -		dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8); | ||||
| -} | ||||
| - | ||||
|  static __init void print_node(struct allowedips_node *node, u8 bits) | ||||
|  { | ||||
|  	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n"; | ||||
| -	char *fmt_declaration = KERN_DEBUG | ||||
| -		"\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
| +	char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
| +	u8 ip1[16], ip2[16], cidr1, cidr2; | ||||
|  	char *style = "dotted"; | ||||
| -	u8 ip1[16], ip2[16]; | ||||
|  	u32 color = 0; | ||||
|   | ||||
| +	if (node == NULL) | ||||
| +		return; | ||||
|  	if (bits == 32) { | ||||
|  		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n"; | ||||
| -		fmt_declaration = KERN_DEBUG | ||||
| -			"\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
| +		fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
|  	} else if (bits == 128) { | ||||
|  		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n"; | ||||
| -		fmt_declaration = KERN_DEBUG | ||||
| -			"\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
| +		fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n"; | ||||
|  	} | ||||
|  	if (node->peer) { | ||||
|  		hsiphash_key_t key = { { 0 } }; | ||||
| @@ -55,24 +45,20 @@ static __init void print_node(struct all | ||||
|  			hsiphash_1u32(0xabad1dea, &key) % 200; | ||||
|  		style = "bold"; | ||||
|  	} | ||||
| -	swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr); | ||||
| -	printk(fmt_declaration, ip1, node->cidr, style, color); | ||||
| +	wg_allowedips_read_node(node, ip1, &cidr1); | ||||
| +	printk(fmt_declaration, ip1, cidr1, style, color); | ||||
|  	if (node->bit[0]) { | ||||
| -		swap_endian_and_apply_cidr(ip2, | ||||
| -				rcu_dereference_raw(node->bit[0])->bits, bits, | ||||
| -				node->cidr); | ||||
| -		printk(fmt_connection, ip1, node->cidr, ip2, | ||||
| -		       rcu_dereference_raw(node->bit[0])->cidr); | ||||
| -		print_node(rcu_dereference_raw(node->bit[0]), bits); | ||||
| +		wg_allowedips_read_node(rcu_dereference_raw(node->bit[0]), ip2, &cidr2); | ||||
| +		printk(fmt_connection, ip1, cidr1, ip2, cidr2); | ||||
|  	} | ||||
|  	if (node->bit[1]) { | ||||
| -		swap_endian_and_apply_cidr(ip2, | ||||
| -				rcu_dereference_raw(node->bit[1])->bits, | ||||
| -				bits, node->cidr); | ||||
| -		printk(fmt_connection, ip1, node->cidr, ip2, | ||||
| -		       rcu_dereference_raw(node->bit[1])->cidr); | ||||
| -		print_node(rcu_dereference_raw(node->bit[1]), bits); | ||||
| +		wg_allowedips_read_node(rcu_dereference_raw(node->bit[1]), ip2, &cidr2); | ||||
| +		printk(fmt_connection, ip1, cidr1, ip2, cidr2); | ||||
|  	} | ||||
| +	if (node->bit[0]) | ||||
| +		print_node(rcu_dereference_raw(node->bit[0]), bits); | ||||
| +	if (node->bit[1]) | ||||
| +		print_node(rcu_dereference_raw(node->bit[1]), bits); | ||||
|  } | ||||
|   | ||||
|  static __init void print_tree(struct allowedips_node __rcu *top, u8 bits) | ||||
| @@ -121,8 +107,8 @@ static __init inline union nf_inet_addr | ||||
|  { | ||||
|  	union nf_inet_addr mask; | ||||
|   | ||||
| -	memset(&mask, 0x00, 128 / 8); | ||||
| -	memset(&mask, 0xff, cidr / 8); | ||||
| +	memset(&mask, 0, sizeof(mask)); | ||||
| +	memset(&mask.all, 0xff, cidr / 8); | ||||
|  	if (cidr % 32) | ||||
|  		mask.all[cidr / 32] = (__force u32)htonl( | ||||
|  			(0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL); | ||||
| @@ -149,42 +135,36 @@ horrible_mask_self(struct horrible_allow | ||||
|  } | ||||
|   | ||||
|  static __init inline bool | ||||
| -horrible_match_v4(const struct horrible_allowedips_node *node, | ||||
| -		  struct in_addr *ip) | ||||
| +horrible_match_v4(const struct horrible_allowedips_node *node, struct in_addr *ip) | ||||
|  { | ||||
|  	return (ip->s_addr & node->mask.ip) == node->ip.ip; | ||||
|  } | ||||
|   | ||||
|  static __init inline bool | ||||
| -horrible_match_v6(const struct horrible_allowedips_node *node, | ||||
| -		  struct in6_addr *ip) | ||||
| +horrible_match_v6(const struct horrible_allowedips_node *node, struct in6_addr *ip) | ||||
|  { | ||||
| -	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == | ||||
| -		       node->ip.ip6[0] && | ||||
| -	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == | ||||
| -		       node->ip.ip6[1] && | ||||
| -	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == | ||||
| -		       node->ip.ip6[2] && | ||||
| +	return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] && | ||||
| +	       (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] && | ||||
| +	       (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] && | ||||
|  	       (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3]; | ||||
|  } | ||||
|   | ||||
|  static __init void | ||||
| -horrible_insert_ordered(struct horrible_allowedips *table, | ||||
| -			struct horrible_allowedips_node *node) | ||||
| +horrible_insert_ordered(struct horrible_allowedips *table, struct horrible_allowedips_node *node) | ||||
|  { | ||||
|  	struct horrible_allowedips_node *other = NULL, *where = NULL; | ||||
|  	u8 my_cidr = horrible_mask_to_cidr(node->mask); | ||||
|   | ||||
|  	hlist_for_each_entry(other, &table->head, table) { | ||||
| -		if (!memcmp(&other->mask, &node->mask, | ||||
| -			    sizeof(union nf_inet_addr)) && | ||||
| -		    !memcmp(&other->ip, &node->ip, | ||||
| -			    sizeof(union nf_inet_addr)) && | ||||
| -		    other->ip_version == node->ip_version) { | ||||
| +		if (other->ip_version == node->ip_version && | ||||
| +		    !memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) && | ||||
| +		    !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr))) { | ||||
|  			other->value = node->value; | ||||
|  			kfree(node); | ||||
|  			return; | ||||
|  		} | ||||
| +	} | ||||
| +	hlist_for_each_entry(other, &table->head, table) { | ||||
|  		where = other; | ||||
|  		if (horrible_mask_to_cidr(other->mask) <= my_cidr) | ||||
|  			break; | ||||
| @@ -201,8 +181,7 @@ static __init int | ||||
|  horrible_allowedips_insert_v4(struct horrible_allowedips *table, | ||||
|  			      struct in_addr *ip, u8 cidr, void *value) | ||||
|  { | ||||
| -	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), | ||||
| -							GFP_KERNEL); | ||||
| +	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
|   | ||||
|  	if (unlikely(!node)) | ||||
|  		return -ENOMEM; | ||||
| @@ -219,8 +198,7 @@ static __init int | ||||
|  horrible_allowedips_insert_v6(struct horrible_allowedips *table, | ||||
|  			      struct in6_addr *ip, u8 cidr, void *value) | ||||
|  { | ||||
| -	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), | ||||
| -							GFP_KERNEL); | ||||
| +	struct horrible_allowedips_node *node = kzalloc(sizeof(*node), GFP_KERNEL); | ||||
|   | ||||
|  	if (unlikely(!node)) | ||||
|  		return -ENOMEM; | ||||
| @@ -234,39 +212,43 @@ horrible_allowedips_insert_v6(struct hor | ||||
|  } | ||||
|   | ||||
|  static __init void * | ||||
| -horrible_allowedips_lookup_v4(struct horrible_allowedips *table, | ||||
| -			      struct in_addr *ip) | ||||
| +horrible_allowedips_lookup_v4(struct horrible_allowedips *table, struct in_addr *ip) | ||||
|  { | ||||
|  	struct horrible_allowedips_node *node; | ||||
| -	void *ret = NULL; | ||||
|   | ||||
|  	hlist_for_each_entry(node, &table->head, table) { | ||||
| -		if (node->ip_version != 4) | ||||
| -			continue; | ||||
| -		if (horrible_match_v4(node, ip)) { | ||||
| -			ret = node->value; | ||||
| -			break; | ||||
| -		} | ||||
| +		if (node->ip_version == 4 && horrible_match_v4(node, ip)) | ||||
| +			return node->value; | ||||
|  	} | ||||
| -	return ret; | ||||
| +	return NULL; | ||||
|  } | ||||
|   | ||||
|  static __init void * | ||||
| -horrible_allowedips_lookup_v6(struct horrible_allowedips *table, | ||||
| -			      struct in6_addr *ip) | ||||
| +horrible_allowedips_lookup_v6(struct horrible_allowedips *table, struct in6_addr *ip) | ||||
|  { | ||||
|  	struct horrible_allowedips_node *node; | ||||
| -	void *ret = NULL; | ||||
|   | ||||
|  	hlist_for_each_entry(node, &table->head, table) { | ||||
| -		if (node->ip_version != 6) | ||||
| +		if (node->ip_version == 6 && horrible_match_v6(node, ip)) | ||||
| +			return node->value; | ||||
| +	} | ||||
| +	return NULL; | ||||
| +} | ||||
| + | ||||
| + | ||||
| +static __init void | ||||
| +horrible_allowedips_remove_by_value(struct horrible_allowedips *table, void *value) | ||||
| +{ | ||||
| +	struct horrible_allowedips_node *node; | ||||
| +	struct hlist_node *h; | ||||
| + | ||||
| +	hlist_for_each_entry_safe(node, h, &table->head, table) { | ||||
| +		if (node->value != value) | ||||
|  			continue; | ||||
| -		if (horrible_match_v6(node, ip)) { | ||||
| -			ret = node->value; | ||||
| -			break; | ||||
| -		} | ||||
| +		hlist_del(&node->table); | ||||
| +		kfree(node); | ||||
|  	} | ||||
| -	return ret; | ||||
| + | ||||
|  } | ||||
|   | ||||
|  static __init bool randomized_test(void) | ||||
| @@ -397,23 +379,33 @@ static __init bool randomized_test(void) | ||||
|  		print_tree(t.root6, 128); | ||||
|  	} | ||||
|   | ||||
| -	for (i = 0; i < NUM_QUERIES; ++i) { | ||||
| -		prandom_bytes(ip, 4); | ||||
| -		if (lookup(t.root4, 32, ip) != | ||||
| -		    horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { | ||||
| -			pr_err("allowedips random self-test: FAIL\n"); | ||||
| -			goto free; | ||||
| +	for (j = 0;; ++j) { | ||||
| +		for (i = 0; i < NUM_QUERIES; ++i) { | ||||
| +			prandom_bytes(ip, 4); | ||||
| +			if (lookup(t.root4, 32, ip) != horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) { | ||||
| +				horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip); | ||||
| +				pr_err("allowedips random v4 self-test: FAIL\n"); | ||||
| +				goto free; | ||||
| +			} | ||||
| +			prandom_bytes(ip, 16); | ||||
| +			if (lookup(t.root6, 128, ip) != horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { | ||||
| +				pr_err("allowedips random v6 self-test: FAIL\n"); | ||||
| +				goto free; | ||||
| +			} | ||||
|  		} | ||||
| +		if (j >= NUM_PEERS) | ||||
| +			break; | ||||
| +		mutex_lock(&mutex); | ||||
| +		wg_allowedips_remove_by_peer(&t, peers[j], &mutex); | ||||
| +		mutex_unlock(&mutex); | ||||
| +		horrible_allowedips_remove_by_value(&h, peers[j]); | ||||
|  	} | ||||
|   | ||||
| -	for (i = 0; i < NUM_QUERIES; ++i) { | ||||
| -		prandom_bytes(ip, 16); | ||||
| -		if (lookup(t.root6, 128, ip) != | ||||
| -		    horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) { | ||||
| -			pr_err("allowedips random self-test: FAIL\n"); | ||||
| -			goto free; | ||||
| -		} | ||||
| +	if (t.root4 || t.root6) { | ||||
| +		pr_err("allowedips random self-test removal: FAIL\n"); | ||||
| +		goto free; | ||||
|  	} | ||||
| + | ||||
|  	ret = true; | ||||
|   | ||||
|  free: | ||||
		Reference in New Issue
	
	Block a user