kernel: add minimal TCP state tracking to flow offload support
Fixes issues with connections hanging after >30 seconds idle time Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		@@ -0,0 +1,38 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 25 Feb 2018 15:37:27 +0100
 | 
			
		||||
Subject: [PATCH] netfilter: nf_flow_table: make flow_offload_dead inline
 | 
			
		||||
 | 
			
		||||
It is too trivial to keep as a separate exported function
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/include/net/netfilter/nf_flow_table.h
 | 
			
		||||
+++ b/include/net/netfilter/nf_flow_table.h
 | 
			
		||||
@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *n
 | 
			
		||||
 int nf_flow_table_init(struct nf_flowtable *flow_table);
 | 
			
		||||
 void nf_flow_table_free(struct nf_flowtable *flow_table);
 | 
			
		||||
 
 | 
			
		||||
-void flow_offload_dead(struct flow_offload *flow);
 | 
			
		||||
+static inline void flow_offload_dead(struct flow_offload *flow)
 | 
			
		||||
+{
 | 
			
		||||
+	flow->flags |= FLOW_OFFLOAD_DYING;
 | 
			
		||||
+}
 | 
			
		||||
 
 | 
			
		||||
 int nf_flow_snat_port(const struct flow_offload *flow,
 | 
			
		||||
 		      struct sk_buff *skb, unsigned int thoff,
 | 
			
		||||
--- a/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
+++ b/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offlo
 | 
			
		||||
 }
 | 
			
		||||
 EXPORT_SYMBOL_GPL(flow_offload_free);
 | 
			
		||||
 
 | 
			
		||||
-void flow_offload_dead(struct flow_offload *flow)
 | 
			
		||||
-{
 | 
			
		||||
-	flow->flags |= FLOW_OFFLOAD_DYING;
 | 
			
		||||
-}
 | 
			
		||||
-EXPORT_SYMBOL_GPL(flow_offload_dead);
 | 
			
		||||
-
 | 
			
		||||
 static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
 | 
			
		||||
 {
 | 
			
		||||
 	const struct flow_offload_tuple *tuple = data;
 | 
			
		||||
@@ -0,0 +1,74 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 25 Feb 2018 15:38:31 +0100
 | 
			
		||||
Subject: [PATCH] netfilter: nf_flow_table: add a new flow state for
 | 
			
		||||
 tearing down offloading
 | 
			
		||||
 | 
			
		||||
Will be used to tear down the offload entry while keeping the conntrack
 | 
			
		||||
entry alive.
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/include/net/netfilter/nf_flow_table.h
 | 
			
		||||
+++ b/include/net/netfilter/nf_flow_table.h
 | 
			
		||||
@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
 | 
			
		||||
 #define FLOW_OFFLOAD_SNAT	0x1
 | 
			
		||||
 #define FLOW_OFFLOAD_DNAT	0x2
 | 
			
		||||
 #define FLOW_OFFLOAD_DYING	0x4
 | 
			
		||||
+#define FLOW_OFFLOAD_TEARDOWN	0x8
 | 
			
		||||
 
 | 
			
		||||
 struct flow_offload {
 | 
			
		||||
 	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX];
 | 
			
		||||
@@ -108,6 +109,11 @@ static inline void flow_offload_dead(str
 | 
			
		||||
 	flow->flags |= FLOW_OFFLOAD_DYING;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
+static inline void flow_offload_teardown(struct flow_offload *flow)
 | 
			
		||||
+{
 | 
			
		||||
+	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 int nf_flow_snat_port(const struct flow_offload *flow,
 | 
			
		||||
 		      struct sk_buff *skb, unsigned int thoff,
 | 
			
		||||
 		      u8 protocol, enum flow_offload_tuple_dir dir);
 | 
			
		||||
--- a/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
+++ b/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
@@ -226,11 +226,6 @@ static inline bool nf_flow_has_expired(c
 | 
			
		||||
 	return (__s32)(flow->timeout - (u32)jiffies) <= 0;
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
 | 
			
		||||
-{
 | 
			
		||||
-	return flow->flags & FLOW_OFFLOAD_DYING;
 | 
			
		||||
-}
 | 
			
		||||
-
 | 
			
		||||
 static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 | 
			
		||||
 {
 | 
			
		||||
 	struct flow_offload_tuple_rhash *tuplehash;
 | 
			
		||||
@@ -258,7 +253,8 @@ static int nf_flow_offload_gc_step(struc
 | 
			
		||||
 		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 | 
			
		||||
 
 | 
			
		||||
 		if (nf_flow_has_expired(flow) ||
 | 
			
		||||
-		    nf_flow_is_dying(flow))
 | 
			
		||||
+		    (flow->flags & (FLOW_OFFLOAD_DYING |
 | 
			
		||||
+				    FLOW_OFFLOAD_TEARDOWN)))
 | 
			
		||||
 			flow_offload_del(flow_table, flow);
 | 
			
		||||
 	}
 | 
			
		||||
 out:
 | 
			
		||||
@@ -419,10 +415,14 @@ static void nf_flow_table_do_cleanup(str
 | 
			
		||||
 {
 | 
			
		||||
 	struct net_device *dev = data;
 | 
			
		||||
 
 | 
			
		||||
-	if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
 | 
			
		||||
+	if (!dev) {
 | 
			
		||||
+		flow_offload_teardown(flow);
 | 
			
		||||
 		return;
 | 
			
		||||
+	}
 | 
			
		||||
 
 | 
			
		||||
-	flow_offload_dead(flow);
 | 
			
		||||
+	if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
 | 
			
		||||
+	    flow->tuplehash[1].tuple.iifidx == dev->ifindex)
 | 
			
		||||
+		flow_offload_dead(flow);
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
 | 
			
		||||
@@ -0,0 +1,36 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 25 Feb 2018 15:39:56 +0100
 | 
			
		||||
Subject: [PATCH] netfilter: nf_flow_table: in flow_offload_lookup, skip
 | 
			
		||||
 entries being deleted
 | 
			
		||||
 | 
			
		||||
Preparation for sending flows back to the slow path
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
+++ b/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
@@ -178,8 +178,21 @@ struct flow_offload_tuple_rhash *
 | 
			
		||||
 flow_offload_lookup(struct nf_flowtable *flow_table,
 | 
			
		||||
 		    struct flow_offload_tuple *tuple)
 | 
			
		||||
 {
 | 
			
		||||
-	return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
 | 
			
		||||
-				      nf_flow_offload_rhash_params);
 | 
			
		||||
+	struct flow_offload_tuple_rhash *tuplehash;
 | 
			
		||||
+	struct flow_offload *flow;
 | 
			
		||||
+	int dir;
 | 
			
		||||
+
 | 
			
		||||
+	tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
 | 
			
		||||
+					   nf_flow_offload_rhash_params);
 | 
			
		||||
+	if (!tuplehash)
 | 
			
		||||
+		return NULL;
 | 
			
		||||
+
 | 
			
		||||
+	dir = tuplehash->tuple.dir;
 | 
			
		||||
+	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
 | 
			
		||||
+	if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
 | 
			
		||||
+		return NULL;
 | 
			
		||||
+
 | 
			
		||||
+	return tuplehash;
 | 
			
		||||
 }
 | 
			
		||||
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,64 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 25 Feb 2018 15:41:11 +0100
 | 
			
		||||
Subject: [PATCH] netfilter: nf_flow_table: add support for sending flows
 | 
			
		||||
 back to the slow path
 | 
			
		||||
 | 
			
		||||
Reset the timeout. For TCP, also set the state to indicate to use the
 | 
			
		||||
next incoming packets to reset window tracking.
 | 
			
		||||
This allows the slow path to take over again once the offload state has
 | 
			
		||||
been torn down
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
+++ b/net/netfilter/nf_flow_table_core.c
 | 
			
		||||
@@ -100,6 +100,36 @@ err_ct_refcnt:
 | 
			
		||||
 }
 | 
			
		||||
 EXPORT_SYMBOL_GPL(flow_offload_alloc);
 | 
			
		||||
 
 | 
			
		||||
+static void flow_offload_fixup_ct_state(struct nf_conn *ct)
 | 
			
		||||
+{
 | 
			
		||||
+	const struct nf_conntrack_l4proto *l4proto;
 | 
			
		||||
+	struct net *net = nf_ct_net(ct);
 | 
			
		||||
+	unsigned int *timeouts;
 | 
			
		||||
+	unsigned int timeout;
 | 
			
		||||
+	int l4num;
 | 
			
		||||
+
 | 
			
		||||
+	l4num = nf_ct_protonum(ct);
 | 
			
		||||
+	l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
 | 
			
		||||
+	if (!l4proto)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	timeouts = l4proto->get_timeouts(net);
 | 
			
		||||
+	if (!timeouts)
 | 
			
		||||
+		return;
 | 
			
		||||
+
 | 
			
		||||
+	if (l4num == IPPROTO_TCP) {
 | 
			
		||||
+		timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
 | 
			
		||||
+		ct->proto.tcp.state = TCP_CONNTRACK_IGNORE;
 | 
			
		||||
+	} else if (l4num == IPPROTO_UDP) {
 | 
			
		||||
+		timeout = timeouts[UDP_CT_REPLIED];
 | 
			
		||||
+	} else {
 | 
			
		||||
+		return;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	ct->timeout = nfct_time_stamp + timeout;
 | 
			
		||||
+	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 void flow_offload_free(struct flow_offload *flow)
 | 
			
		||||
 {
 | 
			
		||||
 	struct flow_offload_entry *e;
 | 
			
		||||
@@ -107,7 +137,10 @@ void flow_offload_free(struct flow_offlo
 | 
			
		||||
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 | 
			
		||||
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 | 
			
		||||
 	e = container_of(flow, struct flow_offload_entry, flow);
 | 
			
		||||
-	nf_ct_delete(e->ct, 0, 0);
 | 
			
		||||
+	if (flow->flags & FLOW_OFFLOAD_DYING)
 | 
			
		||||
+		nf_ct_delete(e->ct, 0, 0);
 | 
			
		||||
+	else
 | 
			
		||||
+		flow_offload_fixup_ct_state(e->ct);
 | 
			
		||||
 	nf_ct_put(e->ct);
 | 
			
		||||
 	kfree_rcu(e, rcu_head);
 | 
			
		||||
 }
 | 
			
		||||
@@ -0,0 +1,81 @@
 | 
			
		||||
From: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
Date: Sun, 25 Feb 2018 15:42:58 +0100
 | 
			
		||||
Subject: [PATCH] netfilter: nf_flow_table: tear down TCP flows if RST or
 | 
			
		||||
 FIN was seen
 | 
			
		||||
 | 
			
		||||
Allow the slow path to handle the shutdown of the connection with proper
 | 
			
		||||
timeouts
 | 
			
		||||
 | 
			
		||||
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
--- a/net/netfilter/nf_flow_table_ip.c
 | 
			
		||||
+++ b/net/netfilter/nf_flow_table_ip.c
 | 
			
		||||
@@ -15,6 +15,23 @@
 | 
			
		||||
 #include <linux/tcp.h>
 | 
			
		||||
 #include <linux/udp.h>
 | 
			
		||||
 
 | 
			
		||||
+static int nf_flow_tcp_state_check(struct flow_offload *flow,
 | 
			
		||||
+				   struct sk_buff *skb, unsigned int thoff)
 | 
			
		||||
+{
 | 
			
		||||
+	struct tcphdr *tcph;
 | 
			
		||||
+
 | 
			
		||||
+	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
 | 
			
		||||
+		return -1;
 | 
			
		||||
+
 | 
			
		||||
+	tcph = (void *)(skb_network_header(skb) + thoff);
 | 
			
		||||
+	if (unlikely(tcph->fin || tcph->rst)) {
 | 
			
		||||
+		flow_offload_teardown(flow);
 | 
			
		||||
+		return -1;
 | 
			
		||||
+	}
 | 
			
		||||
+
 | 
			
		||||
+	return 0;
 | 
			
		||||
+}
 | 
			
		||||
+
 | 
			
		||||
 static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
 | 
			
		||||
 			      __be32 addr, __be32 new_addr)
 | 
			
		||||
 {
 | 
			
		||||
@@ -118,10 +135,9 @@ static int nf_flow_dnat_ip(const struct
 | 
			
		||||
 }
 | 
			
		||||
 
 | 
			
		||||
 static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
 | 
			
		||||
-			  enum flow_offload_tuple_dir dir)
 | 
			
		||||
+			  unsigned int thoff, enum flow_offload_tuple_dir dir)
 | 
			
		||||
 {
 | 
			
		||||
 	struct iphdr *iph = ip_hdr(skb);
 | 
			
		||||
-	unsigned int thoff = iph->ihl * 4;
 | 
			
		||||
 
 | 
			
		||||
 	if (flow->flags & FLOW_OFFLOAD_SNAT &&
 | 
			
		||||
 	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
 | 
			
		||||
@@ -201,6 +217,7 @@ nf_flow_offload_ip_hook(void *priv, stru
 | 
			
		||||
 	struct flow_offload *flow;
 | 
			
		||||
 	struct net_device *outdev;
 | 
			
		||||
 	const struct rtable *rt;
 | 
			
		||||
+	unsigned int thoff;
 | 
			
		||||
 	struct iphdr *iph;
 | 
			
		||||
 	__be32 nexthop;
 | 
			
		||||
 
 | 
			
		||||
@@ -229,8 +246,12 @@ nf_flow_offload_ip_hook(void *priv, stru
 | 
			
		||||
 	if (skb_try_make_writable(skb, sizeof(*iph)))
 | 
			
		||||
 		return NF_DROP;
 | 
			
		||||
 
 | 
			
		||||
+	thoff = ip_hdr(skb)->ihl * 4;
 | 
			
		||||
+	if (nf_flow_tcp_state_check(flow, skb, thoff))
 | 
			
		||||
+		return NF_ACCEPT;
 | 
			
		||||
+
 | 
			
		||||
 	if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
 | 
			
		||||
-	    nf_flow_nat_ip(flow, skb, dir) < 0)
 | 
			
		||||
+	    nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
 | 
			
		||||
 		return NF_DROP;
 | 
			
		||||
 
 | 
			
		||||
 	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
 | 
			
		||||
@@ -438,6 +459,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
 | 
			
		||||
 	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
 | 
			
		||||
 		return NF_ACCEPT;
 | 
			
		||||
 
 | 
			
		||||
+	if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
 | 
			
		||||
+		return NF_ACCEPT;
 | 
			
		||||
+
 | 
			
		||||
 	if (skb_try_make_writable(skb, sizeof(*ip6h)))
 | 
			
		||||
 		return NF_DROP;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user