kernel: fix conntrack leak for flow_offload connections
This was caused by a race condition between offload teardown and conntrack gc bumping the timeout of offloaded connections Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		| @@ -0,0 +1,110 @@ | ||||
| From: Felix Fietkau <nbd@nbd.name> | ||||
| Date: Wed, 13 Jun 2018 12:33:39 +0200 | ||||
| Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout | ||||
|  corner case | ||||
|  | ||||
| The full teardown of offloaded flows is deferred to a gc work item, | ||||
| however processing of packets by netfilter needs to happen immediately | ||||
| after a teardown is requested, because the conntrack state needs to be | ||||
| fixed up. | ||||
|  | ||||
| Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete, | ||||
| the netfilter conntrack gc can accidentally bump the timeout of a | ||||
| connection where offload was just stopped, causing a conntrack entry | ||||
| leak. | ||||
|  | ||||
| Fix this by moving the conntrack timeout bumping from conntrack core to | ||||
| the nf_flow_offload and add a check to prevent bogus timeout bumps. | ||||
|  | ||||
| Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| --- | ||||
|  | ||||
| --- a/net/netfilter/nf_conntrack_core.c | ||||
| +++ b/net/netfilter/nf_conntrack_core.c | ||||
| @@ -978,18 +978,6 @@ static bool gc_worker_can_early_drop(con | ||||
|  	return false; | ||||
|  } | ||||
|   | ||||
| -#define	DAY	(86400 * HZ) | ||||
| - | ||||
| -/* Set an arbitrary timeout large enough not to ever expire, this save | ||||
| - * us a check for the IPS_OFFLOAD_BIT from the packet path via | ||||
| - * nf_ct_is_expired(). | ||||
| - */ | ||||
| -static void nf_ct_offload_timeout(struct nf_conn *ct) | ||||
| -{ | ||||
| -	if (nf_ct_expires(ct) < DAY / 2) | ||||
| -		ct->timeout = nfct_time_stamp + DAY; | ||||
| -} | ||||
| - | ||||
|  static void gc_worker(struct work_struct *work) | ||||
|  { | ||||
|  	unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); | ||||
| @@ -1026,10 +1014,8 @@ static void gc_worker(struct work_struct | ||||
|  			tmp = nf_ct_tuplehash_to_ctrack(h); | ||||
|   | ||||
|  			scanned++; | ||||
| -			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { | ||||
| -				nf_ct_offload_timeout(tmp); | ||||
| +			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) | ||||
|  				continue; | ||||
| -			} | ||||
|   | ||||
|  			if (nf_ct_is_expired(tmp)) { | ||||
|  				nf_ct_gc_expired(tmp); | ||||
| --- a/net/netfilter/nf_flow_table_core.c | ||||
| +++ b/net/netfilter/nf_flow_table_core.c | ||||
| @@ -185,8 +185,27 @@ static const struct rhashtable_params nf | ||||
|  	.automatic_shrinking	= true, | ||||
|  }; | ||||
|   | ||||
| +#define	DAY	(86400 * HZ) | ||||
| + | ||||
| +/* Set an arbitrary timeout large enough not to ever expire, this save | ||||
| + * us a check for the IPS_OFFLOAD_BIT from the packet path via | ||||
| + * nf_ct_is_expired(). | ||||
| + */ | ||||
| +static void nf_ct_offload_timeout(struct flow_offload *flow) | ||||
| +{ | ||||
| +	struct flow_offload_entry *entry; | ||||
| +	struct nf_conn *ct; | ||||
| + | ||||
| +	entry = container_of(flow, struct flow_offload_entry, flow); | ||||
| +	ct = entry->ct; | ||||
| + | ||||
| +	if (nf_ct_expires(ct) < DAY / 2) | ||||
| +		ct->timeout = nfct_time_stamp + DAY; | ||||
| +} | ||||
| + | ||||
|  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) | ||||
|  { | ||||
| +	nf_ct_offload_timeout(flow); | ||||
|  	flow->timeout = (u32)jiffies; | ||||
|   | ||||
|  	rhashtable_insert_fast(&flow_table->rhashtable, | ||||
| @@ -307,6 +326,8 @@ static int nf_flow_offload_gc_step(struc | ||||
|  	rhashtable_walk_start(&hti); | ||||
|   | ||||
|  	while ((tuplehash = rhashtable_walk_next(&hti))) { | ||||
| +		bool teardown; | ||||
| + | ||||
|  		if (IS_ERR(tuplehash)) { | ||||
|  			err = PTR_ERR(tuplehash); | ||||
|  			if (err != -EAGAIN) | ||||
| @@ -319,9 +340,13 @@ static int nf_flow_offload_gc_step(struc | ||||
|   | ||||
|  		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | ||||
|   | ||||
| -		if (nf_flow_has_expired(flow) || | ||||
| -		    (flow->flags & (FLOW_OFFLOAD_DYING | | ||||
| -				    FLOW_OFFLOAD_TEARDOWN))) | ||||
| +		teardown = flow->flags & (FLOW_OFFLOAD_DYING | | ||||
| +					  FLOW_OFFLOAD_TEARDOWN); | ||||
| + | ||||
| +		if (!teardown) | ||||
| +			nf_ct_offload_timeout(flow); | ||||
| + | ||||
| +		if (nf_flow_has_expired(flow) || teardown) | ||||
|  			flow_offload_del(flow_table, flow); | ||||
|  	} | ||||
|  out: | ||||
| @@ -156,7 +156,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o | ||||
| --- a/net/netfilter/nf_flow_table_core.c | ||||
| +++ b/net/netfilter/nf_flow_table_core.c | ||||
| @@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable | ||||
| @@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable | ||||
|  } | ||||
|  EXPORT_SYMBOL_GPL(flow_offload_add); | ||||
|   | ||||
| @@ -173,7 +173,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|   | ||||
|  	rhashtable_remove_fast(&flow_table->rhashtable, | ||||
|  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | ||||
| @@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f | ||||
| @@ -233,6 +239,9 @@ static void flow_offload_del(struct nf_f | ||||
|  	e = container_of(flow, struct flow_offload_entry, flow); | ||||
|  	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); | ||||
|   | ||||
| @@ -183,32 +183,17 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|  	flow_offload_free(flow); | ||||
|  } | ||||
|   | ||||
| @@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc | ||||
|  	rhashtable_walk_start(&hti); | ||||
| @@ -346,6 +355,9 @@ static int nf_flow_offload_gc_step(struc | ||||
|  		if (!teardown) | ||||
|  			nf_ct_offload_timeout(flow); | ||||
|   | ||||
|  	while ((tuplehash = rhashtable_walk_next(&hti))) { | ||||
| +		bool teardown; | ||||
|  		if (IS_ERR(tuplehash)) { | ||||
|  			err = PTR_ERR(tuplehash); | ||||
|  			if (err != -EAGAIN) | ||||
| @@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc | ||||
|   | ||||
|  		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | ||||
|   | ||||
| -		if (nf_flow_has_expired(flow) || | ||||
| -		    (flow->flags & (FLOW_OFFLOAD_DYING | | ||||
| -				    FLOW_OFFLOAD_TEARDOWN))) | ||||
| +		teardown = flow->flags & (FLOW_OFFLOAD_DYING | | ||||
| +					  FLOW_OFFLOAD_TEARDOWN); | ||||
| + | ||||
| +		if (nf_flow_in_hw(flow) && !teardown) | ||||
| +			continue; | ||||
| + | ||||
| +		if (nf_flow_has_expired(flow) || teardown) | ||||
|  		if (nf_flow_has_expired(flow) || teardown) | ||||
|  			flow_offload_del(flow_table, flow); | ||||
|  	} | ||||
|  out: | ||||
| @@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_ | ||||
| @@ -481,10 +493,43 @@ int nf_flow_dnat_port(const struct flow_ | ||||
|  } | ||||
|  EXPORT_SYMBOL_GPL(nf_flow_dnat_port); | ||||
|   | ||||
| @@ -252,7 +237,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|  	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); | ||||
|   | ||||
|  	err = rhashtable_init(&flowtable->rhashtable, | ||||
| @@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu | ||||
| @@ -522,6 +567,8 @@ static void nf_flow_table_iterate_cleanu | ||||
|  { | ||||
|  	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); | ||||
|  	flush_delayed_work(&flowtable->gc_work); | ||||
| @@ -261,7 +246,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|  } | ||||
|   | ||||
|  void nf_flow_table_cleanup(struct net *net, struct net_device *dev) | ||||
| @@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n | ||||
| @@ -535,6 +582,26 @@ void nf_flow_table_cleanup(struct net *n | ||||
|  } | ||||
|  EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); | ||||
|   | ||||
| @@ -288,7 +273,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
|  void nf_flow_table_free(struct nf_flowtable *flow_table) | ||||
|  { | ||||
|  	mutex_lock(&flowtable_lock); | ||||
| @@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta | ||||
| @@ -544,9 +611,58 @@ void nf_flow_table_free(struct nf_flowta | ||||
|  	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); | ||||
|  	WARN_ON(!nf_flow_offload_gc_step(flow_table)); | ||||
|  	rhashtable_destroy(&flow_table->rhashtable); | ||||
|   | ||||
| @@ -26,9 +26,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
|  	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX]; | ||||
| --- a/net/netfilter/nf_flow_table_core.c | ||||
| +++ b/net/netfilter/nf_flow_table_core.c | ||||
| @@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc | ||||
|  		teardown = flow->flags & (FLOW_OFFLOAD_DYING | | ||||
|  					  FLOW_OFFLOAD_TEARDOWN); | ||||
| @@ -355,7 +355,7 @@ static int nf_flow_offload_gc_step(struc | ||||
|  		if (!teardown) | ||||
|  			nf_ct_offload_timeout(flow); | ||||
|   | ||||
| -		if (nf_flow_in_hw(flow) && !teardown) | ||||
| +		if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Felix Fietkau
					Felix Fietkau