kernel: fix conntrack leak for flow_offload connections
This was caused by a race condition between offload teardown and conntrack gc bumping the timeout of offloaded connections Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		| @@ -0,0 +1,110 @@ | |||||||
|  | From: Felix Fietkau <nbd@nbd.name> | ||||||
|  | Date: Wed, 13 Jun 2018 12:33:39 +0200 | ||||||
|  | Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout | ||||||
|  |  corner case | ||||||
|  |  | ||||||
|  | The full teardown of offloaded flows is deferred to a gc work item, | ||||||
|  | however processing of packets by netfilter needs to happen immediately | ||||||
|  | after a teardown is requested, because the conntrack state needs to be | ||||||
|  | fixed up. | ||||||
|  |  | ||||||
|  | Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete, | ||||||
|  | the netfilter conntrack gc can accidentally bump the timeout of a | ||||||
|  | connection where offload was just stopped, causing a conntrack entry | ||||||
|  | leak. | ||||||
|  |  | ||||||
|  | Fix this by moving the conntrack timeout bumping from conntrack core to | ||||||
|  | the nf_flow_offload and add a check to prevent bogus timeout bumps. | ||||||
|  |  | ||||||
|  | Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/net/netfilter/nf_conntrack_core.c | ||||||
|  | +++ b/net/netfilter/nf_conntrack_core.c | ||||||
|  | @@ -978,18 +978,6 @@ static bool gc_worker_can_early_drop(con | ||||||
|  |  	return false; | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -#define	DAY	(86400 * HZ) | ||||||
|  | - | ||||||
|  | -/* Set an arbitrary timeout large enough not to ever expire, this save | ||||||
|  | - * us a check for the IPS_OFFLOAD_BIT from the packet path via | ||||||
|  | - * nf_ct_is_expired(). | ||||||
|  | - */ | ||||||
|  | -static void nf_ct_offload_timeout(struct nf_conn *ct) | ||||||
|  | -{ | ||||||
|  | -	if (nf_ct_expires(ct) < DAY / 2) | ||||||
|  | -		ct->timeout = nfct_time_stamp + DAY; | ||||||
|  | -} | ||||||
|  | - | ||||||
|  |  static void gc_worker(struct work_struct *work) | ||||||
|  |  { | ||||||
|  |  	unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); | ||||||
|  | @@ -1026,10 +1014,8 @@ static void gc_worker(struct work_struct | ||||||
|  |  			tmp = nf_ct_tuplehash_to_ctrack(h); | ||||||
|  |   | ||||||
|  |  			scanned++; | ||||||
|  | -			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { | ||||||
|  | -				nf_ct_offload_timeout(tmp); | ||||||
|  | +			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) | ||||||
|  |  				continue; | ||||||
|  | -			} | ||||||
|  |   | ||||||
|  |  			if (nf_ct_is_expired(tmp)) { | ||||||
|  |  				nf_ct_gc_expired(tmp); | ||||||
|  | --- a/net/netfilter/nf_flow_table_core.c | ||||||
|  | +++ b/net/netfilter/nf_flow_table_core.c | ||||||
|  | @@ -185,8 +185,27 @@ static const struct rhashtable_params nf | ||||||
|  |  	.automatic_shrinking	= true, | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  | +#define	DAY	(86400 * HZ) | ||||||
|  | + | ||||||
|  | +/* Set an arbitrary timeout large enough not to ever expire, this save | ||||||
|  | + * us a check for the IPS_OFFLOAD_BIT from the packet path via | ||||||
|  | + * nf_ct_is_expired(). | ||||||
|  | + */ | ||||||
|  | +static void nf_ct_offload_timeout(struct flow_offload *flow) | ||||||
|  | +{ | ||||||
|  | +	struct flow_offload_entry *entry; | ||||||
|  | +	struct nf_conn *ct; | ||||||
|  | + | ||||||
|  | +	entry = container_of(flow, struct flow_offload_entry, flow); | ||||||
|  | +	ct = entry->ct; | ||||||
|  | + | ||||||
|  | +	if (nf_ct_expires(ct) < DAY / 2) | ||||||
|  | +		ct->timeout = nfct_time_stamp + DAY; | ||||||
|  | +} | ||||||
|  | + | ||||||
|  |  int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) | ||||||
|  |  { | ||||||
|  | +	nf_ct_offload_timeout(flow); | ||||||
|  |  	flow->timeout = (u32)jiffies; | ||||||
|  |   | ||||||
|  |  	rhashtable_insert_fast(&flow_table->rhashtable, | ||||||
|  | @@ -307,6 +326,8 @@ static int nf_flow_offload_gc_step(struc | ||||||
|  |  	rhashtable_walk_start(&hti); | ||||||
|  |   | ||||||
|  |  	while ((tuplehash = rhashtable_walk_next(&hti))) { | ||||||
|  | +		bool teardown; | ||||||
|  | + | ||||||
|  |  		if (IS_ERR(tuplehash)) { | ||||||
|  |  			err = PTR_ERR(tuplehash); | ||||||
|  |  			if (err != -EAGAIN) | ||||||
|  | @@ -319,9 +340,13 @@ static int nf_flow_offload_gc_step(struc | ||||||
|  |   | ||||||
|  |  		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); | ||||||
|  |   | ||||||
|  | -		if (nf_flow_has_expired(flow) || | ||||||
|  | -		    (flow->flags & (FLOW_OFFLOAD_DYING | | ||||||
|  | -				    FLOW_OFFLOAD_TEARDOWN))) | ||||||
|  | +		teardown = flow->flags & (FLOW_OFFLOAD_DYING | | ||||||
|  | +					  FLOW_OFFLOAD_TEARDOWN); | ||||||
|  | + | ||||||
|  | +		if (!teardown) | ||||||
|  | +			nf_ct_offload_timeout(flow); | ||||||
|  | + | ||||||
|  | +		if (nf_flow_has_expired(flow) || teardown) | ||||||
|  |  			flow_offload_del(flow_table, flow); | ||||||
|  |  	} | ||||||
|  |  out: | ||||||
| @@ -156,7 +156,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o |  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o | ||||||
| --- a/net/netfilter/nf_flow_table_core.c | --- a/net/netfilter/nf_flow_table_core.c | ||||||
| +++ b/net/netfilter/nf_flow_table_core.c | +++ b/net/netfilter/nf_flow_table_core.c | ||||||
| @@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable | @@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable | ||||||
|  } |  } | ||||||
|  EXPORT_SYMBOL_GPL(flow_offload_add); |  EXPORT_SYMBOL_GPL(flow_offload_add); | ||||||
|   |   | ||||||
| @@ -173,7 +173,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|   |   | ||||||
|  	rhashtable_remove_fast(&flow_table->rhashtable, |  	rhashtable_remove_fast(&flow_table->rhashtable, | ||||||
|  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, |  			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, | ||||||
| @@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f | @@ -233,6 +239,9 @@ static void flow_offload_del(struct nf_f | ||||||
|  	e = container_of(flow, struct flow_offload_entry, flow); |  	e = container_of(flow, struct flow_offload_entry, flow); | ||||||
|  	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); |  	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status); | ||||||
|   |   | ||||||
| @@ -183,32 +183,17 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|  	flow_offload_free(flow); |  	flow_offload_free(flow); | ||||||
|  } |  } | ||||||
|   |   | ||||||
| @@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc | @@ -346,6 +355,9 @@ static int nf_flow_offload_gc_step(struc | ||||||
|  	rhashtable_walk_start(&hti); |  		if (!teardown) | ||||||
|  |  			nf_ct_offload_timeout(flow); | ||||||
|   |   | ||||||
|  	while ((tuplehash = rhashtable_walk_next(&hti))) { |  | ||||||
| +		bool teardown; |  | ||||||
|  		if (IS_ERR(tuplehash)) { |  | ||||||
|  			err = PTR_ERR(tuplehash); |  | ||||||
|  			if (err != -EAGAIN) |  | ||||||
| @@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc |  | ||||||
|   |  | ||||||
|  		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); |  | ||||||
|   |  | ||||||
| -		if (nf_flow_has_expired(flow) || |  | ||||||
| -		    (flow->flags & (FLOW_OFFLOAD_DYING | |  | ||||||
| -				    FLOW_OFFLOAD_TEARDOWN))) |  | ||||||
| +		teardown = flow->flags & (FLOW_OFFLOAD_DYING | |  | ||||||
| +					  FLOW_OFFLOAD_TEARDOWN); |  | ||||||
| + |  | ||||||
| +		if (nf_flow_in_hw(flow) && !teardown) | +		if (nf_flow_in_hw(flow) && !teardown) | ||||||
| +			continue; | +			continue; | ||||||
| + | + | ||||||
| +		if (nf_flow_has_expired(flow) || teardown) |  		if (nf_flow_has_expired(flow) || teardown) | ||||||
|  			flow_offload_del(flow_table, flow); |  			flow_offload_del(flow_table, flow); | ||||||
|  	} |  	} | ||||||
|  out: | @@ -481,10 +493,43 @@ int nf_flow_dnat_port(const struct flow_ | ||||||
| @@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_ |  | ||||||
|  } |  } | ||||||
|  EXPORT_SYMBOL_GPL(nf_flow_dnat_port); |  EXPORT_SYMBOL_GPL(nf_flow_dnat_port); | ||||||
|   |   | ||||||
| @@ -252,7 +237,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|  	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); |  	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); | ||||||
|   |   | ||||||
|  	err = rhashtable_init(&flowtable->rhashtable, |  	err = rhashtable_init(&flowtable->rhashtable, | ||||||
| @@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu | @@ -522,6 +567,8 @@ static void nf_flow_table_iterate_cleanu | ||||||
|  { |  { | ||||||
|  	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); |  	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); | ||||||
|  	flush_delayed_work(&flowtable->gc_work); |  	flush_delayed_work(&flowtable->gc_work); | ||||||
| @@ -261,7 +246,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|  } |  } | ||||||
|   |   | ||||||
|  void nf_flow_table_cleanup(struct net *net, struct net_device *dev) |  void nf_flow_table_cleanup(struct net *net, struct net_device *dev) | ||||||
| @@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n | @@ -535,6 +582,26 @@ void nf_flow_table_cleanup(struct net *n | ||||||
|  } |  } | ||||||
|  EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); |  EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); | ||||||
|   |   | ||||||
| @@ -288,7 +273,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | |||||||
|  void nf_flow_table_free(struct nf_flowtable *flow_table) |  void nf_flow_table_free(struct nf_flowtable *flow_table) | ||||||
|  { |  { | ||||||
|  	mutex_lock(&flowtable_lock); |  	mutex_lock(&flowtable_lock); | ||||||
| @@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta | @@ -544,9 +611,58 @@ void nf_flow_table_free(struct nf_flowta | ||||||
|  	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); |  	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); | ||||||
|  	WARN_ON(!nf_flow_offload_gc_step(flow_table)); |  	WARN_ON(!nf_flow_offload_gc_step(flow_table)); | ||||||
|  	rhashtable_destroy(&flow_table->rhashtable); |  	rhashtable_destroy(&flow_table->rhashtable); | ||||||
|   | |||||||
| @@ -26,9 +26,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | |||||||
|  	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX]; |  	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX]; | ||||||
| --- a/net/netfilter/nf_flow_table_core.c | --- a/net/netfilter/nf_flow_table_core.c | ||||||
| +++ b/net/netfilter/nf_flow_table_core.c | +++ b/net/netfilter/nf_flow_table_core.c | ||||||
| @@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc | @@ -355,7 +355,7 @@ static int nf_flow_offload_gc_step(struc | ||||||
|  		teardown = flow->flags & (FLOW_OFFLOAD_DYING | |  		if (!teardown) | ||||||
|  					  FLOW_OFFLOAD_TEARDOWN); |  			nf_ct_offload_timeout(flow); | ||||||
|   |   | ||||||
| -		if (nf_flow_in_hw(flow) && !teardown) | -		if (nf_flow_in_hw(flow) && !teardown) | ||||||
| +		if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown) | +		if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Felix Fietkau
					Felix Fietkau