kernel: backport flow offload fixes to 5.10
Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		| @@ -0,0 +1,27 @@ | ||||
| From: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
| Date: Sat, 17 Jul 2021 10:10:29 +0200 | ||||
| Subject: [PATCH] netfilter: flowtable: avoid possible false sharing | ||||
|  | ||||
| The flowtable follows the same timeout approach as conntrack, use the | ||||
| same idiom as in cc16921351d8 ("netfilter: conntrack: avoid same-timeout | ||||
| update") but also include the fix provided by e37542ba111f ("netfilter: | ||||
| conntrack: avoid possible false sharing"). | ||||
|  | ||||
| Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> | ||||
| --- | ||||
|  | ||||
| --- a/net/netfilter/nf_flow_table_core.c | ||||
| +++ b/net/netfilter/nf_flow_table_core.c | ||||
| @@ -328,7 +328,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add); | ||||
|  void flow_offload_refresh(struct nf_flowtable *flow_table, | ||||
|  			  struct flow_offload *flow) | ||||
|  { | ||||
| -	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); | ||||
| +	u32 timeout; | ||||
| + | ||||
| +	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); | ||||
| +	if (READ_ONCE(flow->timeout) != timeout) | ||||
| +		WRITE_ONCE(flow->timeout, timeout); | ||||
|   | ||||
|  	if (likely(!nf_flowtable_hw_offload(flow_table))) | ||||
|  		return; | ||||
| @@ -98,7 +98,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
|  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o | ||||
| --- /dev/null | ||||
| +++ b/net/netfilter/xt_FLOWOFFLOAD.c | ||||
| @@ -0,0 +1,657 @@ | ||||
| @@ -0,0 +1,712 @@ | ||||
| +/* | ||||
| + * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name> | ||||
| + * | ||||
| @@ -110,6 +110,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +#include <linux/init.h> | ||||
| +#include <linux/netfilter.h> | ||||
| +#include <linux/netfilter/xt_FLOWOFFLOAD.h> | ||||
| +#include <linux/if_vlan.h> | ||||
| +#include <linux/if_pppox.h> | ||||
| +#include <linux/ppp_defs.h> | ||||
| +#include <net/ip.h> | ||||
| +#include <net/netfilter/nf_conntrack.h> | ||||
| +#include <net/netfilter/nf_conntrack_extend.h> | ||||
| @@ -130,20 +133,62 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	struct delayed_work work; | ||||
| +}; | ||||
| + | ||||
| +struct nf_forward_info { | ||||
| +	const struct net_device *indev; | ||||
| +	const struct net_device *outdev; | ||||
| +	const struct net_device *hw_outdev; | ||||
| +	struct id { | ||||
| +		__u16	id; | ||||
| +		__be16	proto; | ||||
| +	} encap[NF_FLOW_TABLE_ENCAP_MAX]; | ||||
| +	u8 num_encaps; | ||||
| +	u8 ingress_vlans; | ||||
| +	u8 h_source[ETH_ALEN]; | ||||
| +	u8 h_dest[ETH_ALEN]; | ||||
| +	enum flow_offload_xmit_type xmit_type; | ||||
| +}; | ||||
| + | ||||
| +static DEFINE_SPINLOCK(hooks_lock); | ||||
| + | ||||
| +struct xt_flowoffload_table flowtable[2]; | ||||
| + | ||||
| +static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb) | ||||
| +{ | ||||
| +	__be16 proto; | ||||
| + | ||||
| +	proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + | ||||
| +			     sizeof(struct pppoe_hdr))); | ||||
| +	switch (proto) { | ||||
| +	case htons(PPP_IP): | ||||
| +		return htons(ETH_P_IP); | ||||
| +	case htons(PPP_IPV6): | ||||
| +		return htons(ETH_P_IPV6); | ||||
| +	} | ||||
| + | ||||
| +	return 0; | ||||
| +} | ||||
| + | ||||
| +static unsigned int | ||||
| +xt_flowoffload_net_hook(void *priv, struct sk_buff *skb, | ||||
| +			const struct nf_hook_state *state) | ||||
| +{ | ||||
| +	struct nf_flowtable *ft = priv; | ||||
| + | ||||
| +	if (!atomic_read(&ft->rhashtable.nelems)) | ||||
| +		return NF_ACCEPT; | ||||
| +	struct vlan_ethhdr *veth; | ||||
| +	__be16 proto; | ||||
| + | ||||
| +	switch (skb->protocol) { | ||||
| +	case htons(ETH_P_8021Q): | ||||
| +		veth = (struct vlan_ethhdr *)skb_mac_header(skb); | ||||
| +		proto = veth->h_vlan_encapsulated_proto; | ||||
| +		break; | ||||
| +	case htons(ETH_P_PPP_SES): | ||||
| +		proto = nf_flow_pppoe_proto(skb); | ||||
| +		break; | ||||
| +	default: | ||||
| +		proto = skb->protocol; | ||||
| +		break; | ||||
| +	} | ||||
| + | ||||
| +	switch (proto) { | ||||
| +	case htons(ETH_P_IP): | ||||
| +		return nf_flow_offload_ip_hook(priv, skb, state); | ||||
| +	case htons(ETH_P_IPV6): | ||||
| @@ -323,7 +368,26 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	return false; | ||||
| +} | ||||
| + | ||||
| +static bool flow_is_valid_ether_device(const struct net_device *dev) | ||||
| +static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst) | ||||
| +{ | ||||
| +	if (dst_xfrm(dst)) | ||||
| +		return FLOW_OFFLOAD_XMIT_XFRM; | ||||
| + | ||||
| +	return FLOW_OFFLOAD_XMIT_NEIGH; | ||||
| +} | ||||
| + | ||||
| +static void nf_default_forward_path(struct nf_flow_route *route, | ||||
| +				    struct dst_entry *dst_cache, | ||||
| +				    enum ip_conntrack_dir dir, | ||||
| +				    struct net_device **dev) | ||||
| +{ | ||||
| +	dev[!dir] = dst_cache->dev; | ||||
| +	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex; | ||||
| +	route->tuple[dir].dst		= dst_cache; | ||||
| +	route->tuple[dir].xmit_type	= nf_xmit_type(dst_cache); | ||||
| +} | ||||
| + | ||||
| +static bool nf_is_valid_ether_device(const struct net_device *dev) | ||||
| +{ | ||||
| +	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || | ||||
| +	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) | ||||
| @@ -332,174 +396,181 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	return true; | ||||
| +} | ||||
| + | ||||
| +static void | ||||
| +xt_flowoffload_route_check_path(struct nf_flow_route *route, | ||||
| +				const struct nf_conn *ct, | ||||
| +				enum ip_conntrack_dir dir, | ||||
| +				struct net_device **out_dev) | ||||
| +static void nf_dev_path_info(const struct net_device_path_stack *stack, | ||||
| +			     struct nf_forward_info *info, | ||||
| +			     unsigned char *ha) | ||||
| +{ | ||||
| +	const struct dst_entry *dst = route->tuple[dir].dst; | ||||
| +	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; | ||||
| +	struct net_device_path_stack stack; | ||||
| +	enum net_device_path_type prev_type; | ||||
| +	struct net_device *dev = dst->dev; | ||||
| +	struct neighbour *n; | ||||
| +	bool last = false; | ||||
| +	u8 nud_state; | ||||
| +	const struct net_device_path *path; | ||||
| +	int i; | ||||
| + | ||||
| +	route->tuple[!dir].in.ifindex = dev->ifindex; | ||||
| +	route->tuple[dir].out.ifindex = dev->ifindex; | ||||
| + | ||||
| +	if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_XFRM) | ||||
| +		return; | ||||
| + | ||||
| +	if ((dev->flags & IFF_LOOPBACK) || | ||||
| +	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN || | ||||
| +	    !is_valid_ether_addr(dev->dev_addr)) | ||||
| +		return; | ||||
| + | ||||
| +	n = dst_neigh_lookup(dst, daddr); | ||||
| +	if (!n) | ||||
| +		return; | ||||
| + | ||||
| +	read_lock_bh(&n->lock); | ||||
| +	nud_state = n->nud_state; | ||||
| +	memcpy(route->tuple[dir].out.h_dest, n->ha, ETH_ALEN); | ||||
| +	read_unlock_bh(&n->lock); | ||||
| +	neigh_release(n); | ||||
| + | ||||
| +	if (!(nud_state & NUD_VALID)) | ||||
| +		return; | ||||
| + | ||||
| +	if (dev_fill_forward_path(dev, route->tuple[dir].out.h_dest, &stack) || | ||||
| +	    !stack.num_paths) | ||||
| +		return; | ||||
| + | ||||
| +	prev_type = DEV_PATH_ETHERNET; | ||||
| +	for (i = 0; i <= stack.num_paths; i++) { | ||||
| +		const struct net_device_path *path = &stack.path[i]; | ||||
| +		int n_encaps = route->tuple[!dir].in.num_encaps; | ||||
| + | ||||
| +		dev = (struct net_device *)path->dev; | ||||
| +		if (flow_is_valid_ether_device(dev)) { | ||||
| +			if (route->tuple[dir].xmit_type != FLOW_OFFLOAD_XMIT_DIRECT) { | ||||
| +				memcpy(route->tuple[dir].out.h_source, | ||||
| +				       dev->dev_addr, ETH_ALEN); | ||||
| +				route->tuple[dir].out.ifindex = dev->ifindex; | ||||
| +			} | ||||
| +			route->tuple[dir].xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; | ||||
| +		} | ||||
| +	memcpy(info->h_dest, ha, ETH_ALEN); | ||||
| + | ||||
| +	for (i = 0; i < stack->num_paths; i++) { | ||||
| +		path = &stack->path[i]; | ||||
| +		switch (path->type) { | ||||
| +		case DEV_PATH_PPPOE: | ||||
| +		case DEV_PATH_ETHERNET: | ||||
| +		case DEV_PATH_DSA: | ||||
| +		case DEV_PATH_VLAN: | ||||
| +			if (n_encaps >= NF_FLOW_TABLE_ENCAP_MAX || | ||||
| +			    i == stack.num_paths) { | ||||
| +				last = true; | ||||
| +		case DEV_PATH_PPPOE: | ||||
| +			info->indev = path->dev; | ||||
| +			if (is_zero_ether_addr(info->h_source)) | ||||
| +				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); | ||||
| + | ||||
| +			if (path->type == DEV_PATH_ETHERNET) | ||||
| +				break; | ||||
| +			if (path->type == DEV_PATH_DSA) { | ||||
| +				i = stack->num_paths; | ||||
| +				break; | ||||
| +			} | ||||
| + | ||||
| +			route->tuple[!dir].in.num_encaps++; | ||||
| +			route->tuple[!dir].in.encap[n_encaps].id = path->encap.id; | ||||
| +			route->tuple[!dir].in.encap[n_encaps].proto = path->encap.proto; | ||||
| +			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */ | ||||
| +			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) { | ||||
| +				info->indev = NULL; | ||||
| +				break; | ||||
| +			} | ||||
| +			if (!info->outdev) | ||||
| +				info->outdev = path->dev; | ||||
| +			info->encap[info->num_encaps].id = path->encap.id; | ||||
| +			info->encap[info->num_encaps].proto = path->encap.proto; | ||||
| +			info->num_encaps++; | ||||
| +			if (path->type == DEV_PATH_PPPOE) | ||||
| +				memcpy(route->tuple[dir].out.h_dest, | ||||
| +				       path->encap.h_dest, ETH_ALEN); | ||||
| +				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN); | ||||
| +			break; | ||||
| +		case DEV_PATH_BRIDGE: | ||||
| +			switch (path->bridge.vlan_mode) { | ||||
| +			case DEV_PATH_BR_VLAN_TAG: | ||||
| +				if (n_encaps >= NF_FLOW_TABLE_ENCAP_MAX || | ||||
| +				    i == stack.num_paths) { | ||||
| +					last = true; | ||||
| +					break; | ||||
| +				} | ||||
| +			if (is_zero_ether_addr(info->h_source)) | ||||
| +				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN); | ||||
| + | ||||
| +				route->tuple[!dir].in.num_encaps++; | ||||
| +				route->tuple[!dir].in.encap[n_encaps].id = | ||||
| +					path->bridge.vlan_id; | ||||
| +				route->tuple[!dir].in.encap[n_encaps].proto = | ||||
| +					path->bridge.vlan_proto; | ||||
| +			switch (path->bridge.vlan_mode) { | ||||
| +			case DEV_PATH_BR_VLAN_UNTAG_HW: | ||||
| +				info->ingress_vlans |= BIT(info->num_encaps - 1); | ||||
| +				break; | ||||
| +			case DEV_PATH_BR_VLAN_TAG: | ||||
| +				info->encap[info->num_encaps].id = path->bridge.vlan_id; | ||||
| +				info->encap[info->num_encaps].proto = path->bridge.vlan_proto; | ||||
| +				info->num_encaps++; | ||||
| +				break; | ||||
| +			case DEV_PATH_BR_VLAN_UNTAG: | ||||
| +				route->tuple[!dir].in.num_encaps--; | ||||
| +				break; | ||||
| +			case DEV_PATH_BR_VLAN_UNTAG_HW: | ||||
| +				route->tuple[!dir].in.ingress_vlans |= BIT(n_encaps - 1); | ||||
| +				info->num_encaps--; | ||||
| +				break; | ||||
| +			case DEV_PATH_BR_VLAN_KEEP: | ||||
| +				break; | ||||
| +			} | ||||
| +			break; | ||||
| +		default: | ||||
| +			last = true; | ||||
| +			info->indev = NULL; | ||||
| +			break; | ||||
| +		} | ||||
| + | ||||
| +		if (last) | ||||
| +			break; | ||||
| +	} | ||||
| +	if (!info->outdev) | ||||
| +		info->outdev = info->indev; | ||||
| + | ||||
| +	*out_dev = dev; | ||||
| +	route->tuple[dir].out.hw_ifindex = dev->ifindex; | ||||
| +	route->tuple[!dir].in.ifindex = dev->ifindex; | ||||
| +	info->hw_outdev = info->indev; | ||||
| + | ||||
| +	if (nf_is_valid_ether_device(info->indev)) | ||||
| +		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT; | ||||
| +} | ||||
| + | ||||
| +static int | ||||
| +xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct, | ||||
| +			 enum ip_conntrack_dir dir, | ||||
| +			 const struct xt_action_param *par, int ifindex) | ||||
| +static int nf_dev_fill_forward_path(const struct nf_flow_route *route, | ||||
| +				     const struct dst_entry *dst_cache, | ||||
| +				     const struct nf_conn *ct, | ||||
| +				     enum ip_conntrack_dir dir, u8 *ha, | ||||
| +				     struct net_device_path_stack *stack) | ||||
| +{ | ||||
| +	struct dst_entry *dst = NULL; | ||||
| +	struct flowi fl; | ||||
| +	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3; | ||||
| +	struct net_device *dev = dst_cache->dev; | ||||
| +	struct neighbour *n; | ||||
| +	u8 nud_state; | ||||
| + | ||||
| +	memset(&fl, 0, sizeof(fl)); | ||||
| +	switch (xt_family(par)) { | ||||
| +	case NFPROTO_IPV4: | ||||
| +		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip; | ||||
| +		fl.u.ip4.flowi4_oif = ifindex; | ||||
| +		break; | ||||
| +	case NFPROTO_IPV6: | ||||
| +		fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6; | ||||
| +		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6; | ||||
| +		fl.u.ip6.flowi6_oif = ifindex; | ||||
| +		break; | ||||
| +	if (!nf_is_valid_ether_device(dev)) | ||||
| +		goto out; | ||||
| + | ||||
| +	n = dst_neigh_lookup(dst_cache, daddr); | ||||
| +	if (!n) | ||||
| +		return -1; | ||||
| + | ||||
| +	read_lock_bh(&n->lock); | ||||
| +	nud_state = n->nud_state; | ||||
| +	ether_addr_copy(ha, n->ha); | ||||
| +	read_unlock_bh(&n->lock); | ||||
| +	neigh_release(n); | ||||
| + | ||||
| +	if (!(nud_state & NUD_VALID)) | ||||
| +		return -1; | ||||
| + | ||||
| +out: | ||||
| +	return dev_fill_forward_path(dev, ha, stack); | ||||
| +} | ||||
| + | ||||
| +static void nf_dev_forward_path(struct nf_flow_route *route, | ||||
| +				const struct nf_conn *ct, | ||||
| +				enum ip_conntrack_dir dir, | ||||
| +				struct net_device **devs) | ||||
| +{ | ||||
| +	const struct dst_entry *dst = route->tuple[dir].dst; | ||||
| +	struct net_device_path_stack stack; | ||||
| +	struct nf_forward_info info = {}; | ||||
| +	unsigned char ha[ETH_ALEN]; | ||||
| +	int i; | ||||
| + | ||||
| +	if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0) | ||||
| +		nf_dev_path_info(&stack, &info, ha); | ||||
| + | ||||
| +	devs[!dir] = (struct net_device *)info.indev; | ||||
| +	if (!info.indev) | ||||
| +		return; | ||||
| + | ||||
| +	route->tuple[!dir].in.ifindex = info.indev->ifindex; | ||||
| +	for (i = 0; i < info.num_encaps; i++) { | ||||
| +		route->tuple[!dir].in.encap[i].id = info.encap[i].id; | ||||
| +		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto; | ||||
| +	} | ||||
| +	route->tuple[!dir].in.num_encaps = info.num_encaps; | ||||
| +	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans; | ||||
| + | ||||
| +	nf_route(xt_net(par), &dst, &fl, false, xt_family(par)); | ||||
| +	if (!dst) | ||||
| +		return -ENOENT; | ||||
| + | ||||
| +	route->tuple[dir].dst = dst; | ||||
| +	if (dst_xfrm(dst)) | ||||
| +		route->tuple[dir].xmit_type = FLOW_OFFLOAD_XMIT_XFRM; | ||||
| +	else | ||||
| +		route->tuple[dir].xmit_type = FLOW_OFFLOAD_XMIT_NEIGH; | ||||
| + | ||||
| +	return 0; | ||||
| +	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) { | ||||
| +		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN); | ||||
| +		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN); | ||||
| +		route->tuple[dir].out.ifindex = info.outdev->ifindex; | ||||
| +		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex; | ||||
| +		route->tuple[dir].xmit_type = info.xmit_type; | ||||
| +	} | ||||
| +} | ||||
| + | ||||
| +static int | ||||
| +xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct, | ||||
| +		     const struct xt_action_param *par, | ||||
| +		     struct nf_flow_route *route, enum ip_conntrack_dir dir, | ||||
| +		     struct net_device **dev) | ||||
| +		     struct net_device **devs) | ||||
| +{ | ||||
| +	int ret; | ||||
| +	struct dst_entry *this_dst = skb_dst(skb); | ||||
| +	struct dst_entry *other_dst = NULL; | ||||
| +	struct flowi fl; | ||||
| + | ||||
| +	ret = xt_flowoffload_route_dir(route, ct, dir, par, | ||||
| +				       dev[dir]->ifindex); | ||||
| +	if (ret) | ||||
| +		return ret; | ||||
| +	memset(&fl, 0, sizeof(fl)); | ||||
| +	switch (xt_family(par)) { | ||||
| +	case NFPROTO_IPV4: | ||||
| +		fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; | ||||
| +		fl.u.ip4.flowi4_oif = xt_in(par)->ifindex; | ||||
| +		break; | ||||
| +	case NFPROTO_IPV6: | ||||
| +		fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6; | ||||
| +		fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; | ||||
| +		fl.u.ip6.flowi6_oif = xt_in(par)->ifindex; | ||||
| +		break; | ||||
| +	} | ||||
| + | ||||
| +	ret = xt_flowoffload_route_dir(route, ct, !dir, par, | ||||
| +				       dev[!dir]->ifindex); | ||||
| +	if (ret) | ||||
| +		return ret; | ||||
| +	nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par)); | ||||
| +	if (!other_dst) | ||||
| +		return -ENOENT; | ||||
| + | ||||
| +	xt_flowoffload_route_check_path(route, ct, dir, &dev[!dir]); | ||||
| +	xt_flowoffload_route_check_path(route, ct, !dir, &dev[dir]); | ||||
| +	nf_default_forward_path(route, this_dst, dir, devs); | ||||
| +	nf_default_forward_path(route, other_dst, !dir, devs); | ||||
| + | ||||
| +	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH && | ||||
| +	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) { | ||||
| +		nf_dev_forward_path(route, ct, dir, devs); | ||||
| +		nf_dev_forward_path(route, ct, !dir, devs); | ||||
| +	} | ||||
| + | ||||
| +	return 0; | ||||
| +} | ||||
| @@ -542,7 +613,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	} | ||||
| + | ||||
| +	if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || | ||||
| +	    ct->status & IPS_SEQ_ADJUST) | ||||
| +	    ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH)) | ||||
| +		return XT_CONTINUE; | ||||
| + | ||||
| +	if (!nf_ct_is_confirmed(ct)) | ||||
| @@ -586,7 +657,6 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	xt_flowoffload_check_device(table, devs[0]); | ||||
| +	xt_flowoffload_check_device(table, devs[1]); | ||||
| + | ||||
| +	dst_release(route.tuple[dir].dst); | ||||
| +	dst_release(route.tuple[!dir].dst); | ||||
| + | ||||
| +	return XT_CONTINUE; | ||||
| @@ -594,7 +664,6 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +err_flow_add: | ||||
| +	flow_offload_free(flow); | ||||
| +err_flow_alloc: | ||||
| +	dst_release(route.tuple[dir].dst); | ||||
| +	dst_release(route.tuple[!dir].dst); | ||||
| +err_flow_route: | ||||
| +	clear_bit(IPS_OFFLOAD_BIT, &ct->status); | ||||
| @@ -661,20 +730,6 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	.notifier_call	= flow_offload_netdev_event, | ||||
| +}; | ||||
| + | ||||
| +static unsigned int | ||||
| +nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, | ||||
| +			  const struct nf_hook_state *state) | ||||
| +{ | ||||
| +	switch (skb->protocol) { | ||||
| +	case htons(ETH_P_IP): | ||||
| +		return nf_flow_offload_ip_hook(priv, skb, state); | ||||
| +	case htons(ETH_P_IPV6): | ||||
| +		return nf_flow_offload_ipv6_hook(priv, skb, state); | ||||
| +	} | ||||
| + | ||||
| +	return NF_ACCEPT; | ||||
| +} | ||||
| + | ||||
| +static int nf_flow_rule_route_inet(struct net *net, | ||||
| +				   const struct flow_offload *flow, | ||||
| +				   enum flow_offload_tuple_dir dir, | ||||
| @@ -704,7 +759,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| +	.setup		= nf_flow_table_offload_setup, | ||||
| +	.action		= nf_flow_rule_route_inet, | ||||
| +	.free		= nf_flow_table_free, | ||||
| +	.hook		= nf_flow_offload_inet_hook, | ||||
| +	.hook		= xt_flowoffload_net_hook, | ||||
| +	.owner		= THIS_MODULE, | ||||
| +}; | ||||
| + | ||||
| @@ -766,7 +821,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
|  #include <net/netfilter/nf_flow_table.h> | ||||
|  #include <net/netfilter/nf_conntrack.h> | ||||
|  #include <net/netfilter/nf_conntrack_core.h> | ||||
| @@ -395,8 +394,7 @@ flow_offload_lookup(struct nf_flowtable | ||||
| @@ -401,8 +400,7 @@ flow_offload_lookup(struct nf_flowtable | ||||
|  } | ||||
|  EXPORT_SYMBOL_GPL(flow_offload_lookup); | ||||
|   | ||||
| @@ -776,7 +831,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
|  		      void (*iter)(struct flow_offload *flow, void *data), | ||||
|  		      void *data) | ||||
|  { | ||||
| @@ -428,6 +426,7 @@ nf_flow_table_iterate(struct nf_flowtabl | ||||
| @@ -434,6 +432,7 @@ nf_flow_table_iterate(struct nf_flowtabl | ||||
|   | ||||
|  	return err; | ||||
|  } | ||||
|   | ||||
| @@ -0,0 +1,27 @@ | ||||
| From: Felix Fietkau <nbd@nbd.name> | ||||
| Date: Fri, 6 May 2022 12:37:23 +0200 | ||||
| Subject: [PATCH] netfilter: flowtable: fix excessive hw offload attempts | ||||
|  after failure | ||||
|  | ||||
| If a flow cannot be offloaded, the code currently repeatedly tries again as | ||||
| quickly as possible, which can significantly increase system load. | ||||
| Fix this by limiting flow timeout update and hardware offload retry to once | ||||
| per second. | ||||
|  | ||||
| Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| --- | ||||
|  | ||||
| --- a/net/netfilter/nf_flow_table_core.c | ||||
| +++ b/net/netfilter/nf_flow_table_core.c | ||||
| @@ -331,8 +331,10 @@ void flow_offload_refresh(struct nf_flow | ||||
|  	u32 timeout; | ||||
|   | ||||
|  	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); | ||||
| -	if (READ_ONCE(flow->timeout) != timeout) | ||||
| +	if (timeout - READ_ONCE(flow->timeout) > HZ) | ||||
|  		WRITE_ONCE(flow->timeout, timeout); | ||||
| +	else | ||||
| +		return; | ||||
|   | ||||
|  	if (likely(!nf_flowtable_hw_offload(flow_table))) | ||||
|  		return; | ||||
| @@ -0,0 +1,64 @@ | ||||
| From: Felix Fietkau <nbd@nbd.name> | ||||
| Date: Fri, 6 May 2022 12:43:58 +0200 | ||||
| Subject: [PATCH] netfilter: nft_flow_offload: skip dst neigh lookup for | ||||
|  ppp devices | ||||
|  | ||||
| The dst entry does not contain a valid hardware address, so skip the lookup | ||||
| in order to avoid running into errors here. | ||||
| The proper hardware address is filled in from nft_dev_path_info | ||||
|  | ||||
| Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| --- | ||||
|  | ||||
| --- a/net/netfilter/nft_flow_offload.c | ||||
| +++ b/net/netfilter/nft_flow_offload.c | ||||
| @@ -36,6 +36,15 @@ static void nft_default_forward_path(str | ||||
|  	route->tuple[dir].xmit_type	= nft_xmit_type(dst_cache); | ||||
|  } | ||||
|   | ||||
| +static bool nft_is_valid_ether_device(const struct net_device *dev) | ||||
| +{ | ||||
| +	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || | ||||
| +	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) | ||||
| +		return false; | ||||
| + | ||||
| +	return true; | ||||
| +} | ||||
| + | ||||
|  static int nft_dev_fill_forward_path(const struct nf_flow_route *route, | ||||
|  				     const struct dst_entry *dst_cache, | ||||
|  				     const struct nf_conn *ct, | ||||
| @@ -47,6 +56,9 @@ static int nft_dev_fill_forward_path(con | ||||
|  	struct neighbour *n; | ||||
|  	u8 nud_state; | ||||
|   | ||||
| +	if (!nft_is_valid_ether_device(dev)) | ||||
| +		goto out; | ||||
| + | ||||
|  	n = dst_neigh_lookup(dst_cache, daddr); | ||||
|  	if (!n) | ||||
|  		return -1; | ||||
| @@ -60,6 +72,7 @@ static int nft_dev_fill_forward_path(con | ||||
|  	if (!(nud_state & NUD_VALID)) | ||||
|  		return -1; | ||||
|   | ||||
| +out: | ||||
|  	return dev_fill_forward_path(dev, ha, stack); | ||||
|  } | ||||
|   | ||||
| @@ -78,15 +91,6 @@ struct nft_forward_info { | ||||
|  	enum flow_offload_xmit_type xmit_type; | ||||
|  }; | ||||
|   | ||||
| -static bool nft_is_valid_ether_device(const struct net_device *dev) | ||||
| -{ | ||||
| -	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER || | ||||
| -	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr)) | ||||
| -		return false; | ||||
| - | ||||
| -	return true; | ||||
| -} | ||||
| - | ||||
|  static void nft_dev_path_info(const struct net_device_path_stack *stack, | ||||
|  			      struct nft_forward_info *info, | ||||
|  			      unsigned char *ha, struct nf_flowtable *flowtable) | ||||
| @@ -0,0 +1,66 @@ | ||||
| From: Felix Fietkau <nbd@nbd.name> | ||||
| Date: Fri, 6 May 2022 13:54:44 +0200 | ||||
| Subject: [PATCH] net: fix dev_fill_forward_path with pppoe + bridge | ||||
|  | ||||
| When calling dev_fill_forward_path on a pppoe device, the provided destination | ||||
| address is invalid. In order for the bridge fdb lookup to succeed, the pppoe | ||||
| code needs to update ctx->daddr to the correct value. | ||||
| Fix this by storing the address inside struct net_device_path_ctx | ||||
|  | ||||
| Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| --- | ||||
|  | ||||
| --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c | ||||
| +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c | ||||
| @@ -91,7 +91,6 @@ mtk_flow_get_wdma_info(struct net_device | ||||
|  { | ||||
|  	struct net_device_path_ctx ctx = { | ||||
|  		.dev = dev, | ||||
| -		.daddr = addr, | ||||
|  	}; | ||||
|  	struct net_device_path path = {}; | ||||
|   | ||||
| @@ -101,6 +100,7 @@ mtk_flow_get_wdma_info(struct net_device | ||||
|  	if (!dev->netdev_ops->ndo_fill_forward_path) | ||||
|  		return -1; | ||||
|   | ||||
| +	memcpy(ctx.daddr, addr, sizeof(ctx.daddr)); | ||||
|  	if (dev->netdev_ops->ndo_fill_forward_path(&ctx, &path)) | ||||
|  		return -1; | ||||
|   | ||||
| --- a/drivers/net/ppp/pppoe.c | ||||
| +++ b/drivers/net/ppp/pppoe.c | ||||
| @@ -988,6 +988,7 @@ static int pppoe_fill_forward_path(struc | ||||
|  	path->encap.proto = htons(ETH_P_PPP_SES); | ||||
|  	path->encap.id = be16_to_cpu(po->num); | ||||
|  	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); | ||||
| +	memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); | ||||
|  	path->dev = ctx->dev; | ||||
|  	ctx->dev = dev; | ||||
|   | ||||
| --- a/include/linux/netdevice.h | ||||
| +++ b/include/linux/netdevice.h | ||||
| @@ -878,7 +878,7 @@ struct net_device_path_stack { | ||||
|   | ||||
|  struct net_device_path_ctx { | ||||
|  	const struct net_device *dev; | ||||
| -	const u8		*daddr; | ||||
| +	u8			daddr[ETH_ALEN]; | ||||
|   | ||||
|  	int			num_vlans; | ||||
|  	struct { | ||||
| --- a/net/core/dev.c | ||||
| +++ b/net/core/dev.c | ||||
| @@ -863,11 +863,11 @@ int dev_fill_forward_path(const struct n | ||||
|  	const struct net_device *last_dev; | ||||
|  	struct net_device_path_ctx ctx = { | ||||
|  		.dev	= dev, | ||||
| -		.daddr	= daddr, | ||||
|  	}; | ||||
|  	struct net_device_path *path; | ||||
|  	int ret = 0; | ||||
|   | ||||
| +	memcpy(ctx.daddr, daddr, sizeof(ctx.daddr)); | ||||
|  	stack->num_paths = 0; | ||||
|  	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) { | ||||
|  		last_dev = ctx.dev; | ||||
| @@ -0,0 +1,24 @@ | ||||
| From: Felix Fietkau <nbd@nbd.name> | ||||
| Date: Fri, 6 May 2022 15:15:06 +0200 | ||||
| Subject: [PATCH] netfilter: nft_flow_offload: fix offload with pppoe + | ||||
|  vlan | ||||
|  | ||||
| When running a combination of PPPoE on top of a VLAN, we need to set | ||||
| info->outdev to the PPPoE device, otherwise PPPoE encap is skipped | ||||
| during software offload. | ||||
|  | ||||
| Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||
| --- | ||||
|  | ||||
| --- a/net/netfilter/nft_flow_offload.c | ||||
| +++ b/net/netfilter/nft_flow_offload.c | ||||
| @@ -123,7 +123,8 @@ static void nft_dev_path_info(const stru | ||||
|  				info->indev = NULL; | ||||
|  				break; | ||||
|  			} | ||||
| -			info->outdev = path->dev; | ||||
| +			if (!info->outdev) | ||||
| +				info->outdev = path->dev; | ||||
|  			info->encap[info->num_encaps].id = path->encap.id; | ||||
|  			info->encap[info->num_encaps].proto = path->encap.proto; | ||||
|  			info->num_encaps++; | ||||
		Reference in New Issue
	
	Block a user
	 Felix Fietkau
					Felix Fietkau