171 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			171 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From: Pablo Neira Ayuso <pablo@netfilter.org>
 | |
| Date: Wed, 24 Mar 2021 02:30:32 +0100
 | |
| Subject: [PATCH] net: resolve forwarding path from virtual netdevice and
 | |
|  HW destination address
 | |
| 
 | |
| This patch adds dev_fill_forward_path() which resolves the path to reach
 | |
| the real netdevice from the IP forwarding side. This function takes as
 | |
| input the netdevice and the destination hardware address and it walks
 | |
| down the devices calling .ndo_fill_forward_path() for each device until
 | |
| the real device is found.
 | |
| 
 | |
| For instance, assuming the following topology:
 | |
| 
 | |
|                IP forwarding
 | |
|               /             \
 | |
|            br0              eth0
 | |
|            / \
 | |
|        eth1  eth2
 | |
|         .
 | |
|         .
 | |
|         .
 | |
|        ethX
 | |
|  ab:cd:ef:ab:cd:ef
 | |
| 
 | |
| where eth1 and eth2 are bridge ports and eth0 provides WAN connectivity.
 | |
| ethX is the interface in another box which is connected to the eth1
 | |
| bridge port.
 | |
| 
 | |
| For packets going through IP forwarding to br0 whose destination MAC
 | |
| address is ab:cd:ef:ab:cd:ef, dev_fill_forward_path() provides the
 | |
| following path:
 | |
| 
 | |
| 	br0 -> eth1
 | |
| 
 | |
| .ndo_fill_forward_path for br0 looks up at the FDB for the bridge port
 | |
| from the destination MAC address to get the bridge port eth1.
 | |
| 
 | |
| This information allows to create a fast path that bypasses the classic
 | |
| bridge and IP forwarding paths, so packets go directly from the bridge
 | |
| port eth1 to eth0 (wan interface) and vice versa.
 | |
| 
 | |
|              fast path
 | |
|       .------------------------.
 | |
|      /                          \
 | |
|     |           IP forwarding   |
 | |
|     |          /             \  \/
 | |
|     |       br0               eth0
 | |
|     .       / \
 | |
|      -> eth1  eth2
 | |
|         .
 | |
|         .
 | |
|         .
 | |
|        ethX
 | |
|  ab:cd:ef:ab:cd:ef
 | |
| 
 | |
| Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
 | |
| ---
 | |
| 
 | |
| --- a/include/linux/netdevice.h
 | |
| +++ b/include/linux/netdevice.h
 | |
| @@ -850,6 +850,27 @@ typedef u16 (*select_queue_fallback_t)(s
 | |
|  				       struct sk_buff *skb,
 | |
|  				       struct net_device *sb_dev);
 | |
|  
 | |
| +enum net_device_path_type {
 | |
| +	DEV_PATH_ETHERNET = 0,
 | |
| +};
 | |
| +
 | |
| +struct net_device_path {
 | |
| +	enum net_device_path_type	type;
 | |
| +	const struct net_device		*dev;
 | |
| +};
 | |
| +
 | |
| +#define NET_DEVICE_PATH_STACK_MAX	5
 | |
| +
 | |
| +struct net_device_path_stack {
 | |
| +	int			num_paths;
 | |
| +	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
 | |
| +};
 | |
| +
 | |
| +struct net_device_path_ctx {
 | |
| +	const struct net_device *dev;
 | |
| +	const u8		*daddr;
 | |
| +};
 | |
| +
 | |
|  enum tc_setup_type {
 | |
|  	TC_SETUP_QDISC_MQPRIO,
 | |
|  	TC_SETUP_CLSU32,
 | |
| @@ -1296,6 +1317,8 @@ struct netdev_net_notifier {
 | |
|   * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev);
 | |
|   *	If a device is paired with a peer device, return the peer instance.
 | |
|   *	The caller must be under RCU read context.
 | |
| + * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
 | |
| + *     Get the forwarding path to reach the real device from the HW destination address
 | |
|   */
 | |
|  struct net_device_ops {
 | |
|  	int			(*ndo_init)(struct net_device *dev);
 | |
| @@ -1504,6 +1527,8 @@ struct net_device_ops {
 | |
|  	int			(*ndo_tunnel_ctl)(struct net_device *dev,
 | |
|  						  struct ip_tunnel_parm *p, int cmd);
 | |
|  	struct net_device *	(*ndo_get_peer_dev)(struct net_device *dev);
 | |
| +	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
 | |
| +                                                         struct net_device_path *path);
 | |
|  };
 | |
|  
 | |
|  /**
 | |
| @@ -2851,6 +2876,8 @@ void dev_remove_offload(struct packet_of
 | |
|  
 | |
|  int dev_get_iflink(const struct net_device *dev);
 | |
|  int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 | |
| +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
 | |
| +			  struct net_device_path_stack *stack);
 | |
|  struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 | |
|  				      unsigned short mask);
 | |
|  struct net_device *dev_get_by_name(struct net *net, const char *name);
 | |
| --- a/net/core/dev.c
 | |
| +++ b/net/core/dev.c
 | |
| @@ -847,6 +847,52 @@ int dev_fill_metadata_dst(struct net_dev
 | |
|  }
 | |
|  EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 | |
|  
 | |
| +static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
 | |
| +{
 | |
| +	int k = stack->num_paths++;
 | |
| +
 | |
| +	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
 | |
| +		return NULL;
 | |
| +
 | |
| +	return &stack->path[k];
 | |
| +}
 | |
| +
 | |
| +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
 | |
| +			  struct net_device_path_stack *stack)
 | |
| +{
 | |
| +	const struct net_device *last_dev;
 | |
| +	struct net_device_path_ctx ctx = {
 | |
| +		.dev	= dev,
 | |
| +		.daddr	= daddr,
 | |
| +	};
 | |
| +	struct net_device_path *path;
 | |
| +	int ret = 0;
 | |
| +
 | |
| +	stack->num_paths = 0;
 | |
| +	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
 | |
| +		last_dev = ctx.dev;
 | |
| +		path = dev_fwd_path(stack);
 | |
| +		if (!path)
 | |
| +			return -1;
 | |
| +
 | |
| +		memset(path, 0, sizeof(struct net_device_path));
 | |
| +		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
 | |
| +		if (ret < 0)
 | |
| +			return -1;
 | |
| +
 | |
| +		if (WARN_ON_ONCE(last_dev == ctx.dev))
 | |
| +			return -1;
 | |
| +	}
 | |
| +	path = dev_fwd_path(stack);
 | |
| +	if (!path)
 | |
| +		return -1;
 | |
| +	path->type = DEV_PATH_ETHERNET;
 | |
| +	path->dev = ctx.dev;
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +EXPORT_SYMBOL_GPL(dev_fill_forward_path);
 | |
| +
 | |
|  /**
 | |
|   *	__dev_get_by_name	- find a device by its name
 | |
|   *	@net: the applicable net namespace
 | 
