ar71xx: add unaligned access hacks for VXLAN
Gives a ~5% performance gain. Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
This commit is contained in:
		| @@ -886,3 +886,95 @@ | |||||||
|  	}; |  	}; | ||||||
|  	if (skb->ip_summed != CHECKSUM_PARTIAL) { |  	if (skb->ip_summed != CHECKSUM_PARTIAL) { | ||||||
|  		*sum = csum_fold(csum_partial(diff, sizeof(diff), |  		*sum = csum_fold(csum_partial(diff, sizeof(diff), | ||||||
|  | --- a/drivers/net/vxlan.c | ||||||
|  | +++ b/drivers/net/vxlan.c | ||||||
|  | @@ -1811,15 +1811,15 @@ static int vxlan_build_skb(struct sk_buf | ||||||
|  |  		goto out_free; | ||||||
|  |   | ||||||
|  |  	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); | ||||||
|  | -	vxh->vx_flags = VXLAN_HF_VNI; | ||||||
|  | -	vxh->vx_vni = vxlan_vni_field(vni); | ||||||
|  | +	net_hdr_word(&vxh->vx_flags) = VXLAN_HF_VNI; | ||||||
|  | +	net_hdr_word(&vxh->vx_vni) = vxlan_vni_field(vni); | ||||||
|  |   | ||||||
|  |  	if (type & SKB_GSO_TUNNEL_REMCSUM) { | ||||||
|  |  		unsigned int start; | ||||||
|  |   | ||||||
|  |  		start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr); | ||||||
|  | -		vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset); | ||||||
|  | -		vxh->vx_flags |= VXLAN_HF_RCO; | ||||||
|  | +		net_hdr_word(&vxh->vx_vni) |= vxlan_compute_rco(start, skb->csum_offset); | ||||||
|  | +		net_hdr_word(&vxh->vx_flags) |= VXLAN_HF_RCO; | ||||||
|  |   | ||||||
|  |  		if (!skb_is_gso(skb)) { | ||||||
|  |  			skb->ip_summed = CHECKSUM_NONE; | ||||||
|  | --- a/include/linux/etherdevice.h | ||||||
|  | +++ b/include/linux/etherdevice.h | ||||||
|  | @@ -435,7 +435,7 @@ static inline bool is_etherdev_addr(cons | ||||||
|  |   * @b: Pointer to Ethernet header | ||||||
|  |   * | ||||||
|  |   * Compare two Ethernet headers, returns 0 if equal. | ||||||
|  | - * This assumes that the network header (i.e., IP header) is 4-byte | ||||||
|  | + * This assumes that the network header (i.e., IP header) is 2-byte | ||||||
|  |   * aligned OR the platform can handle unaligned access.  This is the | ||||||
|  |   * case for all packets coming into netif_receive_skb or similar | ||||||
|  |   * entry points. | ||||||
|  | @@ -458,11 +458,12 @@ static inline unsigned long compare_ethe | ||||||
|  |  	fold |= *(unsigned long *)(a + 6) ^ *(unsigned long *)(b + 6); | ||||||
|  |  	return fold; | ||||||
|  |  #else | ||||||
|  | -	u32 *a32 = (u32 *)((u8 *)a + 2); | ||||||
|  | -	u32 *b32 = (u32 *)((u8 *)b + 2); | ||||||
|  | +	const u16 *a16 = a; | ||||||
|  | +	const u16 *b16 = b; | ||||||
|  |   | ||||||
|  | -	return (*(u16 *)a ^ *(u16 *)b) | (a32[0] ^ b32[0]) | | ||||||
|  | -	       (a32[1] ^ b32[1]) | (a32[2] ^ b32[2]); | ||||||
|  | +	return (a16[0] ^ b16[0]) | (a16[1] ^ b16[1]) | (a16[2] ^ b16[2]) | | ||||||
|  | +	       (a16[3] ^ b16[3]) | (a16[4] ^ b16[4]) | (a16[5] ^ b16[5]) | | ||||||
|  | +	       (a16[6] ^ b16[6]); | ||||||
|  |  #endif | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | --- a/net/ipv4/tcp_offload.c | ||||||
|  | +++ b/net/ipv4/tcp_offload.c | ||||||
|  | @@ -215,7 +215,7 @@ struct sk_buff **tcp_gro_receive(struct | ||||||
|  |   | ||||||
|  |  		th2 = tcp_hdr(p); | ||||||
|  |   | ||||||
|  | -		if (*(u32 *)&th->source ^ *(u32 *)&th2->source) { | ||||||
|  | +		if (net_hdr_word(&th->source) ^ net_hdr_word(&th2->source)) { | ||||||
|  |  			NAPI_GRO_CB(p)->same_flow = 0; | ||||||
|  |  			continue; | ||||||
|  |  		} | ||||||
|  | @@ -233,8 +233,8 @@ found: | ||||||
|  |  		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); | ||||||
|  |  	flush |= (__force int)(th->ack_seq ^ th2->ack_seq); | ||||||
|  |  	for (i = sizeof(*th); i < thlen; i += 4) | ||||||
|  | -		flush |= *(u32 *)((u8 *)th + i) ^ | ||||||
|  | -			 *(u32 *)((u8 *)th2 + i); | ||||||
|  | +		flush |= net_hdr_word((u8 *)th + i) ^ | ||||||
|  | +			 net_hdr_word((u8 *)th2 + i); | ||||||
|  |   | ||||||
|  |  	/* When we receive our second frame we can made a decision on if we | ||||||
|  |  	 * continue this flow as an atomic flow with a fixed ID or if we use | ||||||
|  | --- a/net/ipv6/netfilter/ip6table_mangle.c | ||||||
|  | +++ b/net/ipv6/netfilter/ip6table_mangle.c | ||||||
|  | @@ -58,7 +58,7 @@ ip6t_mangle_out(struct sk_buff *skb, con | ||||||
|  |  	hop_limit = ipv6_hdr(skb)->hop_limit; | ||||||
|  |   | ||||||
|  |  	/* flowlabel and prio (includes version, which shouldn't change either */ | ||||||
|  | -	flowlabel = *((u_int32_t *)ipv6_hdr(skb)); | ||||||
|  | +	flowlabel = net_hdr_word(ipv6_hdr(skb)); | ||||||
|  |   | ||||||
|  |  	ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle); | ||||||
|  |   | ||||||
|  | @@ -67,7 +67,7 @@ ip6t_mangle_out(struct sk_buff *skb, con | ||||||
|  |  	     !ipv6_addr_equal(&ipv6_hdr(skb)->daddr, &daddr) || | ||||||
|  |  	     skb->mark != mark || | ||||||
|  |  	     ipv6_hdr(skb)->hop_limit != hop_limit || | ||||||
|  | -	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { | ||||||
|  | +	     flowlabel != net_hdr_word(ipv6_hdr(skb)))) { | ||||||
|  |  		err = ip6_route_me_harder(state->net, skb); | ||||||
|  |  		if (err < 0) | ||||||
|  |  			ret = NF_DROP_ERR(err); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Matthias Schiffer
					Matthias Schiffer