kernel: backport GRO improvements
Improves network performance Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		| @@ -0,0 +1,78 @@ | |||||||
|  | From: Alexander Lobakin <alobakin@dlink.ru> | ||||||
|  | Date: Fri, 15 Nov 2019 12:11:35 +0300 | ||||||
|  | Subject: [PATCH] net: core: allow fast GRO for skbs with Ethernet header in | ||||||
|  |  head | ||||||
|  |  | ||||||
|  | Commit 78d3fd0b7de8 ("gro: Only use skb_gro_header for completely | ||||||
|  | non-linear packets") back in May'09 (v2.6.31-rc1) has changed the | ||||||
|  | original condition '!skb_headlen(skb)' to | ||||||
|  | 'skb->mac_header == skb->tail' in gro_reset_offset() saying: "Since | ||||||
|  | the drivers that need this optimisation all provide completely | ||||||
|  | non-linear packets" (note that this condition has become the current | ||||||
|  | 'skb_mac_header(skb) == skb_tail_pointer(skb)' later with commmit | ||||||
|  | ced14f6804a9 ("net: Correct comparisons and calculations using | ||||||
|  | skb->tail and skb-transport_header") without any functional changes). | ||||||
|  |  | ||||||
|  | For now, we have the following rough statistics for v5.4-rc7: | ||||||
|  | 1) napi_gro_frags: 14 | ||||||
|  | 2) napi_gro_receive with skb->head containing (most of) payload: 83 | ||||||
|  | 3) napi_gro_receive with skb->head containing all the headers: 20 | ||||||
|  | 4) napi_gro_receive with skb->head containing only Ethernet header: 2 | ||||||
|  |  | ||||||
|  | With the current condition, fast GRO with the usage of | ||||||
|  | NAPI_GRO_CB(skb)->frag0 is available only in the [1] case. | ||||||
|  | Packets pushed by [2] and [3] go through the 'slow' path, but | ||||||
|  | it's not a problem for them as they already contain all the needed | ||||||
|  | headers in skb->head, so pskb_may_pull() only moves skb->data. | ||||||
|  |  | ||||||
|  | The layout of skbs in the fourth [4] case at the moment of | ||||||
|  | dev_gro_receive() is identical to skbs that have come through [1], | ||||||
|  | as napi_frags_skb() pulls Ethernet header to skb->head. The only | ||||||
|  | difference is that the mentioned condition is always false for them, | ||||||
|  | because skb_put() and friends irreversibly alter the tail pointer. | ||||||
|  | They also go through the 'slow' path, but now every single | ||||||
|  | pskb_may_pull() in every single .gro_receive() will call the *really* | ||||||
|  | slow __pskb_pull_tail() to pull headers to head. This significantly | ||||||
|  | decreases the overall performance for no visible reasons. | ||||||
|  |  | ||||||
|  | The only two users of method [4] is: | ||||||
|  | * drivers/staging/qlge | ||||||
|  | * drivers/net/wireless/iwlwifi (all three variants: dvm, mvm, mvm-mq) | ||||||
|  |  | ||||||
|  | Note that in case with wireless drivers we can't use [1] | ||||||
|  | (napi_gro_frags()) at least for now and mac80211 stack always | ||||||
|  | performs pushes and pulls anyways, so performance hit is inavoidable. | ||||||
|  |  | ||||||
|  | At the moment of v2.6.31 the mentioned change was necessary (that's | ||||||
|  | why I don't add the "Fixes:" tag), but it became obsolete since | ||||||
|  | skb_gro_mac_header() has gone in commit a50e233c50db ("net-gro: | ||||||
|  | restore frag0 optimization"), so we can simply revert the condition | ||||||
|  | in gro_reset_offset() to allow skbs from [4] go through the 'fast' | ||||||
|  | path just like in case [1]. | ||||||
|  |  | ||||||
|  | This was tested on a 600 MHz MIPS CPU and a custom driver and this | ||||||
|  | patch gave boosts up to 40 Mbps to method [4] in both directions | ||||||
|  | comparing to net-next, which made overall performance relatively | ||||||
|  | close to [1] (without it, [4] is the slowest). | ||||||
|  |  | ||||||
|  | v2: | ||||||
|  | - Add more references and explanations to commit message | ||||||
|  | - Fix some typos ibid | ||||||
|  | - No functional changes | ||||||
|  |  | ||||||
|  | Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> | ||||||
|  | Signed-off-by: David S. Miller <davem@davemloft.net> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/net/core/dev.c | ||||||
|  | +++ b/net/core/dev.c | ||||||
|  | @@ -5403,8 +5403,7 @@ static void skb_gro_reset_offset(struct | ||||||
|  |  	NAPI_GRO_CB(skb)->frag0 = NULL; | ||||||
|  |  	NAPI_GRO_CB(skb)->frag0_len = 0; | ||||||
|  |   | ||||||
|  | -	if (skb_mac_header(skb) == skb_tail_pointer(skb) && | ||||||
|  | -	    pinfo->nr_frags && | ||||||
|  | +	if (!skb_headlen(skb) && pinfo->nr_frags && | ||||||
|  |  	    !PageHighMem(skb_frag_page(frag0))) { | ||||||
|  |  		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); | ||||||
|  |  		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, | ||||||
| @@ -0,0 +1,51 @@ | |||||||
|  | From: Alexander Lobakin <alobakin@dlink.ru> | ||||||
|  | Date: Mon, 14 Oct 2019 11:00:33 +0300 | ||||||
|  | Subject: [PATCH] net: core: use listified Rx for GRO_NORMAL in | ||||||
|  |  napi_gro_receive() | ||||||
|  |  | ||||||
|  | Commit 323ebb61e32b4 ("net: use listified RX for handling GRO_NORMAL | ||||||
|  | skbs") made use of listified skb processing for the users of | ||||||
|  | napi_gro_frags(). | ||||||
|  | The same technique can be used in a way more common napi_gro_receive() | ||||||
|  | to speed up non-merged (GRO_NORMAL) skbs for a wide range of drivers | ||||||
|  | including gro_cells and mac80211 users. | ||||||
|  | This slightly changes the return value in cases where skb is being | ||||||
|  | dropped by the core stack, but it seems to have no impact on related | ||||||
|  | drivers' functionality. | ||||||
|  | gro_normal_batch is left untouched as it's very individual for every | ||||||
|  | single system configuration and might be tuned in manual order to | ||||||
|  | achieve an optimal performance. | ||||||
|  |  | ||||||
|  | Signed-off-by: Alexander Lobakin <alobakin@dlink.ru> | ||||||
|  | Acked-by: Edward Cree <ecree@solarflare.com> | ||||||
|  | Signed-off-by: David S. Miller <davem@davemloft.net> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/net/core/dev.c | ||||||
|  | +++ b/net/core/dev.c | ||||||
|  | @@ -5601,12 +5601,13 @@ static void napi_skb_free_stolen_head(st | ||||||
|  |  	kmem_cache_free(skbuff_head_cache, skb); | ||||||
|  |  } | ||||||
|  |   | ||||||
|  | -static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) | ||||||
|  | +static gro_result_t napi_skb_finish(struct napi_struct *napi, | ||||||
|  | +				    struct sk_buff *skb, | ||||||
|  | +				    gro_result_t ret) | ||||||
|  |  { | ||||||
|  |  	switch (ret) { | ||||||
|  |  	case GRO_NORMAL: | ||||||
|  | -		if (netif_receive_skb_internal(skb)) | ||||||
|  | -			ret = GRO_DROP; | ||||||
|  | +		gro_normal_one(napi, skb); | ||||||
|  |  		break; | ||||||
|  |   | ||||||
|  |  	case GRO_DROP: | ||||||
|  | @@ -5638,7 +5639,7 @@ gro_result_t napi_gro_receive(struct nap | ||||||
|  |   | ||||||
|  |  	skb_gro_reset_offset(skb); | ||||||
|  |   | ||||||
|  | -	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb); | ||||||
|  | +	ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); | ||||||
|  |  	trace_napi_gro_receive_exit(ret); | ||||||
|  |   | ||||||
|  |  	return ret; | ||||||
| @@ -32,7 +32,7 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name> | |||||||
|  	__u16			tc_index;	/* traffic control index */ |  	__u16			tc_index;	/* traffic control index */ | ||||||
| --- a/net/core/dev.c | --- a/net/core/dev.c | ||||||
| +++ b/net/core/dev.c | +++ b/net/core/dev.c | ||||||
| @@ -5469,6 +5469,9 @@ static enum gro_result dev_gro_receive(s | @@ -5468,6 +5468,9 @@ static enum gro_result dev_gro_receive(s | ||||||
|  	int same_flow; |  	int same_flow; | ||||||
|  	int grow; |  	int grow; | ||||||
|   |   | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Felix Fietkau
					Felix Fietkau