mac80211: merge performance improvement patches
Fix fq_codel performance issues Add a new rx function for batch processing Signed-off-by: Felix Fietkau <nbd@nbd.name>
This commit is contained in:
		| @@ -0,0 +1,186 @@ | |||||||
|  | From: Felix Fietkau <nbd@nbd.name> | ||||||
|  | Date: Sat, 25 Jul 2020 20:53:23 +0200 | ||||||
|  | Subject: [PATCH] mac80211: add a function for running rx without passing skbs | ||||||
|  |  to the stack | ||||||
|  |  | ||||||
|  | This can be used to run mac80211 rx processing on a batch of frames in NAPI | ||||||
|  | poll before passing them to the network stack in a large batch. | ||||||
|  | This can improve icache footprint, or it can be used to pass frames via | ||||||
|  | netif_receive_skb_list. | ||||||
|  |  | ||||||
|  | Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/include/net/mac80211.h | ||||||
|  | +++ b/include/net/mac80211.h | ||||||
|  | @@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_ | ||||||
|  |  void ieee80211_restart_hw(struct ieee80211_hw *hw); | ||||||
|  |   | ||||||
|  |  /** | ||||||
|  | + * ieee80211_rx_list - receive frame and store processed skbs in a list | ||||||
|  | + * | ||||||
|  | + * Use this function to hand received frames to mac80211. The receive | ||||||
|  | + * buffer in @skb must start with an IEEE 802.11 header. In case of a | ||||||
|  | + * paged @skb is used, the driver is recommended to put the ieee80211 | ||||||
|  | + * header of the frame on the linear part of the @skb to avoid memory | ||||||
|  | + * allocation and/or memcpy by the stack. | ||||||
|  | + * | ||||||
|  | + * This function may not be called in IRQ context. Calls to this function | ||||||
|  | + * for a single hardware must be synchronized against each other. Calls to | ||||||
|  | + * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be | ||||||
|  | + * mixed for a single hardware. Must not run concurrently with | ||||||
|  | + * ieee80211_tx_status() or ieee80211_tx_status_ni(). | ||||||
|  | + * | ||||||
|  | + * This function must be called with BHs disabled and RCU read lock | ||||||
|  | + * | ||||||
|  | + * @hw: the hardware this frame came in on | ||||||
|  | + * @sta: the station the frame was received from, or %NULL | ||||||
|  | + * @skb: the buffer to receive, owned by mac80211 after this call | ||||||
|  | + * @list: the destination list | ||||||
|  | + */ | ||||||
|  | +void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta, | ||||||
|  | +		       struct sk_buff *skb, struct list_head *list); | ||||||
|  | + | ||||||
|  | +/** | ||||||
|  |   * ieee80211_rx_napi - receive frame from NAPI context | ||||||
|  |   * | ||||||
|  |   * Use this function to hand received frames to mac80211. The receive | ||||||
|  | --- a/net/mac80211/ieee80211_i.h | ||||||
|  | +++ b/net/mac80211/ieee80211_i.h | ||||||
|  | @@ -218,7 +218,7 @@ enum ieee80211_rx_flags { | ||||||
|  |  }; | ||||||
|  |   | ||||||
|  |  struct ieee80211_rx_data { | ||||||
|  | -	struct napi_struct *napi; | ||||||
|  | +	struct list_head *list; | ||||||
|  |  	struct sk_buff *skb; | ||||||
|  |  	struct ieee80211_local *local; | ||||||
|  |  	struct ieee80211_sub_if_data *sdata; | ||||||
|  | --- a/net/mac80211/rx.c | ||||||
|  | +++ b/net/mac80211/rx.c | ||||||
|  | @@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc | ||||||
|  |  		memset(skb->cb, 0, sizeof(skb->cb)); | ||||||
|  |   | ||||||
|  |  		/* deliver to local stack */ | ||||||
|  | -		if (rx->napi) | ||||||
|  | -			napi_gro_receive(rx->napi, skb); | ||||||
|  | +		if (rx->list) | ||||||
|  | +			list_add_tail(&skb->list, rx->list); | ||||||
|  |  		else | ||||||
|  |  			netif_receive_skb(skb); | ||||||
|  |  	} | ||||||
|  | @@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s | ||||||
|  |  		/* This is OK -- must be QoS data frame */ | ||||||
|  |  		.security_idx = tid, | ||||||
|  |  		.seqno_idx = tid, | ||||||
|  | -		.napi = NULL, /* must be NULL to not have races */ | ||||||
|  |  	}; | ||||||
|  |  	struct tid_ampdu_rx *tid_agg_rx; | ||||||
|  |   | ||||||
|  | @@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str | ||||||
|  |  	/* deliver to local stack */ | ||||||
|  |  	skb->protocol = eth_type_trans(skb, fast_rx->dev); | ||||||
|  |  	memset(skb->cb, 0, sizeof(skb->cb)); | ||||||
|  | -	if (rx->napi) | ||||||
|  | -		napi_gro_receive(rx->napi, skb); | ||||||
|  | +	if (rx->list) | ||||||
|  | +		list_add_tail(&skb->list, rx->list); | ||||||
|  |  	else | ||||||
|  |  		netif_receive_skb(skb); | ||||||
|  |   | ||||||
|  | @@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han | ||||||
|  |  static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, | ||||||
|  |  					 struct ieee80211_sta *pubsta, | ||||||
|  |  					 struct sk_buff *skb, | ||||||
|  | -					 struct napi_struct *napi) | ||||||
|  | +					 struct list_head *list) | ||||||
|  |  { | ||||||
|  |  	struct ieee80211_local *local = hw_to_local(hw); | ||||||
|  |  	struct ieee80211_sub_if_data *sdata; | ||||||
|  | @@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet | ||||||
|  |  	memset(&rx, 0, sizeof(rx)); | ||||||
|  |  	rx.skb = skb; | ||||||
|  |  	rx.local = local; | ||||||
|  | -	rx.napi = napi; | ||||||
|  | +	rx.list = list; | ||||||
|  |   | ||||||
|  |  	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc)) | ||||||
|  |  		I802_DEBUG_INC(local->dot11ReceivedFragmentCount); | ||||||
|  | @@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet | ||||||
|  |   * This is the receive path handler. It is called by a low level driver when an | ||||||
|  |   * 802.11 MPDU is received from the hardware. | ||||||
|  |   */ | ||||||
|  | -void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, | ||||||
|  | -		       struct sk_buff *skb, struct napi_struct *napi) | ||||||
|  | +void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, | ||||||
|  | +		       struct sk_buff *skb, struct list_head *list) | ||||||
|  |  { | ||||||
|  |  	struct ieee80211_local *local = hw_to_local(hw); | ||||||
|  |  	struct ieee80211_rate *rate = NULL; | ||||||
|  | @@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_ | ||||||
|  |  	status->rx_flags = 0; | ||||||
|  |   | ||||||
|  |  	/* | ||||||
|  | -	 * key references and virtual interfaces are protected using RCU | ||||||
|  | -	 * and this requires that we are in a read-side RCU section during | ||||||
|  | -	 * receive processing | ||||||
|  | -	 */ | ||||||
|  | -	rcu_read_lock(); | ||||||
|  | - | ||||||
|  | -	/* | ||||||
|  |  	 * Frames with failed FCS/PLCP checksum are not returned, | ||||||
|  |  	 * all other frames are returned without radiotap header | ||||||
|  |  	 * if it was previously present. | ||||||
|  |  	 * Also, frames with less than 16 bytes are dropped. | ||||||
|  |  	 */ | ||||||
|  |  	skb = ieee80211_rx_monitor(local, skb, rate); | ||||||
|  | -	if (!skb) { | ||||||
|  | -		rcu_read_unlock(); | ||||||
|  | +	if (!skb) | ||||||
|  |  		return; | ||||||
|  | -	} | ||||||
|  |   | ||||||
|  |  	ieee80211_tpt_led_trig_rx(local, | ||||||
|  |  			((struct ieee80211_hdr *)skb->data)->frame_control, | ||||||
|  |  			skb->len); | ||||||
|  |   | ||||||
|  | -	__ieee80211_rx_handle_packet(hw, pubsta, skb, napi); | ||||||
|  | - | ||||||
|  | -	rcu_read_unlock(); | ||||||
|  | +	__ieee80211_rx_handle_packet(hw, pubsta, skb, list); | ||||||
|  |   | ||||||
|  |  	return; | ||||||
|  |   drop: | ||||||
|  |  	kfree_skb(skb); | ||||||
|  |  } | ||||||
|  | +EXPORT_SYMBOL(ieee80211_rx_list); | ||||||
|  | + | ||||||
|  | +void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, | ||||||
|  | +		       struct sk_buff *skb, struct napi_struct *napi) | ||||||
|  | +{ | ||||||
|  | +	struct sk_buff *tmp; | ||||||
|  | +	LIST_HEAD(list); | ||||||
|  | + | ||||||
|  | + | ||||||
|  | +	/* | ||||||
|  | +	 * key references and virtual interfaces are protected using RCU | ||||||
|  | +	 * and this requires that we are in a read-side RCU section during | ||||||
|  | +	 * receive processing | ||||||
|  | +	 */ | ||||||
|  | +	rcu_read_lock(); | ||||||
|  | +	ieee80211_rx_list(hw, pubsta, skb, &list); | ||||||
|  | +	rcu_read_unlock(); | ||||||
|  | + | ||||||
|  | +	if (!napi) { | ||||||
|  | +		netif_receive_skb_list(&list); | ||||||
|  | +		return; | ||||||
|  | +	} | ||||||
|  | + | ||||||
|  | +	list_for_each_entry_safe(skb, tmp, &list, list) { | ||||||
|  | +		skb_list_del_init(skb); | ||||||
|  | +		napi_gro_receive(napi, skb); | ||||||
|  | +	} | ||||||
|  | +} | ||||||
|  |  EXPORT_SYMBOL(ieee80211_rx_napi); | ||||||
|  |   | ||||||
|  |  /* This is a version of the rx handler that can be called from hard irq | ||||||
| @@ -0,0 +1,55 @@ | |||||||
|  | From: Felix Fietkau <nbd@nbd.name> | ||||||
|  | Date: Sun, 26 Jul 2020 14:37:02 +0200 | ||||||
|  | Subject: [PATCH] net/fq_impl: use skb_get_hash instead of | ||||||
|  |  skb_get_hash_perturb | ||||||
|  |  | ||||||
|  | This avoids unnecessary regenerating of the skb flow hash | ||||||
|  |  | ||||||
|  | Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/include/net/fq.h | ||||||
|  | +++ b/include/net/fq.h | ||||||
|  | @@ -69,15 +69,6 @@ struct fq { | ||||||
|  |  	struct list_head backlogs; | ||||||
|  |  	spinlock_t lock; | ||||||
|  |  	u32 flows_cnt; | ||||||
|  | -#if LINUX_VERSION_IS_GEQ(5,3,10) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0) | ||||||
|  | -	siphash_key_t	perturbation; | ||||||
|  | -#else | ||||||
|  | -	u32 perturbation; | ||||||
|  | -#endif | ||||||
|  |  	u32 limit; | ||||||
|  |  	u32 memory_limit; | ||||||
|  |  	u32 memory_usage; | ||||||
|  | --- a/include/net/fq_impl.h | ||||||
|  | +++ b/include/net/fq_impl.h | ||||||
|  | @@ -108,15 +108,7 @@ begin: | ||||||
|  |   | ||||||
|  |  static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb) | ||||||
|  |  { | ||||||
|  | -#if LINUX_VERSION_IS_GEQ(5,3,10) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \ | ||||||
|  | -    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0) | ||||||
|  | -	u32 hash = skb_get_hash_perturb(skb, &fq->perturbation); | ||||||
|  | -#else | ||||||
|  | -	u32 hash = skb_get_hash_perturb(skb, fq->perturbation); | ||||||
|  | -#endif | ||||||
|  | +	u32 hash = skb_get_hash(skb); | ||||||
|  |   | ||||||
|  |  	return reciprocal_scale(hash, fq->flows_cnt); | ||||||
|  |  } | ||||||
|  | @@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl | ||||||
|  |  	INIT_LIST_HEAD(&fq->backlogs); | ||||||
|  |  	spin_lock_init(&fq->lock); | ||||||
|  |  	fq->flows_cnt = max_t(u32, flows_cnt, 1); | ||||||
|  | -	get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); | ||||||
|  |  	fq->quantum = 300; | ||||||
|  |  	fq->limit = 8192; | ||||||
|  |  	fq->memory_limit = 16 << 20; /* 16 MBytes */ | ||||||
| @@ -0,0 +1,19 @@ | |||||||
|  | From: Felix Fietkau <nbd@nbd.name> | ||||||
|  | Date: Sun, 26 Jul 2020 14:42:58 +0200 | ||||||
|  | Subject: [PATCH] mac80211: calculcate skb hash early when using itxq | ||||||
|  |  | ||||||
|  | This avoids flow separation issues when using software encryption | ||||||
|  |  | ||||||
|  | Signed-off-by: Felix Fietkau <nbd@nbd.name> | ||||||
|  | --- | ||||||
|  |  | ||||||
|  | --- a/net/mac80211/tx.c | ||||||
|  | +++ b/net/mac80211/tx.c | ||||||
|  | @@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct | ||||||
|  |  	if (local->ops->wake_tx_queue) { | ||||||
|  |  		u16 queue = __ieee80211_select_queue(sdata, sta, skb); | ||||||
|  |  		skb_set_queue_mapping(skb, queue); | ||||||
|  | +		skb_get_hash(skb); | ||||||
|  |  	} | ||||||
|  |   | ||||||
|  |  	if (sta) { | ||||||
		Reference in New Issue
	
	Block a user
	 Felix Fietkau
					Felix Fietkau