 75aeb7ed62
			
		
	
	75aeb7ed62
	
	
	
		
			
			Changelog: https://cdn.kernel.org/pub/linux/kernel/v5.x/ChangeLog-5.15.137 All patches automatically rebased. Build system: x86_64 Build-tested: ramips/tplink_archer-a6-v3 Run-tested: ramips/tplink_archer-a6-v3 Signed-off-by: John Audia <therealgraysky@proton.me>
		
			
				
	
	
		
			233 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			233 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| From: Felix Fietkau <nbd@nbd.name>
 | |
| Date: Thu, 16 Feb 2023 18:39:04 +0100
 | |
| Subject: [PATCH] net/core: add optional threading for backlog processing
 | |
| 
 | |
| When dealing with few flows or an imbalance on CPU utilization, static RPS
 | |
| CPU assignment can be too inflexible. Add support for enabling threaded NAPI
 | |
| for backlog processing in order to allow the scheduler to better balance
 | |
| processing. This helps better spread the load across idle CPUs.
 | |
| 
 | |
| Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | |
| ---
 | |
| 
 | |
| --- a/include/linux/netdevice.h
 | |
| +++ b/include/linux/netdevice.h
 | |
| @@ -502,6 +502,7 @@ static inline bool napi_complete(struct
 | |
|  }
 | |
|  
 | |
|  int dev_set_threaded(struct net_device *dev, bool threaded);
 | |
| +int backlog_set_threaded(bool threaded);
 | |
|  
 | |
|  /**
 | |
|   *	napi_disable - prevent NAPI from scheduling
 | |
| @@ -3363,6 +3364,7 @@ struct softnet_data {
 | |
|  	unsigned int		processed;
 | |
|  	unsigned int		time_squeeze;
 | |
|  	unsigned int		received_rps;
 | |
| +	unsigned int		process_queue_empty;
 | |
|  #ifdef CONFIG_RPS
 | |
|  	struct softnet_data	*rps_ipi_list;
 | |
|  #endif
 | |
| --- a/net/core/dev.c
 | |
| +++ b/net/core/dev.c
 | |
| @@ -4583,7 +4583,7 @@ static int rps_ipi_queued(struct softnet
 | |
|  #ifdef CONFIG_RPS
 | |
|  	struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
 | |
|  
 | |
| -	if (sd != mysd) {
 | |
| +	if (sd != mysd && !test_bit(NAPI_STATE_THREADED, &sd->backlog.state)) {
 | |
|  		sd->rps_ipi_next = mysd->rps_ipi_list;
 | |
|  		mysd->rps_ipi_list = sd;
 | |
|  
 | |
| @@ -5764,6 +5764,8 @@ static DEFINE_PER_CPU(struct work_struct
 | |
|  /* Network device is going away, flush any packets still pending */
 | |
|  static void flush_backlog(struct work_struct *work)
 | |
|  {
 | |
| +	unsigned int process_queue_empty;
 | |
| +	bool threaded, flush_processq;
 | |
|  	struct sk_buff *skb, *tmp;
 | |
|  	struct softnet_data *sd;
 | |
|  
 | |
| @@ -5779,9 +5781,18 @@ static void flush_backlog(struct work_st
 | |
|  			input_queue_head_incr(sd);
 | |
|  		}
 | |
|  	}
 | |
| +
 | |
| +	threaded = test_bit(NAPI_STATE_THREADED, &sd->backlog.state);
 | |
| +	flush_processq = threaded &&
 | |
| +			 !skb_queue_empty_lockless(&sd->process_queue);
 | |
| +	if (flush_processq)
 | |
| +		process_queue_empty = sd->process_queue_empty;
 | |
|  	rps_unlock(sd);
 | |
|  	local_irq_enable();
 | |
|  
 | |
| +	if (threaded)
 | |
| +		goto out;
 | |
| +
 | |
|  	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 | |
|  		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 | |
|  			__skb_unlink(skb, &sd->process_queue);
 | |
| @@ -5789,7 +5800,18 @@ static void flush_backlog(struct work_st
 | |
|  			input_queue_head_incr(sd);
 | |
|  		}
 | |
|  	}
 | |
| +
 | |
| +out:
 | |
|  	local_bh_enable();
 | |
| +
 | |
| +	while (flush_processq) {
 | |
| +		msleep(1);
 | |
| +		local_irq_disable();
 | |
| +		rps_lock(sd);
 | |
| +		flush_processq = process_queue_empty == sd->process_queue_empty;
 | |
| +		rps_unlock(sd);
 | |
| +		local_irq_enable();
 | |
| +	}
 | |
|  }
 | |
|  
 | |
|  static bool flush_required(int cpu)
 | |
| @@ -6472,6 +6494,7 @@ static int process_backlog(struct napi_s
 | |
|  
 | |
|  		local_irq_disable();
 | |
|  		rps_lock(sd);
 | |
| +		sd->process_queue_empty++;
 | |
|  		if (skb_queue_empty(&sd->input_pkt_queue)) {
 | |
|  			/*
 | |
|  			 * Inline a custom version of __napi_complete().
 | |
| @@ -6481,7 +6504,8 @@ static int process_backlog(struct napi_s
 | |
|  			 * We can use a plain write instead of clear_bit(),
 | |
|  			 * and we dont need an smp_mb() memory barrier.
 | |
|  			 */
 | |
| -			napi->state = 0;
 | |
| +			napi->state &= ~(NAPIF_STATE_SCHED |
 | |
| +					 NAPIF_STATE_SCHED_THREADED);
 | |
|  			again = false;
 | |
|  		} else {
 | |
|  			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 | |
| @@ -6898,6 +6922,57 @@ int dev_set_threaded(struct net_device *
 | |
|  }
 | |
|  EXPORT_SYMBOL(dev_set_threaded);
 | |
|  
 | |
| +int backlog_set_threaded(bool threaded)
 | |
| +{
 | |
| +	static bool backlog_threaded;
 | |
| +	int err = 0;
 | |
| +	int i;
 | |
| +
 | |
| +	if (backlog_threaded == threaded)
 | |
| +		return 0;
 | |
| +
 | |
| +	for_each_possible_cpu(i) {
 | |
| +		struct softnet_data *sd = &per_cpu(softnet_data, i);
 | |
| +		struct napi_struct *n = &sd->backlog;
 | |
| +
 | |
| +		if (n->thread)
 | |
| +			continue;
 | |
| +		n->thread = kthread_run(napi_threaded_poll, n, "napi/backlog-%d", i);
 | |
| +		if (IS_ERR(n->thread)) {
 | |
| +			err = PTR_ERR(n->thread);
 | |
| +			pr_err("kthread_run failed with err %d\n", err);
 | |
| +			n->thread = NULL;
 | |
| +			threaded = false;
 | |
| +			break;
 | |
| +		}
 | |
| +
 | |
| +	}
 | |
| +
 | |
| +	backlog_threaded = threaded;
 | |
| +
 | |
| +	/* Make sure kthread is created before THREADED bit
 | |
| +	 * is set.
 | |
| +	 */
 | |
| +	smp_mb__before_atomic();
 | |
| +
 | |
| +	for_each_possible_cpu(i) {
 | |
| +		struct softnet_data *sd = &per_cpu(softnet_data, i);
 | |
| +		struct napi_struct *n = &sd->backlog;
 | |
| +		unsigned long flags;
 | |
| +
 | |
| +		local_irq_save(flags);
 | |
| +		rps_lock(sd);
 | |
| +		if (threaded)
 | |
| +			n->state |= NAPIF_STATE_THREADED;
 | |
| +		else
 | |
| +			n->state &= ~NAPIF_STATE_THREADED;
 | |
| +		rps_unlock(sd);
 | |
| +		local_irq_restore(flags);
 | |
| +	}
 | |
| +
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
|  void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | |
|  		    int (*poll)(struct napi_struct *, int), int weight)
 | |
|  {
 | |
| @@ -11378,6 +11453,9 @@ static int dev_cpu_dead(unsigned int old
 | |
|  	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 | |
|  	local_irq_enable();
 | |
|  
 | |
| +	if (test_bit(NAPI_STATE_THREADED, &oldsd->backlog.state))
 | |
| +		return 0;
 | |
| +
 | |
|  #ifdef CONFIG_RPS
 | |
|  	remsd = oldsd->rps_ipi_list;
 | |
|  	oldsd->rps_ipi_list = NULL;
 | |
| @@ -11717,6 +11795,7 @@ static int __init net_dev_init(void)
 | |
|  		sd->cpu = i;
 | |
|  #endif
 | |
|  
 | |
| +		INIT_LIST_HEAD(&sd->backlog.poll_list);
 | |
|  		init_gro_hash(&sd->backlog);
 | |
|  		sd->backlog.poll = process_backlog;
 | |
|  		sd->backlog.weight = weight_p;
 | |
| --- a/net/core/sysctl_net_core.c
 | |
| +++ b/net/core/sysctl_net_core.c
 | |
| @@ -28,6 +28,7 @@ static int int_3600 = 3600;
 | |
|  static int min_sndbuf = SOCK_MIN_SNDBUF;
 | |
|  static int min_rcvbuf = SOCK_MIN_RCVBUF;
 | |
|  static int max_skb_frags = MAX_SKB_FRAGS;
 | |
| +static int backlog_threaded;
 | |
|  static long long_one __maybe_unused = 1;
 | |
|  static long long_max __maybe_unused = LONG_MAX;
 | |
|  
 | |
| @@ -114,6 +115,23 @@ static int rps_sock_flow_sysctl(struct c
 | |
|  }
 | |
|  #endif /* CONFIG_RPS */
 | |
|  
 | |
| +static int backlog_threaded_sysctl(struct ctl_table *table, int write,
 | |
| +			       void *buffer, size_t *lenp, loff_t *ppos)
 | |
| +{
 | |
| +	static DEFINE_MUTEX(backlog_threaded_mutex);
 | |
| +	int ret;
 | |
| +
 | |
| +	mutex_lock(&backlog_threaded_mutex);
 | |
| +
 | |
| +	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 | |
| +	if (write && !ret)
 | |
| +		ret = backlog_set_threaded(backlog_threaded);
 | |
| +
 | |
| +	mutex_unlock(&backlog_threaded_mutex);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
|  #ifdef CONFIG_NET_FLOW_LIMIT
 | |
|  static DEFINE_MUTEX(flow_limit_update_mutex);
 | |
|  
 | |
| @@ -470,6 +488,15 @@ static struct ctl_table net_core_table[]
 | |
|  		.proc_handler	= rps_sock_flow_sysctl
 | |
|  	},
 | |
|  #endif
 | |
| +	{
 | |
| +		.procname	= "backlog_threaded",
 | |
| +		.data		= &backlog_threaded,
 | |
| +		.maxlen		= sizeof(unsigned int),
 | |
| +		.mode		= 0644,
 | |
| +		.proc_handler	= backlog_threaded_sysctl,
 | |
| +		.extra1		= SYSCTL_ZERO,
 | |
| +		.extra2		= SYSCTL_ONE
 | |
| +	},
 | |
|  #ifdef CONFIG_NET_FLOW_LIMIT
 | |
|  	{
 | |
|  		.procname	= "flow_limit_cpu_bitmap",
 |