All patches automatically rebased.
Build system: x86_64
Build-tested: ramips/tplink_archer-a6-v3, filogic/xiaomi_redmi-router-ax6000-ubootmod
Run-tested: ramips/tplink_archer-a6-v3, filogic/xiaomi_redmi-router-ax6000-ubootmod
Signed-off-by: John Audia <therealgraysky@proton.me>
(cherry picked from commit 1f5fce27c1)
		
	
		
			
				
	
	
		
			233 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			233 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From: Felix Fietkau <nbd@nbd.name>
 | 
						|
Date: Thu, 16 Feb 2023 18:39:04 +0100
 | 
						|
Subject: [PATCH] net/core: add optional threading for backlog processing
 | 
						|
 | 
						|
When dealing with few flows or an imbalance on CPU utilization, static RPS
 | 
						|
CPU assignment can be too inflexible. Add support for enabling threaded NAPI
 | 
						|
for backlog processing in order to allow the scheduler to better balance
 | 
						|
processing. This helps better spread the load across idle CPUs.
 | 
						|
 | 
						|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
 | 
						|
---
 | 
						|
 | 
						|
--- a/include/linux/netdevice.h
 | 
						|
+++ b/include/linux/netdevice.h
 | 
						|
@@ -502,6 +502,7 @@ static inline bool napi_complete(struct
 | 
						|
 }
 | 
						|
 
 | 
						|
 int dev_set_threaded(struct net_device *dev, bool threaded);
 | 
						|
+int backlog_set_threaded(bool threaded);
 | 
						|
 
 | 
						|
 /**
 | 
						|
  *	napi_disable - prevent NAPI from scheduling
 | 
						|
@@ -3364,6 +3365,7 @@ struct softnet_data {
 | 
						|
 	unsigned int		processed;
 | 
						|
 	unsigned int		time_squeeze;
 | 
						|
 	unsigned int		received_rps;
 | 
						|
+	unsigned int		process_queue_empty;
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
 	struct softnet_data	*rps_ipi_list;
 | 
						|
 #endif
 | 
						|
--- a/net/core/dev.c
 | 
						|
+++ b/net/core/dev.c
 | 
						|
@@ -4578,7 +4578,7 @@ static int rps_ipi_queued(struct softnet
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
 	struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
 | 
						|
 
 | 
						|
-	if (sd != mysd) {
 | 
						|
+	if (sd != mysd && !test_bit(NAPI_STATE_THREADED, &sd->backlog.state)) {
 | 
						|
 		sd->rps_ipi_next = mysd->rps_ipi_list;
 | 
						|
 		mysd->rps_ipi_list = sd;
 | 
						|
 
 | 
						|
@@ -5759,6 +5759,8 @@ static DEFINE_PER_CPU(struct work_struct
 | 
						|
 /* Network device is going away, flush any packets still pending */
 | 
						|
 static void flush_backlog(struct work_struct *work)
 | 
						|
 {
 | 
						|
+	unsigned int process_queue_empty;
 | 
						|
+	bool threaded, flush_processq;
 | 
						|
 	struct sk_buff *skb, *tmp;
 | 
						|
 	struct softnet_data *sd;
 | 
						|
 
 | 
						|
@@ -5774,9 +5776,18 @@ static void flush_backlog(struct work_st
 | 
						|
 			input_queue_head_incr(sd);
 | 
						|
 		}
 | 
						|
 	}
 | 
						|
+
 | 
						|
+	threaded = test_bit(NAPI_STATE_THREADED, &sd->backlog.state);
 | 
						|
+	flush_processq = threaded &&
 | 
						|
+			 !skb_queue_empty_lockless(&sd->process_queue);
 | 
						|
+	if (flush_processq)
 | 
						|
+		process_queue_empty = sd->process_queue_empty;
 | 
						|
 	rps_unlock(sd);
 | 
						|
 	local_irq_enable();
 | 
						|
 
 | 
						|
+	if (threaded)
 | 
						|
+		goto out;
 | 
						|
+
 | 
						|
 	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
 | 
						|
 		if (skb->dev->reg_state == NETREG_UNREGISTERING) {
 | 
						|
 			__skb_unlink(skb, &sd->process_queue);
 | 
						|
@@ -5784,7 +5795,18 @@ static void flush_backlog(struct work_st
 | 
						|
 			input_queue_head_incr(sd);
 | 
						|
 		}
 | 
						|
 	}
 | 
						|
+
 | 
						|
+out:
 | 
						|
 	local_bh_enable();
 | 
						|
+
 | 
						|
+	while (flush_processq) {
 | 
						|
+		msleep(1);
 | 
						|
+		local_irq_disable();
 | 
						|
+		rps_lock(sd);
 | 
						|
+		flush_processq = process_queue_empty == sd->process_queue_empty;
 | 
						|
+		rps_unlock(sd);
 | 
						|
+		local_irq_enable();
 | 
						|
+	}
 | 
						|
 }
 | 
						|
 
 | 
						|
 static bool flush_required(int cpu)
 | 
						|
@@ -6467,6 +6489,7 @@ static int process_backlog(struct napi_s
 | 
						|
 
 | 
						|
 		local_irq_disable();
 | 
						|
 		rps_lock(sd);
 | 
						|
+		sd->process_queue_empty++;
 | 
						|
 		if (skb_queue_empty(&sd->input_pkt_queue)) {
 | 
						|
 			/*
 | 
						|
 			 * Inline a custom version of __napi_complete().
 | 
						|
@@ -6476,7 +6499,8 @@ static int process_backlog(struct napi_s
 | 
						|
 			 * We can use a plain write instead of clear_bit(),
 | 
						|
 			 * and we dont need an smp_mb() memory barrier.
 | 
						|
 			 */
 | 
						|
-			napi->state = 0;
 | 
						|
+			napi->state &= ~(NAPIF_STATE_SCHED |
 | 
						|
+					 NAPIF_STATE_SCHED_THREADED);
 | 
						|
 			again = false;
 | 
						|
 		} else {
 | 
						|
 			skb_queue_splice_tail_init(&sd->input_pkt_queue,
 | 
						|
@@ -6893,6 +6917,57 @@ int dev_set_threaded(struct net_device *
 | 
						|
 }
 | 
						|
 EXPORT_SYMBOL(dev_set_threaded);
 | 
						|
 
 | 
						|
+int backlog_set_threaded(bool threaded)
 | 
						|
+{
 | 
						|
+	static bool backlog_threaded;
 | 
						|
+	int err = 0;
 | 
						|
+	int i;
 | 
						|
+
 | 
						|
+	if (backlog_threaded == threaded)
 | 
						|
+		return 0;
 | 
						|
+
 | 
						|
+	for_each_possible_cpu(i) {
 | 
						|
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 | 
						|
+		struct napi_struct *n = &sd->backlog;
 | 
						|
+
 | 
						|
+		if (n->thread)
 | 
						|
+			continue;
 | 
						|
+		n->thread = kthread_run(napi_threaded_poll, n, "napi/backlog-%d", i);
 | 
						|
+		if (IS_ERR(n->thread)) {
 | 
						|
+			err = PTR_ERR(n->thread);
 | 
						|
+			pr_err("kthread_run failed with err %d\n", err);
 | 
						|
+			n->thread = NULL;
 | 
						|
+			threaded = false;
 | 
						|
+			break;
 | 
						|
+		}
 | 
						|
+
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	backlog_threaded = threaded;
 | 
						|
+
 | 
						|
+	/* Make sure kthread is created before THREADED bit
 | 
						|
+	 * is set.
 | 
						|
+	 */
 | 
						|
+	smp_mb__before_atomic();
 | 
						|
+
 | 
						|
+	for_each_possible_cpu(i) {
 | 
						|
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 | 
						|
+		struct napi_struct *n = &sd->backlog;
 | 
						|
+		unsigned long flags;
 | 
						|
+
 | 
						|
+		local_irq_save(flags);
 | 
						|
+		rps_lock(sd);
 | 
						|
+		if (threaded)
 | 
						|
+			n->state |= NAPIF_STATE_THREADED;
 | 
						|
+		else
 | 
						|
+			n->state &= ~NAPIF_STATE_THREADED;
 | 
						|
+		rps_unlock(sd);
 | 
						|
+		local_irq_restore(flags);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return err;
 | 
						|
+}
 | 
						|
+
 | 
						|
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 | 
						|
 		    int (*poll)(struct napi_struct *, int), int weight)
 | 
						|
 {
 | 
						|
@@ -11369,6 +11444,9 @@ static int dev_cpu_dead(unsigned int old
 | 
						|
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 | 
						|
 	local_irq_enable();
 | 
						|
 
 | 
						|
+	if (test_bit(NAPI_STATE_THREADED, &oldsd->backlog.state))
 | 
						|
+		return 0;
 | 
						|
+
 | 
						|
 #ifdef CONFIG_RPS
 | 
						|
 	remsd = oldsd->rps_ipi_list;
 | 
						|
 	oldsd->rps_ipi_list = NULL;
 | 
						|
@@ -11708,6 +11786,7 @@ static int __init net_dev_init(void)
 | 
						|
 		sd->cpu = i;
 | 
						|
 #endif
 | 
						|
 
 | 
						|
+		INIT_LIST_HEAD(&sd->backlog.poll_list);
 | 
						|
 		init_gro_hash(&sd->backlog);
 | 
						|
 		sd->backlog.poll = process_backlog;
 | 
						|
 		sd->backlog.weight = weight_p;
 | 
						|
--- a/net/core/sysctl_net_core.c
 | 
						|
+++ b/net/core/sysctl_net_core.c
 | 
						|
@@ -28,6 +28,7 @@ static int int_3600 = 3600;
 | 
						|
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 | 
						|
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 | 
						|
 static int max_skb_frags = MAX_SKB_FRAGS;
 | 
						|
+static int backlog_threaded;
 | 
						|
 static long long_one __maybe_unused = 1;
 | 
						|
 static long long_max __maybe_unused = LONG_MAX;
 | 
						|
 
 | 
						|
@@ -114,6 +115,23 @@ static int rps_sock_flow_sysctl(struct c
 | 
						|
 }
 | 
						|
 #endif /* CONFIG_RPS */
 | 
						|
 
 | 
						|
+static int backlog_threaded_sysctl(struct ctl_table *table, int write,
 | 
						|
+			       void *buffer, size_t *lenp, loff_t *ppos)
 | 
						|
+{
 | 
						|
+	static DEFINE_MUTEX(backlog_threaded_mutex);
 | 
						|
+	int ret;
 | 
						|
+
 | 
						|
+	mutex_lock(&backlog_threaded_mutex);
 | 
						|
+
 | 
						|
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 | 
						|
+	if (write && !ret)
 | 
						|
+		ret = backlog_set_threaded(backlog_threaded);
 | 
						|
+
 | 
						|
+	mutex_unlock(&backlog_threaded_mutex);
 | 
						|
+
 | 
						|
+	return ret;
 | 
						|
+}
 | 
						|
+
 | 
						|
 #ifdef CONFIG_NET_FLOW_LIMIT
 | 
						|
 static DEFINE_MUTEX(flow_limit_update_mutex);
 | 
						|
 
 | 
						|
@@ -470,6 +488,15 @@ static struct ctl_table net_core_table[]
 | 
						|
 		.proc_handler	= rps_sock_flow_sysctl
 | 
						|
 	},
 | 
						|
 #endif
 | 
						|
+	{
 | 
						|
+		.procname	= "backlog_threaded",
 | 
						|
+		.data		= &backlog_threaded,
 | 
						|
+		.maxlen		= sizeof(unsigned int),
 | 
						|
+		.mode		= 0644,
 | 
						|
+		.proc_handler	= backlog_threaded_sysctl,
 | 
						|
+		.extra1		= SYSCTL_ZERO,
 | 
						|
+		.extra2		= SYSCTL_ONE
 | 
						|
+	},
 | 
						|
 #ifdef CONFIG_NET_FLOW_LIMIT
 | 
						|
 	{
 | 
						|
 		.procname	= "flow_limit_cpu_bitmap",
 |