The patches were generated from the RPi repo with the following command: git format-patch v6.6.34..rpi-6.1.y Some patches needed rebasing and, as usual, the applied and reverted, wireless drivers, Github workflows, READMEs and defconfigs patches were removed. Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
		
			
				
	
	
		
			494 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			494 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
From f0b0156b38d07a45e1b5309181efc645e6fe8393 Mon Sep 17 00:00:00 2001
 | 
						|
From: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
 | 
						|
Date: Tue, 7 Feb 2023 13:54:02 +0100
 | 
						|
Subject: [PATCH 0464/1085] drm/v3d: New debugfs end-points to query GPU usage
 | 
						|
 stats.
 | 
						|
 | 
						|
Two new debugfs interfaces are implemented:
 | 
						|
 | 
						|
- gpu_usage: exposes the total runtime since boot of each
 | 
						|
of the 5 scheduling queues available at V3D (BIN, RENDER,
 | 
						|
CSD, TFU, CACHE_CLEAN). So if the interface is queried at
 | 
						|
two different points of time the usage percentage of each
 | 
						|
of the queues can be calculated.
 | 
						|
 | 
						|
- gpu_pid_usage: exposes the same information but to the
 | 
						|
level of detail of each process using the V3D driver. The
 | 
						|
runtime for process using the driver is stored. So the
 | 
						|
percentages of usage by PID can be calculated with
 | 
						|
measures at different timestamps.
 | 
						|
 | 
						|
The storage of gpu_pid_usage stats is only done if
 | 
						|
the debugfs interface is polled during the last 70 seconds.
 | 
						|
If a process does not submit a GPU job during last 70
 | 
						|
seconds its stats will also be purged.
 | 
						|
 | 
						|
Signed-off-by: Jose Maria Casanova Crespo <jmcasanova@igalia.com>
 | 
						|
---
 | 
						|
 drivers/gpu/drm/v3d/v3d_debugfs.c |  79 +++++++++++++++++
 | 
						|
 drivers/gpu/drm/v3d/v3d_drv.h     |  59 +++++++++++++
 | 
						|
 drivers/gpu/drm/v3d/v3d_gem.c     |   1 +
 | 
						|
 drivers/gpu/drm/v3d/v3d_irq.c     |   5 ++
 | 
						|
 drivers/gpu/drm/v3d/v3d_sched.c   | 139 +++++++++++++++++++++++++++++-
 | 
						|
 5 files changed, 282 insertions(+), 1 deletion(-)
 | 
						|
 | 
						|
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
 | 
						|
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
 | 
						|
@@ -6,6 +6,7 @@
 | 
						|
 #include <linux/debugfs.h>
 | 
						|
 #include <linux/seq_file.h>
 | 
						|
 #include <linux/string_helpers.h>
 | 
						|
+#include <linux/sched/clock.h>
 | 
						|
 
 | 
						|
 #include <drm/drm_debugfs.h>
 | 
						|
 
 | 
						|
@@ -202,6 +203,82 @@ static int v3d_debugfs_bo_stats(struct s
 | 
						|
 	return 0;
 | 
						|
 }
 | 
						|
 
 | 
						|
+static int v3d_debugfs_gpu_usage(struct seq_file *m, void *unused)
 | 
						|
+{
 | 
						|
+	struct drm_debugfs_entry *entry = m->private;
 | 
						|
+	struct drm_device *dev = entry->dev;
 | 
						|
+	struct v3d_dev *v3d = to_v3d_dev(dev);
 | 
						|
+	struct v3d_queue_stats *queue_stats;
 | 
						|
+	enum v3d_queue queue;
 | 
						|
+	u64 timestamp = local_clock();
 | 
						|
+	u64 active_runtime;
 | 
						|
+
 | 
						|
+	seq_printf(m, "timestamp;%llu;\n", local_clock());
 | 
						|
+	seq_printf(m, "\"QUEUE\";\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
 | 
						|
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
 | 
						|
+		if (!v3d->queue[queue].sched.ready)
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		queue_stats = &v3d->gpu_queue_stats[queue];
 | 
						|
+		mutex_lock(&queue_stats->lock);
 | 
						|
+		v3d_sched_stats_update(queue_stats);
 | 
						|
+		if (queue_stats->last_pid)
 | 
						|
+			active_runtime = timestamp - queue_stats->last_exec_start;
 | 
						|
+		else
 | 
						|
+			active_runtime = 0;
 | 
						|
+
 | 
						|
+		seq_printf(m, "%s;%d;%llu;%c;\n",
 | 
						|
+			   v3d_queue_to_string(queue),
 | 
						|
+			   queue_stats->jobs_sent,
 | 
						|
+			   queue_stats->runtime + active_runtime,
 | 
						|
+			   queue_stats->last_pid?'1':'0');
 | 
						|
+		mutex_unlock(&queue_stats->lock);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return 0;
 | 
						|
+}
 | 
						|
+
 | 
						|
+static int v3d_debugfs_gpu_pid_usage(struct seq_file *m, void *unused)
 | 
						|
+{
 | 
						|
+	struct drm_debugfs_entry *entry = m->private;
 | 
						|
+	struct drm_device *dev = entry->dev;
 | 
						|
+	struct v3d_dev *v3d = to_v3d_dev(dev);
 | 
						|
+	struct v3d_queue_stats *queue_stats;
 | 
						|
+	struct v3d_queue_pid_stats *cur;
 | 
						|
+	enum v3d_queue queue;
 | 
						|
+	u64 active_runtime;
 | 
						|
+	u64 timestamp = local_clock();
 | 
						|
+
 | 
						|
+	seq_printf(m, "timestamp;%llu;\n", timestamp);
 | 
						|
+	seq_printf(m, "\"QUEUE\";\"PID\",\"JOBS\";\"RUNTIME\";\"ACTIVE\";\n");
 | 
						|
+	for (queue = 0; queue < V3D_MAX_QUEUES; queue++) {
 | 
						|
+
 | 
						|
+		if (!v3d->queue[queue].sched.ready)
 | 
						|
+			continue;
 | 
						|
+
 | 
						|
+		queue_stats = &v3d->gpu_queue_stats[queue];
 | 
						|
+		mutex_lock(&queue_stats->lock);
 | 
						|
+		queue_stats->gpu_pid_stats_timeout = jiffies + V3D_QUEUE_STATS_TIMEOUT;
 | 
						|
+		v3d_sched_stats_update(queue_stats);
 | 
						|
+		list_for_each_entry(cur, &queue_stats->pid_stats_list, list) {
 | 
						|
+
 | 
						|
+			if (cur->pid == queue_stats->last_pid)
 | 
						|
+				active_runtime = timestamp - queue_stats->last_exec_start;
 | 
						|
+			else
 | 
						|
+				active_runtime = 0;
 | 
						|
+
 | 
						|
+			seq_printf(m, "%s;%d;%d;%llu;%c;\n",
 | 
						|
+				   v3d_queue_to_string(queue),
 | 
						|
+				   cur->pid, cur->jobs_sent,
 | 
						|
+				   cur->runtime + active_runtime,
 | 
						|
+				   cur->pid == queue_stats->last_pid ? '1' : '0');
 | 
						|
+		}
 | 
						|
+		mutex_unlock(&queue_stats->lock);
 | 
						|
+	}
 | 
						|
+
 | 
						|
+	return 0;
 | 
						|
+}
 | 
						|
+
 | 
						|
 static int v3d_measure_clock(struct seq_file *m, void *unused)
 | 
						|
 {
 | 
						|
 	struct drm_debugfs_entry *entry = m->private;
 | 
						|
@@ -241,6 +318,8 @@ static const struct drm_debugfs_info v3d
 | 
						|
 	{"v3d_regs", v3d_v3d_debugfs_regs, 0},
 | 
						|
 	{"measure_clock", v3d_measure_clock, 0},
 | 
						|
 	{"bo_stats", v3d_debugfs_bo_stats, 0},
 | 
						|
+	{"gpu_usage", v3d_debugfs_gpu_usage, 0},
 | 
						|
+	{"gpu_pid_usage", v3d_debugfs_gpu_pid_usage, 0},
 | 
						|
 };
 | 
						|
 
 | 
						|
 void
 | 
						|
--- a/drivers/gpu/drm/v3d/v3d_drv.h
 | 
						|
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
 | 
						|
@@ -21,6 +21,19 @@ struct reset_control;
 | 
						|
 
 | 
						|
 #define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 | 
						|
 
 | 
						|
+static inline char *
 | 
						|
+v3d_queue_to_string(enum v3d_queue queue)
 | 
						|
+{
 | 
						|
+	switch (queue) {
 | 
						|
+	case V3D_BIN: return "v3d_bin";
 | 
						|
+	case V3D_RENDER: return "v3d_render";
 | 
						|
+	case V3D_TFU: return "v3d_tfu";
 | 
						|
+	case V3D_CSD: return "v3d_csd";
 | 
						|
+	case V3D_CACHE_CLEAN: return "v3d_cache_clean";
 | 
						|
+	}
 | 
						|
+	return "UNKNOWN";
 | 
						|
+}
 | 
						|
+
 | 
						|
 struct v3d_queue_state {
 | 
						|
 	struct drm_gpu_scheduler sched;
 | 
						|
 
 | 
						|
@@ -28,6 +41,44 @@ struct v3d_queue_state {
 | 
						|
 	u64 emit_seqno;
 | 
						|
 };
 | 
						|
 
 | 
						|
+struct v3d_queue_pid_stats {
 | 
						|
+	struct	list_head list;
 | 
						|
+	u64	runtime;
 | 
						|
+	/* Time in jiffes.to purge the stats of this process. Every time a
 | 
						|
+	 * process sends a new job to the queue, this timeout is delayed by
 | 
						|
+	 * V3D_QUEUE_STATS_TIMEOUT while the gpu_pid_stats_timeout of the
 | 
						|
+	 * queue is not reached.
 | 
						|
+	 */
 | 
						|
+	unsigned long timeout_purge;
 | 
						|
+	u32	jobs_sent;
 | 
						|
+	pid_t	pid;
 | 
						|
+};
 | 
						|
+
 | 
						|
+struct v3d_queue_stats {
 | 
						|
+	struct mutex lock;
 | 
						|
+	u64	last_exec_start;
 | 
						|
+	u64	last_exec_end;
 | 
						|
+	u64	runtime;
 | 
						|
+	u32	jobs_sent;
 | 
						|
+	/* Time in jiffes to stop collecting gpu stats by process. This is
 | 
						|
+	 * increased by every access to*the debugfs interface gpu_pid_usage.
 | 
						|
+	 * If the debugfs is not used stats are not collected.
 | 
						|
+	 */
 | 
						|
+	unsigned long gpu_pid_stats_timeout;
 | 
						|
+	pid_t	last_pid;
 | 
						|
+	struct list_head pid_stats_list;
 | 
						|
+};
 | 
						|
+
 | 
						|
+/* pid_stats by process (v3d_queue_pid_stats) are recorded if there is an
 | 
						|
+ * access to the gpu_pid_usageare debugfs interface for the last
 | 
						|
+ * V3D_QUEUE_STATS_TIMEOUT (70s).
 | 
						|
+ *
 | 
						|
+ * The same timeout is used to purge the stats by process for those process
 | 
						|
+ * that have not sent jobs this period.
 | 
						|
+ */
 | 
						|
+#define V3D_QUEUE_STATS_TIMEOUT (70 * HZ)
 | 
						|
+
 | 
						|
+
 | 
						|
 /* Performance monitor object. The perform lifetime is controlled by userspace
 | 
						|
  * using perfmon related ioctls. A perfmon can be attached to a submit_cl
 | 
						|
  * request, and when this is the case, HW perf counters will be activated just
 | 
						|
@@ -147,6 +198,8 @@ struct v3d_dev {
 | 
						|
 		u32 num_allocated;
 | 
						|
 		u32 pages_allocated;
 | 
						|
 	} bo_stats;
 | 
						|
+
 | 
						|
+	struct v3d_queue_stats gpu_queue_stats[V3D_MAX_QUEUES];
 | 
						|
 };
 | 
						|
 
 | 
						|
 static inline struct v3d_dev *
 | 
						|
@@ -244,6 +297,11 @@ struct v3d_job {
 | 
						|
 	 */
 | 
						|
 	struct v3d_perfmon *perfmon;
 | 
						|
 
 | 
						|
+	/* PID of the process that submitted the job that could be used to
 | 
						|
+	 * for collecting stats by process of gpu usage.
 | 
						|
+	 */
 | 
						|
+	pid_t client_pid;
 | 
						|
+
 | 
						|
 	/* Callback for the freeing of the job on refcount going to 0. */
 | 
						|
 	void (*free)(struct kref *ref);
 | 
						|
 };
 | 
						|
@@ -408,6 +466,7 @@ void v3d_mmu_remove_ptes(struct v3d_bo *
 | 
						|
 /* v3d_sched.c */
 | 
						|
 int v3d_sched_init(struct v3d_dev *v3d);
 | 
						|
 void v3d_sched_fini(struct v3d_dev *v3d);
 | 
						|
+void v3d_sched_stats_update(struct v3d_queue_stats *queue_stats);
 | 
						|
 
 | 
						|
 /* v3d_perfmon.c */
 | 
						|
 void v3d_perfmon_get(struct v3d_perfmon *perfmon);
 | 
						|
--- a/drivers/gpu/drm/v3d/v3d_gem.c
 | 
						|
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
 | 
						|
@@ -460,6 +460,7 @@ v3d_job_init(struct v3d_dev *v3d, struct
 | 
						|
 	job = *container;
 | 
						|
 	job->v3d = v3d;
 | 
						|
 	job->free = free;
 | 
						|
+	job->client_pid = current->pid;
 | 
						|
 
 | 
						|
 	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
 | 
						|
 				 v3d_priv);
 | 
						|
--- a/drivers/gpu/drm/v3d/v3d_irq.c
 | 
						|
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
 | 
						|
@@ -14,6 +14,7 @@
 | 
						|
  */
 | 
						|
 
 | 
						|
 #include <linux/platform_device.h>
 | 
						|
+#include <linux/sched/clock.h>
 | 
						|
 
 | 
						|
 #include "v3d_drv.h"
 | 
						|
 #include "v3d_regs.h"
 | 
						|
@@ -100,6 +101,7 @@ v3d_irq(int irq, void *arg)
 | 
						|
 	if (intsts & V3D_INT_FLDONE) {
 | 
						|
 		struct v3d_fence *fence =
 | 
						|
 			to_v3d_fence(v3d->bin_job->base.irq_fence);
 | 
						|
+		v3d->gpu_queue_stats[V3D_BIN].last_exec_end = local_clock();
 | 
						|
 
 | 
						|
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 | 
						|
 		dma_fence_signal(&fence->base);
 | 
						|
@@ -109,6 +111,7 @@ v3d_irq(int irq, void *arg)
 | 
						|
 	if (intsts & V3D_INT_FRDONE) {
 | 
						|
 		struct v3d_fence *fence =
 | 
						|
 			to_v3d_fence(v3d->render_job->base.irq_fence);
 | 
						|
+		v3d->gpu_queue_stats[V3D_RENDER].last_exec_end = local_clock();
 | 
						|
 
 | 
						|
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 | 
						|
 		dma_fence_signal(&fence->base);
 | 
						|
@@ -118,6 +121,7 @@ v3d_irq(int irq, void *arg)
 | 
						|
 	if (intsts & V3D_INT_CSDDONE) {
 | 
						|
 		struct v3d_fence *fence =
 | 
						|
 			to_v3d_fence(v3d->csd_job->base.irq_fence);
 | 
						|
+		v3d->gpu_queue_stats[V3D_CSD].last_exec_end = local_clock();
 | 
						|
 
 | 
						|
 		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
 | 
						|
 		dma_fence_signal(&fence->base);
 | 
						|
@@ -154,6 +158,7 @@ v3d_hub_irq(int irq, void *arg)
 | 
						|
 	if (intsts & V3D_HUB_INT_TFUC) {
 | 
						|
 		struct v3d_fence *fence =
 | 
						|
 			to_v3d_fence(v3d->tfu_job->base.irq_fence);
 | 
						|
+		v3d->gpu_queue_stats[V3D_TFU].last_exec_end = local_clock();
 | 
						|
 
 | 
						|
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 | 
						|
 		dma_fence_signal(&fence->base);
 | 
						|
--- a/drivers/gpu/drm/v3d/v3d_sched.c
 | 
						|
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
 | 
						|
@@ -19,6 +19,7 @@
 | 
						|
  */
 | 
						|
 
 | 
						|
 #include <linux/kthread.h>
 | 
						|
+#include <linux/sched/clock.h>
 | 
						|
 
 | 
						|
 #include "v3d_drv.h"
 | 
						|
 #include "v3d_regs.h"
 | 
						|
@@ -72,6 +73,114 @@ v3d_switch_perfmon(struct v3d_dev *v3d,
 | 
						|
 		v3d_perfmon_start(v3d, job->perfmon);
 | 
						|
 }
 | 
						|
 
 | 
						|
+/*
 | 
						|
+ * Updates the scheduling stats of the gpu queues runtime for completed jobs.
 | 
						|
+ *
 | 
						|
+ * It should be called before any new job submission to the queue or before
 | 
						|
+ * accessing the stats from the debugfs interface.
 | 
						|
+ *
 | 
						|
+ * It is expected that calls to this function are done with queue_stats->lock
 | 
						|
+ * locked.
 | 
						|
+ */
 | 
						|
+void
 | 
						|
+v3d_sched_stats_update(struct v3d_queue_stats *queue_stats)
 | 
						|
+{
 | 
						|
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
 | 
						|
+	struct v3d_queue_pid_stats *cur, *tmp;
 | 
						|
+	u64 runtime = 0;
 | 
						|
+	bool store_pid_stats =
 | 
						|
+		time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout);
 | 
						|
+
 | 
						|
+	/* If debugfs stats gpu_pid_usage has not been polled for a period,
 | 
						|
+	 * the pid stats collection is stopped and we purge any existing
 | 
						|
+	 * pid_stats.
 | 
						|
+	 *
 | 
						|
+	 * pid_stats are also purged for clients that have reached the
 | 
						|
+	 * timeout_purge because the process probably does not exist anymore.
 | 
						|
+	 */
 | 
						|
+	list_for_each_entry_safe_reverse(cur, tmp, pid_stats_list, list) {
 | 
						|
+		if (!store_pid_stats || time_is_before_jiffies(cur->timeout_purge)) {
 | 
						|
+			list_del(&cur->list);
 | 
						|
+			kfree(cur);
 | 
						|
+		} else {
 | 
						|
+			break;
 | 
						|
+		}
 | 
						|
+	}
 | 
						|
+	/* If a job has finished its stats are updated. */
 | 
						|
+	if (queue_stats->last_pid && queue_stats->last_exec_end) {
 | 
						|
+		runtime = queue_stats->last_exec_end -
 | 
						|
+			  queue_stats->last_exec_start;
 | 
						|
+		queue_stats->runtime += runtime;
 | 
						|
+
 | 
						|
+		if (store_pid_stats) {
 | 
						|
+			struct v3d_queue_pid_stats *pid_stats;
 | 
						|
+			/* Last job info is always at the head of the list */
 | 
						|
+			pid_stats = list_first_entry_or_null(pid_stats_list,
 | 
						|
+				struct v3d_queue_pid_stats, list);
 | 
						|
+			if (pid_stats &&
 | 
						|
+			    pid_stats->pid == queue_stats->last_pid) {
 | 
						|
+				pid_stats->runtime += runtime;
 | 
						|
+			}
 | 
						|
+		}
 | 
						|
+		queue_stats->last_pid = 0;
 | 
						|
+	}
 | 
						|
+}
 | 
						|
+
 | 
						|
+/*
 | 
						|
+ * Updates the queue usage adding the information of a new job that is
 | 
						|
+ * about to be sent to the GPU to be executed.
 | 
						|
+ */
 | 
						|
+int
 | 
						|
+v3d_sched_stats_add_job(struct v3d_queue_stats *queue_stats,
 | 
						|
+			struct drm_sched_job *sched_job)
 | 
						|
+{
 | 
						|
+
 | 
						|
+	struct v3d_queue_pid_stats *pid_stats = NULL;
 | 
						|
+	struct v3d_job *job = sched_job?to_v3d_job(sched_job):NULL;
 | 
						|
+	struct v3d_queue_pid_stats *cur;
 | 
						|
+	struct list_head *pid_stats_list = &queue_stats->pid_stats_list;
 | 
						|
+	int ret = 0;
 | 
						|
+
 | 
						|
+	mutex_lock(&queue_stats->lock);
 | 
						|
+
 | 
						|
+	/* Completion of previous job requires an update of its runtime stats */
 | 
						|
+	v3d_sched_stats_update(queue_stats);
 | 
						|
+
 | 
						|
+	queue_stats->last_exec_start = local_clock();
 | 
						|
+	queue_stats->last_exec_end = 0;
 | 
						|
+	queue_stats->jobs_sent++;
 | 
						|
+	queue_stats->last_pid = job->client_pid;
 | 
						|
+
 | 
						|
+	/* gpu usage stats by process are being collected */
 | 
						|
+	if (time_is_after_jiffies(queue_stats->gpu_pid_stats_timeout)) {
 | 
						|
+		list_for_each_entry(cur, pid_stats_list, list) {
 | 
						|
+			if (cur->pid == job->client_pid) {
 | 
						|
+				pid_stats = cur;
 | 
						|
+				break;
 | 
						|
+			}
 | 
						|
+		}
 | 
						|
+		/* pid_stats of this client is moved to the head of the list. */
 | 
						|
+		if (pid_stats) {
 | 
						|
+			list_move(&pid_stats->list, pid_stats_list);
 | 
						|
+		} else {
 | 
						|
+			pid_stats = kzalloc(sizeof(struct v3d_queue_pid_stats),
 | 
						|
+					    GFP_KERNEL);
 | 
						|
+			if (!pid_stats) {
 | 
						|
+				ret = -ENOMEM;
 | 
						|
+				goto err_mem;
 | 
						|
+			}
 | 
						|
+			pid_stats->pid = job->client_pid;
 | 
						|
+			list_add(&pid_stats->list, pid_stats_list);
 | 
						|
+		}
 | 
						|
+		pid_stats->jobs_sent++;
 | 
						|
+		pid_stats->timeout_purge = jiffies + V3D_QUEUE_STATS_TIMEOUT;
 | 
						|
+	}
 | 
						|
+
 | 
						|
+err_mem:
 | 
						|
+	mutex_unlock(&queue_stats->lock);
 | 
						|
+	return ret;
 | 
						|
+}
 | 
						|
+
 | 
						|
 static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 | 
						|
 {
 | 
						|
 	struct v3d_bin_job *job = to_bin_job(sched_job);
 | 
						|
@@ -107,6 +216,7 @@ static struct dma_fence *v3d_bin_job_run
 | 
						|
 	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
 | 
						|
 			    job->start, job->end);
 | 
						|
 
 | 
						|
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_BIN], sched_job);
 | 
						|
 	v3d_switch_perfmon(v3d, &job->base);
 | 
						|
 
 | 
						|
 	/* Set the current and end address of the control list.
 | 
						|
@@ -158,6 +268,7 @@ static struct dma_fence *v3d_render_job_
 | 
						|
 	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 | 
						|
 			    job->start, job->end);
 | 
						|
 
 | 
						|
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_RENDER], sched_job);
 | 
						|
 	v3d_switch_perfmon(v3d, &job->base);
 | 
						|
 
 | 
						|
 	/* XXX: Set the QCFG */
 | 
						|
@@ -190,6 +301,7 @@ v3d_tfu_job_run(struct drm_sched_job *sc
 | 
						|
 
 | 
						|
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 | 
						|
 
 | 
						|
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_TFU], sched_job);
 | 
						|
 	V3D_WRITE(V3D_TFU_IIA, job->args.iia);
 | 
						|
 	V3D_WRITE(V3D_TFU_IIS, job->args.iis);
 | 
						|
 	V3D_WRITE(V3D_TFU_ICA, job->args.ica);
 | 
						|
@@ -231,6 +343,7 @@ v3d_csd_job_run(struct drm_sched_job *sc
 | 
						|
 
 | 
						|
 	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
 | 
						|
 
 | 
						|
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CSD], sched_job);
 | 
						|
 	v3d_switch_perfmon(v3d, &job->base);
 | 
						|
 
 | 
						|
 	for (i = 1; i <= 6; i++)
 | 
						|
@@ -247,7 +360,10 @@ v3d_cache_clean_job_run(struct drm_sched
 | 
						|
 	struct v3d_job *job = to_v3d_job(sched_job);
 | 
						|
 	struct v3d_dev *v3d = job->v3d;
 | 
						|
 
 | 
						|
+	v3d_sched_stats_add_job(&v3d->gpu_queue_stats[V3D_CACHE_CLEAN],
 | 
						|
+				sched_job);
 | 
						|
 	v3d_clean_caches(v3d);
 | 
						|
+	v3d->gpu_queue_stats[V3D_CACHE_CLEAN].last_exec_end = local_clock();
 | 
						|
 
 | 
						|
 	return NULL;
 | 
						|
 }
 | 
						|
@@ -385,8 +501,18 @@ v3d_sched_init(struct v3d_dev *v3d)
 | 
						|
 	int hw_jobs_limit = 1;
 | 
						|
 	int job_hang_limit = 0;
 | 
						|
 	int hang_limit_ms = 500;
 | 
						|
+	enum v3d_queue q;
 | 
						|
 	int ret;
 | 
						|
 
 | 
						|
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
 | 
						|
+		INIT_LIST_HEAD(&v3d->gpu_queue_stats[q].pid_stats_list);
 | 
						|
+		/* Setting timeout before current jiffies disables collecting
 | 
						|
+		 * pid_stats on scheduling init.
 | 
						|
+		 */
 | 
						|
+		v3d->gpu_queue_stats[q].gpu_pid_stats_timeout = jiffies - 1;
 | 
						|
+		mutex_init(&v3d->gpu_queue_stats[q].lock);
 | 
						|
+	}
 | 
						|
+
 | 
						|
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
 | 
						|
 			     &v3d_bin_sched_ops,
 | 
						|
 			     hw_jobs_limit, job_hang_limit,
 | 
						|
@@ -440,9 +566,20 @@ void
 | 
						|
 v3d_sched_fini(struct v3d_dev *v3d)
 | 
						|
 {
 | 
						|
 	enum v3d_queue q;
 | 
						|
+	struct v3d_queue_stats *queue_stats;
 | 
						|
 
 | 
						|
 	for (q = 0; q < V3D_MAX_QUEUES; q++) {
 | 
						|
-		if (v3d->queue[q].sched.ready)
 | 
						|
+		if (v3d->queue[q].sched.ready) {
 | 
						|
+			queue_stats = &v3d->gpu_queue_stats[q];
 | 
						|
+			mutex_lock(&queue_stats->lock);
 | 
						|
+			/* Setting gpu_pid_stats_timeout to jiffies-1 will
 | 
						|
+			 * make v3d_sched_stats_update to purge all
 | 
						|
+			 * allocated pid_stats.
 | 
						|
+			 */
 | 
						|
+			queue_stats->gpu_pid_stats_timeout = jiffies - 1;
 | 
						|
+			v3d_sched_stats_update(queue_stats);
 | 
						|
+			mutex_unlock(&queue_stats->lock);
 | 
						|
 			drm_sched_fini(&v3d->queue[q].sched);
 | 
						|
+		}
 | 
						|
 	}
 | 
						|
 }
 |