gpu: nvgpu: introduce internal runlist domain

The current runlist code assumes a single runlist buffer to hold all TSG and channel entries. Create separate RL domain and domain memory types to hold data that is related to only a scheduling domain and not directly to the runlist hardware; in the future, more than one domains may exist and one of them is enabled at a time. The domain is used only internally by the runlist code at this point and is functionally equivalent to the current runlist memory that houses the round robin entries. The double buffering is still kept, although more domains might benefit from some cleverness. Although any number of created domains may be edited in runtime, nly one runlist memory is accessed by the hardware at a time. To spare some contiguous memory, this should be considered an opportunity for optimization in the future. Jira NVGPU-6425 Change-Id: Id99c55f058ad56daa48b732240f05b3195debfb1 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2618386 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2021-10-28 22:10:34 +03:00
parent e616b2ba4d
commit 1d23b8f13a
13 changed files with 195 additions and 120 deletions
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/runlist.h
@@ -32,6 +32,7 @@
 */
 struct gk20a;
 struct nvgpu_channel;
+struct nvgpu_runlist;

 /**
 * Runlist HAL operations.
@@ -89,8 +90,7 @@ struct gops_runlist {
 	void (*get_tsg_entry)(struct nvgpu_tsg *tsg,
 			u32 *runlist, u32 timeslice);
 	void (*get_ch_entry)(struct nvgpu_channel *ch, u32 *runlist);
-	void (*hw_submit)(struct gk20a *g, u32 runlist_id,
-		u32 count, u32 buffer_index);
+	void (*hw_submit)(struct gk20a *g, struct nvgpu_runlist *runlist);
 	int (*wait_pending)(struct gk20a *g, u32 runlist_id);
 	void (*write_state)(struct gk20a *g, u32 runlists_mask,
 			u32 runlist_state);
--- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
@@ -75,31 +75,50 @@ struct nvgpu_pbdma_info;
 /** Enable runlist. */
 #define RUNLIST_ENABLED			1U

-/** Double buffering is used to build runlists */
-#define MAX_RUNLIST_BUFFERS		2U
-
 /** Runlist identifier is invalid. */
 #define NVGPU_INVALID_RUNLIST_ID		U32_MAX

+/*
+ * Updates to this memory are still serialized by the runlist lock.
+ *
+ * TODO: add a mutex when domain updates get more fine-grained. The buffers in
+ * nvgpu_runlist_domain are pointer members for a reason to make this easier in
+ * the future; the buffers may get juggled around.
+ */
+struct nvgpu_runlist_mem {
+	/** Rendered runlist memory suitable for HW. */
+	struct nvgpu_mem mem;
+
+	/** Number of entries written in the buffer. */
+	u32 count;
+};
+
+struct nvgpu_runlist_domain {
+	/** Runlist buffer free to use in sw. Swapped with another mem on next load. */
+	struct nvgpu_runlist_mem *mem;
+
+	/** Currently active buffer submitted for hardware. */
+	struct nvgpu_runlist_mem *mem_hw;
+};
+
 struct nvgpu_runlist {
-	/** Runlist identifier. */
+	/** The HW has some designated RL IDs that are bound to engines. */
 	u32 id;
+
 	/** Bitmap of active channels in the runlist. One bit per chid. */
 	unsigned long *active_channels;
 	/** Bitmap of active TSGs in the runlist. One bit per tsgid. */
 	unsigned long *active_tsgs;
-	/** Runlist buffers. Double buffering is used for each engine. */
-	struct nvgpu_mem mem[MAX_RUNLIST_BUFFERS];
-	/** Indicates current runlist buffer used by HW. */
-	u32  cur_buffer;
+
+	/* The default domain is the only one that currently exists. */
+	struct nvgpu_runlist_domain *domain;
+
 	/** Bitmask of PBDMAs supported for this runlist. */
 	u32  pbdma_bitmask;
 	/** Bitmask of engines using this runlist. */
 	u32  eng_bitmask;
 	/** Bitmask of engines to be reset during recovery. */
 	u32  reset_eng_bitmask;
-	/** Cached hw_submit parameter. */
-	u32  count;
 	/** Protect ch/tsg/runlist preempt & runlist update. */
 	struct nvgpu_mutex runlist_lock;

@@ -139,7 +158,8 @@ struct nvgpu_runlist {
 */
 u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
 		struct nvgpu_runlist *runlist,
-		u32 buf_id, u32 max_entries);
+		struct nvgpu_runlist_domain *domain,
+		u32 max_entries);

 /**
 * @brief Add/remove channel to/from runlist (locked)