gpu: nvgpu: introduce internal runlist domain

The current runlist code assumes a single runlist buffer to hold all TSG
and channel entries. Create separate RL domain and domain memory types
to hold data that is related to only a scheduling domain and not
directly to the runlist hardware; in the future, more than one domains
may exist and one of them is enabled at a time.

The domain is used only internally by the runlist code at this point and
is functionally equivalent to the current runlist memory that houses the
round robin entries.

The double buffering is still kept, although more domains might benefit
from some cleverness. Although any number of created domains may be
edited in runtime, nly one runlist memory is accessed by the hardware at
a time. To spare some contiguous memory, this should be considered an
opportunity for optimization in the future.

Jira NVGPU-6425

Change-Id: Id99c55f058ad56daa48b732240f05b3195debfb1
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2618386
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2021-10-28 22:10:34 +03:00
committed by mobile promotions
parent e616b2ba4d
commit 1d23b8f13a
13 changed files with 195 additions and 120 deletions

View File

@@ -32,6 +32,7 @@
*/
struct gk20a;
struct nvgpu_channel;
struct nvgpu_runlist;
/**
* Runlist HAL operations.
@@ -89,8 +90,7 @@ struct gops_runlist {
void (*get_tsg_entry)(struct nvgpu_tsg *tsg,
u32 *runlist, u32 timeslice);
void (*get_ch_entry)(struct nvgpu_channel *ch, u32 *runlist);
void (*hw_submit)(struct gk20a *g, u32 runlist_id,
u32 count, u32 buffer_index);
void (*hw_submit)(struct gk20a *g, struct nvgpu_runlist *runlist);
int (*wait_pending)(struct gk20a *g, u32 runlist_id);
void (*write_state)(struct gk20a *g, u32 runlists_mask,
u32 runlist_state);

View File

@@ -75,31 +75,50 @@ struct nvgpu_pbdma_info;
/** Enable runlist. */
#define RUNLIST_ENABLED 1U
/** Double buffering is used to build runlists */
#define MAX_RUNLIST_BUFFERS 2U
/** Runlist identifier is invalid. */
#define NVGPU_INVALID_RUNLIST_ID U32_MAX
/*
* Updates to this memory are still serialized by the runlist lock.
*
* TODO: add a mutex when domain updates get more fine-grained. The buffers in
* nvgpu_runlist_domain are pointer members for a reason to make this easier in
* the future; the buffers may get juggled around.
*/
struct nvgpu_runlist_mem {
/** Rendered runlist memory suitable for HW. */
struct nvgpu_mem mem;
/** Number of entries written in the buffer. */
u32 count;
};
struct nvgpu_runlist_domain {
/** Runlist buffer free to use in sw. Swapped with another mem on next load. */
struct nvgpu_runlist_mem *mem;
/** Currently active buffer submitted for hardware. */
struct nvgpu_runlist_mem *mem_hw;
};
struct nvgpu_runlist {
/** Runlist identifier. */
/** The HW has some designated RL IDs that are bound to engines. */
u32 id;
/** Bitmap of active channels in the runlist. One bit per chid. */
unsigned long *active_channels;
/** Bitmap of active TSGs in the runlist. One bit per tsgid. */
unsigned long *active_tsgs;
/** Runlist buffers. Double buffering is used for each engine. */
struct nvgpu_mem mem[MAX_RUNLIST_BUFFERS];
/** Indicates current runlist buffer used by HW. */
u32 cur_buffer;
/* The default domain is the only one that currently exists. */
struct nvgpu_runlist_domain *domain;
/** Bitmask of PBDMAs supported for this runlist. */
u32 pbdma_bitmask;
/** Bitmask of engines using this runlist. */
u32 eng_bitmask;
/** Bitmask of engines to be reset during recovery. */
u32 reset_eng_bitmask;
/** Cached hw_submit parameter. */
u32 count;
/** Protect ch/tsg/runlist preempt & runlist update. */
struct nvgpu_mutex runlist_lock;
@@ -139,7 +158,8 @@ struct nvgpu_runlist {
*/
u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 buf_id, u32 max_entries);
struct nvgpu_runlist_domain *domain,
u32 max_entries);
/**
* @brief Add/remove channel to/from runlist (locked)