mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
Open source GPL/LGPL release
This commit is contained in:
1195
drivers/gpu/nvgpu/common/gr/ctx.c
Normal file
1195
drivers/gpu/nvgpu/common/gr/ctx.c
Normal file
File diff suppressed because it is too large
Load Diff
183
drivers/gpu/nvgpu/common/gr/ctx_priv.h
Normal file
183
drivers/gpu/nvgpu/common/gr/ctx_priv.h
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_CTX_PRIV_H
|
||||
#define NVGPU_GR_CTX_PRIV_H
|
||||
|
||||
struct nvgpu_mem;
|
||||
|
||||
/**
|
||||
* Patch context buffer descriptor structure.
|
||||
*
|
||||
* Pointer to this structure is maintained in #nvgpu_gr_ctx structure.
|
||||
*/
|
||||
struct patch_desc {
|
||||
/**
|
||||
* Memory to hold patch context buffer.
|
||||
*/
|
||||
struct nvgpu_mem mem;
|
||||
|
||||
/**
|
||||
* Count of entries written into patch context buffer.
|
||||
*/
|
||||
u32 data_count;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
struct zcull_ctx_desc {
|
||||
u64 gpu_va;
|
||||
u32 ctx_sw_mode;
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
struct pm_ctx_desc {
|
||||
struct nvgpu_mem mem;
|
||||
u64 gpu_va;
|
||||
u32 pm_mode;
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* GR context descriptor structure.
|
||||
*
|
||||
* This structure stores various properties of all GR context buffers.
|
||||
*/
|
||||
struct nvgpu_gr_ctx_desc {
|
||||
/**
|
||||
* Array to store all GR context buffer sizes.
|
||||
*/
|
||||
u32 size[NVGPU_GR_CTX_COUNT];
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
bool force_preemption_gfxp;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_CILP
|
||||
bool force_preemption_cilp;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
bool dump_ctxsw_stats_on_channel_close;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* Graphics context buffer structure.
|
||||
*
|
||||
* This structure stores all the properties of a graphics context
|
||||
* buffer. One graphics context is allocated per GPU Time Slice
|
||||
* Group (TSG).
|
||||
*/
|
||||
struct nvgpu_gr_ctx {
|
||||
/**
|
||||
* Context ID read from graphics context buffer.
|
||||
*/
|
||||
u32 ctx_id;
|
||||
|
||||
/**
|
||||
* Flag to indicate if above context ID is valid or not.
|
||||
*/
|
||||
bool ctx_id_valid;
|
||||
|
||||
/**
|
||||
* Memory to hold graphics context buffer.
|
||||
*/
|
||||
struct nvgpu_mem mem;
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
struct nvgpu_mem preempt_ctxsw_buffer;
|
||||
struct nvgpu_mem spill_ctxsw_buffer;
|
||||
struct nvgpu_mem betacb_ctxsw_buffer;
|
||||
struct nvgpu_mem pagepool_ctxsw_buffer;
|
||||
struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Patch context buffer descriptor struct.
|
||||
*/
|
||||
struct patch_desc patch_ctx;
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
struct zcull_ctx_desc zcull_ctx;
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
struct pm_ctx_desc pm_ctx;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Graphics preemption mode of the graphics context.
|
||||
*/
|
||||
u32 graphics_preempt_mode;
|
||||
|
||||
/**
|
||||
* Compute preemption mode of the graphics context.
|
||||
*/
|
||||
u32 compute_preempt_mode;
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
bool golden_img_loaded;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_CILP
|
||||
bool cilp_preempt_pending;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
bool boosted_ctx;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Array to store GPU virtual addresses of all global context
|
||||
* buffers.
|
||||
*/
|
||||
u64 global_ctx_buffer_va[NVGPU_GR_CTX_VA_COUNT];
|
||||
|
||||
/**
|
||||
* Array to store indexes of global context buffers
|
||||
* corresponding to GPU virtual addresses above.
|
||||
*/
|
||||
u32 global_ctx_buffer_index[NVGPU_GR_CTX_VA_COUNT];
|
||||
|
||||
/**
|
||||
* Flag to indicate if global context buffers are mapped and
|
||||
* #global_ctx_buffer_va array is populated.
|
||||
*/
|
||||
bool global_ctx_buffer_mapped;
|
||||
|
||||
/**
|
||||
* TSG identifier corresponding to the graphics context.
|
||||
*/
|
||||
u32 tsgid;
|
||||
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
/** SM diversity configuration offset.
|
||||
* It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
|
||||
* else input param is just ignored.
|
||||
* A valid offset starts from 0 to
|
||||
* (#gk20a.max_sm_diversity_config_count - 1).
|
||||
*/
|
||||
u32 sm_diversity_config;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_CTX_PRIV_H */
|
||||
700
drivers/gpu/nvgpu/common/gr/fecs_trace.c
Normal file
700
drivers/gpu/nvgpu/common/gr/fecs_trace.c
Normal file
@@ -0,0 +1,700 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/log2.h>
|
||||
#include <nvgpu/mm.h>
|
||||
#include <nvgpu/circ_buf.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/gr/global_ctx.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/gr/subctx.h>
|
||||
#include <nvgpu/gr/fecs_trace.h>
|
||||
#include <nvgpu/gr/gr_utils.h>
|
||||
|
||||
static int nvgpu_gr_fecs_trace_periodic_polling(void *arg);
|
||||
|
||||
int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
|
||||
pid_t pid, u32 vmid, struct nvgpu_list_node *list)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_fecs_trace_context_entry *entry;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"adding hash entry context_ptr=%x -> pid=%d, vmid=%d",
|
||||
context_ptr, pid, vmid);
|
||||
|
||||
entry = nvgpu_kzalloc(g, sizeof(*entry));
|
||||
if (entry == NULL) {
|
||||
nvgpu_err(g,
|
||||
"can't alloc new entry for context_ptr=%x pid=%d vmid=%d",
|
||||
context_ptr, pid, vmid);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_init_list_node(&entry->entry);
|
||||
entry->context_ptr = context_ptr;
|
||||
entry->pid = pid;
|
||||
entry->vmid = vmid;
|
||||
|
||||
nvgpu_mutex_acquire(&trace->list_lock);
|
||||
nvgpu_list_add_tail(&entry->entry, list);
|
||||
nvgpu_mutex_release(&trace->list_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
|
||||
struct nvgpu_list_node *list)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"freeing entry context_ptr=%x", context_ptr);
|
||||
|
||||
nvgpu_mutex_acquire(&trace->list_lock);
|
||||
nvgpu_list_for_each_entry_safe(entry, tmp, list,
|
||||
nvgpu_fecs_trace_context_entry, entry) {
|
||||
if (entry->context_ptr == context_ptr) {
|
||||
nvgpu_list_del(&entry->entry);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"freed entry=%p context_ptr=%x", entry,
|
||||
entry->context_ptr);
|
||||
nvgpu_kfree(g, entry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&trace->list_lock);
|
||||
}
|
||||
|
||||
void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
|
||||
struct nvgpu_list_node *list)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
|
||||
|
||||
nvgpu_mutex_acquire(&trace->list_lock);
|
||||
nvgpu_list_for_each_entry_safe(entry, tmp, list,
|
||||
nvgpu_fecs_trace_context_entry, entry) {
|
||||
nvgpu_list_del(&entry->entry);
|
||||
nvgpu_kfree(g, entry);
|
||||
}
|
||||
nvgpu_mutex_release(&trace->list_lock);
|
||||
}
|
||||
|
||||
void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
|
||||
struct nvgpu_list_node *list, pid_t *pid, u32 *vmid)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_fecs_trace_context_entry *entry;
|
||||
|
||||
nvgpu_mutex_acquire(&trace->list_lock);
|
||||
nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry,
|
||||
entry) {
|
||||
if (entry->context_ptr == context_ptr) {
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"found context_ptr=%x -> pid=%d, vmid=%d",
|
||||
entry->context_ptr, entry->pid, entry->vmid);
|
||||
*pid = entry->pid;
|
||||
*vmid = entry->vmid;
|
||||
nvgpu_mutex_release(&trace->list_lock);
|
||||
return;
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&trace->list_lock);
|
||||
|
||||
*pid = 0;
|
||||
*vmid = 0xffffffffU;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace;
|
||||
|
||||
if (!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)) {
|
||||
nvgpu_err(g, "invalid NUM_RECORDS chosen");
|
||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
trace = nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_fecs_trace));
|
||||
if (trace == NULL) {
|
||||
nvgpu_err(g, "failed to allocate fecs_trace");
|
||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
|
||||
return -ENOMEM;
|
||||
}
|
||||
g->fecs_trace = trace;
|
||||
|
||||
nvgpu_mutex_init(&trace->poll_lock);
|
||||
nvgpu_mutex_init(&trace->list_lock);
|
||||
nvgpu_mutex_init(&trace->enable_lock);
|
||||
|
||||
nvgpu_init_list_node(&trace->context_list);
|
||||
|
||||
trace->enable_count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_deinit(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
if (trace == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if tracer was enabled before attempting to stop the
|
||||
* tracer thread.
|
||||
*/
|
||||
if (trace->enable_count > 0) {
|
||||
nvgpu_thread_stop(&trace->poll_task);
|
||||
}
|
||||
|
||||
nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
|
||||
|
||||
nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
|
||||
nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
|
||||
nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
|
||||
|
||||
nvgpu_kfree(g, g->fecs_trace);
|
||||
g->fecs_trace = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_num_ts(struct gk20a *g)
|
||||
{
|
||||
return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()
|
||||
- sizeof(struct nvgpu_fecs_trace_record)) / sizeof(u64);
|
||||
}
|
||||
|
||||
struct nvgpu_fecs_trace_record *nvgpu_gr_fecs_trace_get_record(
|
||||
struct gk20a *g, int idx)
|
||||
{
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
|
||||
nvgpu_gr_get_global_ctx_buffer_ptr(g);
|
||||
struct nvgpu_mem *mem = nvgpu_gr_global_ctx_buffer_get_mem(
|
||||
gr_global_ctx_buffer,
|
||||
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
|
||||
if (mem == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (struct nvgpu_fecs_trace_record *)
|
||||
((u8 *) mem->cpu_va +
|
||||
(idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()));
|
||||
}
|
||||
|
||||
bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g,
|
||||
struct nvgpu_fecs_trace_record *r)
|
||||
{
|
||||
/*
|
||||
* testing magic_hi should suffice. magic_lo is sometimes used
|
||||
* as a sequence number in experimental ucode.
|
||||
*/
|
||||
return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
|
||||
}
|
||||
|
||||
size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g)
|
||||
{
|
||||
return GK20A_FECS_TRACE_NUM_RECORDS
|
||||
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g,
|
||||
struct nvgpu_gpu_ctxsw_trace_filter *filter)
|
||||
{
|
||||
int n;
|
||||
int tag;
|
||||
|
||||
/* Compute number of entries per record, with given filter */
|
||||
for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++)
|
||||
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
|
||||
|
||||
/* Return max number of entries generated for the whole ring */
|
||||
return n * GK20A_FECS_TRACE_NUM_RECORDS;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_enable(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
int write;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_mutex_acquire(&trace->enable_lock);
|
||||
trace->enable_count++;
|
||||
|
||||
if (trace->enable_count == 1U) {
|
||||
/* drop data in hw buffer */
|
||||
if (g->ops.gr.fecs_trace.flush)
|
||||
g->ops.gr.fecs_trace.flush(g);
|
||||
|
||||
write = g->ops.gr.fecs_trace.get_write_index(g);
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
|
||||
/*
|
||||
* For enabling FECS trace support, MAILBOX1's MSB
|
||||
* (Bit 31:31) should be set to 1. Bits 30:0 represents
|
||||
* actual pointer value.
|
||||
*/
|
||||
write = write |
|
||||
(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
|
||||
}
|
||||
|
||||
g->ops.gr.fecs_trace.set_read_index(g, write);
|
||||
|
||||
/*
|
||||
* FECS ucode does a priv holdoff around the assertion of
|
||||
* context reset. So, pri transactions (e.g. mailbox1 register
|
||||
* write) might fail due to this. Hence, do write with ack
|
||||
* i.e. write and read it back to make sure write happened for
|
||||
* mailbox1.
|
||||
*/
|
||||
while (g->ops.gr.fecs_trace.get_read_index(g) != write) {
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
|
||||
g->ops.gr.fecs_trace.set_read_index(g, write);
|
||||
}
|
||||
|
||||
err = nvgpu_thread_create(&trace->poll_task, g,
|
||||
nvgpu_gr_fecs_trace_periodic_polling, __func__);
|
||||
if (err != 0) {
|
||||
nvgpu_warn(g, "failed to create FECS polling task");
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
nvgpu_mutex_release(&trace->enable_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_disable(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
int read = 0;
|
||||
|
||||
if (trace == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&trace->enable_lock);
|
||||
if (trace->enable_count <= 0U) {
|
||||
nvgpu_mutex_release(&trace->enable_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
trace->enable_count--;
|
||||
if (trace->enable_count == 0U) {
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
|
||||
/*
|
||||
* For disabling FECS trace support, MAILBOX1's MSB
|
||||
* (Bit 31:31) should be set to 0.
|
||||
*/
|
||||
read = g->ops.gr.fecs_trace.get_read_index(g) &
|
||||
(~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
|
||||
|
||||
g->ops.gr.fecs_trace.set_read_index(g, read);
|
||||
|
||||
/*
|
||||
* FECS ucode does a priv holdoff around the assertion
|
||||
* of context reset. So, pri transactions (e.g.
|
||||
* mailbox1 register write) might fail due to this.
|
||||
* Hence, do write with ack i.e. write and read it back
|
||||
* to make sure write happened for mailbox1.
|
||||
*/
|
||||
while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"mailbox1 update failed");
|
||||
g->ops.gr.fecs_trace.set_read_index(g, read);
|
||||
}
|
||||
}
|
||||
nvgpu_thread_stop(&trace->poll_task);
|
||||
}
|
||||
nvgpu_mutex_release(&trace->enable_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
return (trace && (trace->enable_count > 0));
|
||||
}
|
||||
|
||||
void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
||||
|
||||
g->ops.gr.fecs_trace.set_read_index(g,
|
||||
g->ops.gr.fecs_trace.get_write_index(g));
|
||||
}
|
||||
|
||||
/*
|
||||
* Converts HW entry format to userspace-facing format and pushes it to the
|
||||
* queue.
|
||||
*/
|
||||
int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index,
|
||||
u32 *vm_update_mask)
|
||||
{
|
||||
int i;
|
||||
struct nvgpu_gpu_ctxsw_trace_entry entry = { };
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
pid_t cur_pid = 0, new_pid = 0;
|
||||
u32 cur_vmid = 0U, new_vmid = 0U;
|
||||
u32 vmid = 0U;
|
||||
int count = 0;
|
||||
|
||||
struct nvgpu_fecs_trace_record *r =
|
||||
nvgpu_gr_fecs_trace_get_record(g, index);
|
||||
if (r == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"consuming record trace=%p read=%d record=%p", trace, index, r);
|
||||
|
||||
if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) {
|
||||
nvgpu_warn(g,
|
||||
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
|
||||
trace, index, r, r->magic_lo, r->magic_hi);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Clear magic_hi to detect cases where CPU could read write index
|
||||
* before FECS record is actually written to DRAM. This should not
|
||||
* as we force FECS writes to SYSMEM by reading through PRAMIN.
|
||||
*/
|
||||
r->magic_hi = 0;
|
||||
|
||||
if ((r->context_ptr != 0U) && (r->context_id != 0U)) {
|
||||
nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr,
|
||||
&trace->context_list, &cur_pid, &cur_vmid);
|
||||
} else {
|
||||
cur_vmid = 0xffffffffU;
|
||||
cur_pid = 0;
|
||||
}
|
||||
|
||||
if (r->new_context_ptr != 0U) {
|
||||
nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr,
|
||||
&trace->context_list, &new_pid, &new_vmid);
|
||||
} else {
|
||||
new_vmid = 0xffffffffU;
|
||||
new_pid = 0;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"context_ptr=%x (vmid=%u pid=%d)",
|
||||
r->context_ptr, cur_vmid, cur_pid);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"new_context_ptr=%x (vmid=%u pid=%d)",
|
||||
r->new_context_ptr, new_vmid, new_pid);
|
||||
|
||||
entry.context_id = r->context_id;
|
||||
|
||||
/* break out FECS record into trace events */
|
||||
for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
|
||||
|
||||
entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
|
||||
entry.timestamp =
|
||||
g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
|
||||
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
|
||||
entry.tag, entry.timestamp, r->context_id,
|
||||
r->new_context_id);
|
||||
|
||||
switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
|
||||
case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
|
||||
case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
|
||||
entry.context_id = r->new_context_id;
|
||||
entry.pid = new_pid;
|
||||
entry.vmid = new_vmid;
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
|
||||
case NVGPU_GPU_CTXSW_TAG_FE_ACK:
|
||||
case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
|
||||
case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
|
||||
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
|
||||
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
|
||||
case NVGPU_GPU_CTXSW_TAG_SAVE_END:
|
||||
entry.context_id = r->context_id;
|
||||
entry.pid = cur_pid;
|
||||
entry.vmid = cur_vmid;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* tags are not guaranteed to start at the beginning */
|
||||
if ((entry.tag != 0) && (entry.tag !=
|
||||
NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) {
|
||||
nvgpu_warn(g, "TAG not found");
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
|
||||
entry.tag, entry.context_id, entry.pid);
|
||||
|
||||
if (!entry.context_id)
|
||||
continue;
|
||||
|
||||
if (g->ops.gr.fecs_trace.vm_dev_write != NULL) {
|
||||
g->ops.gr.fecs_trace.vm_dev_write(g, entry.vmid,
|
||||
vm_update_mask, &entry);
|
||||
} else {
|
||||
nvgpu_gr_fecs_trace_write_entry(g, &entry);
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
nvgpu_gr_fecs_trace_wake_up(g, vmid);
|
||||
return count;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
u32 vm_update_mask = 0U;
|
||||
int read = 0;
|
||||
int write = 0;
|
||||
int cnt;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_mutex_acquire(&trace->poll_lock);
|
||||
if (trace->enable_count == 0) {
|
||||
goto done_unlock;
|
||||
}
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
goto done_unlock;
|
||||
}
|
||||
|
||||
write = g->ops.gr.fecs_trace.get_write_index(g);
|
||||
if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) {
|
||||
nvgpu_err(g,
|
||||
"failed to acquire write index, write=%d", write);
|
||||
err = write;
|
||||
goto done;
|
||||
}
|
||||
|
||||
read = g->ops.gr.fecs_trace.get_read_index(g);
|
||||
|
||||
cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
if (!cnt)
|
||||
goto done;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
|
||||
read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt);
|
||||
|
||||
/* Ensure all FECS writes have made it to SYSMEM */
|
||||
err = g->ops.mm.cache.fb_flush(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "mm.cache.fb_flush() failed err=%d", err);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
|
||||
/* Bits 30:0 of MAILBOX1 represents actual read pointer value */
|
||||
read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
|
||||
}
|
||||
|
||||
while (read != write) {
|
||||
cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
|
||||
if (cnt <= 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Get to next record. */
|
||||
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
|
||||
/*
|
||||
* In the next step, read pointer is going to be updated.
|
||||
* So, MSB of read pointer should be set back to 1. This will
|
||||
* keep FECS trace enabled.
|
||||
*/
|
||||
read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
|
||||
}
|
||||
|
||||
/* ensure FECS records has been updated before incrementing read index */
|
||||
nvgpu_wmb();
|
||||
g->ops.gr.fecs_trace.set_read_index(g, read);
|
||||
|
||||
/*
|
||||
* FECS ucode does a priv holdoff around the assertion of context
|
||||
* reset. So, pri transactions (e.g. mailbox1 register write) might
|
||||
* fail due to this. Hence, do write with ack i.e. write and read
|
||||
* it back to make sure write happened for mailbox1.
|
||||
*/
|
||||
while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
|
||||
g->ops.gr.fecs_trace.set_read_index(g, read);
|
||||
}
|
||||
|
||||
if (g->ops.gr.fecs_trace.vm_dev_update) {
|
||||
g->ops.gr.fecs_trace.vm_dev_update(g, vm_update_mask);
|
||||
}
|
||||
|
||||
done:
|
||||
gk20a_idle(g);
|
||||
done_unlock:
|
||||
nvgpu_mutex_release(&trace->poll_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_fecs_trace_periodic_polling(void *arg)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)arg;
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "thread running");
|
||||
|
||||
while (!nvgpu_thread_should_stop(&trace->poll_task) &&
|
||||
trace->enable_count > 0U) {
|
||||
|
||||
nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
|
||||
GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U);
|
||||
|
||||
nvgpu_gr_fecs_trace_poll(g);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
||||
|
||||
if (!g->ops.gr.fecs_trace.is_enabled(g))
|
||||
return 0;
|
||||
|
||||
nvgpu_gr_fecs_trace_poll(g);
|
||||
return g->ops.gr.fecs_trace.set_read_index(g, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* map global circ_buf to the context space and store the GPU VA
|
||||
* in the context header.
|
||||
*/
|
||||
int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
|
||||
struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
|
||||
struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid)
|
||||
{
|
||||
u64 addr = 0ULL;
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_mem *mem;
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
|
||||
nvgpu_gr_get_global_ctx_buffer_ptr(g);
|
||||
u32 context_ptr;
|
||||
u32 aperture_mask;
|
||||
int ret;
|
||||
|
||||
if (trace == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
context_ptr = nvgpu_inst_block_ptr(g, inst_block);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"pid=%d context_ptr=%x inst_block=%llx",
|
||||
pid, context_ptr,
|
||||
nvgpu_inst_block_addr(g, inst_block));
|
||||
|
||||
mem = nvgpu_gr_global_ctx_buffer_get_mem(gr_global_ctx_buffer,
|
||||
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
|
||||
if (mem == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
|
||||
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
|
||||
aperture_mask = 0;
|
||||
} else {
|
||||
addr = nvgpu_inst_block_addr(g, mem);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
|
||||
aperture_mask =
|
||||
g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem);
|
||||
}
|
||||
if (addr == 0ULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
|
||||
GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem,
|
||||
GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA) && subctx != NULL) {
|
||||
mem = nvgpu_gr_subctx_get_ctx_header(subctx);
|
||||
}
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask);
|
||||
|
||||
ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, pid, vmid,
|
||||
&trace->context_list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
|
||||
struct nvgpu_mem *inst_block)
|
||||
{
|
||||
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
|
||||
u32 context_ptr;
|
||||
|
||||
if (trace == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
context_ptr = nvgpu_inst_block_ptr(g, inst_block);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"context_ptr=%x", context_ptr);
|
||||
|
||||
if (g->ops.gr.fecs_trace.is_enabled(g)) {
|
||||
if (g->ops.gr.fecs_trace.flush) {
|
||||
g->ops.gr.fecs_trace.flush(g);
|
||||
}
|
||||
nvgpu_gr_fecs_trace_poll(g);
|
||||
}
|
||||
|
||||
nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
|
||||
&trace->context_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
196
drivers/gpu/nvgpu/common/gr/fs_state.c
Normal file
196
drivers/gpu/nvgpu/common/gr/fs_state.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/fs_state.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
#include <nvgpu/grmgr.h>
|
||||
|
||||
static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
int err;
|
||||
u32 *tpc_sm_id;
|
||||
u32 sm_id_size = g->ops.gr.init.get_sm_id_size();
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
tpc_sm_id = nvgpu_kcalloc(g, sm_id_size, sizeof(u32));
|
||||
if (tpc_sm_id == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
err = g->ops.gr.init.sm_id_config(g, tpc_sm_id, config, NULL, false);
|
||||
|
||||
nvgpu_kfree(g, tpc_sm_id);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return err;
|
||||
}
|
||||
|
||||
static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 pes_tpc_mask = 0;
|
||||
u32 gpc, pes;
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
|
||||
u32 fuse_tpc_mask;
|
||||
u32 val;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 gpc_phys_id;
|
||||
#endif
|
||||
|
||||
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
|
||||
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
|
||||
for (pes = 0;
|
||||
pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
|
||||
pes++) {
|
||||
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
|
||||
config, gpc, pes) <<
|
||||
nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/*
|
||||
* Fuse registers must be queried with physical gpc-id and not
|
||||
* the logical ones. For tu104 and before chips logical gpc-id
|
||||
* is same as physical gpc-id for non-floorswept config but for
|
||||
* chips after tu104 it may not be true.
|
||||
*/
|
||||
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
|
||||
cur_gr_instance, 0U);
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(g->tpc_fs_mask_user != fuse_tpc_mask)) {
|
||||
if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
|
||||
U32(1))) {
|
||||
val = g->tpc_fs_mask_user;
|
||||
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
|
||||
/*
|
||||
* skip tpc to disable the other tpc cause channel
|
||||
* timeout
|
||||
*/
|
||||
val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
|
||||
pes_tpc_mask = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
|
||||
}
|
||||
|
||||
int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 tpc_index, gpc_index;
|
||||
u32 sm_id = 0;
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 fuse_tpc_mask;
|
||||
u32 max_tpc_cnt;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 gpc_phys_id;
|
||||
#endif
|
||||
u32 gpc_cnt, tpc_cnt;
|
||||
u32 num_sm;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
g->ops.gr.init.fs_state(g);
|
||||
|
||||
err = g->ops.gr.config.init_sm_id_table(g, config);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
num_sm = nvgpu_gr_config_get_no_of_sm(config);
|
||||
nvgpu_assert(num_sm > 0U);
|
||||
|
||||
for (sm_id = 0; sm_id < num_sm; sm_id++) {
|
||||
struct nvgpu_sm_info *sm_info =
|
||||
nvgpu_gr_config_get_sm_info(config, sm_id);
|
||||
tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info);
|
||||
gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info);
|
||||
|
||||
g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id,
|
||||
config, NULL, false);
|
||||
}
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
g->ops.gr.init.pd_tpc_per_gpc(g, config);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/* gr__setup_pd_mapping */
|
||||
g->ops.gr.init.rop_mapping(g, config);
|
||||
g->ops.gr.init.pd_skip_table_gpc(g, config);
|
||||
}
|
||||
#endif
|
||||
|
||||
gpc_cnt = nvgpu_gr_config_get_gpc_count(config);
|
||||
tpc_cnt = nvgpu_gr_config_get_tpc_count(config);
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/*
|
||||
* Fuse registers must be queried with physical gpc-id and not
|
||||
* the logical ones. For tu104 and before chips logical gpc-id
|
||||
* is same as physical gpc-id for non-floorswept config but for
|
||||
* chips after tu104 it may not be true.
|
||||
*/
|
||||
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
|
||||
cur_gr_instance, 0U);
|
||||
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
|
||||
max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config);
|
||||
|
||||
if ((g->tpc_fs_mask_user != 0U) &&
|
||||
(fuse_tpc_mask ==
|
||||
nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1)))) {
|
||||
u32 val = g->tpc_fs_mask_user;
|
||||
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1));
|
||||
tpc_cnt = (u32)hweight32(val);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
|
||||
|
||||
gr_load_tpc_mask(g, config);
|
||||
|
||||
err = gr_load_sm_id_config(g, config);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "load_smid_config failed err=%d", err);
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return err;
|
||||
}
|
||||
|
||||
477
drivers/gpu/nvgpu/common/gr/global_ctx.c
Normal file
477
drivers/gpu/nvgpu/common/gr/global_ctx.c
Normal file
@@ -0,0 +1,477 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
#include <nvgpu/static_analysis.h>
|
||||
#include <nvgpu/string.h>
|
||||
#endif
|
||||
|
||||
#include <nvgpu/gr/global_ctx.h>
|
||||
|
||||
#include "global_ctx_priv.h"
|
||||
|
||||
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
|
||||
#include <nvgpu/posix/posix-fault-injection.h>
|
||||
|
||||
struct nvgpu_posix_fault_inj *nvgpu_golden_ctx_verif_get_fault_injection(void)
|
||||
{
|
||||
struct nvgpu_posix_fault_inj_container *c =
|
||||
nvgpu_posix_fault_injection_get_container();
|
||||
|
||||
return &c->golden_ctx_verif_fi;
|
||||
}
|
||||
|
||||
struct nvgpu_posix_fault_inj *nvgpu_local_golden_image_get_fault_injection(void)
|
||||
{
|
||||
struct nvgpu_posix_fault_inj_container *c =
|
||||
nvgpu_posix_fault_injection_get_container();
|
||||
|
||||
return &c->local_golden_image_fi;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *
|
||||
nvgpu_gr_global_ctx_desc_alloc(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc =
|
||||
nvgpu_kzalloc(g, sizeof(*desc) *
|
||||
U64(NVGPU_GR_GLOBAL_CTX_COUNT));
|
||||
return desc;
|
||||
}
|
||||
|
||||
void nvgpu_gr_global_ctx_desc_free(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
nvgpu_kfree(g, desc);
|
||||
}
|
||||
|
||||
|
||||
void nvgpu_gr_global_ctx_set_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index, size_t size)
|
||||
{
|
||||
nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_COUNT);
|
||||
desc[index].size = size;
|
||||
}
|
||||
|
||||
size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index)
|
||||
{
|
||||
return desc[index].size;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_global_ctx_buffer_destroy(struct gk20a *g,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
nvgpu_dma_free(g, mem);
|
||||
}
|
||||
|
||||
void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
if (desc == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) {
|
||||
if (desc[i].destroy != NULL) {
|
||||
desc[i].destroy(g, &desc[i].mem);
|
||||
desc[i].destroy = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static int nvgpu_gr_global_ctx_buffer_alloc_sys(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = nvgpu_dma_alloc_sys(g, desc[index].size,
|
||||
&desc[index].mem);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
desc[index].destroy = nvgpu_gr_global_ctx_buffer_destroy;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_VPR
|
||||
static int nvgpu_gr_global_ctx_buffer_alloc_vpr(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (g->ops.secure_alloc != NULL) {
|
||||
err = g->ops.secure_alloc(g,
|
||||
&desc[index].mem, desc[index].size,
|
||||
&desc[index].destroy);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool nvgpu_gr_global_ctx_buffer_sizes_are_valid(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
|
||||
if (desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].size == 0U) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR].size == 0U) ||
|
||||
(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL].size == 0U) ||
|
||||
(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE].size == 0U)) {
|
||||
return false;
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_VPR
|
||||
if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR].size == 0U) ||
|
||||
(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR].size == 0U) ||
|
||||
(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR].size == 0U)) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_VPR
|
||||
static int nvgpu_gr_global_ctx_buffer_vpr_alloc(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* MIG supports only compute class.
|
||||
* Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
|
||||
* if 2D/3D/I2M classes(graphics) are supported.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_mig,
|
||||
"2D class is not supported "
|
||||
"skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB "
|
||||
"and RTV_CB");
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int nvgpu_gr_global_ctx_buffer_sys_alloc(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* MIG supports only compute class.
|
||||
* Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
|
||||
* if 2D/3D/I2M classes(graphics) are supported.
|
||||
*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_CIRCULAR);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_PAGEPOOL);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP);
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (nvgpu_gr_global_ctx_buffer_sizes_are_valid(g, desc) != true) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_global_ctx_buffer_sys_alloc(g, desc);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_FECS_TRACE
|
||||
if (desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].size != 0U) {
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
if (desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].size != 0U) {
|
||||
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
|
||||
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_VPR
|
||||
if (nvgpu_gr_global_ctx_buffer_vpr_alloc(g, desc) != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
#endif
|
||||
|
||||
return err;
|
||||
|
||||
clean_up:
|
||||
nvgpu_gr_global_ctx_buffer_free(g, desc);
|
||||
return err;
|
||||
}
|
||||
|
||||
u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index,
|
||||
struct vm_gk20a *vm, u32 flags, bool priv)
|
||||
{
|
||||
u64 gpu_va;
|
||||
|
||||
if (!nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem, desc[index].mem.size,
|
||||
flags, gk20a_mem_flag_none, priv,
|
||||
desc[index].mem.aperture);
|
||||
return gpu_va;
|
||||
}
|
||||
|
||||
void nvgpu_gr_global_ctx_buffer_unmap(
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index,
|
||||
struct vm_gk20a *vm, u64 gpu_va)
|
||||
{
|
||||
if (nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
nvgpu_gmmu_unmap(vm, &desc[index].mem, gpu_va);
|
||||
}
|
||||
}
|
||||
|
||||
struct nvgpu_mem *nvgpu_gr_global_ctx_buffer_get_mem(
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index)
|
||||
{
|
||||
if (nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
return &desc[index].mem;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool nvgpu_gr_global_ctx_buffer_ready(
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *desc,
|
||||
u32 index)
|
||||
{
|
||||
if (nvgpu_mem_is_valid(&desc[index].mem)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *
|
||||
nvgpu_gr_global_ctx_init_local_golden_image(struct gk20a *g,
|
||||
struct nvgpu_mem *source_mem, size_t size)
|
||||
{
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
|
||||
|
||||
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
|
||||
if (nvgpu_posix_fault_injection_handle_call(
|
||||
nvgpu_local_golden_image_get_fault_injection())) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
local_golden_image = nvgpu_kzalloc(g, sizeof(*local_golden_image));
|
||||
if (local_golden_image == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
local_golden_image->context = nvgpu_vzalloc(g, size);
|
||||
if (local_golden_image->context == NULL) {
|
||||
nvgpu_kfree(g, local_golden_image);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
local_golden_image->size = size;
|
||||
|
||||
nvgpu_mem_rd_n(g, source_mem, 0, local_golden_image->context,
|
||||
nvgpu_safe_cast_u64_to_u32(size));
|
||||
|
||||
return local_golden_image;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
bool nvgpu_gr_global_ctx_compare_golden_images(struct gk20a *g,
|
||||
bool is_sysmem,
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image1,
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image2,
|
||||
size_t size)
|
||||
{
|
||||
bool is_identical = true;
|
||||
u32 *data1 = local_golden_image1->context;
|
||||
u32 *data2 = local_golden_image2->context;
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
u32 i;
|
||||
#endif
|
||||
|
||||
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
|
||||
if (nvgpu_posix_fault_injection_handle_call(
|
||||
nvgpu_golden_ctx_verif_get_fault_injection())) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In case of sysmem, direct mem compare can be used.
|
||||
* For vidmem, word by word comparison only works and
|
||||
* it is too early to use ce engine for read operations.
|
||||
*/
|
||||
if (is_sysmem) {
|
||||
if (nvgpu_memcmp((u8 *)data1, (u8 *)data2, size) != 0) {
|
||||
is_identical = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
for( i = 0U; i < nvgpu_safe_cast_u64_to_u32(size/sizeof(u32));
|
||||
i = nvgpu_safe_add_u32(i, 1U)) {
|
||||
if (*(data1 + i) != *(data2 + i)) {
|
||||
is_identical = false;
|
||||
nvgpu_log_info(g,
|
||||
"mismatch i = %u golden1: %u golden2 %u",
|
||||
i, *(data1 + i), *(data2 + i));
|
||||
break;
|
||||
}
|
||||
}
|
||||
#else
|
||||
is_identical = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "%s result %u", __func__, is_identical);
|
||||
return is_identical;
|
||||
}
|
||||
#endif
|
||||
|
||||
void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
|
||||
struct nvgpu_mem *target_mem)
|
||||
{
|
||||
/* Channel gr_ctx buffer is gpu cacheable.
|
||||
Flush and invalidate before cpu update. */
|
||||
if (g->ops.mm.cache.l2_flush(g, true) != 0) {
|
||||
nvgpu_err(g, "l2_flush failed");
|
||||
}
|
||||
|
||||
nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
|
||||
nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "loaded saved golden image into gr_ctx");
|
||||
}
|
||||
|
||||
void nvgpu_gr_global_ctx_deinit_local_golden_image(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
|
||||
{
|
||||
nvgpu_vfree(g, local_golden_image->context);
|
||||
nvgpu_kfree(g, local_golden_image);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
u32 *nvgpu_gr_global_ctx_get_local_golden_image_ptr(
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
|
||||
{
|
||||
return local_golden_image->context;
|
||||
}
|
||||
#endif
|
||||
68
drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
Normal file
68
drivers/gpu/nvgpu/common/gr/global_ctx_priv.h
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_GLOBAL_CTX_PRIV_H
|
||||
#define NVGPU_GR_GLOBAL_CTX_PRIV_H
|
||||
|
||||
/**
|
||||
* Global context buffer descriptor structure.
|
||||
*
|
||||
* This structure stores properties applicable to each global
|
||||
* context buffer.
|
||||
*/
|
||||
struct nvgpu_gr_global_ctx_buffer_desc {
|
||||
/**
|
||||
* Memory to hold global context buffer.
|
||||
*/
|
||||
struct nvgpu_mem mem;
|
||||
|
||||
/**
|
||||
* Size of global context buffer.
|
||||
*/
|
||||
size_t size;
|
||||
|
||||
/**
|
||||
* Function pointer to free global context buffer.
|
||||
*/
|
||||
global_ctx_mem_destroy_fn destroy;
|
||||
};
|
||||
|
||||
/**
|
||||
* Local Golden context image descriptor structure.
|
||||
*
|
||||
* This structure stores details of a local Golden context image.
|
||||
* Pointer to this struct is maintained in
|
||||
* #nvgpu_gr_obj_ctx_golden_image structure.
|
||||
*/
|
||||
struct nvgpu_gr_global_ctx_local_golden_image {
|
||||
/**
|
||||
* Pointer to local Golden context image memory.
|
||||
*/
|
||||
u32 *context;
|
||||
|
||||
/**
|
||||
* Size of local Golden context image.
|
||||
*/
|
||||
size_t size;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_GLOBAL_CTX_PRIV_H */
|
||||
1204
drivers/gpu/nvgpu/common/gr/gr.c
Normal file
1204
drivers/gpu/nvgpu/common/gr/gr.c
Normal file
File diff suppressed because it is too large
Load Diff
864
drivers/gpu/nvgpu/common/gr/gr_config.c
Normal file
864
drivers/gpu/nvgpu/common/gr/gr_config.c
Normal file
@@ -0,0 +1,864 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
#include <nvgpu/grmgr.h>
|
||||
|
||||
#include "gr_config_priv.h"
|
||||
|
||||
static void gr_config_init_pes_tpc(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
u32 pes_index;
|
||||
u32 pes_tpc_mask;
|
||||
u32 pes_tpc_count;
|
||||
|
||||
for (pes_index = 0; pes_index < config->pe_count_per_gpc;
|
||||
pes_index++) {
|
||||
pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
|
||||
config, gpc_index, pes_index);
|
||||
pes_tpc_count = hweight32(pes_tpc_mask);
|
||||
|
||||
/* detect PES presence by seeing if there are
|
||||
* TPCs connected to it.
|
||||
*/
|
||||
if (pes_tpc_count != 0U) {
|
||||
config->gpc_ppc_count[gpc_index] = nvgpu_safe_add_u32(
|
||||
config->gpc_ppc_count[gpc_index], 1U);
|
||||
}
|
||||
|
||||
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
|
||||
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
|
||||
}
|
||||
}
|
||||
|
||||
static void gr_config_init_gpc_skip_mask(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
u32 pes_heavy_index;
|
||||
u32 gpc_new_skip_mask = 0U;
|
||||
u32 pes_tpc_cnt = 0U, pes_tpc_mask = 0U;
|
||||
|
||||
if (config->pe_count_per_gpc <= 1U) {
|
||||
goto skip_mask_end;
|
||||
}
|
||||
|
||||
pes_tpc_cnt = nvgpu_safe_add_u32(
|
||||
config->pes_tpc_count[0][gpc_index],
|
||||
config->pes_tpc_count[1][gpc_index]);
|
||||
|
||||
pes_heavy_index =
|
||||
(config->pes_tpc_count[0][gpc_index] >
|
||||
config->pes_tpc_count[1][gpc_index]) ? 0U : 1U;
|
||||
|
||||
if ((pes_tpc_cnt == 5U) || ((pes_tpc_cnt == 4U) &&
|
||||
(config->pes_tpc_count[0][gpc_index] !=
|
||||
config->pes_tpc_count[1][gpc_index]))) {
|
||||
pes_tpc_mask = nvgpu_safe_sub_u32(
|
||||
config->pes_tpc_mask[pes_heavy_index][gpc_index], 1U);
|
||||
gpc_new_skip_mask =
|
||||
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
|
||||
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
|
||||
pes_tpc_mask);
|
||||
}
|
||||
|
||||
skip_mask_end:
|
||||
config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
|
||||
}
|
||||
|
||||
static void gr_config_log_info(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 gpc_index, pes_index;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_gpc_count: %d", config->max_gpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_count: %d", config->gpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_mask: 0x%x", config->gpc_mask);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_count: %d", config->max_tpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "tpc_count: %d", config->tpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "sm_count_per_tpc: %d", config->sm_count_per_tpc);
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "zcb_count: %d", config->zcb_count);
|
||||
#endif
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "ppc_count: %d", config->ppc_count);
|
||||
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_count[%d] : %d",
|
||||
gpc_index, config->gpc_tpc_count[gpc_index]);
|
||||
}
|
||||
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_mask[%d] : 0x%x",
|
||||
gpc_index, config->gpc_tpc_mask[gpc_index]);
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_zcb_count[%d] : %d",
|
||||
gpc_index, config->gpc_zcb_count != NULL ?
|
||||
config->gpc_zcb_count[gpc_index] : 0U);
|
||||
}
|
||||
#endif
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_ppc_count[%d] : %d",
|
||||
gpc_index, config->gpc_ppc_count[gpc_index]);
|
||||
}
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_skip_mask[%d] : 0x%x",
|
||||
gpc_index, config->gpc_skip_mask[gpc_index]);
|
||||
}
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
for (pes_index = 0;
|
||||
pes_index < config->pe_count_per_gpc;
|
||||
pes_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_count[%d][%d] : %d",
|
||||
pes_index, gpc_index,
|
||||
config->pes_tpc_count[pes_index][gpc_index]);
|
||||
}
|
||||
}
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
for (pes_index = 0;
|
||||
pes_index < config->pe_count_per_gpc;
|
||||
pes_index++) {
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_mask[%d][%d] : 0x%x",
|
||||
pes_index, gpc_index,
|
||||
config->pes_tpc_mask[pes_index][gpc_index]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void gr_config_set_gpc_mask(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
if (g->ops.gr.config.get_gpc_mask != NULL) {
|
||||
config->gpc_mask = g->ops.gr.config.get_gpc_mask(g);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count),
|
||||
1U);
|
||||
}
|
||||
}
|
||||
|
||||
static bool gr_config_alloc_valid(struct nvgpu_gr_config *config)
|
||||
{
|
||||
if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
|
||||
(config->gpc_ppc_count == NULL) ||
|
||||
(config->gpc_skip_mask == NULL)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(config->g, NVGPU_SUPPORT_MIG) &&
|
||||
(config->gpc_zcb_count == NULL)) {
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void gr_config_free_mem(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 pes_index;
|
||||
|
||||
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
|
||||
nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
|
||||
nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, config->gpc_skip_mask);
|
||||
nvgpu_kfree(g, config->gpc_ppc_count);
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
nvgpu_kfree(g, config->gpc_zcb_count);
|
||||
#endif
|
||||
nvgpu_kfree(g, config->gpc_tpc_mask);
|
||||
nvgpu_kfree(g, config->gpc_tpc_count);
|
||||
}
|
||||
|
||||
static bool gr_config_alloc_struct_mem(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 pes_index;
|
||||
u32 total_tpc_cnt;
|
||||
size_t sm_info_size;
|
||||
size_t gpc_size, sm_size, max_gpc_cnt;
|
||||
size_t pd_tbl_size;
|
||||
|
||||
total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count,
|
||||
config->max_tpc_per_gpc_count);
|
||||
sm_size = nvgpu_safe_mult_u64((size_t)config->sm_count_per_tpc,
|
||||
sizeof(struct nvgpu_sm_info));
|
||||
/* allocate for max tpc per gpc */
|
||||
sm_info_size = nvgpu_safe_mult_u64((size_t)total_tpc_cnt, sm_size);
|
||||
|
||||
config->sm_to_cluster = nvgpu_kzalloc(g, sm_info_size);
|
||||
if (config->sm_to_cluster == NULL) {
|
||||
nvgpu_err(g, "sm_to_cluster == NULL");
|
||||
goto alloc_err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) {
|
||||
config->sm_to_cluster_redex_config =
|
||||
nvgpu_kzalloc(g, sm_info_size);
|
||||
if (config->sm_to_cluster_redex_config == NULL) {
|
||||
nvgpu_err(g, "sm_to_cluster_redex_config == NULL");
|
||||
goto clean_alloc_mem;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
config->no_of_sm = 0;
|
||||
|
||||
gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32));
|
||||
max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32));
|
||||
config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size);
|
||||
config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt);
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_ZCULL_BANKS);
|
||||
|
||||
config->gpc_zcb_count = nvgpu_kzalloc(g, gpc_size);
|
||||
}
|
||||
#endif
|
||||
config->gpc_ppc_count = nvgpu_kzalloc(g, gpc_size);
|
||||
|
||||
pd_tbl_size = nvgpu_safe_mult_u64(
|
||||
(size_t)g->ops.gr.config.get_pd_dist_skip_table_size(),
|
||||
sizeof(u32));
|
||||
pd_tbl_size = nvgpu_safe_mult_u64(pd_tbl_size, 4UL);
|
||||
config->gpc_skip_mask = nvgpu_kzalloc(g, pd_tbl_size);
|
||||
|
||||
if (gr_config_alloc_valid(config) == false) {
|
||||
goto clean_alloc_mem;
|
||||
}
|
||||
|
||||
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
|
||||
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, gpc_size);
|
||||
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, gpc_size);
|
||||
if ((config->pes_tpc_count[pes_index] == NULL) ||
|
||||
(config->pes_tpc_mask[pes_index] == NULL)) {
|
||||
goto clean_alloc_mem;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
clean_alloc_mem:
|
||||
nvgpu_kfree(g, config->sm_to_cluster);
|
||||
config->sm_to_cluster = NULL;
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
if (config->sm_to_cluster_redex_config != NULL) {
|
||||
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
|
||||
config->sm_to_cluster_redex_config = NULL;
|
||||
}
|
||||
#endif
|
||||
gr_config_free_mem(g, config);
|
||||
|
||||
alloc_err:
|
||||
return false;
|
||||
}
|
||||
|
||||
static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
|
||||
{
|
||||
struct gk20a *g = config->g;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
|
||||
config->max_gpc_count = nvgpu_grmgr_get_max_gpc_count(g);
|
||||
config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
|
||||
if (config->gpc_count == 0U) {
|
||||
nvgpu_err(g, "gpc_count==0!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
config->gpc_mask = nvgpu_grmgr_get_gr_logical_gpc_mask(
|
||||
g, cur_gr_instance);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
|
||||
{
|
||||
struct gk20a *g = config->g;
|
||||
|
||||
config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
|
||||
config->gpc_count = g->ops.priv_ring.get_gpc_count(g);
|
||||
if (config->gpc_count == 0U) {
|
||||
nvgpu_err(g, "gpc_count==0!");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gr_config_set_gpc_mask(g, config);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_config *config;
|
||||
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
|
||||
u32 gpc_index;
|
||||
u32 gpc_phys_id;
|
||||
int err;
|
||||
|
||||
config = nvgpu_kzalloc(g, sizeof(*config));
|
||||
if (config == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
config->g = g;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
err = gr_config_init_mig_gpcs(config);
|
||||
if (err < 0) {
|
||||
nvgpu_err(g, "MIG GPC config init failed");
|
||||
nvgpu_kfree(g, config);
|
||||
return NULL;
|
||||
}
|
||||
} else {
|
||||
err = gr_config_init_gpcs(config);
|
||||
if (err < 0) {
|
||||
nvgpu_err(g, "GPC config init failed");
|
||||
nvgpu_kfree(g, config);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Required to read gpc_tpc_mask below */
|
||||
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
|
||||
|
||||
config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count,
|
||||
config->max_tpc_per_gpc_count);
|
||||
|
||||
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_PES_PER_GPC);
|
||||
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
|
||||
nvgpu_err(g, "too many pes per gpc");
|
||||
goto clean_up_init;
|
||||
}
|
||||
|
||||
config->sm_count_per_tpc =
|
||||
nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
||||
if (config->sm_count_per_tpc == 0U) {
|
||||
nvgpu_err(g, "sm_count_per_tpc==0!");
|
||||
goto clean_up_init;
|
||||
}
|
||||
|
||||
if (gr_config_alloc_struct_mem(g, config) == false) {
|
||||
goto clean_up_init;
|
||||
}
|
||||
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
/*
|
||||
* Fuse registers must be queried with physical gpc-id and not
|
||||
* the logical ones. For tu104 and before chips logical gpc-id
|
||||
* is same as physical gpc-id for non-floorswept config but for
|
||||
* chips after tu104 it may not be true.
|
||||
*/
|
||||
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
|
||||
cur_gr_instance, gpc_index);
|
||||
config->gpc_tpc_mask[gpc_index] =
|
||||
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
|
||||
}
|
||||
|
||||
config->ppc_count = 0;
|
||||
config->tpc_count = 0;
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
config->zcb_count = 0;
|
||||
#endif
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
config->gpc_tpc_count[gpc_index] =
|
||||
g->ops.gr.config.get_tpc_count_in_gpc(g, config,
|
||||
gpc_index);
|
||||
config->tpc_count = nvgpu_safe_add_u32(config->tpc_count,
|
||||
config->gpc_tpc_count[gpc_index]);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
config->gpc_zcb_count[gpc_index] =
|
||||
g->ops.gr.config.get_zcull_count_in_gpc(g, config,
|
||||
gpc_index);
|
||||
config->zcb_count = nvgpu_safe_add_u32(config->zcb_count,
|
||||
config->gpc_zcb_count[gpc_index]);
|
||||
}
|
||||
#endif
|
||||
|
||||
gr_config_init_pes_tpc(g, config, gpc_index);
|
||||
|
||||
config->ppc_count = nvgpu_safe_add_u32(config->ppc_count,
|
||||
config->gpc_ppc_count[gpc_index]);
|
||||
|
||||
gr_config_init_gpc_skip_mask(config, gpc_index);
|
||||
}
|
||||
|
||||
gr_config_log_info(g, config);
|
||||
return config;
|
||||
|
||||
clean_up_init:
|
||||
nvgpu_kfree(g, config);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
static u32 prime_set[18] = {
|
||||
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
|
||||
|
||||
/*
|
||||
* Return map tiles count for given index
|
||||
* Return 0 if index is out-of-bounds
|
||||
*/
|
||||
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
|
||||
{
|
||||
if (index >= config->map_tile_count) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return config->map_tiles[index];
|
||||
}
|
||||
|
||||
u8 *nvgpu_gr_config_get_map_tiles(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->map_tiles;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->map_row_offset;
|
||||
}
|
||||
|
||||
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
s32 comm_denom;
|
||||
s32 mul_factor;
|
||||
s32 *init_frac = NULL;
|
||||
s32 *init_err = NULL;
|
||||
s32 *run_err = NULL;
|
||||
u32 *sorted_num_tpcs = NULL;
|
||||
u32 *sorted_to_unsorted_gpc_map = NULL;
|
||||
u32 gpc_index;
|
||||
u32 gpc_mark = 0;
|
||||
u32 num_tpc;
|
||||
u32 max_tpc_count = 0;
|
||||
u32 swap;
|
||||
u32 tile_count;
|
||||
u32 index;
|
||||
bool delete_map = false;
|
||||
bool gpc_sorted;
|
||||
int ret = 0;
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||
init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||
run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
|
||||
sorted_num_tpcs =
|
||||
nvgpu_kzalloc(g, (size_t)num_gpcs *
|
||||
(size_t)num_tpc_per_gpc *
|
||||
sizeof(s32));
|
||||
sorted_to_unsorted_gpc_map =
|
||||
nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
|
||||
|
||||
if (!((init_frac != NULL) &&
|
||||
(init_err != NULL) &&
|
||||
(run_err != NULL) &&
|
||||
(sorted_num_tpcs != NULL) &&
|
||||
(sorted_to_unsorted_gpc_map != NULL))) {
|
||||
ret = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
config->map_row_offset = 0xFFFFFFFFU;
|
||||
|
||||
if (config->tpc_count == 3U) {
|
||||
config->map_row_offset = 2;
|
||||
} else if (config->tpc_count < 3U) {
|
||||
config->map_row_offset = 1;
|
||||
} else {
|
||||
config->map_row_offset = 3;
|
||||
|
||||
for (index = 1U; index < 18U; index++) {
|
||||
u32 prime = prime_set[index];
|
||||
if ((config->tpc_count % prime) != 0U) {
|
||||
config->map_row_offset = prime;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (config->tpc_count) {
|
||||
case 15:
|
||||
config->map_row_offset = 6;
|
||||
break;
|
||||
case 14:
|
||||
config->map_row_offset = 5;
|
||||
break;
|
||||
case 13:
|
||||
config->map_row_offset = 2;
|
||||
break;
|
||||
case 11:
|
||||
config->map_row_offset = 7;
|
||||
break;
|
||||
case 10:
|
||||
config->map_row_offset = 6;
|
||||
break;
|
||||
case 7:
|
||||
case 5:
|
||||
config->map_row_offset = 1;
|
||||
break;
|
||||
default:
|
||||
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "unsupported tpc count = %u",
|
||||
config->tpc_count);
|
||||
break;
|
||||
}
|
||||
|
||||
if (config->map_tiles != NULL) {
|
||||
if (config->map_tile_count != config->tpc_count) {
|
||||
delete_map = true;
|
||||
}
|
||||
|
||||
for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
|
||||
if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
|
||||
>= config->tpc_count) {
|
||||
delete_map = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (delete_map) {
|
||||
nvgpu_kfree(g, config->map_tiles);
|
||||
config->map_tiles = NULL;
|
||||
config->map_tile_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (config->map_tiles == NULL) {
|
||||
config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
|
||||
if (config->map_tiles == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
config->map_tile_count = map_tile_count;
|
||||
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
|
||||
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
|
||||
}
|
||||
|
||||
gpc_sorted = false;
|
||||
while (!gpc_sorted) {
|
||||
gpc_sorted = true;
|
||||
for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
|
||||
if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
|
||||
gpc_sorted = false;
|
||||
swap = sorted_num_tpcs[gpc_index];
|
||||
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
|
||||
sorted_num_tpcs[gpc_index + 1U] = swap;
|
||||
swap = sorted_to_unsorted_gpc_map[gpc_index];
|
||||
sorted_to_unsorted_gpc_map[gpc_index] =
|
||||
sorted_to_unsorted_gpc_map[gpc_index + 1U];
|
||||
sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
|
||||
max_tpc_count = config->gpc_tpc_count[gpc_index];
|
||||
}
|
||||
}
|
||||
|
||||
mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
|
||||
if ((U32(mul_factor) & 0x1U) != 0U) {
|
||||
mul_factor = 2;
|
||||
} else {
|
||||
mul_factor = 1;
|
||||
}
|
||||
|
||||
comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
|
||||
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
num_tpc = sorted_num_tpcs[gpc_index];
|
||||
|
||||
init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
|
||||
|
||||
if (num_tpc != 0U) {
|
||||
init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
|
||||
} else {
|
||||
init_err[gpc_index] = 0;
|
||||
}
|
||||
|
||||
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
|
||||
}
|
||||
|
||||
while (gpc_mark < config->tpc_count) {
|
||||
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
|
||||
if ((run_err[gpc_index] * 2) >= comm_denom) {
|
||||
config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
|
||||
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
|
||||
} else {
|
||||
run_err[gpc_index] += init_frac[gpc_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
clean_up:
|
||||
nvgpu_kfree(g, init_frac);
|
||||
nvgpu_kfree(g, init_err);
|
||||
nvgpu_kfree(g, run_err);
|
||||
nvgpu_kfree(g, sorted_num_tpcs);
|
||||
nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
|
||||
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "fail");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->max_zcull_per_gpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->zcb_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
return config->gpc_zcb_count[gpc_index];
|
||||
}
|
||||
#endif
|
||||
|
||||
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
|
||||
{
|
||||
if (config == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
gr_config_free_mem(g, config);
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
nvgpu_kfree(g, config->map_tiles);
|
||||
#endif
|
||||
nvgpu_kfree(g, config->sm_to_cluster);
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
if (config->sm_to_cluster_redex_config != NULL) {
|
||||
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
|
||||
config->sm_to_cluster_redex_config = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->max_gpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->max_tpc_per_gpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->max_tpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->gpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->tpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->ppc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->pe_count_per_gpc;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->sm_count_per_tpc;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
|
||||
return config->gpc_ppc_count[gpc_index];
|
||||
}
|
||||
|
||||
u32 *nvgpu_gr_config_get_gpc_tpc_count_base(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->gpc_tpc_count;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
if (gpc_index >= config->gpc_count) {
|
||||
return 0;
|
||||
}
|
||||
return config->gpc_tpc_count[gpc_index];
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index, u32 pes_index)
|
||||
{
|
||||
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
|
||||
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
|
||||
return config->pes_tpc_count[pes_index][gpc_index];
|
||||
}
|
||||
|
||||
u32 *nvgpu_gr_config_get_gpc_tpc_mask_base(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->gpc_tpc_mask;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
|
||||
return config->gpc_tpc_mask[gpc_index];
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_gpc_tpc_mask(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index, u32 val)
|
||||
{
|
||||
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
|
||||
config->gpc_tpc_mask[gpc_index] = val;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index)
|
||||
{
|
||||
if (gpc_index >= config->gpc_count) {
|
||||
return 0;
|
||||
}
|
||||
return config->gpc_skip_mask[gpc_index];
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
|
||||
u32 gpc_index, u32 pes_index)
|
||||
{
|
||||
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
|
||||
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
|
||||
return config->pes_tpc_mask[pes_index][gpc_index];
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->gpc_mask;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config)
|
||||
{
|
||||
return config->no_of_sm;
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_no_of_sm(struct nvgpu_gr_config *config, u32 no_of_sm)
|
||||
{
|
||||
config->no_of_sm = no_of_sm;
|
||||
}
|
||||
|
||||
struct nvgpu_sm_info *nvgpu_gr_config_get_sm_info(struct nvgpu_gr_config *config,
|
||||
u32 sm_id)
|
||||
{
|
||||
return &config->sm_to_cluster[sm_id];
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
struct nvgpu_sm_info *nvgpu_gr_config_get_redex_sm_info(
|
||||
struct nvgpu_gr_config *config, u32 sm_id)
|
||||
{
|
||||
return &config->sm_to_cluster_redex_config[sm_id];
|
||||
}
|
||||
#endif
|
||||
|
||||
u32 nvgpu_gr_config_get_sm_info_gpc_index(struct nvgpu_sm_info *sm_info)
|
||||
{
|
||||
return sm_info->gpc_index;
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_sm_info_gpc_index(struct nvgpu_sm_info *sm_info,
|
||||
u32 gpc_index)
|
||||
{
|
||||
sm_info->gpc_index = gpc_index;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_sm_info_tpc_index(struct nvgpu_sm_info *sm_info)
|
||||
{
|
||||
return sm_info->tpc_index;
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_sm_info_tpc_index(struct nvgpu_sm_info *sm_info,
|
||||
u32 tpc_index)
|
||||
{
|
||||
sm_info->tpc_index = tpc_index;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info)
|
||||
{
|
||||
return sm_info->global_tpc_index;
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info,
|
||||
u32 global_tpc_index)
|
||||
{
|
||||
sm_info->global_tpc_index = global_tpc_index;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_config_get_sm_info_sm_index(struct nvgpu_sm_info *sm_info)
|
||||
{
|
||||
return sm_info->sm_index;
|
||||
}
|
||||
|
||||
void nvgpu_gr_config_set_sm_info_sm_index(struct nvgpu_sm_info *sm_info,
|
||||
u32 sm_index)
|
||||
{
|
||||
sm_info->sm_index = sm_index;
|
||||
}
|
||||
172
drivers/gpu/nvgpu/common/gr/gr_config_priv.h
Normal file
172
drivers/gpu/nvgpu/common/gr/gr_config_priv.h
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_CONFIG_PRIV_H
|
||||
#define NVGPU_GR_CONFIG_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
/**
|
||||
* Max possible PES count per GPC.
|
||||
*/
|
||||
#define GK20A_GR_MAX_PES_PER_GPC 3U
|
||||
|
||||
struct gk20a;
|
||||
|
||||
/**
|
||||
* Detailed information of SM indexes in GR engine.
|
||||
*/
|
||||
struct nvgpu_sm_info {
|
||||
/**
|
||||
* Index of GPC for SM.
|
||||
*/
|
||||
u32 gpc_index;
|
||||
|
||||
/**
|
||||
* Index of TPC for SM.
|
||||
*/
|
||||
u32 tpc_index;
|
||||
|
||||
/**
|
||||
* Index of SM within TPC.
|
||||
*/
|
||||
u32 sm_index;
|
||||
|
||||
/**
|
||||
* Global TPC index for SM.
|
||||
*/
|
||||
u32 global_tpc_index;
|
||||
};
|
||||
|
||||
/**
|
||||
* GR engine configuration data.
|
||||
*
|
||||
* This data is populated during GR initialization and referred across
|
||||
* GPU driver through public APIs.
|
||||
*/
|
||||
struct nvgpu_gr_config {
|
||||
/**
|
||||
* Pointer to GPU driver struct.
|
||||
*/
|
||||
struct gk20a *g;
|
||||
|
||||
/**
|
||||
* Max possible number of GPCs in GR engine.
|
||||
*/
|
||||
u32 max_gpc_count;
|
||||
/**
|
||||
* Max possible number of TPCs per GPC in GR engine.
|
||||
*/
|
||||
u32 max_tpc_per_gpc_count;
|
||||
/**
|
||||
* Max possible number of TPCs in GR engine.
|
||||
*/
|
||||
u32 max_tpc_count;
|
||||
|
||||
/**
|
||||
* Number of GPCs in GR engine.
|
||||
*/
|
||||
u32 gpc_count;
|
||||
/**
|
||||
* Number of TPCs in GR engine.
|
||||
*/
|
||||
u32 tpc_count;
|
||||
/**
|
||||
* Number of PPCs in GR engine.
|
||||
*/
|
||||
u32 ppc_count;
|
||||
|
||||
/**
|
||||
* Number of PES per GPC in GR engine.
|
||||
*/
|
||||
u32 pe_count_per_gpc;
|
||||
/**
|
||||
* Number of SMs per TPC in GR engine.
|
||||
*/
|
||||
u32 sm_count_per_tpc;
|
||||
|
||||
/**
|
||||
* Array to hold number of PPC units per GPC.
|
||||
* Array is indexed by GPC index.
|
||||
*/
|
||||
u32 *gpc_ppc_count;
|
||||
/**
|
||||
* Array to hold number of TPCs per GPC.
|
||||
* Array is indexed by GPC index.
|
||||
*/
|
||||
u32 *gpc_tpc_count;
|
||||
/**
|
||||
* 2-D array to hold number of TPCs attached to a PES unit
|
||||
* in a GPC.
|
||||
*/
|
||||
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
|
||||
|
||||
/**
|
||||
* Mask of GPCs. A set bit indicates GPC is available, otherwise
|
||||
* it is not available.
|
||||
*/
|
||||
u32 gpc_mask;
|
||||
|
||||
/**
|
||||
* Array to hold mask of TPCs per GPC.
|
||||
* Array is indexed by GPC index.
|
||||
*/
|
||||
u32 *gpc_tpc_mask;
|
||||
/**
|
||||
* 2-D array to hold mask of TPCs attached to a PES unit
|
||||
* in a GPC.
|
||||
*/
|
||||
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
|
||||
/**
|
||||
* Array to hold skip mask of TPCs per GPC.
|
||||
* Array is indexed by GPC index.
|
||||
*/
|
||||
u32 *gpc_skip_mask;
|
||||
|
||||
/**
|
||||
* Number of SMs in GR engine.
|
||||
*/
|
||||
u32 no_of_sm;
|
||||
/**
|
||||
* Pointer to SM information struct.
|
||||
*/
|
||||
struct nvgpu_sm_info *sm_to_cluster;
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
/**
|
||||
* Pointer to redundant execution config SM information struct.
|
||||
* It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
|
||||
*/
|
||||
struct nvgpu_sm_info *sm_to_cluster_redex_config;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
u32 max_zcull_per_gpc_count;
|
||||
u32 zcb_count;
|
||||
u32 *gpc_zcb_count;
|
||||
|
||||
u8 *map_tiles;
|
||||
u32 map_tile_count;
|
||||
u32 map_row_offset;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_CONFIG_PRIV_H */
|
||||
269
drivers/gpu/nvgpu/common/gr/gr_ecc.c
Normal file
269
drivers/gpu/nvgpu/common/gr/gr_ecc.c
Normal file
@@ -0,0 +1,269 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gr/gr_ecc.h>
|
||||
#include <nvgpu/gr/gr_utils.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/string.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/ecc.h>
|
||||
|
||||
int nvgpu_ecc_counter_init_per_gr(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name)
|
||||
{
|
||||
struct nvgpu_ecc_stat *stats;
|
||||
u32 i;
|
||||
char gr_str[10] = {0};
|
||||
|
||||
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
|
||||
g->num_gr_instances));
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0; i < g->num_gr_instances; i++) {
|
||||
/**
|
||||
* Store stats name as below:
|
||||
* gr<gr_index>_<name_string>
|
||||
*/
|
||||
(void)strcpy(stats[i].name, "gr");
|
||||
(void)nvgpu_strnadd_u32(gr_str, i, sizeof(gr_str), 10U);
|
||||
(void)strncat(stats[i].name, gr_str,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[i].name));
|
||||
(void)strncat(stats[i].name, "_",
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[i].name));
|
||||
(void)strncat(stats[i].name, name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[i].name));
|
||||
|
||||
nvgpu_ecc_stat_add(g, &stats[i]);
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stat, const char *name)
|
||||
{
|
||||
struct nvgpu_ecc_stat **stats;
|
||||
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
|
||||
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
|
||||
u32 gpc, tpc;
|
||||
char gpc_str[10] = {0}, tpc_str[10] = {0};
|
||||
int err = 0;
|
||||
|
||||
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
|
||||
gpc_count));
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||
stats[gpc] = nvgpu_kzalloc(g,
|
||||
nvgpu_safe_mult_u64(sizeof(*stats[gpc]),
|
||||
nvgpu_gr_config_get_gpc_tpc_count(gr_config,
|
||||
gpc)));
|
||||
if (stats[gpc] == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||
for (tpc = 0;
|
||||
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
|
||||
tpc++) {
|
||||
/**
|
||||
* Store stats name as below:
|
||||
* gpc<gpc_value>_tpc<tpc_value>_<name_string>
|
||||
*/
|
||||
(void)strcpy(stats[gpc][tpc].name, "gpc");
|
||||
(void)nvgpu_strnadd_u32(gpc_str, gpc,
|
||||
sizeof(gpc_str), 10U);
|
||||
(void)strncat(stats[gpc][tpc].name, gpc_str,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc][tpc].name));
|
||||
(void)strncat(stats[gpc][tpc].name, "_tpc",
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc][tpc].name));
|
||||
(void)nvgpu_strnadd_u32(tpc_str, tpc,
|
||||
sizeof(tpc_str), 10U);
|
||||
(void)strncat(stats[gpc][tpc].name, tpc_str,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc][tpc].name));
|
||||
(void)strncat(stats[gpc][tpc].name, "_",
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc][tpc].name));
|
||||
(void)strncat(stats[gpc][tpc].name, name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc][tpc].name));
|
||||
|
||||
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
|
||||
}
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
|
||||
fail:
|
||||
if (err != 0) {
|
||||
while (gpc-- != 0u) {
|
||||
nvgpu_kfree(g, stats[gpc]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stat, const char *name)
|
||||
{
|
||||
struct nvgpu_ecc_stat *stats;
|
||||
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
|
||||
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
|
||||
u32 gpc;
|
||||
char gpc_str[10] = {0};
|
||||
|
||||
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
|
||||
gpc_count));
|
||||
if (stats == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||
/**
|
||||
* Store stats name as below:
|
||||
* gpc<gpc_value>_<name_string>
|
||||
*/
|
||||
(void)strcpy(stats[gpc].name, "gpc");
|
||||
(void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U);
|
||||
(void)strncat(stats[gpc].name, gpc_str,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc].name));
|
||||
(void)strncat(stats[gpc].name, "_",
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc].name));
|
||||
(void)strncat(stats[gpc].name, name,
|
||||
NVGPU_ECC_STAT_NAME_MAX_SIZE -
|
||||
strlen(stats[gpc].name));
|
||||
|
||||
nvgpu_ecc_stat_add(g, &stats[gpc]);
|
||||
}
|
||||
|
||||
*stat = stats;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_ecc_counter_deinit_per_gr(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stats_p)
|
||||
{
|
||||
struct nvgpu_ecc_stat *stats = NULL;
|
||||
u32 i;
|
||||
|
||||
if (*stats_p != NULL) {
|
||||
stats = *stats_p;
|
||||
|
||||
for (i = 0; i < g->num_gr_instances; i++) {
|
||||
nvgpu_ecc_stat_del(g, &stats[i]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
*stats_p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat ***stats_p)
|
||||
{
|
||||
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
|
||||
struct nvgpu_ecc_stat **stats = NULL;
|
||||
u32 gpc_count;
|
||||
u32 gpc, tpc;
|
||||
|
||||
if (*stats_p != NULL) {
|
||||
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
|
||||
stats = *stats_p;
|
||||
|
||||
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||
if (stats[gpc] == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (tpc = 0;
|
||||
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
|
||||
tpc++) {
|
||||
nvgpu_ecc_stat_del(g, &stats[gpc][tpc]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats[gpc]);
|
||||
stats[gpc] = NULL;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
*stats_p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g,
|
||||
struct nvgpu_ecc_stat **stats_p)
|
||||
{
|
||||
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
|
||||
struct nvgpu_ecc_stat *stats = NULL;
|
||||
u32 gpc_count;
|
||||
u32 gpc;
|
||||
|
||||
if (*stats_p != NULL) {
|
||||
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
|
||||
stats = *stats_p;
|
||||
|
||||
for (gpc = 0; gpc < gpc_count; gpc++) {
|
||||
nvgpu_ecc_stat_del(g, &stats[gpc]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, stats);
|
||||
*stats_p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_gr_ecc_free(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
if (gr_config == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (g->ops.gr.ecc.fecs_ecc_deinit != NULL) {
|
||||
g->ops.gr.ecc.fecs_ecc_deinit(g);
|
||||
}
|
||||
|
||||
if (g->ops.gr.ecc.gpc_tpc_ecc_deinit != NULL) {
|
||||
g->ops.gr.ecc.gpc_tpc_ecc_deinit(g);
|
||||
}
|
||||
}
|
||||
755
drivers/gpu/nvgpu/common/gr/gr_falcon.c
Normal file
755
drivers/gpu/nvgpu/common/gr/gr_falcon.c
Normal file
@@ -0,0 +1,755 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/netlist.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/debug.h>
|
||||
#include <nvgpu/gr/hwpm_map.h>
|
||||
#include <nvgpu/firmware.h>
|
||||
#include <nvgpu/sizes.h>
|
||||
#include <nvgpu/mm.h>
|
||||
#include <nvgpu/acr.h>
|
||||
#include <nvgpu/gr/gr_utils.h>
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
#include <nvgpu/pmu/lsfm.h>
|
||||
#include <nvgpu/pmu/pmu_pg.h>
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
#include <nvgpu/sec2/lsfm.h>
|
||||
#endif
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
|
||||
#include "gr_falcon_priv.h"
|
||||
|
||||
#define NVGPU_FECS_UCODE_IMAGE "fecs.bin"
|
||||
#define NVGPU_GPCCS_UCODE_IMAGE "gpccs.bin"
|
||||
|
||||
struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr_falcon *falcon;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
falcon = nvgpu_kzalloc(g, sizeof(*falcon));
|
||||
if (falcon == NULL) {
|
||||
return falcon;
|
||||
}
|
||||
|
||||
nvgpu_mutex_init(&falcon->fecs_mutex);
|
||||
falcon->coldboot_bootstrap_done = false;
|
||||
|
||||
return falcon;
|
||||
}
|
||||
|
||||
void nvgpu_gr_falcon_remove_support(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (falcon == NULL) {
|
||||
return;
|
||||
}
|
||||
nvgpu_kfree(g, falcon);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
struct nvgpu_pmu *pmu = g->pmu;
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
struct vm_gk20a *vm = mm->pmu.vm;
|
||||
int err = 0;
|
||||
u32 size;
|
||||
u32 data;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
size = 0;
|
||||
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_REGLIST_DISCOVER_IMAGE_SIZE, 0U, &size);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"fail to query fecs pg buffer size");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "FECS PG buffer size = %u", size);
|
||||
|
||||
if (nvgpu_pmu_pg_buf_get_cpu_va(g, pmu) == NULL) {
|
||||
err = nvgpu_dma_alloc_map_sys(vm, size, nvgpu_pmu_pg_buf(g, pmu));
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to allocate memory");
|
||||
return -ENOMEM;
|
||||
}
|
||||
}
|
||||
|
||||
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g,
|
||||
&mm->pmu.inst_block);
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_REGLIST_BIND_INSTANCE, data, NULL);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"fail to bind pmu inst to gr");
|
||||
return err;
|
||||
}
|
||||
|
||||
data = u64_lo32(nvgpu_pmu_pg_buf_get_gpu_va(g, pmu) >> 8);
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_REGLIST_SET_VIRTUAL_ADDRESS, data, NULL);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"fail to set pg buffer pmu va");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "done");
|
||||
return err;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
err = g->ops.gr.falcon.load_ctxsw_ucode(g, falcon);
|
||||
if (err != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = g->ops.gr.falcon.wait_ctxsw_ready(g);
|
||||
|
||||
out:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct nvgpu_gr_falcon_query_sizes *sizes = &falcon->sizes;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
/* fecs init ramchain */
|
||||
err = g->ops.gr.falcon.init_ctx_state(g, sizes);
|
||||
if (err != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_falcon_get_golden_image_size(struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return falcon->sizes.golden_image_size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
u32 nvgpu_gr_falcon_get_pm_ctxsw_image_size(struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return falcon->sizes.pm_ctxsw_image_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
u32 nvgpu_gr_falcon_get_preempt_image_size(struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return falcon->sizes.preempt_image_size;
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_GFXP */
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
u32 nvgpu_gr_falcon_get_zcull_image_size(struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return falcon->sizes.zcull_image_size;
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_GRAPHICS */
|
||||
|
||||
static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct mm_gk20a *mm = &g->mm;
|
||||
struct vm_gk20a *vm = mm->pmu.vm;
|
||||
struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
|
||||
int err;
|
||||
|
||||
err = nvgpu_alloc_inst_block(g, &ucode_info->inst_blk_desc);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
|
||||
|
||||
/* Map ucode surface to GMMU */
|
||||
ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
|
||||
&ucode_info->surface_desc,
|
||||
ucode_info->surface_desc.size,
|
||||
0, /* flags */
|
||||
gk20a_mem_flag_read_only,
|
||||
false,
|
||||
ucode_info->surface_desc.aperture);
|
||||
if (ucode_info->surface_desc.gpu_va == 0ULL) {
|
||||
nvgpu_err(g, "failed to update gmmu ptes");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_init_ctxsw_ucode_segment(
|
||||
struct nvgpu_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
|
||||
{
|
||||
u32 ucode_offset;
|
||||
|
||||
p_seg->offset = *offset;
|
||||
p_seg->size = size;
|
||||
ucode_offset = nvgpu_safe_add_u32(*offset, size);
|
||||
*offset = NVGPU_ALIGN(ucode_offset, 256U);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_init_ctxsw_ucode_segments(
|
||||
struct nvgpu_ctxsw_ucode_segments *segments, u32 *offset,
|
||||
struct nvgpu_ctxsw_bootloader_desc *bootdesc,
|
||||
u32 code_size, u32 data_size)
|
||||
{
|
||||
u32 boot_size = NVGPU_ALIGN(bootdesc->size, sizeof(u32));
|
||||
|
||||
segments->boot_entry = bootdesc->entry_point;
|
||||
segments->boot_imem_offset = bootdesc->imem_offset;
|
||||
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->boot,
|
||||
offset, boot_size);
|
||||
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->code,
|
||||
offset, code_size);
|
||||
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->data,
|
||||
offset, data_size);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_copy_ctxsw_ucode_segments(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_mem *dst,
|
||||
struct nvgpu_ctxsw_ucode_segments *segments,
|
||||
u32 *bootimage,
|
||||
u32 *code, u32 *data)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage,
|
||||
segments->boot.size);
|
||||
nvgpu_mem_wr_n(g, dst, segments->code.offset, code,
|
||||
segments->code.size);
|
||||
nvgpu_mem_wr_n(g, dst, segments->data.offset, data,
|
||||
segments->data.size);
|
||||
|
||||
/* compute a "checksum" for the boot binary to detect its version */
|
||||
segments->boot_signature = 0;
|
||||
for (i = 0; i < (segments->boot.size / sizeof(u32)); i++) {
|
||||
segments->boot_signature = nvgpu_gr_checksum_u32(
|
||||
segments->boot_signature, bootimage[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct nvgpu_ctxsw_bootloader_desc *fecs_boot_desc;
|
||||
struct nvgpu_ctxsw_bootloader_desc *gpccs_boot_desc;
|
||||
struct nvgpu_firmware *fecs_fw;
|
||||
struct nvgpu_firmware *gpccs_fw;
|
||||
u32 *fecs_boot_image;
|
||||
u32 *gpccs_boot_image;
|
||||
struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
|
||||
u32 ucode_size;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "Requst and copy FECS/GPCCS firmwares");
|
||||
|
||||
fecs_fw = nvgpu_request_firmware(g, NVGPU_FECS_UCODE_IMAGE, 0);
|
||||
if (fecs_fw == NULL) {
|
||||
nvgpu_err(g, "failed to load fecs ucode!!");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
fecs_boot_desc = (void *)fecs_fw->data;
|
||||
fecs_boot_image = (void *)(fecs_fw->data +
|
||||
sizeof(struct nvgpu_ctxsw_bootloader_desc));
|
||||
|
||||
gpccs_fw = nvgpu_request_firmware(g, NVGPU_GPCCS_UCODE_IMAGE, 0);
|
||||
if (gpccs_fw == NULL) {
|
||||
nvgpu_release_firmware(g, fecs_fw);
|
||||
nvgpu_err(g, "failed to load gpccs ucode!!");
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
gpccs_boot_desc = (void *)gpccs_fw->data;
|
||||
gpccs_boot_image = (void *)(gpccs_fw->data +
|
||||
sizeof(struct nvgpu_ctxsw_bootloader_desc));
|
||||
|
||||
ucode_size = 0;
|
||||
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->fecs,
|
||||
&ucode_size, fecs_boot_desc,
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_netlist_get_fecs_inst_count(g), (u32)sizeof(u32)),
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_netlist_get_fecs_data_count(g), (u32)sizeof(u32)));
|
||||
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->gpccs,
|
||||
&ucode_size, gpccs_boot_desc,
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_netlist_get_gpccs_inst_count(g), (u32)sizeof(u32)),
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_netlist_get_gpccs_data_count(g), (u32)sizeof(u32)));
|
||||
|
||||
err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g,
|
||||
&ucode_info->surface_desc,
|
||||
&ucode_info->fecs,
|
||||
fecs_boot_image,
|
||||
nvgpu_netlist_get_fecs_inst_list(g),
|
||||
nvgpu_netlist_get_fecs_data_list(g));
|
||||
|
||||
nvgpu_release_firmware(g, fecs_fw);
|
||||
fecs_fw = NULL;
|
||||
|
||||
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g,
|
||||
&ucode_info->surface_desc,
|
||||
&ucode_info->gpccs,
|
||||
gpccs_boot_image,
|
||||
nvgpu_netlist_get_gpccs_inst_list(g),
|
||||
nvgpu_netlist_get_gpccs_data_list(g));
|
||||
|
||||
nvgpu_release_firmware(g, gpccs_fw);
|
||||
gpccs_fw = NULL;
|
||||
|
||||
err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g, falcon);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_dma_free(g, &ucode_info->surface_desc);
|
||||
|
||||
if (gpccs_fw != NULL) {
|
||||
nvgpu_release_firmware(g, gpccs_fw);
|
||||
gpccs_fw = NULL;
|
||||
}
|
||||
if (fecs_fw != NULL) {
|
||||
nvgpu_release_firmware(g, fecs_fw);
|
||||
fecs_fw = NULL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct nvgpu_ctxsw_ucode_info *ucode_info =
|
||||
&falcon->ctxsw_ucode_info;
|
||||
u64 inst_ptr;
|
||||
|
||||
if (g->ops.gr.falcon.bind_instblk == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
|
||||
|
||||
g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc,
|
||||
inst_ptr);
|
||||
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
|
||||
static void nvgpu_gr_falcon_load_dmem(struct gk20a *g)
|
||||
{
|
||||
u32 ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
ucode_u32_size = nvgpu_netlist_get_gpccs_data_count(g);
|
||||
ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_data_list(g);
|
||||
g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
ucode_u32_size = nvgpu_netlist_get_fecs_data_count(g);
|
||||
ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_data_list(g);
|
||||
g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
|
||||
{
|
||||
u32 ucode_u32_size;
|
||||
const u32 *ucode_u32_data;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
ucode_u32_size = nvgpu_netlist_get_gpccs_inst_count(g);
|
||||
ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_inst_list(g);
|
||||
g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
|
||||
ucode_u32_size = nvgpu_netlist_get_fecs_inst_count(g);
|
||||
ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_inst_list(g);
|
||||
g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
|
||||
u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset)
|
||||
{
|
||||
u32 addr_code32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
|
||||
segments->code.offset) >> 8);
|
||||
u32 addr_data32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
|
||||
segments->data.offset) >> 8);
|
||||
|
||||
g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset,
|
||||
segments->boot_signature, addr_code32, addr_data32,
|
||||
segments->code.size, segments->data.size);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
|
||||
u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
|
||||
u32 reg_offset)
|
||||
{
|
||||
u32 addr_load32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
|
||||
segments->boot.offset) >> 8);
|
||||
u32 blocks = (nvgpu_safe_add_u32(segments->boot.size, 0xFFU)
|
||||
& ~0xFFU) >> 8;
|
||||
u32 dst = segments->boot_imem_offset;
|
||||
|
||||
g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset,
|
||||
segments->boot_entry, addr_load32, blocks, dst);
|
||||
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
|
||||
struct gk20a *g, u64 addr_base,
|
||||
struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset)
|
||||
{
|
||||
|
||||
/* Copy falcon bootloader into dmem */
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base,
|
||||
segments, reg_offset);
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_boot(g,
|
||||
addr_base, segments, reg_offset);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct nvgpu_ctxsw_ucode_info *ucode_info =
|
||||
&falcon->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
nvgpu_gr_falcon_bind_instblk(g, falcon);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&falcon->ctxsw_ucode_info.fecs, 0);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&falcon->ctxsw_ucode_info.gpccs,
|
||||
g->ops.gr.falcon.get_gpccs_start_reg_offset());
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
#ifdef CONFIG_NVGPU_SIM
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
g->ops.gr.falcon.configure_fmodel(g);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* In case bootloader is not supported, revert to the old way of
|
||||
* loading gr ucode, without the faster bootstrap routine.
|
||||
*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
|
||||
nvgpu_gr_falcon_load_dmem(g);
|
||||
nvgpu_gr_falcon_load_imem(g);
|
||||
g->ops.gr.falcon.start_ucode(g);
|
||||
} else {
|
||||
if (!falcon->skip_ucode_init) {
|
||||
err = nvgpu_gr_falcon_init_ctxsw_ucode(g, falcon);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
nvgpu_gr_falcon_load_with_bootloader(g, falcon);
|
||||
falcon->skip_ucode_init = true;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
struct nvgpu_ctxsw_ucode_info *ucode_info =
|
||||
&falcon->ctxsw_ucode_info;
|
||||
u64 addr_base = ucode_info->surface_desc.gpu_va;
|
||||
|
||||
nvgpu_gr_falcon_bind_instblk(g, falcon);
|
||||
|
||||
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
|
||||
&falcon->ctxsw_ucode_info.gpccs,
|
||||
g->ops.gr.falcon.get_gpccs_start_reg_offset());
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
|
||||
static int gr_falcon_sec2_or_ls_pmu_bootstrap(struct gk20a *g,
|
||||
bool *bootstrap, u32 falcon_id_mask)
|
||||
{
|
||||
int err = 0;
|
||||
bool bootstrap_set = false;
|
||||
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
|
||||
bootstrap_set = true;
|
||||
nvgpu_log(g, gpu_dbg_gr, "bootstrap by SEC2");
|
||||
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_FECS);
|
||||
if (err == 0) {
|
||||
err = nvgpu_sec2_bootstrap_ls_falcons(g,
|
||||
&g->sec2, FALCON_ID_GPCCS);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (g->support_ls_pmu) {
|
||||
bootstrap_set = true;
|
||||
nvgpu_log(g, gpu_dbg_gr, "bootstrap by LS PMU");
|
||||
|
||||
err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g,
|
||||
g->pmu, g->pmu->lsfm,
|
||||
falcon_id_mask);
|
||||
}
|
||||
#endif
|
||||
|
||||
*bootstrap = bootstrap_set;
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
bool bootstrap = false;
|
||||
u32 falcon_idmask = BIT32(FALCON_ID_FECS) | BIT32(FALCON_ID_GPCCS);
|
||||
|
||||
err = gr_falcon_sec2_or_ls_pmu_bootstrap(g,
|
||||
&bootstrap,
|
||||
falcon_idmask);
|
||||
if ((err == 0) && (!bootstrap)) {
|
||||
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"ACR GR LSF bootstrap failed");
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
u8 falcon_id_mask = 0;
|
||||
bool bootstrap = false;
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
|
||||
FALCON_ID_FECS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_FECS);
|
||||
}
|
||||
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
|
||||
FALCON_ID_GPCCS)) {
|
||||
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
|
||||
}
|
||||
|
||||
err = gr_falcon_sec2_or_ls_pmu_bootstrap(g,
|
||||
&bootstrap,
|
||||
(u32)falcon_id_mask);
|
||||
if ((err == 0) && (!bootstrap)) {
|
||||
/* GR falcons bootstrapped by ACR */
|
||||
nvgpu_log(g, gpu_dbg_gr, "bootstrap by ACR");
|
||||
err = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int gr_falcon_recovery_bootstrap(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, g->pmu,
|
||||
g->pmu->lsfm, BIT32(FALCON_ID_FECS));
|
||||
#endif
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
/* bind WPR VA inst block */
|
||||
nvgpu_gr_falcon_bind_instblk(g, falcon);
|
||||
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
|
||||
err = gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(g);
|
||||
#else
|
||||
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"ACR GR LSF bootstrap failed");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void gr_falcon_coldboot_bootstrap(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
|
||||
nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
/* bind WPR VA inst block */
|
||||
nvgpu_gr_falcon_bind_instblk(g, falcon);
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g,
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
#ifdef CONFIG_NVGPU_SIM
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
|
||||
g->ops.gr.falcon.configure_fmodel(g);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (falcon->coldboot_bootstrap_done) {
|
||||
nvgpu_log(g, gpu_dbg_gr, "recovery bootstrap");
|
||||
|
||||
/* this must be recovery so bootstrap fecs and gpccs */
|
||||
err = gr_falcon_recovery_bootstrap(g, falcon);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to recover GR falcon");
|
||||
return err;
|
||||
}
|
||||
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_gr, "coldboot bootstrap");
|
||||
|
||||
/* cold boot or rg exit */
|
||||
falcon->coldboot_bootstrap_done = true;
|
||||
gr_falcon_coldboot_bootstrap(g, falcon);
|
||||
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
|
||||
err = gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Unable to boot GPCCS");
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
g->ops.gr.falcon.start_gpccs(g);
|
||||
g->ops.gr.falcon.start_fecs(g);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments(
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return &falcon->ctxsw_ucode_info.fecs;
|
||||
}
|
||||
struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments(
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return &falcon->ctxsw_ucode_info.gpccs;
|
||||
}
|
||||
void *nvgpu_gr_falcon_get_surface_desc_cpu_va(struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return falcon->ctxsw_ucode_info.surface_desc.cpu_va;
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_ENGINE_RESET
|
||||
struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex(
|
||||
struct nvgpu_gr_falcon *falcon)
|
||||
{
|
||||
return &falcon->fecs_mutex;
|
||||
}
|
||||
#endif
|
||||
213
drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h
Normal file
213
drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_FALCON_PRIV_H
|
||||
#define NVGPU_GR_FALCON_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
|
||||
struct nvgpu_ctxsw_ucode_segments;
|
||||
|
||||
/** GPCCS boot signature for T18X chip, type: with reserved. */
|
||||
#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34U
|
||||
|
||||
/** FECS boot signature for T21X chip, type: with DMEM size. */
|
||||
#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5cU
|
||||
/** FECS boot signature for T21X chip, type: with reserved. */
|
||||
#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5cU
|
||||
/** FECS boot signature for T21X chip, type: without reserved. */
|
||||
#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7dU
|
||||
/** FECS boot signature for T21X chip, type: without reserved2. */
|
||||
#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10U
|
||||
/** GPCCS boot signature for T21X chip, type: with reserved. */
|
||||
#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2U
|
||||
/** GPCCS boot signature for T21X chip, type: without reserved. */
|
||||
#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161daU
|
||||
|
||||
/** FECS boot signature for T12X chip, type: with reserved. */
|
||||
#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78U
|
||||
/** FECS boot signature for T12X chip, type: without reserved. */
|
||||
#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344bU
|
||||
/** FECS boot signature for T12X chip, type: older. */
|
||||
#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09fU
|
||||
|
||||
/** GPCCS boot signature for T12X chip, type: with reserved. */
|
||||
#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5U
|
||||
/** GPCCS boot signature for T12X chip, type: without reserved. */
|
||||
#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3U
|
||||
/** GPCCS boot signature for T12X chip, type: older. */
|
||||
#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877U
|
||||
|
||||
enum wait_ucode_status {
|
||||
/** Status of ucode wait operation : LOOP. */
|
||||
WAIT_UCODE_LOOP,
|
||||
/** Status of ucode wait operation : timedout. */
|
||||
WAIT_UCODE_TIMEOUT,
|
||||
/** Status of ucode wait operation : error. */
|
||||
WAIT_UCODE_ERROR,
|
||||
/** Status of ucode wait operation : success. */
|
||||
WAIT_UCODE_OK
|
||||
};
|
||||
|
||||
/** Falcon operation condition : EQUAL. */
|
||||
#define GR_IS_UCODE_OP_EQUAL 0U
|
||||
/** Falcon operation condition : NOT_EQUAL. */
|
||||
#define GR_IS_UCODE_OP_NOT_EQUAL 1U
|
||||
/** Falcon operation condition : AND. */
|
||||
#define GR_IS_UCODE_OP_AND 2U
|
||||
/** Falcon operation condition : LESSER. */
|
||||
#define GR_IS_UCODE_OP_LESSER 3U
|
||||
/** Falcon operation condition : LESSER_EQUAL. */
|
||||
#define GR_IS_UCODE_OP_LESSER_EQUAL 4U
|
||||
/** Falcon operation condition : SKIP. */
|
||||
#define GR_IS_UCODE_OP_SKIP 5U
|
||||
|
||||
/** Mailbox value in case of successful operation. */
|
||||
#define FALCON_UCODE_HANDSHAKE_INIT_COMPLETE 1U
|
||||
|
||||
struct fecs_mthd_op_method {
|
||||
/** Method address to send to FECS microcontroller. */
|
||||
u32 addr;
|
||||
/** Method data to send to FECS microcontroller. */
|
||||
u32 data;
|
||||
};
|
||||
|
||||
struct fecs_mthd_op_mailbox {
|
||||
/** Mailbox ID to perform operation. */
|
||||
u32 id;
|
||||
/** Mailbox data to be written. */
|
||||
u32 data;
|
||||
/** Mailbox clear value. */
|
||||
u32 clr;
|
||||
/** Last read mailbox value. */
|
||||
u32 *ret;
|
||||
/** Mailbox value in case of operation success. */
|
||||
u32 ok;
|
||||
/** Mailbox value in case of operation failure. */
|
||||
u32 fail;
|
||||
};
|
||||
|
||||
struct fecs_mthd_op_cond {
|
||||
/** Operation success condition. */
|
||||
u32 ok;
|
||||
/** Operation fail condition. */
|
||||
u32 fail;
|
||||
};
|
||||
|
||||
/**
|
||||
* FECS method operation structure.
|
||||
*
|
||||
* This structure defines the protocol for communication with FECS
|
||||
* microcontroller.
|
||||
*/
|
||||
struct nvgpu_fecs_method_op {
|
||||
/** Method struct */
|
||||
struct fecs_mthd_op_method method;
|
||||
/** Mailbox struct */
|
||||
struct fecs_mthd_op_mailbox mailbox;
|
||||
/** Condition struct */
|
||||
struct fecs_mthd_op_cond cond;
|
||||
};
|
||||
|
||||
/**
|
||||
* CTXSW falcon bootloader descriptor structure.
|
||||
*/
|
||||
struct nvgpu_ctxsw_bootloader_desc {
|
||||
/** Start offset, unused. */
|
||||
u32 start_offset;
|
||||
/** Size, unused. */
|
||||
u32 size;
|
||||
/** IMEM offset. */
|
||||
u32 imem_offset;
|
||||
/** Falcon boot vector. */
|
||||
u32 entry_point;
|
||||
};
|
||||
|
||||
/**
|
||||
* CTXSW ucode information structure.
|
||||
*/
|
||||
struct nvgpu_ctxsw_ucode_info {
|
||||
/** Memory to store ucode instance block. */
|
||||
struct nvgpu_mem inst_blk_desc;
|
||||
/** Memory to store ucode contents locally. */
|
||||
struct nvgpu_mem surface_desc;
|
||||
/** Ucode segments for FECS. */
|
||||
struct nvgpu_ctxsw_ucode_segments fecs;
|
||||
/** Ucode segments for GPCCS. */
|
||||
struct nvgpu_ctxsw_ucode_segments gpccs;
|
||||
};
|
||||
|
||||
/**
|
||||
* Structure to store various sizes queried from FECS
|
||||
*/
|
||||
struct nvgpu_gr_falcon_query_sizes {
|
||||
/** Size of golden context image. */
|
||||
u32 golden_image_size;
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
u32 pm_ctxsw_image_size;
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
u32 preempt_image_size;
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
u32 zcull_image_size;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* GR falcon data structure.
|
||||
*
|
||||
* This structure stores all data required to load and boot CTXSW ucode,
|
||||
* and also to communicate with FECS microcontroller.
|
||||
*/
|
||||
struct nvgpu_gr_falcon {
|
||||
/**
|
||||
* CTXSW ucode information structure.
|
||||
*/
|
||||
struct nvgpu_ctxsw_ucode_info ctxsw_ucode_info;
|
||||
|
||||
/**
|
||||
* Mutex to protect all FECS methods.
|
||||
*/
|
||||
struct nvgpu_mutex fecs_mutex;
|
||||
|
||||
/**
|
||||
* Flag to skip ucode initialization if it is already done.
|
||||
*/
|
||||
bool skip_ucode_init;
|
||||
|
||||
/**
|
||||
* Flag to trigger recovery bootstrap in case coldboot bootstrap
|
||||
* was already done.
|
||||
*/
|
||||
bool coldboot_bootstrap_done;
|
||||
|
||||
/**
|
||||
* Structure to hold various sizes that are queried from FECS
|
||||
* microcontroller.
|
||||
*/
|
||||
struct nvgpu_gr_falcon_query_sizes sizes;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_FALCON_PRIV_H */
|
||||
1138
drivers/gpu/nvgpu/common/gr/gr_intr.c
Normal file
1138
drivers/gpu/nvgpu/common/gr/gr_intr.c
Normal file
File diff suppressed because it is too large
Load Diff
224
drivers/gpu/nvgpu/common/gr/gr_intr_priv.h
Normal file
224
drivers/gpu/nvgpu/common/gr/gr_intr_priv.h
Normal file
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_INTR_PRIV_H
|
||||
#define NVGPU_GR_INTR_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <include/nvgpu/gr/gr_falcon.h>
|
||||
|
||||
struct nvgpu_channel;
|
||||
|
||||
/**
|
||||
* Size of lookup buffer used for context translation to GPU channel
|
||||
* and TSG identifiers.
|
||||
* This value must be a power of 2.
|
||||
*/
|
||||
#define GR_CHANNEL_MAP_TLB_SIZE 2U
|
||||
|
||||
/**
|
||||
* GR interrupt information struct.
|
||||
*
|
||||
* This structure maintains information on pending GR engine interrupts.
|
||||
*/
|
||||
struct nvgpu_gr_intr_info {
|
||||
/**
|
||||
* This value is set in case notification interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 notify;
|
||||
/**
|
||||
* This value is set in case semaphore interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 semaphore;
|
||||
/**
|
||||
* This value is set in case illegal notify interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 illegal_notify;
|
||||
/**
|
||||
* This value is set in case illegal method interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 illegal_method;
|
||||
/**
|
||||
* This value is set in case illegal class interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 illegal_class;
|
||||
/**
|
||||
* This value is set in case FECS error interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 fecs_error;
|
||||
/**
|
||||
* This value is set in case illegal class interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 class_error;
|
||||
/**
|
||||
* This value is set in case firmware method interrupt is pending.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 fw_method;
|
||||
/**
|
||||
* This value is set in case exception is pending in graphics pipe.
|
||||
* Same value is used to clear the interrupt.
|
||||
*/
|
||||
u32 exception;
|
||||
/*
|
||||
* This value is set when the FE receives a valid method and it
|
||||
* matches with the value configured in PRI_FE_DEBUG_METHOD_* pri
|
||||
* registers; In case of a match, FE proceeds to drop that method.
|
||||
* This provides a way to the SW to turn off HW decoding of this
|
||||
* method and convert it to a SW method.
|
||||
*/
|
||||
u32 debug_method;
|
||||
/*
|
||||
* This value is set on the completion of a LaunchDma method with
|
||||
* InterruptType field configured to INTERRUPT.
|
||||
*/
|
||||
u32 buffer_notify;
|
||||
};
|
||||
|
||||
/**
|
||||
* TPC exception data structure.
|
||||
*
|
||||
* TPC exceptions can be decomposed into exceptions triggered by its
|
||||
* subunits. This structure keeps track of which subunits have
|
||||
* triggered exception.
|
||||
*/
|
||||
struct nvgpu_gr_tpc_exception {
|
||||
/**
|
||||
* This flag is set in case TEX exception is pending.
|
||||
*/
|
||||
bool tex_exception;
|
||||
/**
|
||||
* This flag is set in case SM exception is pending.
|
||||
*/
|
||||
bool sm_exception;
|
||||
/**
|
||||
* This flag is set in case MPC exception is pending.
|
||||
*/
|
||||
bool mpc_exception;
|
||||
/**
|
||||
* This flag is set in case PE exception is pending.
|
||||
*/
|
||||
bool pe_exception;
|
||||
};
|
||||
|
||||
/**
|
||||
* GR ISR data structure.
|
||||
*
|
||||
* This structure holds all necessary information to handle all GR engine
|
||||
* error/exception interrupts.
|
||||
*/
|
||||
struct nvgpu_gr_isr_data {
|
||||
/**
|
||||
* Contents of TRAPPED_ADDR register used to decode below
|
||||
* fields.
|
||||
*/
|
||||
u32 addr;
|
||||
/**
|
||||
* Low word of the trapped method data.
|
||||
*/
|
||||
u32 data_lo;
|
||||
/**
|
||||
* High word of the trapped method data.
|
||||
*/
|
||||
u32 data_hi;
|
||||
/**
|
||||
* Information of current context.
|
||||
*/
|
||||
u32 curr_ctx;
|
||||
/**
|
||||
* Pointer to faulted GPU channel.
|
||||
*/
|
||||
struct nvgpu_channel *ch;
|
||||
/**
|
||||
* Address of the trapped method.
|
||||
*/
|
||||
u32 offset;
|
||||
/**
|
||||
* Subchannel ID of the trapped method.
|
||||
*/
|
||||
u32 sub_chan;
|
||||
/**
|
||||
* Class ID corresponding to above subchannel.
|
||||
*/
|
||||
u32 class_num;
|
||||
/**
|
||||
* Value read from fecs_host_int_status h/w reg.
|
||||
*/
|
||||
u32 fecs_intr;
|
||||
/**
|
||||
* S/W defined status for fecs_host_int_status.
|
||||
*/
|
||||
struct nvgpu_fecs_host_intr_status fecs_host_intr_status;
|
||||
};
|
||||
|
||||
/**
|
||||
* Details of lookup buffer used to translate context to GPU
|
||||
* channel/TSG identifiers.
|
||||
*/
|
||||
struct gr_channel_map_tlb_entry {
|
||||
/**
|
||||
* Information of context.
|
||||
*/
|
||||
u32 curr_ctx;
|
||||
/**
|
||||
* GPU channel ID.
|
||||
*/
|
||||
u32 chid;
|
||||
/**
|
||||
* GPU Time Slice Group ID.
|
||||
*/
|
||||
u32 tsgid;
|
||||
};
|
||||
|
||||
/**
|
||||
* GR interrupt management data structure.
|
||||
*
|
||||
* This structure holds various fields to manage GR engine interrupt
|
||||
* handling.
|
||||
*/
|
||||
struct nvgpu_gr_intr {
|
||||
/**
|
||||
* Lookup buffer structure used to translate context to GPU
|
||||
* channel and TSG identifiers.
|
||||
*/
|
||||
struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
|
||||
/**
|
||||
* Entry in lookup buffer that should be overwritten if there is
|
||||
* no remaining free entry.
|
||||
*/
|
||||
u32 channel_tlb_flush_index;
|
||||
/**
|
||||
* Spinlock for all lookup buffer accesses.
|
||||
*/
|
||||
struct nvgpu_spinlock ch_tlb_lock;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_INTR_PRIV_H */
|
||||
|
||||
143
drivers/gpu/nvgpu/common/gr/gr_priv.h
Normal file
143
drivers/gpu/nvgpu/common/gr/gr_priv.h
Normal file
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_PRIV_H
|
||||
#define NVGPU_GR_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/cond.h>
|
||||
|
||||
struct nvgpu_gr_ctx_desc;
|
||||
struct nvgpu_gr_global_ctx_buffer_desc;
|
||||
struct nvgpu_gr_obj_ctx_golden_image;
|
||||
struct nvgpu_gr_config;
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
struct nvgpu_gr_zbc;
|
||||
struct nvgpu_gr_zcull;
|
||||
#endif
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
struct nvgpu_gr_hwpm_map;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* GR engine data structure.
|
||||
*
|
||||
* This is the parent structure to all other GR engine data structures,
|
||||
* and holds a pointer to all of them. This structure also stores
|
||||
* various fields to track GR engine initialization state.
|
||||
*
|
||||
* Pointer to this structure is maintained in GPU driver structure.
|
||||
*/
|
||||
struct nvgpu_gr {
|
||||
/**
|
||||
* Pointer to GPU driver struct.
|
||||
*/
|
||||
struct gk20a *g;
|
||||
|
||||
/**
|
||||
* Instance ID of GR engine.
|
||||
*/
|
||||
u32 instance_id;
|
||||
|
||||
/**
|
||||
* Condition variable for GR initialization.
|
||||
* Waiters shall wait on this condition to ensure GR engine
|
||||
* is initialized.
|
||||
*/
|
||||
struct nvgpu_cond init_wq;
|
||||
|
||||
/**
|
||||
* Flag to indicate if GR engine is initialized.
|
||||
*/
|
||||
bool initialized;
|
||||
|
||||
/**
|
||||
* Syspipe ID of the GR instance.
|
||||
*/
|
||||
u32 syspipe_id;
|
||||
|
||||
/**
|
||||
* Pointer to global context buffer descriptor structure.
|
||||
*/
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer;
|
||||
|
||||
/**
|
||||
* Pointer to Golden context image structure.
|
||||
*/
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
|
||||
|
||||
/**
|
||||
* Pointer to GR context descriptor structure.
|
||||
*/
|
||||
struct nvgpu_gr_ctx_desc *gr_ctx_desc;
|
||||
|
||||
/**
|
||||
* Pointer to GR configuration structure.
|
||||
*/
|
||||
struct nvgpu_gr_config *config;
|
||||
|
||||
/**
|
||||
* Pointer to GR falcon data structure.
|
||||
*/
|
||||
struct nvgpu_gr_falcon *falcon;
|
||||
|
||||
/**
|
||||
* Pointer to GR interrupt data structure.
|
||||
*/
|
||||
struct nvgpu_gr_intr *intr;
|
||||
|
||||
/**
|
||||
* Function pointer to remove GR s/w support.
|
||||
*/
|
||||
void (*remove_support)(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* Flag to indicate GR s/w has been initialized.
|
||||
*/
|
||||
bool sw_ready;
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
struct nvgpu_gr_hwpm_map *hwpm_map;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
struct nvgpu_gr_zcull *zcull;
|
||||
|
||||
struct nvgpu_gr_zbc *zbc;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 fecs_feature_override_ecc_val;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_CILP
|
||||
u32 cilp_preempt_pending_chid;
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_NVGPU_RECOVERY) || defined(CONFIG_NVGPU_DEBUGGER)
|
||||
struct nvgpu_mutex ctxsw_disable_mutex;
|
||||
int ctxsw_disable_count;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_PRIV_H */
|
||||
|
||||
396
drivers/gpu/nvgpu/common/gr/gr_setup.c
Normal file
396
drivers/gpu/nvgpu/common/gr/gr_setup.c
Normal file
@@ -0,0 +1,396 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/gr/subctx.h>
|
||||
#include <nvgpu/gr/obj_ctx.h>
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
#include <nvgpu/gr/zcull.h>
|
||||
#endif
|
||||
#include <nvgpu/gr/setup.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/preempt.h>
|
||||
|
||||
#include "gr_priv.h"
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ret = nvgpu_channel_disable_tsg(g, c);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to disable channel/TSG");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = nvgpu_preempt_channel(g, c);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to preempt channel/TSG");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = nvgpu_gr_zcull_ctx_setup(g, c->subctx, gr_ctx);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to setup zcull");
|
||||
goto out;
|
||||
}
|
||||
/* no error at this point */
|
||||
ret = nvgpu_channel_enable_tsg(g, c);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to re-enable channel/TSG");
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
out:
|
||||
/*
|
||||
* control reaches here if preempt failed or nvgpu_gr_zcull_ctx_setup
|
||||
* failed. Propagate preempt failure err or err for
|
||||
* nvgpu_gr_zcull_ctx_setup
|
||||
*/
|
||||
if (nvgpu_channel_enable_tsg(g, c) != 0) {
|
||||
/* ch might not be bound to tsg */
|
||||
nvgpu_err(g, "failed to enable channel/TSG");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
|
||||
u64 zcull_va, u32 mode)
|
||||
{
|
||||
struct nvgpu_tsg *tsg;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(c);
|
||||
if (tsg == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gr_ctx = tsg->gr_ctx;
|
||||
nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va);
|
||||
|
||||
return nvgpu_gr_setup_zcull(g, c, gr_ctx);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int nvgpu_gr_setup_validate_channel_and_class(struct gk20a *g,
|
||||
struct nvgpu_channel *c, u32 class_num)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* an address space needs to have been bound at this point.*/
|
||||
if (!nvgpu_channel_as_bound(c)) {
|
||||
nvgpu_err(g,
|
||||
"not bound to address space at time"
|
||||
" of grctx allocation");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!g->ops.gpu_class.is_valid(class_num)) {
|
||||
nvgpu_err(g,
|
||||
"invalid obj class 0x%x", class_num);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_setup_alloc_subctx(struct gk20a *g, struct nvgpu_channel *c)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
|
||||
if (c->subctx == NULL) {
|
||||
c->subctx = nvgpu_gr_subctx_alloc(g, c->vm);
|
||||
if (c->subctx == NULL) {
|
||||
err = -ENOMEM;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
|
||||
u32 flags)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
int err = 0;
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr,
|
||||
"GR%u: allocate object context for channel %u",
|
||||
gr->instance_id, c->chid);
|
||||
|
||||
err = nvgpu_gr_setup_validate_channel_and_class(g, c, class_num);
|
||||
if (err != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
c->obj_class = class_num;
|
||||
|
||||
#ifndef CONFIG_NVGPU_HAL_NON_FUSA
|
||||
/*
|
||||
* Only compute and graphics classes need object context.
|
||||
* Return success for valid non-compute and non-graphics classes.
|
||||
* Invalid classes are already captured in
|
||||
* nvgpu_gr_setup_validate_channel_and_class() function.
|
||||
*/
|
||||
if (!g->ops.gpu_class.is_valid_compute(class_num) &&
|
||||
!g->ops.gpu_class.is_valid_gfx(class_num)) {
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(c);
|
||||
if (tsg == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_setup_alloc_subctx(g, c);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to allocate gr subctx buffer");
|
||||
goto out;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
|
||||
|
||||
gr_ctx = tsg->gr_ctx;
|
||||
|
||||
if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
|
||||
tsg->vm = c->vm;
|
||||
nvgpu_vm_get(tsg->vm);
|
||||
|
||||
err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
|
||||
gr->global_ctx_buffer, gr->gr_ctx_desc,
|
||||
gr->config, gr_ctx, c->subctx,
|
||||
tsg->vm, &c->inst_block, class_num, flags,
|
||||
c->cde, c->vpr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"failed to allocate gr ctx buffer");
|
||||
nvgpu_mutex_release(&tsg->ctx_init_lock);
|
||||
nvgpu_vm_put(tsg->vm);
|
||||
tsg->vm = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid);
|
||||
} else {
|
||||
/* commit gr ctx buffer */
|
||||
nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
|
||||
c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_FECS_TRACE
|
||||
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
|
||||
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
|
||||
c->subctx, gr_ctx, tsg->tgid, 0);
|
||||
if (err != 0) {
|
||||
nvgpu_warn(g,
|
||||
"fail to bind channel for ctxsw trace");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
if ((g->num_sys_perfmon == 0U) &&
|
||||
(g->ops.perf.get_num_hwpm_perfmon != NULL) &&
|
||||
(err == 0)) {
|
||||
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
|
||||
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
|
||||
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_gpu_dbg,
|
||||
"num_sys_perfmon[%u] num_fbp_perfmon[%u] "
|
||||
"num_gpc_perfmon[%u] ",
|
||||
g->num_sys_perfmon, g->num_fbp_perfmon,
|
||||
g->num_gpc_perfmon);
|
||||
nvgpu_assert((g->num_sys_perfmon != 0U) &&
|
||||
(g->num_fbp_perfmon != 0U) &&
|
||||
(g->num_gpc_perfmon != 0U));
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_mutex_release(&tsg->ctx_init_lock);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return 0;
|
||||
out:
|
||||
if (c->subctx != NULL) {
|
||||
nvgpu_gr_subctx_free(g, c->subctx, c->vm);
|
||||
c->subctx = NULL;
|
||||
}
|
||||
|
||||
/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
|
||||
can be reused so no need to release them.
|
||||
2. golden image init and load is a one time thing so if
|
||||
they pass, no need to undo. */
|
||||
nvgpu_err(g, "fail");
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
|
||||
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (gr_ctx != NULL) {
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
|
||||
nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
|
||||
g->gr->gr_ctx_desc)) {
|
||||
g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g,
|
||||
nvgpu_gr_ctx_get_ctx_mem(gr_ctx));
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c)
|
||||
{
|
||||
nvgpu_log_fn(c->g, " ");
|
||||
|
||||
if (!nvgpu_is_enabled(c->g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->subctx != NULL) {
|
||||
nvgpu_gr_subctx_free(c->g, c->subctx, c->vm);
|
||||
c->subctx = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
|
||||
u32 *compute_preempt_mode,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
/* skip setting anything if both modes are already set */
|
||||
if ((*graphics_preempt_mode != 0U) &&
|
||||
(*graphics_preempt_mode ==
|
||||
nvgpu_gr_ctx_get_graphics_preemption_mode(gr_ctx))) {
|
||||
*graphics_preempt_mode = 0;
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_GRAPHICS */
|
||||
|
||||
if ((*compute_preempt_mode != 0U) &&
|
||||
(*compute_preempt_mode ==
|
||||
nvgpu_gr_ctx_get_compute_preemption_mode(gr_ctx))) {
|
||||
*compute_preempt_mode = 0;
|
||||
}
|
||||
|
||||
if ((*graphics_preempt_mode == 0U) && (*compute_preempt_mode == 0U)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
|
||||
u32 graphics_preempt_mode, u32 compute_preempt_mode,
|
||||
u32 gr_instance_id)
|
||||
{
|
||||
struct nvgpu_gr_ctx *gr_ctx;
|
||||
struct gk20a *g = ch->g;
|
||||
struct nvgpu_tsg *tsg;
|
||||
struct vm_gk20a *vm;
|
||||
struct nvgpu_gr *gr;
|
||||
u32 class_num;
|
||||
int err = 0;
|
||||
|
||||
gr = &g->gr[gr_instance_id];
|
||||
|
||||
class_num = ch->obj_class;
|
||||
if (class_num == 0U) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!g->ops.gpu_class.is_valid(class_num)) {
|
||||
nvgpu_err(g, "invalid obj class 0x%x", class_num);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(ch);
|
||||
if (tsg == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
vm = tsg->vm;
|
||||
gr_ctx = tsg->gr_ctx;
|
||||
|
||||
if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode,
|
||||
&compute_preempt_mode, gr_ctx) == false) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
|
||||
"graphics_preempt_mode=%u compute_preempt_mode=%u",
|
||||
ch->chid, ch->tsgid, ch->tgid,
|
||||
graphics_preempt_mode, compute_preempt_mode);
|
||||
|
||||
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config,
|
||||
gr->gr_ctx_desc, gr_ctx, vm, class_num,
|
||||
graphics_preempt_mode, compute_preempt_mode);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
g->ops.tsg.disable(tsg);
|
||||
|
||||
err = nvgpu_preempt_channel(g, ch);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to preempt channel/TSG");
|
||||
goto enable_ch;
|
||||
}
|
||||
|
||||
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx,
|
||||
ch->subctx);
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
|
||||
g->ops.gr.init.commit_global_cb_manager(g, gr->config, gr_ctx,
|
||||
true);
|
||||
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
|
||||
}
|
||||
|
||||
g->ops.tsg.enable(tsg);
|
||||
|
||||
return err;
|
||||
|
||||
enable_ch:
|
||||
g->ops.tsg.enable(tsg);
|
||||
return err;
|
||||
}
|
||||
143
drivers/gpu/nvgpu/common/gr/gr_utils.c
Normal file
143
drivers/gpu/nvgpu/common/gr/gr_utils.c
Normal file
@@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/gr/gr_utils.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
|
||||
#include "gr_priv.h"
|
||||
|
||||
u32 nvgpu_gr_checksum_u32(u32 a, u32 b)
|
||||
{
|
||||
return nvgpu_safe_cast_u64_to_u32(((u64)a + (u64)b) & (U32_MAX));
|
||||
}
|
||||
|
||||
struct nvgpu_gr_falcon *nvgpu_gr_get_falcon_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->falcon;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_config *nvgpu_gr_get_config_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->config;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_config *nvgpu_gr_get_gr_instance_config_ptr(struct gk20a *g,
|
||||
u32 gr_instance_id)
|
||||
{
|
||||
return g->gr[gr_instance_id].config;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_intr *nvgpu_gr_get_intr_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->intr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
u32 nvgpu_gr_get_override_ecc_val(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->fecs_feature_override_ecc_val;
|
||||
}
|
||||
|
||||
void nvgpu_gr_override_ecc_val(struct nvgpu_gr *gr, u32 ecc_val)
|
||||
{
|
||||
gr->fecs_feature_override_ecc_val = ecc_val;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
struct nvgpu_gr_zcull *nvgpu_gr_get_zcull_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->zcull;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->zbc;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_FECS_TRACE
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr(
|
||||
struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
return gr->global_ctx_buffer;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_CILP
|
||||
u32 nvgpu_gr_get_cilp_preempt_pending_chid(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
return gr->cilp_preempt_pending_chid;
|
||||
}
|
||||
|
||||
void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
gr->cilp_preempt_pending_chid =
|
||||
NVGPU_INVALID_CHANNEL_ID;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
|
||||
struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
return gr->golden_image;
|
||||
}
|
||||
|
||||
struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
return gr->hwpm_map;
|
||||
}
|
||||
|
||||
void nvgpu_gr_reset_falcon_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
gr->falcon = NULL;
|
||||
}
|
||||
|
||||
void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
||||
|
||||
gr->golden_image = NULL;
|
||||
}
|
||||
#endif
|
||||
615
drivers/gpu/nvgpu/common/gr/hwpm_map.c
Normal file
615
drivers/gpu/nvgpu/common/gr/hwpm_map.c
Normal file
@@ -0,0 +1,615 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/netlist.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/sort.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bsearch.h>
|
||||
#include <nvgpu/fbp.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/hwpm_map.h>
|
||||
|
||||
/* needed for pri_is_ppc_addr_shared */
|
||||
#include "hal/gr/gr/gr_pri_gk20a.h"
|
||||
|
||||
#define NV_PCFG_BASE 0x00088000U
|
||||
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200U
|
||||
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200U
|
||||
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020U
|
||||
|
||||
/* Dummy address for ctxsw'ed pri reg checksum. */
|
||||
#define CTXSW_PRI_CHECKSUM_DUMMY_REG 0x00ffffffU
|
||||
|
||||
int nvgpu_gr_hwpm_map_init(struct gk20a *g, struct nvgpu_gr_hwpm_map **hwpm_map,
|
||||
u32 size)
|
||||
{
|
||||
struct nvgpu_gr_hwpm_map *tmp_map;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
|
||||
|
||||
if (size == 0U) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
tmp_map = nvgpu_kzalloc(g, sizeof(*tmp_map));
|
||||
if (tmp_map == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
tmp_map->pm_ctxsw_image_size = size;
|
||||
tmp_map->init = false;
|
||||
|
||||
*hwpm_map = tmp_map;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_gr_hwpm_map_deinit(struct gk20a *g,
|
||||
struct nvgpu_gr_hwpm_map *hwpm_map)
|
||||
{
|
||||
if (hwpm_map->init) {
|
||||
nvgpu_big_free(g, hwpm_map->map);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, hwpm_map);
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_hwpm_map_get_size(struct nvgpu_gr_hwpm_map *hwpm_map)
|
||||
{
|
||||
return hwpm_map->pm_ctxsw_image_size;
|
||||
}
|
||||
|
||||
static int map_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct ctxsw_buf_offset_map_entry *e1;
|
||||
const struct ctxsw_buf_offset_map_entry *e2;
|
||||
|
||||
e1 = (const struct ctxsw_buf_offset_map_entry *)a;
|
||||
e2 = (const struct ctxsw_buf_offset_map_entry *)b;
|
||||
|
||||
if (e1->addr < e2->addr) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (e1->addr > e2->addr) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries_pmsys(
|
||||
struct ctxsw_buf_offset_map_entry *map,
|
||||
struct netlist_aiv_list *regs, u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 mask)
|
||||
{
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + regs->count) > max_cnt) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
if ((base + (regs->l[idx].addr & mask)) < 0xFFFU) {
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask)
|
||||
+ NV_PCFG_BASE;
|
||||
} else {
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
||||
}
|
||||
map[cnt++].offset = off;
|
||||
off += 4U;
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
|
||||
struct ctxsw_buf_offset_map_entry *map,
|
||||
struct netlist_aiv_list *regs,
|
||||
u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 mask)
|
||||
{
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + regs->count) > max_cnt) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
|
||||
* To handle the case of PPC registers getting added into GPC, the below
|
||||
* code specifically checks for any PPC offsets and adds them using
|
||||
* proper mask
|
||||
*/
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
/* Check if the address is PPC address */
|
||||
if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
|
||||
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_BASE);
|
||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||
/* Use PPC mask instead of the GPC mask provided */
|
||||
u32 ppcmask = ppc_in_gpc_stride - 1U;
|
||||
|
||||
map[cnt].addr = base + ppc_in_gpc_base
|
||||
+ (regs->l[idx].addr & ppcmask);
|
||||
} else {
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
||||
}
|
||||
map[cnt++].offset = off;
|
||||
off += 4U;
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
|
||||
struct netlist_aiv_list *regs,
|
||||
u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 mask)
|
||||
{
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + regs->count) > max_cnt) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
||||
map[cnt++].offset = off;
|
||||
off += 4U;
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper function to add register entries to the register map for all
|
||||
* subunits
|
||||
*/
|
||||
static int add_ctxsw_buffer_map_entries_subunits(
|
||||
struct ctxsw_buf_offset_map_entry *map,
|
||||
struct netlist_aiv_list *regs,
|
||||
u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 num_units,
|
||||
u32 active_unit_mask, u32 stride, u32 mask)
|
||||
{
|
||||
u32 unit;
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + (regs->count * num_units)) > max_cnt) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Data is interleaved for units in ctxsw buffer */
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
for (unit = 0; unit < num_units; unit++) {
|
||||
if ((active_unit_mask & BIT32(unit)) != 0U) {
|
||||
map[cnt].addr = base +
|
||||
(regs->l[idx].addr & mask) +
|
||||
(unit * stride);
|
||||
map[cnt++].offset = off;
|
||||
off += 4U;
|
||||
|
||||
/*
|
||||
* The ucode computes and saves the checksum of
|
||||
* all ctxsw'ed register values within a list.
|
||||
* Entries with addr=0x00ffffff are placeholder
|
||||
* for these checksums.
|
||||
*
|
||||
* There is only one checksum for a list
|
||||
* even if it contains multiple subunits. Hence,
|
||||
* skip iterating over all subunits for this
|
||||
* entry.
|
||||
*/
|
||||
if (regs->l[idx].addr ==
|
||||
CTXSW_PRI_CHECKSUM_DUMMY_REG) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
struct ctxsw_buf_offset_map_entry *map,
|
||||
u32 *count, u32 *offset, u32 max_cnt,
|
||||
struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 num_gpcs = nvgpu_gr_config_get_gpc_count(config);
|
||||
u32 num_ppcs, num_tpcs, gpc_num, base;
|
||||
u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
|
||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
|
||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||
u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
|
||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||
|
||||
for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
|
||||
num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(config, gpc_num);
|
||||
base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_pm_tpc_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base,
|
||||
num_tpcs, ~U32(0U), tpc_in_gpc_stride,
|
||||
(tpc_in_gpc_stride - 1U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(config, gpc_num);
|
||||
base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_pm_ppc_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, num_ppcs,
|
||||
~U32(0U), ppc_in_gpc_stride,
|
||||
(ppc_in_gpc_stride - 1U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
base = gpc_base + (gpc_stride * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
|
||||
nvgpu_netlist_get_pm_gpc_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base,
|
||||
(gpc_stride - 1U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_gpc_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_gpc_router_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Counter Aggregation Unit, if available */
|
||||
if (nvgpu_netlist_get_pm_cau_ctxsw_regs(g)->count != 0U) {
|
||||
base = gpc_base + (gpc_stride * gpc_num)
|
||||
+ tpc_in_gpc_base;
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_pm_cau_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, num_tpcs,
|
||||
~U32(0U), tpc_in_gpc_stride,
|
||||
(tpc_in_gpc_stride - 1U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
*offset = NVGPU_ALIGN(*offset, 256U);
|
||||
|
||||
base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*offset = NVGPU_ALIGN(*offset, 256U);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* PM CTXSW BUFFER LAYOUT:
|
||||
*|=============================================|0x00 <----PM CTXSW BUFFER BASE
|
||||
*| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
|
||||
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
|
||||
*| LIST_compressed_nv_perf_ctx_reg_sysrouter |Space allocated: numRegs words
|
||||
*| PADDING for 256 byte alignment on Maxwell+ |
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*| LIST_compressed_nv_perf_sys_control_ctx_regs|Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |
|
||||
*|=============================================|<----256 byte aligned (if prev segment exists)
|
||||
*| LIST_compressed_nv_perf_pma_control_ctx_regs|Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_fbp_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_fbpa_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_rop_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_ltc_ctx_regs |
|
||||
*| LTC0 LTS0 |
|
||||
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
|
||||
*| LTCn LTS0 |
|
||||
*| LTC0 LTS1 |
|
||||
*| LTC1 LTS1 |
|
||||
*| LTCn LTS1 |
|
||||
*| LTC0 LTSn |
|
||||
*| LTC1 LTSn |
|
||||
*| LTCn LTSn |
|
||||
*| PADDING for 256 byte alignment |
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*| LIST_compressed_nv_perf_fbp_control_ctx_regs|Space allocated: numRegs words + padding
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*
|
||||
* Each "GPCn PRI register" segment above has this layout:
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accomodate
|
||||
*| REG0 TPC1 | all the GPC/TPC register lists
|
||||
*| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
|
||||
*| LIST_pm_ctx_reg_TPC REG1 TPC0 |
|
||||
*| * numTpcs REG1 TPC1 |
|
||||
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
|
||||
*| * numPpcs REGn TPC0 |
|
||||
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
|
||||
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
|
||||
*| LIST_nv_perf_ctx_reg_GPC |
|
||||
*| LIST_nv_perf_gpcrouter_ctx_reg |
|
||||
*| LIST_nv_perf_ctx_reg_CAU (Tur) |
|
||||
*|=============================================|
|
||||
*| LIST_compressed_nv_perf_gpc_control_ctx_regs|Space allocated: numRegs words + padding
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*/
|
||||
|
||||
static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
||||
struct nvgpu_gr_hwpm_map *hwpm_map, struct nvgpu_gr_config *config)
|
||||
{
|
||||
u32 hwpm_ctxsw_buffer_size = hwpm_map->pm_ctxsw_image_size;
|
||||
struct ctxsw_buf_offset_map_entry *map;
|
||||
u32 hwpm_ctxsw_reg_count_max;
|
||||
u32 map_size;
|
||||
u32 i, count = 0;
|
||||
u32 offset = 0;
|
||||
int ret;
|
||||
u32 active_fbpa_mask;
|
||||
u32 num_fbps = nvgpu_fbp_get_num_fbps(g->fbp);
|
||||
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
|
||||
u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
|
||||
u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
|
||||
u32 num_ltc = g->ops.top.get_max_ltc_per_fbp(g) *
|
||||
g->ops.priv_ring.get_fbp_count(g);
|
||||
|
||||
if (hwpm_ctxsw_buffer_size == 0U) {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
||||
"no PM Ctxsw buffer memory in context buffer");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
|
||||
map_size = hwpm_ctxsw_reg_count_max * (u32)sizeof(*map);
|
||||
|
||||
map = nvgpu_big_zalloc(g, map_size);
|
||||
if (map == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_pm_ctx_reg_SYS */
|
||||
if (add_ctxsw_buffer_map_entries_pmsys(map,
|
||||
nvgpu_netlist_get_pm_sys_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_ctx_reg_SYS */
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_sys_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_sys_control_ctx_reg*/
|
||||
if (nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count > 0U) {
|
||||
offset = NVGPU_ALIGN(offset, 256U);
|
||||
|
||||
ret = add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g),
|
||||
&count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
||||
if (ret != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (g->ops.gr.hwpm_map.align_regs_perf_pma) {
|
||||
g->ops.gr.hwpm_map.align_regs_perf_pma(&offset);
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_pma_ctx_reg*/
|
||||
ret = add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_pma_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
||||
if (ret != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = NVGPU_ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_pma_control_ctx_reg*/
|
||||
ret = add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
||||
if (ret != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = NVGPU_ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, num_fbps, ~U32(0U),
|
||||
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
|
||||
~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_fbp_router_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
||||
num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE,
|
||||
~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (g->ops.gr.hwpm_map.get_active_fbpa_mask) {
|
||||
active_fbpa_mask = g->ops.gr.hwpm_map.get_active_fbpa_mask(g);
|
||||
} else {
|
||||
active_fbpa_mask = ~U32(0U);
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
||||
num_fbpas, active_fbpa_mask, fbpa_stride, ~U32(0U))
|
||||
!= 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_pm_rop_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_pm_rop_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_pm_ltc_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, num_ltc, ~U32(0U),
|
||||
ltc_stride, ~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = NVGPU_ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbp_control_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
||||
num_fbps, ~U32(0U),
|
||||
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
|
||||
~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = NVGPU_ALIGN(offset, 256U);
|
||||
|
||||
/* Add GPC entries */
|
||||
if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, config) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (offset > hwpm_ctxsw_buffer_size) {
|
||||
nvgpu_err(g, "offset > buffer size");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
sort(map, count, sizeof(*map), map_cmp, NULL);
|
||||
|
||||
hwpm_map->map = map;
|
||||
hwpm_map->count = count;
|
||||
hwpm_map->init = true;
|
||||
|
||||
nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset");
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
nvgpu_err(g, "Failed to create HWPM buffer offset map");
|
||||
nvgpu_big_free(g, map);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function will return the 32 bit offset for a priv register if it is
|
||||
* present in the PM context buffer.
|
||||
*/
|
||||
int nvgpu_gr_hwmp_map_find_priv_offset(struct gk20a *g,
|
||||
struct nvgpu_gr_hwpm_map *hwpm_map,
|
||||
u32 addr, u32 *priv_offset, struct nvgpu_gr_config *config)
|
||||
{
|
||||
struct ctxsw_buf_offset_map_entry *map, *result, map_key;
|
||||
int err = 0;
|
||||
u32 count;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
||||
|
||||
/* Create map of pri address and pm offset if necessary */
|
||||
if (!hwpm_map->init) {
|
||||
err = nvgpu_gr_hwpm_map_create(g, hwpm_map, config);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
*priv_offset = 0;
|
||||
|
||||
map = hwpm_map->map;
|
||||
count = hwpm_map->count;
|
||||
|
||||
map_key.addr = addr;
|
||||
result = nvgpu_bsearch(&map_key, map, count, sizeof(*map), map_cmp);
|
||||
|
||||
if (result != NULL) {
|
||||
*priv_offset = result->offset;
|
||||
} else {
|
||||
nvgpu_err(g, "Lookup failed for address 0x%x", addr);
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
982
drivers/gpu/nvgpu/common/gr/obj_ctx.c
Normal file
982
drivers/gpu/nvgpu/common/gr/obj_ctx.c
Normal file
@@ -0,0 +1,982 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/mm.h>
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
#include <nvgpu/pmu/pmu_pg.h>
|
||||
#include <nvgpu/power_features/pg.h>
|
||||
#endif
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/gr/subctx.h>
|
||||
#include <nvgpu/gr/global_ctx.h>
|
||||
#include <nvgpu/gr/obj_ctx.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/netlist.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/gr/fs_state.h>
|
||||
#include <nvgpu/power_features/cg.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
|
||||
#include "obj_ctx_priv.h"
|
||||
|
||||
void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
|
||||
struct nvgpu_mem *inst_block, u64 gpu_va)
|
||||
{
|
||||
g->ops.ramin.set_gr_ptr(g, inst_block, gpu_va);
|
||||
}
|
||||
|
||||
void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va)
|
||||
{
|
||||
struct nvgpu_mem *ctxheader;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
|
||||
nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va);
|
||||
|
||||
ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx);
|
||||
nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block,
|
||||
ctxheader->gpu_va);
|
||||
} else {
|
||||
nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va);
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
|
||||
static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
|
||||
u32 class_num, u32 flags)
|
||||
{
|
||||
int err;
|
||||
u32 graphics_preempt_mode = 0U;
|
||||
u32 compute_preempt_mode = 0U;
|
||||
u32 default_graphics_preempt_mode = 0U;
|
||||
u32 default_compute_preempt_mode = 0U;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
/* Skip for engines other than GR */
|
||||
if (!g->ops.gpu_class.is_valid_compute(class_num) &&
|
||||
!g->ops.gpu_class.is_valid_gfx(class_num)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
g->ops.gr.init.get_default_preemption_modes(
|
||||
&default_graphics_preempt_mode,
|
||||
&default_compute_preempt_mode);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) {
|
||||
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
|
||||
}
|
||||
|
||||
if (g->ops.gpu_class.is_valid_gfx(class_num) &&
|
||||
nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) {
|
||||
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_CILP
|
||||
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) {
|
||||
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
|
||||
}
|
||||
|
||||
if (g->ops.gpu_class.is_valid_compute(class_num) &&
|
||||
nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) {
|
||||
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (compute_preempt_mode == 0U) {
|
||||
compute_preempt_mode = default_compute_preempt_mode;
|
||||
}
|
||||
|
||||
if (graphics_preempt_mode == 0U) {
|
||||
graphics_preempt_mode = default_graphics_preempt_mode;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config,
|
||||
gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode,
|
||||
compute_preempt_mode);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
|
||||
u32 graphics_preempt_mode)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* set preemption modes */
|
||||
switch (graphics_preempt_mode) {
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
|
||||
{
|
||||
u32 rtv_cb_size;
|
||||
u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g);
|
||||
u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g);
|
||||
u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g);
|
||||
u32 attrib_cb_size =
|
||||
g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size,
|
||||
nvgpu_gr_config_get_tpc_count(config),
|
||||
nvgpu_gr_config_get_max_tpc_count(config));
|
||||
|
||||
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
|
||||
nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size);
|
||||
nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
|
||||
attrib_cb_size);
|
||||
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
||||
NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
||||
NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size);
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
||||
NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size);
|
||||
|
||||
if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) {
|
||||
rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g);
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
||||
NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
|
||||
}
|
||||
|
||||
err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
|
||||
gr_ctx_desc, vm);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "cannot allocate ctxsw buffers");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
|
||||
graphics_preempt_mode);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
|
||||
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
|
||||
graphics_preempt_mode);
|
||||
break;
|
||||
|
||||
default:
|
||||
nvgpu_log_info(g, "graphics_preempt_mode=%u",
|
||||
graphics_preempt_mode);
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 compute_preempt_mode)
|
||||
{
|
||||
|
||||
if (g->ops.gpu_class.is_valid_compute(class_num)
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
|| g->ops.gpu_class.is_valid_gfx(class_num)
|
||||
#endif
|
||||
) {
|
||||
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
|
||||
compute_preempt_mode);
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
|
||||
u32 graphics_preempt_mode, u32 compute_preempt_mode)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
/* check for invalid combinations */
|
||||
if (nvgpu_gr_ctx_check_valid_preemption_mode(g, gr_ctx,
|
||||
graphics_preempt_mode, compute_preempt_mode) == false) {
|
||||
err = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "graphics_preempt_mode=%u compute_preempt_mode=%u",
|
||||
graphics_preempt_mode, compute_preempt_mode);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config,
|
||||
gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode);
|
||||
|
||||
if (err != 0) {
|
||||
goto fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
err = nvgpu_gr_obj_ctx_set_compute_preemption_mode(g, gr_ctx,
|
||||
class_num, compute_preempt_mode);
|
||||
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_config *config,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
u64 addr;
|
||||
u32 size;
|
||||
struct nvgpu_mem *mem;
|
||||
#endif
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (!nvgpu_mem_is_valid(
|
||||
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) {
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (subctx != NULL) {
|
||||
nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
|
||||
gr_ctx);
|
||||
} else {
|
||||
nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
|
||||
}
|
||||
|
||||
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
|
||||
|
||||
addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va;
|
||||
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
|
||||
nvgpu_gr_config_get_tpc_count(config),
|
||||
nvgpu_gr_config_get_max_tpc_count(config), addr,
|
||||
true);
|
||||
|
||||
mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx);
|
||||
addr = mem->gpu_va;
|
||||
nvgpu_assert(mem->size <= U32_MAX);
|
||||
size = (u32)mem->size;
|
||||
|
||||
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
|
||||
true, false);
|
||||
|
||||
mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx);
|
||||
addr = mem->gpu_va;
|
||||
nvgpu_assert(mem->size <= U32_MAX);
|
||||
size = (u32)mem->size;
|
||||
|
||||
g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true);
|
||||
|
||||
g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true);
|
||||
|
||||
if (g->ops.gr.init.gfxp_wfi_timeout != NULL) {
|
||||
g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx, true);
|
||||
}
|
||||
|
||||
if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
|
||||
g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
|
||||
}
|
||||
|
||||
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
|
||||
|
||||
done:
|
||||
#endif
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
|
||||
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch)
|
||||
{
|
||||
u64 addr;
|
||||
u32 size;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
if (patch) {
|
||||
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* MIG supports only compute class.
|
||||
* Skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
|
||||
* if 2D/3D/I2M classes(graphics) are not supported.
|
||||
*/
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/* global pagepool buffer */
|
||||
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_PAGEPOOL_VA);
|
||||
size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size(
|
||||
global_ctx_buffer,
|
||||
NVGPU_GR_GLOBAL_CTX_PAGEPOOL));
|
||||
|
||||
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
|
||||
patch, true);
|
||||
|
||||
/* global bundle cb */
|
||||
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_CIRCULAR_VA);
|
||||
size = nvgpu_safe_cast_u64_to_u32(
|
||||
g->ops.gr.init.get_bundle_cb_default_size(g));
|
||||
|
||||
g->ops.gr.init.commit_global_bundle_cb(g, gr_ctx, addr, size,
|
||||
patch);
|
||||
|
||||
/* global attrib cb */
|
||||
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_ATTRIBUTE_VA);
|
||||
|
||||
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
|
||||
nvgpu_gr_config_get_tpc_count(config),
|
||||
nvgpu_gr_config_get_max_tpc_count(config), addr, patch);
|
||||
|
||||
g->ops.gr.init.commit_global_cb_manager(g, config, gr_ctx,
|
||||
patch);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (g->ops.gr.init.commit_rtv_cb != NULL) {
|
||||
/* RTV circular buffer */
|
||||
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA);
|
||||
|
||||
g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_SM_DIVERSITY
|
||||
if ((nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) &&
|
||||
(nvgpu_gr_ctx_get_sm_diversity_config(gr_ctx) !=
|
||||
NVGPU_DEFAULT_SM_DIVERSITY_CONFIG) &&
|
||||
(g->ops.gr.init.commit_sm_id_programming != NULL)) {
|
||||
int err;
|
||||
|
||||
err = g->ops.gr.init.commit_sm_id_programming(
|
||||
g, config, gr_ctx, patch);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"commit_sm_id_programming failed err=%d", err);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (patch) {
|
||||
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false);
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
struct netlist_av_list *sw_bundle_init =
|
||||
nvgpu_netlist_get_sw_bundle_init_av_list(g);
|
||||
struct netlist_av_list *sw_veid_bundle_init =
|
||||
nvgpu_netlist_get_sw_veid_bundle_init_av_list(g);
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
struct netlist_av64_list *sw_bundle64_init =
|
||||
nvgpu_netlist_get_sw_bundle64_init_av64_list(g);
|
||||
#endif
|
||||
|
||||
/* enable pipe mode override */
|
||||
g->ops.gr.init.pipe_mode_override(g, true);
|
||||
|
||||
/* load bundle init */
|
||||
err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
|
||||
if (err != 0) {
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
|
||||
err = g->ops.gr.init.load_sw_veid_bundle(g,
|
||||
sw_veid_bundle_init);
|
||||
if (err != 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
if (g->ops.gr.init.load_sw_bundle64 != NULL) {
|
||||
err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
|
||||
if (err != 0) {
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* disable pipe mode override */
|
||||
g->ops.gr.init.pipe_mode_override(g, false);
|
||||
|
||||
err = g->ops.gr.init.wait_idle(g);
|
||||
|
||||
return err;
|
||||
|
||||
error:
|
||||
/* in case of error skip waiting for GR idle - just restore state */
|
||||
g->ops.gr.init.pipe_mode_override(g, false);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g,
|
||||
struct nvgpu_mem *inst_block)
|
||||
{
|
||||
int err = 0;
|
||||
u32 data;
|
||||
u32 i;
|
||||
struct netlist_aiv_list *sw_ctx_load =
|
||||
nvgpu_netlist_get_sw_ctx_load_aiv_list(g);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
g->ops.gr.init.override_context_reset(g);
|
||||
|
||||
err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_ADDRESS_BIND_PTR, data, NULL);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = g->ops.gr.init.wait_idle(g);
|
||||
|
||||
/* load ctx init */
|
||||
nvgpu_log_info(g, "begin: netlist: sw_ctx_load: register writes");
|
||||
for (i = 0U; i < sw_ctx_load->count; i++) {
|
||||
nvgpu_writel(g, sw_ctx_load->l[i].addr,
|
||||
sw_ctx_load->l[i].value);
|
||||
}
|
||||
nvgpu_log_info(g, "end: netlist: sw_ctx_load: register writes");
|
||||
|
||||
nvgpu_log_info(g, "configure sm_hww_esr_report mask after sw_ctx_load");
|
||||
g->ops.gr.intr.set_hww_esr_report_mask(g);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
if (g->ops.gr.init.preemption_state != NULL) {
|
||||
err = g->ops.gr.init.preemption_state(g);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_cg_blcg_gr_load_enable(g);
|
||||
|
||||
err = g->ops.gr.init.wait_idle(g);
|
||||
|
||||
clean_up:
|
||||
if (err == 0) {
|
||||
nvgpu_log(g, gpu_dbg_gr, "done");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
|
||||
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
int err = 0;
|
||||
struct netlist_av_list *sw_method_init =
|
||||
nvgpu_netlist_get_sw_method_init_av_list(g);
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
struct netlist_av_list *sw_bundle_init =
|
||||
nvgpu_netlist_get_sw_bundle_init_av_list(g);
|
||||
#endif
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
/* disable fe_go_idle */
|
||||
g->ops.gr.init.fe_go_idle_timeout(g, false);
|
||||
|
||||
nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
|
||||
config, gr_ctx, false);
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
/* override a few ctx state registers */
|
||||
g->ops.gr.init.commit_global_timeslice(g);
|
||||
}
|
||||
|
||||
/* floorsweep anything left */
|
||||
err = nvgpu_gr_fs_state_init(g, config);
|
||||
if (err != 0) {
|
||||
goto restore_fe_go_idle;
|
||||
}
|
||||
|
||||
err = g->ops.gr.init.wait_idle(g);
|
||||
if (err != 0) {
|
||||
goto restore_fe_go_idle;
|
||||
}
|
||||
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
|
||||
if (g->ops.gr.init.auto_go_idle != NULL) {
|
||||
g->ops.gr.init.auto_go_idle(g, false);
|
||||
}
|
||||
#endif
|
||||
err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g);
|
||||
if (err != 0) {
|
||||
goto restore_fe_go_idle;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
|
||||
if (g->ops.gr.init.auto_go_idle != NULL) {
|
||||
g->ops.gr.init.auto_go_idle(g, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* restore fe_go_idle */
|
||||
g->ops.gr.init.fe_go_idle_timeout(g, true);
|
||||
|
||||
/* load method init */
|
||||
g->ops.gr.init.load_method_init(g, sw_method_init);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
/* restore stats bundle data through mme shadow methods */
|
||||
if (g->ops.gr.init.restore_stats_counter_bundle_data != NULL) {
|
||||
g->ops.gr.init.restore_stats_counter_bundle_data(g,
|
||||
sw_bundle_init);
|
||||
}
|
||||
#endif
|
||||
|
||||
err = g->ops.gr.init.wait_idle(g);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "done");
|
||||
return 0;
|
||||
|
||||
restore_fe_go_idle:
|
||||
/* restore fe_go_idle */
|
||||
g->ops.gr.init.fe_go_idle_timeout(g, true);
|
||||
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
|
||||
if (g->ops.gr.init.auto_go_idle != NULL) {
|
||||
g->ops.gr.init.auto_go_idle(g, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
clean_up:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *inst_block)
|
||||
{
|
||||
int err = 0;
|
||||
struct nvgpu_mem *gr_mem;
|
||||
u64 size;
|
||||
u32 data;
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image_temp =
|
||||
NULL;
|
||||
#endif
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
/*
|
||||
* Save ctx data before first golden context save. Restore same data
|
||||
* before second golden context save. This temporary copy is
|
||||
* saved in local_golden_image_temp.
|
||||
*/
|
||||
|
||||
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
|
||||
|
||||
local_golden_image_temp =
|
||||
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
|
||||
if (local_golden_image_temp == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
#endif
|
||||
|
||||
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
|
||||
|
||||
golden_image->local_golden_image =
|
||||
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
|
||||
if (golden_image->local_golden_image == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
/* Before second golden context save restore to before known state */
|
||||
nvgpu_gr_global_ctx_load_local_golden_image(g,
|
||||
local_golden_image_temp, gr_mem);
|
||||
/* free local copy now */
|
||||
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
|
||||
local_golden_image_temp);
|
||||
local_golden_image_temp = NULL;
|
||||
|
||||
/* Initiate second golden context save */
|
||||
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* Copy the data to local buffer */
|
||||
local_golden_image_temp =
|
||||
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
|
||||
if (local_golden_image_temp == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* Compare two golden context images */
|
||||
if (!nvgpu_gr_global_ctx_compare_golden_images(g,
|
||||
nvgpu_mem_is_sysmem(gr_mem),
|
||||
golden_image->local_golden_image,
|
||||
local_golden_image_temp,
|
||||
size)) {
|
||||
nvgpu_err(g, "golden context mismatch");
|
||||
err = -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
clean_up:
|
||||
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
|
||||
if (local_golden_image_temp != NULL) {
|
||||
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
|
||||
local_golden_image_temp);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (err == 0) {
|
||||
nvgpu_log(g, gpu_dbg_gr, "golden image saved with size = %llu", size);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* init global golden image from a fresh gr_ctx in channel ctx.
|
||||
* save a copy in local_golden_image.
|
||||
*/
|
||||
int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
|
||||
struct nvgpu_gr_config *config,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
struct nvgpu_mem *inst_block)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
/*
|
||||
* golden ctx is global to all channels. Although only the first
|
||||
* channel initializes golden image, driver needs to prevent multiple
|
||||
* channels from initializing golden ctx at the same time
|
||||
*/
|
||||
nvgpu_mutex_acquire(&golden_image->ctx_mutex);
|
||||
|
||||
if (golden_image->ready) {
|
||||
nvgpu_log(g, gpu_dbg_gr, "golden image already saved");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_obj_ctx_init_hw_state(g, inst_block);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer,
|
||||
config, gr_ctx);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
||||
err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
err = nvgpu_gr_obj_ctx_save_golden_ctx(g, golden_image,
|
||||
gr_ctx, inst_block);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
golden_image->ready = true;
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
nvgpu_pmu_set_golden_image_initialized(g, true);
|
||||
#endif
|
||||
g->ops.gr.falcon.set_current_ctx_invalid(g);
|
||||
|
||||
clean_up:
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&golden_image->ctx_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
|
||||
struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx,
|
||||
struct vm_gk20a *vm)
|
||||
{
|
||||
u64 size;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
|
||||
nvgpu_safe_cast_u64_to_u32(size));
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size);
|
||||
err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
|
||||
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
|
||||
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
|
||||
struct nvgpu_gr_config *config,
|
||||
struct nvgpu_gr_ctx *gr_ctx,
|
||||
struct nvgpu_gr_subctx *subctx,
|
||||
struct vm_gk20a *vm,
|
||||
struct nvgpu_mem *inst_block,
|
||||
u32 class_num, u32 flags,
|
||||
bool cde, bool vpr)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
|
||||
|
||||
err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc,
|
||||
gr_ctx, vm);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to allocate TSG gr ctx buffer");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* allocate patch buffer */
|
||||
if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) {
|
||||
nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
|
||||
|
||||
nvgpu_gr_ctx_set_size(gr_ctx_desc,
|
||||
NVGPU_GR_CTX_PATCH_CTX,
|
||||
nvgpu_safe_mult_u32(
|
||||
g->ops.gr.init.get_patch_slots(g, config),
|
||||
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY));
|
||||
|
||||
err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to allocate patch buffer");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
|
||||
err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
|
||||
gr_ctx_desc, gr_ctx, vm, class_num, flags);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to init preemption mode");
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* map global buffer to channel gpu_va and commit */
|
||||
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
|
||||
global_ctx_buffer, vm, vpr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to map global ctx buffer");
|
||||
goto out;
|
||||
}
|
||||
|
||||
nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
|
||||
config, gr_ctx, true);
|
||||
|
||||
/* commit gr ctx buffer */
|
||||
nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx,
|
||||
nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
|
||||
|
||||
/* init golden image */
|
||||
err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image,
|
||||
global_ctx_buffer, config, gr_ctx, inst_block);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to init golden ctx image");
|
||||
goto out;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
/* Re-enable ELPG now that golden image has been initialized.
|
||||
* The PMU PG init code may already have tried to enable elpg, but
|
||||
* would not have been able to complete this action since the golden
|
||||
* image hadn't been initialized yet, so do this now.
|
||||
*/
|
||||
err = nvgpu_pmu_reenable_elpg(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fail to re-enable elpg");
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* load golden image */
|
||||
nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
|
||||
golden_image->local_golden_image, cde);
|
||||
|
||||
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx,
|
||||
subctx);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
|
||||
return 0;
|
||||
out:
|
||||
/*
|
||||
* 1. gr_ctx, patch_ctx and global ctx buffer mapping
|
||||
* can be reused so no need to release them.
|
||||
* 2. golden image init and load is a one time thing so if
|
||||
* they pass, no need to undo.
|
||||
*/
|
||||
nvgpu_err(g, "fail");
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_gr_obj_ctx_set_golden_image_size(
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
|
||||
size_t size)
|
||||
{
|
||||
golden_image->size = size;
|
||||
}
|
||||
|
||||
size_t nvgpu_gr_obj_ctx_get_golden_image_size(
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
|
||||
{
|
||||
return golden_image->size;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
|
||||
{
|
||||
return nvgpu_gr_global_ctx_get_local_golden_image_ptr(
|
||||
golden_image->local_golden_image);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool nvgpu_gr_obj_ctx_is_golden_image_ready(
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
|
||||
{
|
||||
bool ready;
|
||||
|
||||
nvgpu_mutex_acquire(&golden_image->ctx_mutex);
|
||||
ready = golden_image->ready;
|
||||
nvgpu_mutex_release(&golden_image->ctx_mutex);
|
||||
|
||||
return ready;
|
||||
}
|
||||
|
||||
int nvgpu_gr_obj_ctx_init(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size)
|
||||
{
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
|
||||
|
||||
golden_image = nvgpu_kzalloc(g, sizeof(*golden_image));
|
||||
if (golden_image == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size);
|
||||
|
||||
nvgpu_mutex_init(&golden_image->ctx_mutex);
|
||||
|
||||
*gr_golden_image = golden_image;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
|
||||
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
|
||||
{
|
||||
if (golden_image == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (golden_image->local_golden_image != NULL) {
|
||||
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
|
||||
golden_image->local_golden_image);
|
||||
golden_image->local_golden_image = NULL;
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
nvgpu_pmu_set_golden_image_initialized(g, false);
|
||||
#endif
|
||||
golden_image->ready = false;
|
||||
nvgpu_kfree(g, golden_image);
|
||||
}
|
||||
|
||||
58
drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h
Normal file
58
drivers/gpu/nvgpu/common/gr/obj_ctx_priv.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_OBJ_CTX_PRIV_H
|
||||
#define NVGPU_GR_OBJ_CTX_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/lock.h>
|
||||
|
||||
struct nvgpu_gr_global_ctx_local_golden_image;
|
||||
|
||||
/**
|
||||
* Golden context image descriptor structure.
|
||||
*
|
||||
* This structure stores details of the Golden context image.
|
||||
*/
|
||||
struct nvgpu_gr_obj_ctx_golden_image {
|
||||
/**
|
||||
* Flag to indicate if Golden context image is ready or not.
|
||||
*/
|
||||
bool ready;
|
||||
|
||||
/**
|
||||
* Mutex to hold for accesses to Golden context image.
|
||||
*/
|
||||
struct nvgpu_mutex ctx_mutex;
|
||||
|
||||
/**
|
||||
* Size of Golden context image.
|
||||
*/
|
||||
size_t size;
|
||||
|
||||
/**
|
||||
* Pointer to local Golden context image struct.
|
||||
*/
|
||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_OBJ_CTX_PRIV_H */
|
||||
167
drivers/gpu/nvgpu/common/gr/subctx.c
Normal file
167
drivers/gpu/nvgpu/common/gr/subctx.c
Normal file
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/gr/subctx.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/gmmu.h>
|
||||
#include <nvgpu/dma.h>
|
||||
|
||||
#include "common/gr/subctx_priv.h"
|
||||
|
||||
struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g,
|
||||
struct vm_gk20a *vm)
|
||||
{
|
||||
struct nvgpu_gr_subctx *subctx;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
subctx = nvgpu_kzalloc(g, sizeof(*subctx));
|
||||
if (subctx == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
err = nvgpu_dma_alloc_sys(g,
|
||||
g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
|
||||
&subctx->ctx_header);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to allocate sub ctx header");
|
||||
goto err_free_subctx;
|
||||
}
|
||||
|
||||
subctx->ctx_header.gpu_va = nvgpu_gmmu_map(vm,
|
||||
&subctx->ctx_header,
|
||||
subctx->ctx_header.size,
|
||||
0, /* not GPU-cacheable */
|
||||
gk20a_mem_flag_none, true,
|
||||
subctx->ctx_header.aperture);
|
||||
if (subctx->ctx_header.gpu_va == 0ULL) {
|
||||
nvgpu_err(g, "failed to map ctx header");
|
||||
goto err_free_ctx_header;
|
||||
}
|
||||
|
||||
return subctx;
|
||||
|
||||
err_free_ctx_header:
|
||||
nvgpu_dma_free(g, &subctx->ctx_header);
|
||||
err_free_subctx:
|
||||
nvgpu_kfree(g, subctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvgpu_gr_subctx_free(struct gk20a *g,
|
||||
struct nvgpu_gr_subctx *subctx,
|
||||
struct vm_gk20a *vm)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_gmmu_unmap(vm, &subctx->ctx_header,
|
||||
subctx->ctx_header.gpu_va);
|
||||
nvgpu_dma_free(g, &subctx->ctx_header);
|
||||
nvgpu_kfree(g, subctx);
|
||||
}
|
||||
|
||||
void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
|
||||
struct nvgpu_gr_subctx *subctx,
|
||||
struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
|
||||
{
|
||||
struct nvgpu_mem *ctxheader = &subctx->ctx_header;
|
||||
int err = 0;
|
||||
|
||||
err = g->ops.mm.cache.l2_flush(g, true);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "l2_flush failed");
|
||||
}
|
||||
|
||||
/* set priv access map */
|
||||
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
|
||||
nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
|
||||
NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA));
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
|
||||
nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
|
||||
nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
|
||||
nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
|
||||
#endif
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va);
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
|
||||
}
|
||||
|
||||
struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct nvgpu_gr_subctx *subctx)
|
||||
{
|
||||
return &subctx->ctx_header;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
|
||||
void nvgpu_gr_subctx_set_patch_ctx(struct gk20a *g,
|
||||
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
g->ops.gr.ctxsw_prog.set_patch_addr(g, &subctx->ctx_header,
|
||||
nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_GRAPHICS
|
||||
void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &subctx->ctx_header,
|
||||
nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_GRAPHICS */
|
||||
|
||||
#ifdef CONFIG_NVGPU_GFXP
|
||||
void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
|
||||
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header,
|
||||
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
|
||||
|
||||
if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
|
||||
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
|
||||
&subctx->ctx_header,
|
||||
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_GFXP */
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g,
|
||||
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header,
|
||||
nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
|
||||
}
|
||||
#endif
|
||||
40
drivers/gpu/nvgpu/common/gr/subctx_priv.h
Normal file
40
drivers/gpu/nvgpu/common/gr/subctx_priv.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_SUBCTX_PRIV_H
|
||||
#define NVGPU_GR_SUBCTX_PRIV_H
|
||||
|
||||
struct nvgpu_mem;
|
||||
|
||||
/**
|
||||
* GR subcontext data structure.
|
||||
*
|
||||
* One subcontext is allocated per GPU channel.
|
||||
*/
|
||||
struct nvgpu_gr_subctx {
|
||||
/**
|
||||
* Memory to hold subcontext header image.
|
||||
*/
|
||||
struct nvgpu_mem ctx_header;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_SUBCTX_PRIV_H */
|
||||
690
drivers/gpu/nvgpu/common/gr/zbc.c
Normal file
690
drivers/gpu/nvgpu/common/gr/zbc.c
Normal file
@@ -0,0 +1,690 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/string.h>
|
||||
#include <nvgpu/power_features/pg.h>
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
#include <nvgpu/pmu/pmu_pg.h>
|
||||
#endif
|
||||
|
||||
#include "zbc_priv.h"
|
||||
|
||||
#define ZBC_ENTRY_UPDATED 1
|
||||
#define ZBC_ENTRY_ADDED 2
|
||||
|
||||
static void nvgpu_gr_zbc_update_stencil_reg(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc_entry *stencil_val, u32 index)
|
||||
{
|
||||
/* update l2 table */
|
||||
if (g->ops.ltc.set_zbc_s_entry != NULL) {
|
||||
g->ops.ltc.set_zbc_s_entry(g, stencil_val->stencil, index);
|
||||
}
|
||||
|
||||
/* update zbc stencil registers */
|
||||
g->ops.gr.zbc.add_stencil(g, stencil_val, index);
|
||||
}
|
||||
|
||||
static int nvgpu_gr_zbc_add_stencil(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_entry *stencil_val)
|
||||
{
|
||||
struct zbc_stencil_table *s_tbl;
|
||||
u32 i;
|
||||
int entry_added = -ENOSPC;
|
||||
bool entry_exist = false;
|
||||
|
||||
/* search existing tables */
|
||||
for (i = zbc->min_stencil_index; i <= zbc->max_used_stencil_index;
|
||||
i++) {
|
||||
|
||||
s_tbl = &zbc->zbc_s_tbl[i];
|
||||
|
||||
if ((s_tbl->ref_cnt != 0U) &&
|
||||
(s_tbl->stencil == stencil_val->stencil) &&
|
||||
(s_tbl->format == stencil_val->format)) {
|
||||
s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U);
|
||||
entry_exist = true;
|
||||
entry_added = ZBC_ENTRY_UPDATED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* add new table */
|
||||
if (!entry_exist &&
|
||||
(zbc->max_used_stencil_index < zbc->max_stencil_index)) {
|
||||
|
||||
/* Increment used index and add new entry at that index */
|
||||
zbc->max_used_stencil_index =
|
||||
nvgpu_safe_add_u32(zbc->max_used_stencil_index, 1U);
|
||||
|
||||
s_tbl = &zbc->zbc_s_tbl[zbc->max_used_stencil_index];
|
||||
WARN_ON(s_tbl->ref_cnt != 0U);
|
||||
|
||||
/* update sw copy */
|
||||
s_tbl->stencil = stencil_val->stencil;
|
||||
s_tbl->format = stencil_val->format;
|
||||
s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U);
|
||||
|
||||
nvgpu_gr_zbc_update_stencil_reg(g, stencil_val,
|
||||
zbc->max_used_stencil_index);
|
||||
|
||||
entry_added = ZBC_ENTRY_ADDED;
|
||||
}
|
||||
return entry_added;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_update_depth_reg(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc_entry *depth_val, u32 index)
|
||||
{
|
||||
/* update l2 table */
|
||||
g->ops.ltc.set_zbc_depth_entry(g, depth_val->depth, index);
|
||||
|
||||
/* update zbc registers */
|
||||
g->ops.gr.zbc.add_depth(g, depth_val, index);
|
||||
}
|
||||
|
||||
static int nvgpu_gr_zbc_add_depth(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_entry *depth_val)
|
||||
{
|
||||
struct zbc_depth_table *d_tbl;
|
||||
u32 i;
|
||||
int entry_added = -ENOSPC;
|
||||
bool entry_exist = false;
|
||||
|
||||
/* search existing tables */
|
||||
for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) {
|
||||
|
||||
d_tbl = &zbc->zbc_dep_tbl[i];
|
||||
|
||||
if ((d_tbl->ref_cnt != 0U) &&
|
||||
(d_tbl->depth == depth_val->depth) &&
|
||||
(d_tbl->format == depth_val->format)) {
|
||||
d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U);
|
||||
entry_exist = true;
|
||||
entry_added = ZBC_ENTRY_UPDATED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* add new table */
|
||||
if (!entry_exist &&
|
||||
(zbc->max_used_depth_index < zbc->max_depth_index)) {
|
||||
|
||||
/* Increment used index and add new entry at that index */
|
||||
zbc->max_used_depth_index =
|
||||
nvgpu_safe_add_u32(zbc->max_used_depth_index, 1U);
|
||||
|
||||
d_tbl = &zbc->zbc_dep_tbl[zbc->max_used_depth_index];
|
||||
WARN_ON(d_tbl->ref_cnt != 0U);
|
||||
|
||||
/* update sw copy */
|
||||
d_tbl->depth = depth_val->depth;
|
||||
d_tbl->format = depth_val->format;
|
||||
d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U);
|
||||
|
||||
nvgpu_gr_zbc_update_depth_reg(g, depth_val,
|
||||
zbc->max_used_depth_index);
|
||||
|
||||
entry_added = ZBC_ENTRY_ADDED;
|
||||
}
|
||||
|
||||
return entry_added;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_update_color_reg(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc_entry *color_val, u32 index)
|
||||
{
|
||||
/* update l2 table */
|
||||
g->ops.ltc.set_zbc_color_entry(g, color_val->color_l2, index);
|
||||
|
||||
/* update zbc registers */
|
||||
g->ops.gr.zbc.add_color(g, color_val, index);
|
||||
}
|
||||
|
||||
static int nvgpu_gr_zbc_add_color(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_entry *color_val)
|
||||
{
|
||||
struct zbc_color_table *c_tbl;
|
||||
u32 i;
|
||||
int entry_added = -ENOSPC;
|
||||
bool entry_exist = false;
|
||||
|
||||
/* search existing table */
|
||||
for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) {
|
||||
|
||||
c_tbl = &zbc->zbc_col_tbl[i];
|
||||
|
||||
if ((c_tbl->ref_cnt != 0U) &&
|
||||
(c_tbl->format == color_val->format) &&
|
||||
(nvgpu_memcmp((u8 *)c_tbl->color_ds,
|
||||
(u8 *)color_val->color_ds,
|
||||
sizeof(color_val->color_ds)) == 0) &&
|
||||
(nvgpu_memcmp((u8 *)c_tbl->color_l2,
|
||||
(u8 *)color_val->color_l2,
|
||||
sizeof(color_val->color_l2)) == 0)) {
|
||||
|
||||
c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U);
|
||||
entry_exist = true;
|
||||
entry_added = ZBC_ENTRY_UPDATED;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* add new entry */
|
||||
if (!entry_exist &&
|
||||
(zbc->max_used_color_index < zbc->max_color_index)) {
|
||||
|
||||
/* Increment used index and add new entry at that index */
|
||||
zbc->max_used_color_index =
|
||||
nvgpu_safe_add_u32(zbc->max_used_color_index, 1U);
|
||||
|
||||
c_tbl = &zbc->zbc_col_tbl[zbc->max_used_color_index];
|
||||
WARN_ON(c_tbl->ref_cnt != 0U);
|
||||
|
||||
/* update local copy */
|
||||
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||
c_tbl->color_l2[i] = color_val->color_l2[i];
|
||||
c_tbl->color_ds[i] = color_val->color_ds[i];
|
||||
}
|
||||
c_tbl->format = color_val->format;
|
||||
c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U);
|
||||
|
||||
nvgpu_gr_zbc_update_color_reg(g, color_val,
|
||||
zbc->max_used_color_index);
|
||||
|
||||
entry_added = ZBC_ENTRY_ADDED;
|
||||
}
|
||||
|
||||
return entry_added;
|
||||
}
|
||||
|
||||
static int nvgpu_gr_zbc_add(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_entry *zbc_val)
|
||||
{
|
||||
int added = false;
|
||||
#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG)
|
||||
u32 entries;
|
||||
#endif
|
||||
|
||||
/* no endian swap ? */
|
||||
nvgpu_mutex_acquire(&zbc->zbc_lock);
|
||||
nvgpu_speculation_barrier();
|
||||
switch (zbc_val->type) {
|
||||
case NVGPU_GR_ZBC_TYPE_COLOR:
|
||||
added = nvgpu_gr_zbc_add_color(g, zbc, zbc_val);
|
||||
break;
|
||||
case NVGPU_GR_ZBC_TYPE_DEPTH:
|
||||
added = nvgpu_gr_zbc_add_depth(g, zbc, zbc_val);
|
||||
break;
|
||||
case NVGPU_GR_ZBC_TYPE_STENCIL:
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
|
||||
added = nvgpu_gr_zbc_add_stencil(g, zbc, zbc_val);
|
||||
} else {
|
||||
nvgpu_err(g,
|
||||
"invalid zbc table type %d", zbc_val->type);
|
||||
added = -EINVAL;
|
||||
goto err_mutex;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g,
|
||||
"invalid zbc table type %d", zbc_val->type);
|
||||
added = -EINVAL;
|
||||
goto err_mutex;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG)
|
||||
if (added == ZBC_ENTRY_ADDED) {
|
||||
/* update zbc for elpg only when new entry is added */
|
||||
entries = max(
|
||||
nvgpu_safe_sub_u32(zbc->max_used_color_index,
|
||||
zbc->min_color_index),
|
||||
nvgpu_safe_sub_u32(zbc->max_used_depth_index,
|
||||
zbc->min_depth_index));
|
||||
if (g->elpg_enabled) {
|
||||
nvgpu_pmu_save_zbc(g, entries);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
err_mutex:
|
||||
nvgpu_mutex_release(&zbc->zbc_lock);
|
||||
if (added < 0) {
|
||||
return added;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_zbc_set_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_entry *zbc_val)
|
||||
{
|
||||
nvgpu_log(g, gpu_dbg_zbc, " zbc_val->type %u", zbc_val->type);
|
||||
|
||||
return nvgpu_pg_elpg_protected_call(g,
|
||||
nvgpu_gr_zbc_add(g, zbc, zbc_val));
|
||||
}
|
||||
|
||||
/* get a zbc table entry specified by index
|
||||
* return table size when type is invalid */
|
||||
int nvgpu_gr_zbc_query_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
|
||||
struct nvgpu_gr_zbc_query_params *query_params)
|
||||
{
|
||||
u32 index = query_params->index_size;
|
||||
u32 i;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
switch (query_params->type) {
|
||||
case NVGPU_GR_ZBC_TYPE_INVALID:
|
||||
nvgpu_log(g, gpu_dbg_zbc, "Query zbc size");
|
||||
query_params->index_size = nvgpu_safe_add_u32(
|
||||
nvgpu_safe_sub_u32(zbc->max_color_index,
|
||||
zbc->min_color_index), 1U);
|
||||
break;
|
||||
case NVGPU_GR_ZBC_TYPE_COLOR:
|
||||
if ((index < zbc->min_color_index) ||
|
||||
(index > zbc->max_color_index)) {
|
||||
nvgpu_err(g, "invalid zbc color table index %u", index);
|
||||
return -EINVAL;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_zbc, "Query zbc color at index %u", index);
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||
query_params->color_l2[i] =
|
||||
zbc->zbc_col_tbl[index].color_l2[i];
|
||||
query_params->color_ds[i] =
|
||||
zbc->zbc_col_tbl[index].color_ds[i];
|
||||
}
|
||||
query_params->format = zbc->zbc_col_tbl[index].format;
|
||||
query_params->ref_cnt = zbc->zbc_col_tbl[index].ref_cnt;
|
||||
|
||||
break;
|
||||
case NVGPU_GR_ZBC_TYPE_DEPTH:
|
||||
if ((index < zbc->min_depth_index) ||
|
||||
(index > zbc->max_depth_index)) {
|
||||
nvgpu_err(g, "invalid zbc depth table index %u", index);
|
||||
return -EINVAL;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_zbc, "Query zbc depth at index %u", index);
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
query_params->depth = zbc->zbc_dep_tbl[index].depth;
|
||||
query_params->format = zbc->zbc_dep_tbl[index].format;
|
||||
query_params->ref_cnt = zbc->zbc_dep_tbl[index].ref_cnt;
|
||||
break;
|
||||
case NVGPU_GR_ZBC_TYPE_STENCIL:
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
|
||||
if ((index < zbc->min_stencil_index) ||
|
||||
(index > zbc->max_stencil_index)) {
|
||||
nvgpu_err(g,
|
||||
"invalid zbc stencil table index %u",
|
||||
index);
|
||||
return -EINVAL;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_zbc,
|
||||
"Query zbc stencil at index %u", index);
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
query_params->stencil = zbc->zbc_s_tbl[index].stencil;
|
||||
query_params->format = zbc->zbc_s_tbl[index].format;
|
||||
query_params->ref_cnt = zbc->zbc_s_tbl[index].ref_cnt;
|
||||
} else {
|
||||
nvgpu_err(g, "invalid zbc table type");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "invalid zbc table type");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update zbc table registers as per sw copy of zbc tables
|
||||
*/
|
||||
void nvgpu_gr_zbc_load_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) {
|
||||
struct zbc_color_table *c_tbl = &zbc->zbc_col_tbl[i];
|
||||
struct nvgpu_gr_zbc_entry zbc_val;
|
||||
|
||||
zbc_val.type = NVGPU_GR_ZBC_TYPE_COLOR;
|
||||
nvgpu_memcpy((u8 *)zbc_val.color_ds,
|
||||
(u8 *)c_tbl->color_ds, sizeof(zbc_val.color_ds));
|
||||
nvgpu_memcpy((u8 *)zbc_val.color_l2,
|
||||
(u8 *)c_tbl->color_l2, sizeof(zbc_val.color_l2));
|
||||
zbc_val.format = c_tbl->format;
|
||||
|
||||
nvgpu_gr_zbc_update_color_reg(g, &zbc_val, i);
|
||||
}
|
||||
|
||||
for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) {
|
||||
struct zbc_depth_table *d_tbl = &zbc->zbc_dep_tbl[i];
|
||||
struct nvgpu_gr_zbc_entry zbc_val;
|
||||
|
||||
zbc_val.type = NVGPU_GR_ZBC_TYPE_DEPTH;
|
||||
zbc_val.depth = d_tbl->depth;
|
||||
zbc_val.format = d_tbl->format;
|
||||
|
||||
nvgpu_gr_zbc_update_depth_reg(g, &zbc_val, i);
|
||||
}
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
|
||||
for (i = zbc->min_stencil_index;
|
||||
i <= zbc->max_used_stencil_index; i++) {
|
||||
struct zbc_stencil_table *s_tbl = &zbc->zbc_s_tbl[i];
|
||||
struct nvgpu_gr_zbc_entry zbc_val;
|
||||
|
||||
zbc_val.type = NVGPU_GR_ZBC_TYPE_STENCIL;
|
||||
zbc_val.stencil = s_tbl->stencil;
|
||||
zbc_val.format = s_tbl->format;
|
||||
|
||||
nvgpu_gr_zbc_update_stencil_reg(g, &zbc_val, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_load_default_sw_stencil_table(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
u32 index = zbc->min_stencil_index;
|
||||
|
||||
zbc->zbc_s_tbl[index].stencil = 0x0;
|
||||
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
|
||||
zbc->zbc_s_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
|
||||
index = nvgpu_safe_add_u32(index, 1U);
|
||||
|
||||
zbc->zbc_s_tbl[index].stencil = 0x1;
|
||||
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
|
||||
zbc->zbc_s_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
|
||||
index = nvgpu_safe_add_u32(index, 1U);
|
||||
|
||||
zbc->zbc_s_tbl[index].stencil = 0xff;
|
||||
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
|
||||
zbc->zbc_s_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
|
||||
|
||||
zbc->max_used_stencil_index = index;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_load_default_sw_depth_table(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
u32 index = zbc->min_depth_index;
|
||||
|
||||
zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32;
|
||||
zbc->zbc_dep_tbl[index].depth = 0x3f800000;
|
||||
zbc->zbc_dep_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U);
|
||||
index = nvgpu_safe_add_u32(index, 1U);
|
||||
|
||||
zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32;
|
||||
zbc->zbc_dep_tbl[index].depth = 0;
|
||||
zbc->zbc_dep_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U);
|
||||
|
||||
zbc->max_used_depth_index = index;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_load_default_sw_color_table(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
u32 i;
|
||||
u32 index = zbc->min_color_index;
|
||||
|
||||
/* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
|
||||
zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_BLACK_COLOR_FMT;
|
||||
for (i = 0U; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||
zbc->zbc_col_tbl[index].color_ds[i] = 0U;
|
||||
zbc->zbc_col_tbl[index].color_l2[i] = 0xff000000U;
|
||||
}
|
||||
zbc->zbc_col_tbl[index].color_ds[3] = 0x3f800000U;
|
||||
zbc->zbc_col_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
|
||||
index = nvgpu_safe_add_u32(index, 1U);
|
||||
|
||||
/* Transparent black = (fmt 1 = zero) */
|
||||
zbc->zbc_col_tbl[index].format = GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT;
|
||||
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||
zbc->zbc_col_tbl[index].color_ds[i] = 0U;
|
||||
zbc->zbc_col_tbl[index].color_l2[i] = 0U;
|
||||
}
|
||||
zbc->zbc_col_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
|
||||
index = nvgpu_safe_add_u32(index, 1U);
|
||||
|
||||
/* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
|
||||
zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_WHITE_COLOR_FMT;
|
||||
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||
zbc->zbc_col_tbl[index].color_ds[i] = 0x3f800000U;
|
||||
zbc->zbc_col_tbl[index].color_l2[i] = 0xffffffffU;
|
||||
}
|
||||
zbc->zbc_col_tbl[index].ref_cnt =
|
||||
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
|
||||
|
||||
zbc->max_used_color_index = index;
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_init_indices(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
struct nvgpu_gr_zbc_table_indices zbc_indices;
|
||||
|
||||
g->ops.gr.zbc.init_table_indices(g, &zbc_indices);
|
||||
|
||||
zbc->min_color_index = zbc_indices.min_color_index;
|
||||
zbc->max_color_index = zbc_indices.max_color_index;
|
||||
zbc->min_depth_index = zbc_indices.min_depth_index;
|
||||
zbc->max_depth_index = zbc_indices.max_depth_index;
|
||||
zbc->min_stencil_index = zbc_indices.min_stencil_index;
|
||||
zbc->max_stencil_index = zbc_indices.max_stencil_index;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_color_index %u",
|
||||
zbc->min_color_index);
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_color_index %u",
|
||||
zbc->max_color_index);
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_depth_index %u",
|
||||
zbc->min_depth_index);
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_depth_index %u",
|
||||
zbc->max_depth_index);
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_stencil_index %u",
|
||||
zbc->min_stencil_index);
|
||||
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_stencil_index %u",
|
||||
zbc->max_stencil_index);
|
||||
}
|
||||
|
||||
static void nvgpu_gr_zbc_load_default_sw_table(struct gk20a *g,
|
||||
struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
nvgpu_mutex_init(&zbc->zbc_lock);
|
||||
|
||||
nvgpu_gr_zbc_load_default_sw_color_table(g, zbc);
|
||||
|
||||
nvgpu_gr_zbc_load_default_sw_depth_table(g, zbc);
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
|
||||
nvgpu_gr_zbc_load_default_sw_stencil_table(g, zbc);
|
||||
}
|
||||
}
|
||||
|
||||
static int gr_zbc_allocate_local_tbls(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
u32 zbc_col_size = nvgpu_safe_add_u32(zbc->max_color_index,
|
||||
zbc->min_color_index);
|
||||
u32 zbc_dep_size = nvgpu_safe_add_u32(zbc->max_depth_index,
|
||||
zbc->min_depth_index);
|
||||
u32 zbc_s_size = nvgpu_safe_add_u32(zbc->max_stencil_index,
|
||||
zbc->min_stencil_index);
|
||||
|
||||
zbc->zbc_col_tbl = nvgpu_kzalloc(g,
|
||||
sizeof(struct zbc_color_table) * zbc_col_size);
|
||||
if (zbc->zbc_col_tbl == NULL) {
|
||||
goto alloc_col_tbl_err;
|
||||
}
|
||||
|
||||
zbc->zbc_dep_tbl = nvgpu_kzalloc(g,
|
||||
sizeof(struct zbc_depth_table) * zbc_dep_size);
|
||||
|
||||
if (zbc->zbc_dep_tbl == NULL) {
|
||||
goto alloc_dep_tbl_err;
|
||||
}
|
||||
|
||||
zbc->zbc_s_tbl = nvgpu_kzalloc(g,
|
||||
sizeof(struct zbc_stencil_table) * zbc_s_size);
|
||||
if (zbc->zbc_s_tbl == NULL) {
|
||||
goto alloc_s_tbl_err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
alloc_s_tbl_err:
|
||||
nvgpu_kfree(g, zbc->zbc_dep_tbl);
|
||||
alloc_dep_tbl_err:
|
||||
nvgpu_kfree(g, zbc->zbc_col_tbl);
|
||||
alloc_col_tbl_err:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* allocate the struct and load the table */
|
||||
int nvgpu_gr_zbc_init(struct gk20a *g, struct nvgpu_gr_zbc **zbc)
|
||||
{
|
||||
int ret = -ENOMEM;
|
||||
struct nvgpu_gr_zbc *gr_zbc = NULL;
|
||||
|
||||
*zbc = NULL;
|
||||
|
||||
gr_zbc = nvgpu_kzalloc(g, sizeof(*gr_zbc));
|
||||
if (gr_zbc == NULL) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
nvgpu_gr_zbc_init_indices(g, gr_zbc);
|
||||
|
||||
ret = gr_zbc_allocate_local_tbls(g, gr_zbc);
|
||||
if (ret != 0) {
|
||||
goto alloc_err;
|
||||
}
|
||||
|
||||
nvgpu_gr_zbc_load_default_sw_table(g, gr_zbc);
|
||||
|
||||
*zbc = gr_zbc;
|
||||
return ret;
|
||||
|
||||
alloc_err:
|
||||
nvgpu_kfree(g, gr_zbc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* deallocate the memory for the struct */
|
||||
void nvgpu_gr_zbc_deinit(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
|
||||
{
|
||||
if (zbc == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, zbc->zbc_col_tbl);
|
||||
nvgpu_kfree(g, zbc->zbc_dep_tbl);
|
||||
nvgpu_kfree(g, zbc->zbc_s_tbl);
|
||||
nvgpu_kfree(g, zbc);
|
||||
}
|
||||
|
||||
struct nvgpu_gr_zbc_entry *nvgpu_gr_zbc_entry_alloc(struct gk20a *g)
|
||||
{
|
||||
return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_zbc_entry));
|
||||
}
|
||||
void nvgpu_gr_zbc_entry_free(struct gk20a *g, struct nvgpu_gr_zbc_entry *entry)
|
||||
{
|
||||
nvgpu_kfree(g, entry);
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_color_ds(struct nvgpu_gr_zbc_entry *entry,
|
||||
int idx)
|
||||
{
|
||||
return entry->color_ds[idx];
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_color_ds(struct nvgpu_gr_zbc_entry *entry,
|
||||
int idx, u32 ds)
|
||||
{
|
||||
entry->color_ds[idx] = ds;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_color_l2(struct nvgpu_gr_zbc_entry *entry,
|
||||
int idx)
|
||||
{
|
||||
return entry->color_l2[idx];
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_color_l2(struct nvgpu_gr_zbc_entry *entry,
|
||||
int idx, u32 l2)
|
||||
{
|
||||
entry->color_l2[idx] = l2;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_depth(struct nvgpu_gr_zbc_entry *entry)
|
||||
{
|
||||
return entry->depth;
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_depth(struct nvgpu_gr_zbc_entry *entry,
|
||||
u32 depth)
|
||||
{
|
||||
entry->depth = depth;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_stencil(struct nvgpu_gr_zbc_entry *entry)
|
||||
{
|
||||
return entry->stencil;
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_stencil(struct nvgpu_gr_zbc_entry *entry,
|
||||
u32 stencil)
|
||||
{
|
||||
entry->stencil = stencil;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_type(struct nvgpu_gr_zbc_entry *entry)
|
||||
{
|
||||
return entry->type;
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_type(struct nvgpu_gr_zbc_entry *entry,
|
||||
u32 type)
|
||||
{
|
||||
entry->type = type;
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_zbc_get_entry_format(struct nvgpu_gr_zbc_entry *entry)
|
||||
{
|
||||
return entry->format;
|
||||
}
|
||||
|
||||
void nvgpu_gr_zbc_set_entry_format(struct nvgpu_gr_zbc_entry *entry,
|
||||
u32 format)
|
||||
{
|
||||
entry->format = format;
|
||||
}
|
||||
89
drivers/gpu/nvgpu/common/gr/zbc_priv.h
Normal file
89
drivers/gpu/nvgpu/common/gr/zbc_priv.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_ZBC_PRIV_H
|
||||
#define NVGPU_GR_ZBC_PRIV_H
|
||||
|
||||
#include <nvgpu/gr/zbc.h>
|
||||
|
||||
/* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
|
||||
#define GR_ZBC_SOLID_BLACK_COLOR_FMT 0x28
|
||||
/* Transparent black = (fmt 1 = zero) */
|
||||
#define GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT 0x1
|
||||
/* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
|
||||
#define GR_ZBC_SOLID_WHITE_COLOR_FMT 0x2
|
||||
/* z format with fp32 */
|
||||
#define GR_ZBC_Z_FMT_VAL_FP32 0x1
|
||||
|
||||
#define GR_ZBC_STENCIL_CLEAR_FMT_INVAILD 0U
|
||||
#define GR_ZBC_STENCIL_CLEAR_FMT_U8 1U
|
||||
|
||||
struct zbc_color_table {
|
||||
u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
|
||||
u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
|
||||
u32 format;
|
||||
u32 ref_cnt;
|
||||
};
|
||||
|
||||
struct zbc_depth_table {
|
||||
u32 depth;
|
||||
u32 format;
|
||||
u32 ref_cnt;
|
||||
};
|
||||
|
||||
struct zbc_stencil_table {
|
||||
u32 stencil;
|
||||
u32 format;
|
||||
u32 ref_cnt;
|
||||
};
|
||||
|
||||
struct nvgpu_gr_zbc_entry {
|
||||
u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
|
||||
u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
|
||||
u32 depth;
|
||||
u32 stencil;
|
||||
u32 type;
|
||||
u32 format;
|
||||
};
|
||||
|
||||
/*
|
||||
* HW ZBC table valid entries start at index 1.
|
||||
* Entry 0 is reserved to mean "no matching entry found, do not use ZBC"
|
||||
*/
|
||||
struct nvgpu_gr_zbc {
|
||||
struct nvgpu_mutex zbc_lock; /* Lock to access zbc table */
|
||||
struct zbc_color_table *zbc_col_tbl; /* SW zbc color table pointer */
|
||||
struct zbc_depth_table *zbc_dep_tbl; /* SW zbc depth table pointer */
|
||||
struct zbc_stencil_table *zbc_s_tbl; /* SW zbc stencil table pointer */
|
||||
u32 min_color_index; /* Minimum valid color table index */
|
||||
u32 min_depth_index; /* Minimum valid depth table index */
|
||||
u32 min_stencil_index; /* Minimum valid stencil table index */
|
||||
u32 max_color_index; /* Maximum valid color table index */
|
||||
u32 max_depth_index; /* Maximum valid depth table index */
|
||||
u32 max_stencil_index; /* Maximum valid stencil table index */
|
||||
u32 max_used_color_index; /* Max used color table index */
|
||||
u32 max_used_depth_index; /* Max used depth table index */
|
||||
u32 max_used_stencil_index; /* Max used stencil table index */
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_ZBC_PRIV_H */
|
||||
|
||||
176
drivers/gpu/nvgpu/common/gr/zcull.c
Normal file
176
drivers/gpu/nvgpu/common/gr/zcull.c
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/gr/subctx.h>
|
||||
#include <nvgpu/gr/ctx.h>
|
||||
#include <nvgpu/gr/zcull.h>
|
||||
#include <nvgpu/gr/config.h>
|
||||
|
||||
#include "zcull_priv.h"
|
||||
|
||||
int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull,
|
||||
u32 size, struct nvgpu_gr_config *config)
|
||||
{
|
||||
struct nvgpu_gr_zcull *zcull;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
|
||||
|
||||
zcull = nvgpu_kzalloc(g, sizeof(*zcull));
|
||||
if (zcull == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
zcull->g = g;
|
||||
|
||||
zcull->zcull_ctxsw_image_size = size;
|
||||
|
||||
zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(config) * 16U;
|
||||
zcull->aliquot_height = 16;
|
||||
|
||||
zcull->width_align_pixels =
|
||||
nvgpu_gr_config_get_tpc_count(config) * 16U;
|
||||
zcull->height_align_pixels = 32;
|
||||
|
||||
zcull->aliquot_size =
|
||||
zcull->aliquot_width * zcull->aliquot_height;
|
||||
|
||||
/* assume no floor sweeping since we only have 1 tpc in 1 gpc */
|
||||
zcull->pixel_squares_by_aliquots =
|
||||
nvgpu_gr_config_get_zcb_count(config) * 16U * 16U *
|
||||
nvgpu_gr_config_get_tpc_count(config) /
|
||||
(nvgpu_gr_config_get_gpc_count(config) *
|
||||
nvgpu_gr_config_get_gpc_tpc_count(config, 0U));
|
||||
|
||||
exit:
|
||||
*gr_zcull = zcull;
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull)
|
||||
{
|
||||
if (gr_zcull == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, gr_zcull);
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g,
|
||||
struct nvgpu_gr_zcull *gr_zcull)
|
||||
{
|
||||
/* assuming zcull has already been initialized */
|
||||
return gr_zcull->zcull_ctxsw_image_size;
|
||||
}
|
||||
|
||||
int nvgpu_gr_zcull_init_hw(struct gk20a *g,
|
||||
struct nvgpu_gr_zcull *gr_zcull,
|
||||
struct nvgpu_gr_config *gr_config)
|
||||
{
|
||||
u32 *zcull_map_tiles, *zcull_bank_counters;
|
||||
u32 map_counter;
|
||||
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
|
||||
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_TPC_PER_GPC);
|
||||
u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
|
||||
u32 map_tile_count;
|
||||
int ret = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||
|
||||
if (nvgpu_gr_config_get_map_tiles(gr_config) == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (zcull_alloc_num % 8U != 0U) {
|
||||
/* Total 8 fields per map reg i.e. tile_0 to tile_7*/
|
||||
zcull_alloc_num += (zcull_alloc_num % 8U);
|
||||
}
|
||||
zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
|
||||
|
||||
if (zcull_map_tiles == NULL) {
|
||||
nvgpu_err(g,
|
||||
"failed to allocate zcull map titles");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
|
||||
|
||||
if (zcull_bank_counters == NULL) {
|
||||
nvgpu_err(g,
|
||||
"failed to allocate zcull bank counters");
|
||||
nvgpu_kfree(g, zcull_map_tiles);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (map_counter = 0;
|
||||
map_counter < nvgpu_gr_config_get_tpc_count(gr_config);
|
||||
map_counter++) {
|
||||
map_tile_count =
|
||||
nvgpu_gr_config_get_map_tile_count(gr_config,
|
||||
map_counter);
|
||||
zcull_map_tiles[map_counter] =
|
||||
zcull_bank_counters[map_tile_count];
|
||||
zcull_bank_counters[map_tile_count]++;
|
||||
}
|
||||
|
||||
if (g->ops.gr.zcull.program_zcull_mapping != NULL) {
|
||||
g->ops.gr.zcull.program_zcull_mapping(g, zcull_alloc_num,
|
||||
zcull_map_tiles);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, zcull_map_tiles);
|
||||
nvgpu_kfree(g, zcull_bank_counters);
|
||||
|
||||
if (g->ops.gr.zcull.init_zcull_hw != NULL) {
|
||||
ret = g->ops.gr.zcull.init_zcull_hw(g, gr_zcull, gr_config);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to init zcull hw. err:%d", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_gr, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
|
||||
struct nvgpu_gr_ctx *gr_ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (subctx != NULL) {
|
||||
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false);
|
||||
if (ret == 0) {
|
||||
nvgpu_gr_subctx_zcull_setup(g, subctx, gr_ctx);
|
||||
}
|
||||
} else {
|
||||
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
45
drivers/gpu/nvgpu/common/gr/zcull_priv.h
Normal file
45
drivers/gpu/nvgpu/common/gr/zcull_priv.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_GR_ZCULL_PRIV_H
|
||||
#define NVGPU_GR_ZCULL_PRIV_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
|
||||
struct nvgpu_gr_zcull {
|
||||
struct gk20a *g;
|
||||
|
||||
u32 aliquot_width;
|
||||
u32 aliquot_height;
|
||||
u32 aliquot_size;
|
||||
u32 total_aliquots;
|
||||
|
||||
u32 width_align_pixels;
|
||||
u32 height_align_pixels;
|
||||
u32 pixel_squares_by_aliquots;
|
||||
|
||||
u32 zcull_ctxsw_image_size;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_GR_ZCULL_PRIV_H */
|
||||
Reference in New Issue
Block a user