Open source GPL/LGPL release

This commit is contained in:
svcmobrel-release
2022-07-21 16:03:29 -07:00
commit f338182221
2260 changed files with 576813 additions and 0 deletions

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,183 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_CTX_PRIV_H
#define NVGPU_GR_CTX_PRIV_H
struct nvgpu_mem;
/**
* Patch context buffer descriptor structure.
*
* Pointer to this structure is maintained in #nvgpu_gr_ctx structure.
*/
struct patch_desc {
/**
* Memory to hold patch context buffer.
*/
struct nvgpu_mem mem;
/**
* Count of entries written into patch context buffer.
*/
u32 data_count;
};
#ifdef CONFIG_NVGPU_GRAPHICS
struct zcull_ctx_desc {
u64 gpu_va;
u32 ctx_sw_mode;
};
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
struct pm_ctx_desc {
struct nvgpu_mem mem;
u64 gpu_va;
u32 pm_mode;
};
#endif
/**
* GR context descriptor structure.
*
* This structure stores various properties of all GR context buffers.
*/
struct nvgpu_gr_ctx_desc {
/**
* Array to store all GR context buffer sizes.
*/
u32 size[NVGPU_GR_CTX_COUNT];
#ifdef CONFIG_NVGPU_GRAPHICS
bool force_preemption_gfxp;
#endif
#ifdef CONFIG_NVGPU_CILP
bool force_preemption_cilp;
#endif
#ifdef CONFIG_DEBUG_FS
bool dump_ctxsw_stats_on_channel_close;
#endif
};
/**
* Graphics context buffer structure.
*
* This structure stores all the properties of a graphics context
* buffer. One graphics context is allocated per GPU Time Slice
* Group (TSG).
*/
struct nvgpu_gr_ctx {
/**
* Context ID read from graphics context buffer.
*/
u32 ctx_id;
/**
* Flag to indicate if above context ID is valid or not.
*/
bool ctx_id_valid;
/**
* Memory to hold graphics context buffer.
*/
struct nvgpu_mem mem;
#ifdef CONFIG_NVGPU_GFXP
struct nvgpu_mem preempt_ctxsw_buffer;
struct nvgpu_mem spill_ctxsw_buffer;
struct nvgpu_mem betacb_ctxsw_buffer;
struct nvgpu_mem pagepool_ctxsw_buffer;
struct nvgpu_mem gfxp_rtvcb_ctxsw_buffer;
#endif
/**
* Patch context buffer descriptor struct.
*/
struct patch_desc patch_ctx;
#ifdef CONFIG_NVGPU_GRAPHICS
struct zcull_ctx_desc zcull_ctx;
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
struct pm_ctx_desc pm_ctx;
#endif
/**
* Graphics preemption mode of the graphics context.
*/
u32 graphics_preempt_mode;
/**
* Compute preemption mode of the graphics context.
*/
u32 compute_preempt_mode;
#ifdef CONFIG_NVGPU_NON_FUSA
bool golden_img_loaded;
#endif
#ifdef CONFIG_NVGPU_CILP
bool cilp_preempt_pending;
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
bool boosted_ctx;
#endif
/**
* Array to store GPU virtual addresses of all global context
* buffers.
*/
u64 global_ctx_buffer_va[NVGPU_GR_CTX_VA_COUNT];
/**
* Array to store indexes of global context buffers
* corresponding to GPU virtual addresses above.
*/
u32 global_ctx_buffer_index[NVGPU_GR_CTX_VA_COUNT];
/**
* Flag to indicate if global context buffers are mapped and
* #global_ctx_buffer_va array is populated.
*/
bool global_ctx_buffer_mapped;
/**
* TSG identifier corresponding to the graphics context.
*/
u32 tsgid;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
/** SM diversity configuration offset.
* It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
* else input param is just ignored.
* A valid offset starts from 0 to
* (#gk20a.max_sm_diversity_config_count - 1).
*/
u32 sm_diversity_config;
#endif
};
#endif /* NVGPU_GR_CTX_PRIV_H */

View File

@@ -0,0 +1,700 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/list.h>
#include <nvgpu/log.h>
#include <nvgpu/log2.h>
#include <nvgpu/mm.h>
#include <nvgpu/circ_buf.h>
#include <nvgpu/timers.h>
#include <nvgpu/enabled.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/gr/gr_utils.h>
static int nvgpu_gr_fecs_trace_periodic_polling(void *arg);
int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
pid_t pid, u32 vmid, struct nvgpu_list_node *list)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
struct nvgpu_fecs_trace_context_entry *entry;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"adding hash entry context_ptr=%x -> pid=%d, vmid=%d",
context_ptr, pid, vmid);
entry = nvgpu_kzalloc(g, sizeof(*entry));
if (entry == NULL) {
nvgpu_err(g,
"can't alloc new entry for context_ptr=%x pid=%d vmid=%d",
context_ptr, pid, vmid);
return -ENOMEM;
}
nvgpu_init_list_node(&entry->entry);
entry->context_ptr = context_ptr;
entry->pid = pid;
entry->vmid = vmid;
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_list_add_tail(&entry->entry, list);
nvgpu_mutex_release(&trace->list_lock);
return 0;
}
void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"freeing entry context_ptr=%x", context_ptr);
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_list_for_each_entry_safe(entry, tmp, list,
nvgpu_fecs_trace_context_entry, entry) {
if (entry->context_ptr == context_ptr) {
nvgpu_list_del(&entry->entry);
nvgpu_log(g, gpu_dbg_ctxsw,
"freed entry=%p context_ptr=%x", entry,
entry->context_ptr);
nvgpu_kfree(g, entry);
break;
}
}
nvgpu_mutex_release(&trace->list_lock);
}
void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
struct nvgpu_list_node *list)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_list_for_each_entry_safe(entry, tmp, list,
nvgpu_fecs_trace_context_entry, entry) {
nvgpu_list_del(&entry->entry);
nvgpu_kfree(g, entry);
}
nvgpu_mutex_release(&trace->list_lock);
}
void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list, pid_t *pid, u32 *vmid)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
struct nvgpu_fecs_trace_context_entry *entry;
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry,
entry) {
if (entry->context_ptr == context_ptr) {
nvgpu_log(g, gpu_dbg_ctxsw,
"found context_ptr=%x -> pid=%d, vmid=%d",
entry->context_ptr, entry->pid, entry->vmid);
*pid = entry->pid;
*vmid = entry->vmid;
nvgpu_mutex_release(&trace->list_lock);
return;
}
}
nvgpu_mutex_release(&trace->list_lock);
*pid = 0;
*vmid = 0xffffffffU;
}
int nvgpu_gr_fecs_trace_init(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace;
if (!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS)) {
nvgpu_err(g, "invalid NUM_RECORDS chosen");
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
return -EINVAL;
}
trace = nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_fecs_trace));
if (trace == NULL) {
nvgpu_err(g, "failed to allocate fecs_trace");
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, false);
return -ENOMEM;
}
g->fecs_trace = trace;
nvgpu_mutex_init(&trace->poll_lock);
nvgpu_mutex_init(&trace->list_lock);
nvgpu_mutex_init(&trace->enable_lock);
nvgpu_init_list_node(&trace->context_list);
trace->enable_count = 0;
return 0;
}
int nvgpu_gr_fecs_trace_deinit(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
if (trace == NULL) {
return 0;
}
/*
* Check if tracer was enabled before attempting to stop the
* tracer thread.
*/
if (trace->enable_count > 0) {
nvgpu_thread_stop(&trace->poll_task);
}
nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
nvgpu_mutex_destroy(&g->fecs_trace->enable_lock);
nvgpu_kfree(g, g->fecs_trace);
g->fecs_trace = NULL;
return 0;
}
int nvgpu_gr_fecs_trace_num_ts(struct gk20a *g)
{
return (g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()
- sizeof(struct nvgpu_fecs_trace_record)) / sizeof(u64);
}
struct nvgpu_fecs_trace_record *nvgpu_gr_fecs_trace_get_record(
struct gk20a *g, int idx)
{
struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
nvgpu_gr_get_global_ctx_buffer_ptr(g);
struct nvgpu_mem *mem = nvgpu_gr_global_ctx_buffer_get_mem(
gr_global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
if (mem == NULL) {
return NULL;
}
return (struct nvgpu_fecs_trace_record *)
((u8 *) mem->cpu_va +
(idx * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes()));
}
bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g,
struct nvgpu_fecs_trace_record *r)
{
/*
* testing magic_hi should suffice. magic_lo is sometimes used
* as a sequence number in experimental ucode.
*/
return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
}
size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g)
{
return GK20A_FECS_TRACE_NUM_RECORDS
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
}
int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_filter *filter)
{
int n;
int tag;
/* Compute number of entries per record, with given filter */
for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++)
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
/* Return max number of entries generated for the whole ring */
return n * GK20A_FECS_TRACE_NUM_RECORDS;
}
int nvgpu_gr_fecs_trace_enable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
int write;
int err = 0;
nvgpu_mutex_acquire(&trace->enable_lock);
trace->enable_count++;
if (trace->enable_count == 1U) {
/* drop data in hw buffer */
if (g->ops.gr.fecs_trace.flush)
g->ops.gr.fecs_trace.flush(g);
write = g->ops.gr.fecs_trace.get_write_index(g);
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
/*
* For enabling FECS trace support, MAILBOX1's MSB
* (Bit 31:31) should be set to 1. Bits 30:0 represents
* actual pointer value.
*/
write = write |
(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
}
g->ops.gr.fecs_trace.set_read_index(g, write);
/*
* FECS ucode does a priv holdoff around the assertion of
* context reset. So, pri transactions (e.g. mailbox1 register
* write) might fail due to this. Hence, do write with ack
* i.e. write and read it back to make sure write happened for
* mailbox1.
*/
while (g->ops.gr.fecs_trace.get_read_index(g) != write) {
nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
g->ops.gr.fecs_trace.set_read_index(g, write);
}
err = nvgpu_thread_create(&trace->poll_task, g,
nvgpu_gr_fecs_trace_periodic_polling, __func__);
if (err != 0) {
nvgpu_warn(g, "failed to create FECS polling task");
goto done;
}
}
done:
nvgpu_mutex_release(&trace->enable_lock);
return err;
}
int nvgpu_gr_fecs_trace_disable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
int read = 0;
if (trace == NULL) {
return -EINVAL;
}
nvgpu_mutex_acquire(&trace->enable_lock);
if (trace->enable_count <= 0U) {
nvgpu_mutex_release(&trace->enable_lock);
return 0;
}
trace->enable_count--;
if (trace->enable_count == 0U) {
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
/*
* For disabling FECS trace support, MAILBOX1's MSB
* (Bit 31:31) should be set to 0.
*/
read = g->ops.gr.fecs_trace.get_read_index(g) &
(~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
g->ops.gr.fecs_trace.set_read_index(g, read);
/*
* FECS ucode does a priv holdoff around the assertion
* of context reset. So, pri transactions (e.g.
* mailbox1 register write) might fail due to this.
* Hence, do write with ack i.e. write and read it back
* to make sure write happened for mailbox1.
*/
while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
nvgpu_log(g, gpu_dbg_ctxsw,
"mailbox1 update failed");
g->ops.gr.fecs_trace.set_read_index(g, read);
}
}
nvgpu_thread_stop(&trace->poll_task);
}
nvgpu_mutex_release(&trace->enable_lock);
return 0;
}
bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
return (trace && (trace->enable_count > 0));
}
void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g)
{
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
g->ops.gr.fecs_trace.set_read_index(g,
g->ops.gr.fecs_trace.get_write_index(g));
}
/*
* Converts HW entry format to userspace-facing format and pushes it to the
* queue.
*/
int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index,
u32 *vm_update_mask)
{
int i;
struct nvgpu_gpu_ctxsw_trace_entry entry = { };
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
pid_t cur_pid = 0, new_pid = 0;
u32 cur_vmid = 0U, new_vmid = 0U;
u32 vmid = 0U;
int count = 0;
struct nvgpu_fecs_trace_record *r =
nvgpu_gr_fecs_trace_get_record(g, index);
if (r == NULL) {
return -EINVAL;
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"consuming record trace=%p read=%d record=%p", trace, index, r);
if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) {
nvgpu_warn(g,
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
trace, index, r, r->magic_lo, r->magic_hi);
return -EINVAL;
}
/* Clear magic_hi to detect cases where CPU could read write index
* before FECS record is actually written to DRAM. This should not
* as we force FECS writes to SYSMEM by reading through PRAMIN.
*/
r->magic_hi = 0;
if ((r->context_ptr != 0U) && (r->context_id != 0U)) {
nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr,
&trace->context_list, &cur_pid, &cur_vmid);
} else {
cur_vmid = 0xffffffffU;
cur_pid = 0;
}
if (r->new_context_ptr != 0U) {
nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr,
&trace->context_list, &new_pid, &new_vmid);
} else {
new_vmid = 0xffffffffU;
new_pid = 0;
}
nvgpu_log(g, gpu_dbg_ctxsw,
"context_ptr=%x (vmid=%u pid=%d)",
r->context_ptr, cur_vmid, cur_pid);
nvgpu_log(g, gpu_dbg_ctxsw,
"new_context_ptr=%x (vmid=%u pid=%d)",
r->new_context_ptr, new_vmid, new_pid);
entry.context_id = r->context_id;
/* break out FECS record into trace events */
for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
entry.timestamp =
g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
nvgpu_log(g, gpu_dbg_ctxsw,
"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
entry.tag, entry.timestamp, r->context_id,
r->new_context_id);
switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
entry.context_id = r->new_context_id;
entry.pid = new_pid;
entry.vmid = new_vmid;
break;
case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
case NVGPU_GPU_CTXSW_TAG_FE_ACK:
case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
case NVGPU_GPU_CTXSW_TAG_SAVE_END:
entry.context_id = r->context_id;
entry.pid = cur_pid;
entry.vmid = cur_vmid;
break;
default:
/* tags are not guaranteed to start at the beginning */
if ((entry.tag != 0) && (entry.tag !=
NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) {
nvgpu_warn(g, "TAG not found");
}
continue;
}
nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
entry.tag, entry.context_id, entry.pid);
if (!entry.context_id)
continue;
if (g->ops.gr.fecs_trace.vm_dev_write != NULL) {
g->ops.gr.fecs_trace.vm_dev_write(g, entry.vmid,
vm_update_mask, &entry);
} else {
nvgpu_gr_fecs_trace_write_entry(g, &entry);
}
count++;
}
nvgpu_gr_fecs_trace_wake_up(g, vmid);
return count;
}
int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
u32 vm_update_mask = 0U;
int read = 0;
int write = 0;
int cnt;
int err = 0;
nvgpu_mutex_acquire(&trace->poll_lock);
if (trace->enable_count == 0) {
goto done_unlock;
}
err = gk20a_busy(g);
if (err) {
goto done_unlock;
}
write = g->ops.gr.fecs_trace.get_write_index(g);
if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) {
nvgpu_err(g,
"failed to acquire write index, write=%d", write);
err = write;
goto done;
}
read = g->ops.gr.fecs_trace.get_read_index(g);
cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
if (!cnt)
goto done;
nvgpu_log(g, gpu_dbg_ctxsw,
"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt);
/* Ensure all FECS writes have made it to SYSMEM */
err = g->ops.mm.cache.fb_flush(g);
if (err != 0) {
nvgpu_err(g, "mm.cache.fb_flush() failed err=%d", err);
goto done;
}
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
/* Bits 30:0 of MAILBOX1 represents actual read pointer value */
read = read & (~(BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT)));
}
while (read != write) {
cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
if (cnt <= 0) {
break;
}
/* Get to next record. */
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
}
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_FEATURE_CONTROL)) {
/*
* In the next step, read pointer is going to be updated.
* So, MSB of read pointer should be set back to 1. This will
* keep FECS trace enabled.
*/
read = read | (BIT32(NVGPU_FECS_TRACE_FEATURE_CONTROL_BIT));
}
/* ensure FECS records has been updated before incrementing read index */
nvgpu_wmb();
g->ops.gr.fecs_trace.set_read_index(g, read);
/*
* FECS ucode does a priv holdoff around the assertion of context
* reset. So, pri transactions (e.g. mailbox1 register write) might
* fail due to this. Hence, do write with ack i.e. write and read
* it back to make sure write happened for mailbox1.
*/
while (g->ops.gr.fecs_trace.get_read_index(g) != read) {
nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed");
g->ops.gr.fecs_trace.set_read_index(g, read);
}
if (g->ops.gr.fecs_trace.vm_dev_update) {
g->ops.gr.fecs_trace.vm_dev_update(g, vm_update_mask);
}
done:
gk20a_idle(g);
done_unlock:
nvgpu_mutex_release(&trace->poll_lock);
return err;
}
static int nvgpu_gr_fecs_trace_periodic_polling(void *arg)
{
struct gk20a *g = (struct gk20a *)arg;
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
nvgpu_log(g, gpu_dbg_ctxsw, "thread running");
while (!nvgpu_thread_should_stop(&trace->poll_task) &&
trace->enable_count > 0U) {
nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U);
nvgpu_gr_fecs_trace_poll(g);
}
return 0;
}
int nvgpu_gr_fecs_trace_reset(struct gk20a *g)
{
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
if (!g->ops.gr.fecs_trace.is_enabled(g))
return 0;
nvgpu_gr_fecs_trace_poll(g);
return g->ops.gr.fecs_trace.set_read_index(g, 0);
}
/*
* map global circ_buf to the context space and store the GPU VA
* in the context header.
*/
int nvgpu_gr_fecs_trace_bind_channel(struct gk20a *g,
struct nvgpu_mem *inst_block, struct nvgpu_gr_subctx *subctx,
struct nvgpu_gr_ctx *gr_ctx, pid_t pid, u32 vmid)
{
u64 addr = 0ULL;
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
struct nvgpu_mem *mem;
struct nvgpu_gr_global_ctx_buffer_desc *gr_global_ctx_buffer =
nvgpu_gr_get_global_ctx_buffer_ptr(g);
u32 context_ptr;
u32 aperture_mask;
int ret;
if (trace == NULL) {
return -EINVAL;
}
context_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"pid=%d context_ptr=%x inst_block=%llx",
pid, context_ptr,
nvgpu_inst_block_addr(g, inst_block));
mem = nvgpu_gr_global_ctx_buffer_get_mem(gr_global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
if (mem == NULL) {
return -EINVAL;
}
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_FECS_TRACE_BUFFER_VA);
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
aperture_mask = 0;
} else {
addr = nvgpu_inst_block_addr(g, mem);
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
aperture_mask =
g->ops.gr.ctxsw_prog.get_ts_buffer_aperture_mask(g, mem);
}
if (addr == 0ULL) {
return -ENOMEM;
}
mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
nvgpu_log(g, gpu_dbg_ctxsw, "addr=%llx count=%d", addr,
GK20A_FECS_TRACE_NUM_RECORDS);
g->ops.gr.ctxsw_prog.set_ts_num_records(g, mem,
GK20A_FECS_TRACE_NUM_RECORDS);
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA) && subctx != NULL) {
mem = nvgpu_gr_subctx_get_ctx_header(subctx);
}
g->ops.gr.ctxsw_prog.set_ts_buffer_ptr(g, mem, addr, aperture_mask);
ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, pid, vmid,
&trace->context_list);
return ret;
}
int nvgpu_gr_fecs_trace_unbind_channel(struct gk20a *g,
struct nvgpu_mem *inst_block)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
u32 context_ptr;
if (trace == NULL) {
return -EINVAL;
}
context_ptr = nvgpu_inst_block_ptr(g, inst_block);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"context_ptr=%x", context_ptr);
if (g->ops.gr.fecs_trace.is_enabled(g)) {
if (g->ops.gr.fecs_trace.flush) {
g->ops.gr.fecs_trace.flush(g);
}
nvgpu_gr_fecs_trace_poll(g);
}
nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
&trace->context_list);
return 0;
}

View File

@@ -0,0 +1,196 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/fs_state.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
static int gr_load_sm_id_config(struct gk20a *g, struct nvgpu_gr_config *config)
{
int err;
u32 *tpc_sm_id;
u32 sm_id_size = g->ops.gr.init.get_sm_id_size();
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
tpc_sm_id = nvgpu_kcalloc(g, sm_id_size, sizeof(u32));
if (tpc_sm_id == NULL) {
return -ENOMEM;
}
err = g->ops.gr.init.sm_id_config(g, tpc_sm_id, config, NULL, false);
nvgpu_kfree(g, tpc_sm_id);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return err;
}
static void gr_load_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config)
{
u32 pes_tpc_mask = 0;
u32 gpc, pes;
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_TPC_PER_GPC);
#ifdef CONFIG_NVGPU_NON_FUSA
u32 max_tpc_count = nvgpu_gr_config_get_max_tpc_count(config);
u32 fuse_tpc_mask;
u32 val;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
#endif
/* gv11b has 1 GPC and 4 TPC/GPC, so mask will not overflow u32 */
for (gpc = 0; gpc < nvgpu_gr_config_get_gpc_count(config); gpc++) {
for (pes = 0;
pes < nvgpu_gr_config_get_pe_count_per_gpc(config);
pes++) {
pes_tpc_mask |= nvgpu_gr_config_get_pes_tpc_mask(
config, gpc, pes) <<
nvgpu_safe_mult_u32(num_tpc_per_gpc, gpc);
}
}
nvgpu_log_info(g, "pes_tpc_mask %u\n", pes_tpc_mask);
#ifdef CONFIG_NVGPU_NON_FUSA
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
/*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, 0U);
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
if ((g->tpc_fs_mask_user != 0U) &&
(g->tpc_fs_mask_user != fuse_tpc_mask)) {
if (fuse_tpc_mask == nvgpu_safe_sub_u32(BIT32(max_tpc_count),
U32(1))) {
val = g->tpc_fs_mask_user;
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_count), U32(1));
/*
* skip tpc to disable the other tpc cause channel
* timeout
*/
val = nvgpu_safe_sub_u32(BIT32(hweight32(val)), U32(1));
pes_tpc_mask = val;
}
}
}
#endif
g->ops.gr.init.tpc_mask(g, 0, pes_tpc_mask);
}
int nvgpu_gr_fs_state_init(struct gk20a *g, struct nvgpu_gr_config *config)
{
u32 tpc_index, gpc_index;
u32 sm_id = 0;
#ifdef CONFIG_NVGPU_NON_FUSA
u32 fuse_tpc_mask;
u32 max_tpc_cnt;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_phys_id;
#endif
u32 gpc_cnt, tpc_cnt;
u32 num_sm;
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
g->ops.gr.init.fs_state(g);
err = g->ops.gr.config.init_sm_id_table(g, config);
if (err != 0) {
return err;
}
num_sm = nvgpu_gr_config_get_no_of_sm(config);
nvgpu_assert(num_sm > 0U);
for (sm_id = 0; sm_id < num_sm; sm_id++) {
struct nvgpu_sm_info *sm_info =
nvgpu_gr_config_get_sm_info(config, sm_id);
tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info);
gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info);
g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id,
config, NULL, false);
}
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
g->ops.gr.init.pd_tpc_per_gpc(g, config);
}
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
/* gr__setup_pd_mapping */
g->ops.gr.init.rop_mapping(g, config);
g->ops.gr.init.pd_skip_table_gpc(g, config);
}
#endif
gpc_cnt = nvgpu_gr_config_get_gpc_count(config);
tpc_cnt = nvgpu_gr_config_get_tpc_count(config);
#ifdef CONFIG_NVGPU_NON_FUSA
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
/*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, 0U);
fuse_tpc_mask = g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
max_tpc_cnt = nvgpu_gr_config_get_max_tpc_count(config);
if ((g->tpc_fs_mask_user != 0U) &&
(fuse_tpc_mask ==
nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1)))) {
u32 val = g->tpc_fs_mask_user;
val &= nvgpu_safe_sub_u32(BIT32(max_tpc_cnt), U32(1));
tpc_cnt = (u32)hweight32(val);
}
}
#endif
g->ops.gr.init.cwd_gpcs_tpcs_num(g, gpc_cnt, tpc_cnt);
gr_load_tpc_mask(g, config);
err = gr_load_sm_id_config(g, config);
if (err != 0) {
nvgpu_err(g, "load_smid_config failed err=%d", err);
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return err;
}

View File

@@ -0,0 +1,477 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/log.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/dma.h>
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
#include <nvgpu/static_analysis.h>
#include <nvgpu/string.h>
#endif
#include <nvgpu/gr/global_ctx.h>
#include "global_ctx_priv.h"
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
#include <nvgpu/posix/posix-fault-injection.h>
struct nvgpu_posix_fault_inj *nvgpu_golden_ctx_verif_get_fault_injection(void)
{
struct nvgpu_posix_fault_inj_container *c =
nvgpu_posix_fault_injection_get_container();
return &c->golden_ctx_verif_fi;
}
struct nvgpu_posix_fault_inj *nvgpu_local_golden_image_get_fault_injection(void)
{
struct nvgpu_posix_fault_inj_container *c =
nvgpu_posix_fault_injection_get_container();
return &c->local_golden_image_fi;
}
#endif
struct nvgpu_gr_global_ctx_buffer_desc *
nvgpu_gr_global_ctx_desc_alloc(struct gk20a *g)
{
struct nvgpu_gr_global_ctx_buffer_desc *desc =
nvgpu_kzalloc(g, sizeof(*desc) *
U64(NVGPU_GR_GLOBAL_CTX_COUNT));
return desc;
}
void nvgpu_gr_global_ctx_desc_free(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
nvgpu_kfree(g, desc);
}
void nvgpu_gr_global_ctx_set_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index, size_t size)
{
nvgpu_assert(index < NVGPU_GR_GLOBAL_CTX_COUNT);
desc[index].size = size;
}
size_t nvgpu_gr_global_ctx_get_size(struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index)
{
return desc[index].size;
}
static void nvgpu_gr_global_ctx_buffer_destroy(struct gk20a *g,
struct nvgpu_mem *mem)
{
nvgpu_dma_free(g, mem);
}
void nvgpu_gr_global_ctx_buffer_free(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
u32 i;
if (desc == NULL) {
return;
}
for (i = 0; i < NVGPU_GR_GLOBAL_CTX_COUNT; i++) {
if (desc[i].destroy != NULL) {
desc[i].destroy(g, &desc[i].mem);
desc[i].destroy = NULL;
}
}
nvgpu_log_fn(g, "done");
}
static int nvgpu_gr_global_ctx_buffer_alloc_sys(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index)
{
int err = 0;
nvgpu_log_fn(g, " ");
if (nvgpu_mem_is_valid(&desc[index].mem)) {
return 0;
}
err = nvgpu_dma_alloc_sys(g, desc[index].size,
&desc[index].mem);
if (err != 0) {
return err;
}
desc[index].destroy = nvgpu_gr_global_ctx_buffer_destroy;
return err;
}
#ifdef CONFIG_NVGPU_VPR
static int nvgpu_gr_global_ctx_buffer_alloc_vpr(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index)
{
int err = 0;
nvgpu_log_fn(g, " ");
if (nvgpu_mem_is_valid(&desc[index].mem)) {
return 0;
}
if (g->ops.secure_alloc != NULL) {
err = g->ops.secure_alloc(g,
&desc[index].mem, desc[index].size,
&desc[index].destroy);
if (err != 0) {
return err;
}
}
return err;
}
#endif
static bool nvgpu_gr_global_ctx_buffer_sizes_are_valid(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
if (desc[NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP].size == 0U) {
return false;
}
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR].size == 0U) ||
(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL].size == 0U) ||
(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE].size == 0U)) {
return false;
}
#ifdef CONFIG_NVGPU_VPR
if ((desc[NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR].size == 0U) ||
(desc[NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR].size == 0U) ||
(desc[NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR].size == 0U)) {
return false;
}
#endif
}
return true;
}
#ifdef CONFIG_NVGPU_VPR
static int nvgpu_gr_global_ctx_buffer_vpr_alloc(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
int err = 0;
/*
* MIG supports only compute class.
* Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
* if 2D/3D/I2M classes(graphics) are supported.
*/
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_mig,
"2D class is not supported "
"skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB "
"and RTV_CB");
return 0;
}
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
NVGPU_GR_GLOBAL_CTX_CIRCULAR_VPR);
if (err != 0) {
goto fail;
}
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL_VPR);
if (err != 0) {
goto fail;
}
err = nvgpu_gr_global_ctx_buffer_alloc_vpr(g, desc,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE_VPR);
if (err != 0) {
goto fail;
}
fail:
return err;
}
#endif
static int nvgpu_gr_global_ctx_buffer_sys_alloc(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
int err = 0;
/*
* MIG supports only compute class.
* Allocate BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
* if 2D/3D/I2M classes(graphics) are supported.
*/
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_CIRCULAR);
if (err != 0) {
goto fail;
}
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL);
if (err != 0) {
goto fail;
}
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_ATTRIBUTE);
if (err != 0) {
goto fail;
}
}
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_PRIV_ACCESS_MAP);
if (err != 0) {
goto fail;
}
fail:
return err;
}
int nvgpu_gr_global_ctx_buffer_alloc(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *desc)
{
int err = 0;
if (nvgpu_gr_global_ctx_buffer_sizes_are_valid(g, desc) != true) {
return -EINVAL;
}
err = nvgpu_gr_global_ctx_buffer_sys_alloc(g, desc);
if (err != 0) {
goto clean_up;
}
#ifdef CONFIG_NVGPU_FECS_TRACE
if (desc[NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER].size != 0U) {
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_FECS_TRACE_BUFFER);
if (err != 0) {
goto clean_up;
}
}
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
if (desc[NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER].size != 0U) {
err = nvgpu_gr_global_ctx_buffer_alloc_sys(g, desc,
NVGPU_GR_GLOBAL_CTX_RTV_CIRCULAR_BUFFER);
if (err != 0) {
goto clean_up;
}
}
}
#endif
#ifdef CONFIG_NVGPU_VPR
if (nvgpu_gr_global_ctx_buffer_vpr_alloc(g, desc) != 0) {
goto clean_up;
}
#endif
return err;
clean_up:
nvgpu_gr_global_ctx_buffer_free(g, desc);
return err;
}
u64 nvgpu_gr_global_ctx_buffer_map(struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index,
struct vm_gk20a *vm, u32 flags, bool priv)
{
u64 gpu_va;
if (!nvgpu_mem_is_valid(&desc[index].mem)) {
return 0;
}
gpu_va = nvgpu_gmmu_map(vm, &desc[index].mem, desc[index].mem.size,
flags, gk20a_mem_flag_none, priv,
desc[index].mem.aperture);
return gpu_va;
}
void nvgpu_gr_global_ctx_buffer_unmap(
struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index,
struct vm_gk20a *vm, u64 gpu_va)
{
if (nvgpu_mem_is_valid(&desc[index].mem)) {
nvgpu_gmmu_unmap(vm, &desc[index].mem, gpu_va);
}
}
struct nvgpu_mem *nvgpu_gr_global_ctx_buffer_get_mem(
struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index)
{
if (nvgpu_mem_is_valid(&desc[index].mem)) {
return &desc[index].mem;
}
return NULL;
}
bool nvgpu_gr_global_ctx_buffer_ready(
struct nvgpu_gr_global_ctx_buffer_desc *desc,
u32 index)
{
if (nvgpu_mem_is_valid(&desc[index].mem)) {
return true;
}
return false;
}
struct nvgpu_gr_global_ctx_local_golden_image *
nvgpu_gr_global_ctx_init_local_golden_image(struct gk20a *g,
struct nvgpu_mem *source_mem, size_t size)
{
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
if (nvgpu_posix_fault_injection_handle_call(
nvgpu_local_golden_image_get_fault_injection())) {
return NULL;
}
#endif
local_golden_image = nvgpu_kzalloc(g, sizeof(*local_golden_image));
if (local_golden_image == NULL) {
return NULL;
}
local_golden_image->context = nvgpu_vzalloc(g, size);
if (local_golden_image->context == NULL) {
nvgpu_kfree(g, local_golden_image);
return NULL;
}
local_golden_image->size = size;
nvgpu_mem_rd_n(g, source_mem, 0, local_golden_image->context,
nvgpu_safe_cast_u64_to_u32(size));
return local_golden_image;
}
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
bool nvgpu_gr_global_ctx_compare_golden_images(struct gk20a *g,
bool is_sysmem,
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image1,
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image2,
size_t size)
{
bool is_identical = true;
u32 *data1 = local_golden_image1->context;
u32 *data2 = local_golden_image2->context;
#ifdef CONFIG_NVGPU_DGPU
u32 i;
#endif
#ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
if (nvgpu_posix_fault_injection_handle_call(
nvgpu_golden_ctx_verif_get_fault_injection())) {
return false;
}
#endif
/*
* In case of sysmem, direct mem compare can be used.
* For vidmem, word by word comparison only works and
* it is too early to use ce engine for read operations.
*/
if (is_sysmem) {
if (nvgpu_memcmp((u8 *)data1, (u8 *)data2, size) != 0) {
is_identical = false;
}
}
else {
#ifdef CONFIG_NVGPU_DGPU
for( i = 0U; i < nvgpu_safe_cast_u64_to_u32(size/sizeof(u32));
i = nvgpu_safe_add_u32(i, 1U)) {
if (*(data1 + i) != *(data2 + i)) {
is_identical = false;
nvgpu_log_info(g,
"mismatch i = %u golden1: %u golden2 %u",
i, *(data1 + i), *(data2 + i));
break;
}
}
#else
is_identical = false;
#endif
}
nvgpu_log_info(g, "%s result %u", __func__, is_identical);
return is_identical;
}
#endif
void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
struct nvgpu_mem *target_mem)
{
/* Channel gr_ctx buffer is gpu cacheable.
Flush and invalidate before cpu update. */
if (g->ops.mm.cache.l2_flush(g, true) != 0) {
nvgpu_err(g, "l2_flush failed");
}
nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
nvgpu_log(g, gpu_dbg_gr, "loaded saved golden image into gr_ctx");
}
void nvgpu_gr_global_ctx_deinit_local_golden_image(struct gk20a *g,
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
{
nvgpu_vfree(g, local_golden_image->context);
nvgpu_kfree(g, local_golden_image);
}
#ifdef CONFIG_NVGPU_DEBUGGER
u32 *nvgpu_gr_global_ctx_get_local_golden_image_ptr(
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image)
{
return local_golden_image->context;
}
#endif

View File

@@ -0,0 +1,68 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_GLOBAL_CTX_PRIV_H
#define NVGPU_GR_GLOBAL_CTX_PRIV_H
/**
* Global context buffer descriptor structure.
*
* This structure stores properties applicable to each global
* context buffer.
*/
struct nvgpu_gr_global_ctx_buffer_desc {
/**
* Memory to hold global context buffer.
*/
struct nvgpu_mem mem;
/**
* Size of global context buffer.
*/
size_t size;
/**
* Function pointer to free global context buffer.
*/
global_ctx_mem_destroy_fn destroy;
};
/**
* Local Golden context image descriptor structure.
*
* This structure stores details of a local Golden context image.
* Pointer to this struct is maintained in
* #nvgpu_gr_obj_ctx_golden_image structure.
*/
struct nvgpu_gr_global_ctx_local_golden_image {
/**
* Pointer to local Golden context image memory.
*/
u32 *context;
/**
* Size of local Golden context image.
*/
size_t size;
};
#endif /* NVGPU_GR_GLOBAL_CTX_PRIV_H */

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,864 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
#include "gr_config_priv.h"
static void gr_config_init_pes_tpc(struct gk20a *g,
struct nvgpu_gr_config *config,
u32 gpc_index)
{
u32 pes_index;
u32 pes_tpc_mask;
u32 pes_tpc_count;
for (pes_index = 0; pes_index < config->pe_count_per_gpc;
pes_index++) {
pes_tpc_mask = g->ops.gr.config.get_pes_tpc_mask(g,
config, gpc_index, pes_index);
pes_tpc_count = hweight32(pes_tpc_mask);
/* detect PES presence by seeing if there are
* TPCs connected to it.
*/
if (pes_tpc_count != 0U) {
config->gpc_ppc_count[gpc_index] = nvgpu_safe_add_u32(
config->gpc_ppc_count[gpc_index], 1U);
}
config->pes_tpc_count[pes_index][gpc_index] = pes_tpc_count;
config->pes_tpc_mask[pes_index][gpc_index] = pes_tpc_mask;
}
}
static void gr_config_init_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
u32 pes_heavy_index;
u32 gpc_new_skip_mask = 0U;
u32 pes_tpc_cnt = 0U, pes_tpc_mask = 0U;
if (config->pe_count_per_gpc <= 1U) {
goto skip_mask_end;
}
pes_tpc_cnt = nvgpu_safe_add_u32(
config->pes_tpc_count[0][gpc_index],
config->pes_tpc_count[1][gpc_index]);
pes_heavy_index =
(config->pes_tpc_count[0][gpc_index] >
config->pes_tpc_count[1][gpc_index]) ? 0U : 1U;
if ((pes_tpc_cnt == 5U) || ((pes_tpc_cnt == 4U) &&
(config->pes_tpc_count[0][gpc_index] !=
config->pes_tpc_count[1][gpc_index]))) {
pes_tpc_mask = nvgpu_safe_sub_u32(
config->pes_tpc_mask[pes_heavy_index][gpc_index], 1U);
gpc_new_skip_mask =
config->pes_tpc_mask[pes_heavy_index][gpc_index] ^
(config->pes_tpc_mask[pes_heavy_index][gpc_index] &
pes_tpc_mask);
}
skip_mask_end:
config->gpc_skip_mask[gpc_index] = gpc_new_skip_mask;
}
static void gr_config_log_info(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 gpc_index, pes_index;
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_gpc_count: %d", config->max_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_count: %d", config->gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_mask: 0x%x", config->gpc_mask);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_per_gpc_count: %d", config->max_tpc_per_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_tpc_count: %d", config->max_tpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "tpc_count: %d", config->tpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "sm_count_per_tpc: %d", config->sm_count_per_tpc);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "max_zcull_per_gpc_count: %d", config->max_zcull_per_gpc_count);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "zcb_count: %d", config->zcb_count);
#endif
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pe_count_per_gpc: %d", config->pe_count_per_gpc);
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "ppc_count: %d", config->ppc_count);
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_count[%d] : %d",
gpc_index, config->gpc_tpc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->max_gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_tpc_mask[%d] : 0x%x",
gpc_index, config->gpc_tpc_mask[gpc_index]);
}
#ifdef CONFIG_NVGPU_GRAPHICS
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_zcb_count[%d] : %d",
gpc_index, config->gpc_zcb_count != NULL ?
config->gpc_zcb_count[gpc_index] : 0U);
}
#endif
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_ppc_count[%d] : %d",
gpc_index, config->gpc_ppc_count[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "gpc_skip_mask[%d] : 0x%x",
gpc_index, config->gpc_skip_mask[gpc_index]);
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_count[%d][%d] : %d",
pes_index, gpc_index,
config->pes_tpc_count[pes_index][gpc_index]);
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
for (pes_index = 0;
pes_index < config->pe_count_per_gpc;
pes_index++) {
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "pes_tpc_mask[%d][%d] : 0x%x",
pes_index, gpc_index,
config->pes_tpc_mask[pes_index][gpc_index]);
}
}
}
static void gr_config_set_gpc_mask(struct gk20a *g,
struct nvgpu_gr_config *config)
{
#ifdef CONFIG_NVGPU_DGPU
if (g->ops.gr.config.get_gpc_mask != NULL) {
config->gpc_mask = g->ops.gr.config.get_gpc_mask(g);
} else
#endif
{
config->gpc_mask = nvgpu_safe_sub_u32(BIT32(config->gpc_count),
1U);
}
}
static bool gr_config_alloc_valid(struct nvgpu_gr_config *config)
{
if ((config->gpc_tpc_count == NULL) || (config->gpc_tpc_mask == NULL) ||
(config->gpc_ppc_count == NULL) ||
(config->gpc_skip_mask == NULL)) {
return false;
}
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(config->g, NVGPU_SUPPORT_MIG) &&
(config->gpc_zcb_count == NULL)) {
return false;
}
#endif
return true;
}
static void gr_config_free_mem(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 pes_index;
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
nvgpu_kfree(g, config->pes_tpc_count[pes_index]);
nvgpu_kfree(g, config->pes_tpc_mask[pes_index]);
}
nvgpu_kfree(g, config->gpc_skip_mask);
nvgpu_kfree(g, config->gpc_ppc_count);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_kfree(g, config->gpc_zcb_count);
#endif
nvgpu_kfree(g, config->gpc_tpc_mask);
nvgpu_kfree(g, config->gpc_tpc_count);
}
static bool gr_config_alloc_struct_mem(struct gk20a *g,
struct nvgpu_gr_config *config)
{
u32 pes_index;
u32 total_tpc_cnt;
size_t sm_info_size;
size_t gpc_size, sm_size, max_gpc_cnt;
size_t pd_tbl_size;
total_tpc_cnt = nvgpu_safe_mult_u32(config->gpc_count,
config->max_tpc_per_gpc_count);
sm_size = nvgpu_safe_mult_u64((size_t)config->sm_count_per_tpc,
sizeof(struct nvgpu_sm_info));
/* allocate for max tpc per gpc */
sm_info_size = nvgpu_safe_mult_u64((size_t)total_tpc_cnt, sm_size);
config->sm_to_cluster = nvgpu_kzalloc(g, sm_info_size);
if (config->sm_to_cluster == NULL) {
nvgpu_err(g, "sm_to_cluster == NULL");
goto alloc_err;
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) {
config->sm_to_cluster_redex_config =
nvgpu_kzalloc(g, sm_info_size);
if (config->sm_to_cluster_redex_config == NULL) {
nvgpu_err(g, "sm_to_cluster_redex_config == NULL");
goto clean_alloc_mem;
}
}
#endif
config->no_of_sm = 0;
gpc_size = nvgpu_safe_mult_u64((size_t)config->gpc_count, sizeof(u32));
max_gpc_cnt = nvgpu_safe_mult_u64((size_t)config->max_gpc_count, sizeof(u32));
config->gpc_tpc_count = nvgpu_kzalloc(g, gpc_size);
config->gpc_tpc_mask = nvgpu_kzalloc(g, max_gpc_cnt);
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
config->max_zcull_per_gpc_count = nvgpu_get_litter_value(g,
GPU_LIT_NUM_ZCULL_BANKS);
config->gpc_zcb_count = nvgpu_kzalloc(g, gpc_size);
}
#endif
config->gpc_ppc_count = nvgpu_kzalloc(g, gpc_size);
pd_tbl_size = nvgpu_safe_mult_u64(
(size_t)g->ops.gr.config.get_pd_dist_skip_table_size(),
sizeof(u32));
pd_tbl_size = nvgpu_safe_mult_u64(pd_tbl_size, 4UL);
config->gpc_skip_mask = nvgpu_kzalloc(g, pd_tbl_size);
if (gr_config_alloc_valid(config) == false) {
goto clean_alloc_mem;
}
for (pes_index = 0U; pes_index < config->pe_count_per_gpc; pes_index++) {
config->pes_tpc_count[pes_index] = nvgpu_kzalloc(g, gpc_size);
config->pes_tpc_mask[pes_index] = nvgpu_kzalloc(g, gpc_size);
if ((config->pes_tpc_count[pes_index] == NULL) ||
(config->pes_tpc_mask[pes_index] == NULL)) {
goto clean_alloc_mem;
}
}
return true;
clean_alloc_mem:
nvgpu_kfree(g, config->sm_to_cluster);
config->sm_to_cluster = NULL;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (config->sm_to_cluster_redex_config != NULL) {
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
config->sm_to_cluster_redex_config = NULL;
}
#endif
gr_config_free_mem(g, config);
alloc_err:
return false;
}
static int gr_config_init_mig_gpcs(struct nvgpu_gr_config *config)
{
struct gk20a *g = config->g;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
config->max_gpc_count = nvgpu_grmgr_get_max_gpc_count(g);
config->gpc_count = nvgpu_grmgr_get_gr_num_gpcs(g, cur_gr_instance);
if (config->gpc_count == 0U) {
nvgpu_err(g, "gpc_count==0!");
return -EINVAL;
}
config->gpc_mask = nvgpu_grmgr_get_gr_logical_gpc_mask(
g, cur_gr_instance);
return 0;
}
static int gr_config_init_gpcs(struct nvgpu_gr_config *config)
{
struct gk20a *g = config->g;
config->max_gpc_count = g->ops.top.get_max_gpc_count(g);
config->gpc_count = g->ops.priv_ring.get_gpc_count(g);
if (config->gpc_count == 0U) {
nvgpu_err(g, "gpc_count==0!");
return -EINVAL;
}
gr_config_set_gpc_mask(g, config);
return 0;
}
struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
{
struct nvgpu_gr_config *config;
u32 cur_gr_instance = nvgpu_gr_get_cur_instance_id(g);
u32 gpc_index;
u32 gpc_phys_id;
int err;
config = nvgpu_kzalloc(g, sizeof(*config));
if (config == NULL) {
return NULL;
}
config->g = g;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
err = gr_config_init_mig_gpcs(config);
if (err < 0) {
nvgpu_err(g, "MIG GPC config init failed");
nvgpu_kfree(g, config);
return NULL;
}
} else {
err = gr_config_init_gpcs(config);
if (err < 0) {
nvgpu_err(g, "GPC config init failed");
nvgpu_kfree(g, config);
return NULL;
}
}
/* Required to read gpc_tpc_mask below */
config->max_tpc_per_gpc_count = g->ops.top.get_max_tpc_per_gpc_count(g);
config->max_tpc_count = nvgpu_safe_mult_u32(config->max_gpc_count,
config->max_tpc_per_gpc_count);
config->pe_count_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_PES_PER_GPC);
if (config->pe_count_per_gpc > GK20A_GR_MAX_PES_PER_GPC) {
nvgpu_err(g, "too many pes per gpc");
goto clean_up_init;
}
config->sm_count_per_tpc =
nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
if (config->sm_count_per_tpc == 0U) {
nvgpu_err(g, "sm_count_per_tpc==0!");
goto clean_up_init;
}
if (gr_config_alloc_struct_mem(g, config) == false) {
goto clean_up_init;
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
/*
* Fuse registers must be queried with physical gpc-id and not
* the logical ones. For tu104 and before chips logical gpc-id
* is same as physical gpc-id for non-floorswept config but for
* chips after tu104 it may not be true.
*/
gpc_phys_id = nvgpu_grmgr_get_gr_gpc_phys_id(g,
cur_gr_instance, gpc_index);
config->gpc_tpc_mask[gpc_index] =
g->ops.gr.config.get_gpc_tpc_mask(g, config, gpc_phys_id);
}
config->ppc_count = 0;
config->tpc_count = 0;
#ifdef CONFIG_NVGPU_GRAPHICS
config->zcb_count = 0;
#endif
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
config->gpc_tpc_count[gpc_index] =
g->ops.gr.config.get_tpc_count_in_gpc(g, config,
gpc_index);
config->tpc_count = nvgpu_safe_add_u32(config->tpc_count,
config->gpc_tpc_count[gpc_index]);
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
config->gpc_zcb_count[gpc_index] =
g->ops.gr.config.get_zcull_count_in_gpc(g, config,
gpc_index);
config->zcb_count = nvgpu_safe_add_u32(config->zcb_count,
config->gpc_zcb_count[gpc_index]);
}
#endif
gr_config_init_pes_tpc(g, config, gpc_index);
config->ppc_count = nvgpu_safe_add_u32(config->ppc_count,
config->gpc_ppc_count[gpc_index]);
gr_config_init_gpc_skip_mask(config, gpc_index);
}
gr_config_log_info(g, config);
return config;
clean_up_init:
nvgpu_kfree(g, config);
return NULL;
}
#ifdef CONFIG_NVGPU_GRAPHICS
static u32 prime_set[18] = {
2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61 };
/*
* Return map tiles count for given index
* Return 0 if index is out-of-bounds
*/
u32 nvgpu_gr_config_get_map_tile_count(struct nvgpu_gr_config *config, u32 index)
{
if (index >= config->map_tile_count) {
return 0;
}
return config->map_tiles[index];
}
u8 *nvgpu_gr_config_get_map_tiles(struct nvgpu_gr_config *config)
{
return config->map_tiles;
}
u32 nvgpu_gr_config_get_map_row_offset(struct nvgpu_gr_config *config)
{
return config->map_row_offset;
}
int nvgpu_gr_config_init_map_tiles(struct gk20a *g,
struct nvgpu_gr_config *config)
{
s32 comm_denom;
s32 mul_factor;
s32 *init_frac = NULL;
s32 *init_err = NULL;
s32 *run_err = NULL;
u32 *sorted_num_tpcs = NULL;
u32 *sorted_to_unsorted_gpc_map = NULL;
u32 gpc_index;
u32 gpc_mark = 0;
u32 num_tpc;
u32 max_tpc_count = 0;
u32 swap;
u32 tile_count;
u32 index;
bool delete_map = false;
bool gpc_sorted;
int ret = 0;
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_TPC_PER_GPC);
u32 map_tile_count = num_gpcs * num_tpc_per_gpc;
nvgpu_log(g, gpu_dbg_gr, " ");
init_frac = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
init_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
run_err = nvgpu_kzalloc(g, num_gpcs * sizeof(s32));
sorted_num_tpcs =
nvgpu_kzalloc(g, (size_t)num_gpcs *
(size_t)num_tpc_per_gpc *
sizeof(s32));
sorted_to_unsorted_gpc_map =
nvgpu_kzalloc(g, (size_t)num_gpcs * sizeof(s32));
if (!((init_frac != NULL) &&
(init_err != NULL) &&
(run_err != NULL) &&
(sorted_num_tpcs != NULL) &&
(sorted_to_unsorted_gpc_map != NULL))) {
ret = -ENOMEM;
goto clean_up;
}
config->map_row_offset = 0xFFFFFFFFU;
if (config->tpc_count == 3U) {
config->map_row_offset = 2;
} else if (config->tpc_count < 3U) {
config->map_row_offset = 1;
} else {
config->map_row_offset = 3;
for (index = 1U; index < 18U; index++) {
u32 prime = prime_set[index];
if ((config->tpc_count % prime) != 0U) {
config->map_row_offset = prime;
break;
}
}
}
switch (config->tpc_count) {
case 15:
config->map_row_offset = 6;
break;
case 14:
config->map_row_offset = 5;
break;
case 13:
config->map_row_offset = 2;
break;
case 11:
config->map_row_offset = 7;
break;
case 10:
config->map_row_offset = 6;
break;
case 7:
case 5:
config->map_row_offset = 1;
break;
default:
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gr, "unsupported tpc count = %u",
config->tpc_count);
break;
}
if (config->map_tiles != NULL) {
if (config->map_tile_count != config->tpc_count) {
delete_map = true;
}
for (tile_count = 0; tile_count < config->map_tile_count; tile_count++) {
if (nvgpu_gr_config_get_map_tile_count(config, tile_count)
>= config->tpc_count) {
delete_map = true;
}
}
if (delete_map) {
nvgpu_kfree(g, config->map_tiles);
config->map_tiles = NULL;
config->map_tile_count = 0;
}
}
if (config->map_tiles == NULL) {
config->map_tiles = nvgpu_kzalloc(g, map_tile_count * sizeof(u8));
if (config->map_tiles == NULL) {
ret = -ENOMEM;
goto clean_up;
}
config->map_tile_count = map_tile_count;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
sorted_num_tpcs[gpc_index] = config->gpc_tpc_count[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] = gpc_index;
}
gpc_sorted = false;
while (!gpc_sorted) {
gpc_sorted = true;
for (gpc_index = 0U; gpc_index < config->gpc_count - 1U; gpc_index++) {
if (sorted_num_tpcs[gpc_index + 1U] > sorted_num_tpcs[gpc_index]) {
gpc_sorted = false;
swap = sorted_num_tpcs[gpc_index];
sorted_num_tpcs[gpc_index] = sorted_num_tpcs[gpc_index + 1U];
sorted_num_tpcs[gpc_index + 1U] = swap;
swap = sorted_to_unsorted_gpc_map[gpc_index];
sorted_to_unsorted_gpc_map[gpc_index] =
sorted_to_unsorted_gpc_map[gpc_index + 1U];
sorted_to_unsorted_gpc_map[gpc_index + 1U] = swap;
}
}
}
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if (config->gpc_tpc_count[gpc_index] > max_tpc_count) {
max_tpc_count = config->gpc_tpc_count[gpc_index];
}
}
mul_factor = S32(config->gpc_count) * S32(max_tpc_count);
if ((U32(mul_factor) & 0x1U) != 0U) {
mul_factor = 2;
} else {
mul_factor = 1;
}
comm_denom = S32(config->gpc_count) * S32(max_tpc_count) * mul_factor;
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
num_tpc = sorted_num_tpcs[gpc_index];
init_frac[gpc_index] = S32(num_tpc) * S32(config->gpc_count) * mul_factor;
if (num_tpc != 0U) {
init_err[gpc_index] = S32(gpc_index) * S32(max_tpc_count) * mul_factor - comm_denom/2;
} else {
init_err[gpc_index] = 0;
}
run_err[gpc_index] = init_frac[gpc_index] + init_err[gpc_index];
}
while (gpc_mark < config->tpc_count) {
for (gpc_index = 0; gpc_index < config->gpc_count; gpc_index++) {
if ((run_err[gpc_index] * 2) >= comm_denom) {
config->map_tiles[gpc_mark++] = (u8)sorted_to_unsorted_gpc_map[gpc_index];
run_err[gpc_index] += init_frac[gpc_index] - comm_denom;
} else {
run_err[gpc_index] += init_frac[gpc_index];
}
}
}
}
clean_up:
nvgpu_kfree(g, init_frac);
nvgpu_kfree(g, init_err);
nvgpu_kfree(g, run_err);
nvgpu_kfree(g, sorted_num_tpcs);
nvgpu_kfree(g, sorted_to_unsorted_gpc_map);
if (ret != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
return ret;
}
u32 nvgpu_gr_config_get_max_zcull_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_zcull_per_gpc_count;
}
u32 nvgpu_gr_config_get_zcb_count(struct nvgpu_gr_config *config)
{
return config->zcb_count;
}
u32 nvgpu_gr_config_get_gpc_zcb_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
return config->gpc_zcb_count[gpc_index];
}
#endif
void nvgpu_gr_config_deinit(struct gk20a *g, struct nvgpu_gr_config *config)
{
if (config == NULL) {
return;
}
gr_config_free_mem(g, config);
#ifdef CONFIG_NVGPU_GRAPHICS
nvgpu_kfree(g, config->map_tiles);
#endif
nvgpu_kfree(g, config->sm_to_cluster);
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if (config->sm_to_cluster_redex_config != NULL) {
nvgpu_kfree(g, config->sm_to_cluster_redex_config);
config->sm_to_cluster_redex_config = NULL;
}
#endif
}
u32 nvgpu_gr_config_get_max_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_per_gpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_per_gpc_count;
}
u32 nvgpu_gr_config_get_max_tpc_count(struct nvgpu_gr_config *config)
{
return config->max_tpc_count;
}
u32 nvgpu_gr_config_get_gpc_count(struct nvgpu_gr_config *config)
{
return config->gpc_count;
}
u32 nvgpu_gr_config_get_tpc_count(struct nvgpu_gr_config *config)
{
return config->tpc_count;
}
u32 nvgpu_gr_config_get_ppc_count(struct nvgpu_gr_config *config)
{
return config->ppc_count;
}
u32 nvgpu_gr_config_get_pe_count_per_gpc(struct nvgpu_gr_config *config)
{
return config->pe_count_per_gpc;
}
u32 nvgpu_gr_config_get_sm_count_per_tpc(struct nvgpu_gr_config *config)
{
return config->sm_count_per_tpc;
}
u32 nvgpu_gr_config_get_gpc_ppc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
return config->gpc_ppc_count[gpc_index];
}
u32 *nvgpu_gr_config_get_gpc_tpc_count_base(struct nvgpu_gr_config *config)
{
return config->gpc_tpc_count;
}
u32 nvgpu_gr_config_get_gpc_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index)
{
if (gpc_index >= config->gpc_count) {
return 0;
}
return config->gpc_tpc_count[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_count(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
return config->pes_tpc_count[pes_index][gpc_index];
}
u32 *nvgpu_gr_config_get_gpc_tpc_mask_base(struct nvgpu_gr_config *config)
{
return config->gpc_tpc_mask;
}
u32 nvgpu_gr_config_get_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
return config->gpc_tpc_mask[gpc_index];
}
void nvgpu_gr_config_set_gpc_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 val)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
config->gpc_tpc_mask[gpc_index] = val;
}
u32 nvgpu_gr_config_get_gpc_skip_mask(struct nvgpu_gr_config *config,
u32 gpc_index)
{
if (gpc_index >= config->gpc_count) {
return 0;
}
return config->gpc_skip_mask[gpc_index];
}
u32 nvgpu_gr_config_get_pes_tpc_mask(struct nvgpu_gr_config *config,
u32 gpc_index, u32 pes_index)
{
nvgpu_assert(gpc_index < nvgpu_gr_config_get_gpc_count(config));
nvgpu_assert(pes_index < nvgpu_gr_config_get_pe_count_per_gpc(config));
return config->pes_tpc_mask[pes_index][gpc_index];
}
u32 nvgpu_gr_config_get_gpc_mask(struct nvgpu_gr_config *config)
{
return config->gpc_mask;
}
u32 nvgpu_gr_config_get_no_of_sm(struct nvgpu_gr_config *config)
{
return config->no_of_sm;
}
void nvgpu_gr_config_set_no_of_sm(struct nvgpu_gr_config *config, u32 no_of_sm)
{
config->no_of_sm = no_of_sm;
}
struct nvgpu_sm_info *nvgpu_gr_config_get_sm_info(struct nvgpu_gr_config *config,
u32 sm_id)
{
return &config->sm_to_cluster[sm_id];
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
struct nvgpu_sm_info *nvgpu_gr_config_get_redex_sm_info(
struct nvgpu_gr_config *config, u32 sm_id)
{
return &config->sm_to_cluster_redex_config[sm_id];
}
#endif
u32 nvgpu_gr_config_get_sm_info_gpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->gpc_index;
}
void nvgpu_gr_config_set_sm_info_gpc_index(struct nvgpu_sm_info *sm_info,
u32 gpc_index)
{
sm_info->gpc_index = gpc_index;
}
u32 nvgpu_gr_config_get_sm_info_tpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->tpc_index;
}
void nvgpu_gr_config_set_sm_info_tpc_index(struct nvgpu_sm_info *sm_info,
u32 tpc_index)
{
sm_info->tpc_index = tpc_index;
}
u32 nvgpu_gr_config_get_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->global_tpc_index;
}
void nvgpu_gr_config_set_sm_info_global_tpc_index(struct nvgpu_sm_info *sm_info,
u32 global_tpc_index)
{
sm_info->global_tpc_index = global_tpc_index;
}
u32 nvgpu_gr_config_get_sm_info_sm_index(struct nvgpu_sm_info *sm_info)
{
return sm_info->sm_index;
}
void nvgpu_gr_config_set_sm_info_sm_index(struct nvgpu_sm_info *sm_info,
u32 sm_index)
{
sm_info->sm_index = sm_index;
}

View File

@@ -0,0 +1,172 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_CONFIG_PRIV_H
#define NVGPU_GR_CONFIG_PRIV_H
#include <nvgpu/types.h>
/**
* Max possible PES count per GPC.
*/
#define GK20A_GR_MAX_PES_PER_GPC 3U
struct gk20a;
/**
* Detailed information of SM indexes in GR engine.
*/
struct nvgpu_sm_info {
/**
* Index of GPC for SM.
*/
u32 gpc_index;
/**
* Index of TPC for SM.
*/
u32 tpc_index;
/**
* Index of SM within TPC.
*/
u32 sm_index;
/**
* Global TPC index for SM.
*/
u32 global_tpc_index;
};
/**
* GR engine configuration data.
*
* This data is populated during GR initialization and referred across
* GPU driver through public APIs.
*/
struct nvgpu_gr_config {
/**
* Pointer to GPU driver struct.
*/
struct gk20a *g;
/**
* Max possible number of GPCs in GR engine.
*/
u32 max_gpc_count;
/**
* Max possible number of TPCs per GPC in GR engine.
*/
u32 max_tpc_per_gpc_count;
/**
* Max possible number of TPCs in GR engine.
*/
u32 max_tpc_count;
/**
* Number of GPCs in GR engine.
*/
u32 gpc_count;
/**
* Number of TPCs in GR engine.
*/
u32 tpc_count;
/**
* Number of PPCs in GR engine.
*/
u32 ppc_count;
/**
* Number of PES per GPC in GR engine.
*/
u32 pe_count_per_gpc;
/**
* Number of SMs per TPC in GR engine.
*/
u32 sm_count_per_tpc;
/**
* Array to hold number of PPC units per GPC.
* Array is indexed by GPC index.
*/
u32 *gpc_ppc_count;
/**
* Array to hold number of TPCs per GPC.
* Array is indexed by GPC index.
*/
u32 *gpc_tpc_count;
/**
* 2-D array to hold number of TPCs attached to a PES unit
* in a GPC.
*/
u32 *pes_tpc_count[GK20A_GR_MAX_PES_PER_GPC];
/**
* Mask of GPCs. A set bit indicates GPC is available, otherwise
* it is not available.
*/
u32 gpc_mask;
/**
* Array to hold mask of TPCs per GPC.
* Array is indexed by GPC index.
*/
u32 *gpc_tpc_mask;
/**
* 2-D array to hold mask of TPCs attached to a PES unit
* in a GPC.
*/
u32 *pes_tpc_mask[GK20A_GR_MAX_PES_PER_GPC];
/**
* Array to hold skip mask of TPCs per GPC.
* Array is indexed by GPC index.
*/
u32 *gpc_skip_mask;
/**
* Number of SMs in GR engine.
*/
u32 no_of_sm;
/**
* Pointer to SM information struct.
*/
struct nvgpu_sm_info *sm_to_cluster;
#ifdef CONFIG_NVGPU_SM_DIVERSITY
/**
* Pointer to redundant execution config SM information struct.
* It is valid only if NVGPU_SUPPORT_SM_DIVERSITY support is true.
*/
struct nvgpu_sm_info *sm_to_cluster_redex_config;
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
u32 max_zcull_per_gpc_count;
u32 zcb_count;
u32 *gpc_zcb_count;
u8 *map_tiles;
u32 map_tile_count;
u32 map_row_offset;
#endif
};
#endif /* NVGPU_GR_CONFIG_PRIV_H */

View File

@@ -0,0 +1,269 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gr/gr_ecc.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/string.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/kmem.h>
#include <nvgpu/ecc.h>
int nvgpu_ecc_counter_init_per_gr(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
struct nvgpu_ecc_stat *stats;
u32 i;
char gr_str[10] = {0};
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
g->num_gr_instances));
if (stats == NULL) {
return -ENOMEM;
}
for (i = 0; i < g->num_gr_instances; i++) {
/**
* Store stats name as below:
* gr<gr_index>_<name_string>
*/
(void)strcpy(stats[i].name, "gr");
(void)nvgpu_strnadd_u32(gr_str, i, sizeof(gr_str), 10U);
(void)strncat(stats[i].name, gr_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
(void)strncat(stats[i].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
(void)strncat(stats[i].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[i].name));
nvgpu_ecc_stat_add(g, &stats[i]);
}
*stat = stats;
return 0;
}
int nvgpu_ecc_counter_init_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat ***stat, const char *name)
{
struct nvgpu_ecc_stat **stats;
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
u32 gpc, tpc;
char gpc_str[10] = {0}, tpc_str[10] = {0};
int err = 0;
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
gpc_count));
if (stats == NULL) {
return -ENOMEM;
}
for (gpc = 0; gpc < gpc_count; gpc++) {
stats[gpc] = nvgpu_kzalloc(g,
nvgpu_safe_mult_u64(sizeof(*stats[gpc]),
nvgpu_gr_config_get_gpc_tpc_count(gr_config,
gpc)));
if (stats[gpc] == NULL) {
err = -ENOMEM;
goto fail;
}
}
for (gpc = 0; gpc < gpc_count; gpc++) {
for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
tpc++) {
/**
* Store stats name as below:
* gpc<gpc_value>_tpc<tpc_value>_<name_string>
*/
(void)strcpy(stats[gpc][tpc].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc,
sizeof(gpc_str), 10U);
(void)strncat(stats[gpc][tpc].name, gpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, "_tpc",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc][tpc].name));
(void)nvgpu_strnadd_u32(tpc_str, tpc,
sizeof(tpc_str), 10U);
(void)strncat(stats[gpc][tpc].name, tpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc][tpc].name));
(void)strncat(stats[gpc][tpc].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc][tpc].name));
nvgpu_ecc_stat_add(g, &stats[gpc][tpc]);
}
}
*stat = stats;
fail:
if (err != 0) {
while (gpc-- != 0u) {
nvgpu_kfree(g, stats[gpc]);
}
nvgpu_kfree(g, stats);
}
return err;
}
int nvgpu_ecc_counter_init_per_gpc(struct gk20a *g,
struct nvgpu_ecc_stat **stat, const char *name)
{
struct nvgpu_ecc_stat *stats;
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
u32 gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
u32 gpc;
char gpc_str[10] = {0};
stats = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(sizeof(*stats),
gpc_count));
if (stats == NULL) {
return -ENOMEM;
}
for (gpc = 0; gpc < gpc_count; gpc++) {
/**
* Store stats name as below:
* gpc<gpc_value>_<name_string>
*/
(void)strcpy(stats[gpc].name, "gpc");
(void)nvgpu_strnadd_u32(gpc_str, gpc, sizeof(gpc_str), 10U);
(void)strncat(stats[gpc].name, gpc_str,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc].name));
(void)strncat(stats[gpc].name, "_",
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc].name));
(void)strncat(stats[gpc].name, name,
NVGPU_ECC_STAT_NAME_MAX_SIZE -
strlen(stats[gpc].name));
nvgpu_ecc_stat_add(g, &stats[gpc]);
}
*stat = stats;
return 0;
}
void nvgpu_ecc_counter_deinit_per_gr(struct gk20a *g,
struct nvgpu_ecc_stat **stats_p)
{
struct nvgpu_ecc_stat *stats = NULL;
u32 i;
if (*stats_p != NULL) {
stats = *stats_p;
for (i = 0; i < g->num_gr_instances; i++) {
nvgpu_ecc_stat_del(g, &stats[i]);
}
nvgpu_kfree(g, stats);
*stats_p = NULL;
}
}
void nvgpu_ecc_counter_deinit_per_tpc(struct gk20a *g,
struct nvgpu_ecc_stat ***stats_p)
{
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
struct nvgpu_ecc_stat **stats = NULL;
u32 gpc_count;
u32 gpc, tpc;
if (*stats_p != NULL) {
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
stats = *stats_p;
for (gpc = 0; gpc < gpc_count; gpc++) {
if (stats[gpc] == NULL) {
continue;
}
for (tpc = 0;
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr_config, gpc);
tpc++) {
nvgpu_ecc_stat_del(g, &stats[gpc][tpc]);
}
nvgpu_kfree(g, stats[gpc]);
stats[gpc] = NULL;
}
nvgpu_kfree(g, stats);
*stats_p = NULL;
}
}
void nvgpu_ecc_counter_deinit_per_gpc(struct gk20a *g,
struct nvgpu_ecc_stat **stats_p)
{
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
struct nvgpu_ecc_stat *stats = NULL;
u32 gpc_count;
u32 gpc;
if (*stats_p != NULL) {
gpc_count = nvgpu_gr_config_get_gpc_count(gr_config);
stats = *stats_p;
for (gpc = 0; gpc < gpc_count; gpc++) {
nvgpu_ecc_stat_del(g, &stats[gpc]);
}
nvgpu_kfree(g, stats);
*stats_p = NULL;
}
}
void nvgpu_gr_ecc_free(struct gk20a *g)
{
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
nvgpu_log(g, gpu_dbg_gr, " ");
if (gr_config == NULL) {
return;
}
if (g->ops.gr.ecc.fecs_ecc_deinit != NULL) {
g->ops.gr.ecc.fecs_ecc_deinit(g);
}
if (g->ops.gr.ecc.gpc_tpc_ecc_deinit != NULL) {
g->ops.gr.ecc.gpc_tpc_ecc_deinit(g);
}
}

View File

@@ -0,0 +1,755 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/netlist.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/gr/hwpm_map.h>
#include <nvgpu/firmware.h>
#include <nvgpu/sizes.h>
#include <nvgpu/mm.h>
#include <nvgpu/acr.h>
#include <nvgpu/gr/gr_utils.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/lsfm.h>
#include <nvgpu/pmu/pmu_pg.h>
#endif
#ifdef CONFIG_NVGPU_DGPU
#include <nvgpu/sec2/lsfm.h>
#endif
#include <nvgpu/dma.h>
#include <nvgpu/static_analysis.h>
#include "gr_falcon_priv.h"
#define NVGPU_FECS_UCODE_IMAGE "fecs.bin"
#define NVGPU_GPCCS_UCODE_IMAGE "gpccs.bin"
struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g)
{
struct nvgpu_gr_falcon *falcon;
nvgpu_log_fn(g, " ");
falcon = nvgpu_kzalloc(g, sizeof(*falcon));
if (falcon == NULL) {
return falcon;
}
nvgpu_mutex_init(&falcon->fecs_mutex);
falcon->coldboot_bootstrap_done = false;
return falcon;
}
void nvgpu_gr_falcon_remove_support(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
nvgpu_log_fn(g, " ");
if (falcon == NULL) {
return;
}
nvgpu_kfree(g, falcon);
}
#ifdef CONFIG_NVGPU_POWER_PG
int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_LS_PMU
struct nvgpu_pmu *pmu = g->pmu;
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->pmu.vm;
int err = 0;
u32 size;
u32 data;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
size = 0;
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_REGLIST_DISCOVER_IMAGE_SIZE, 0U, &size);
if (err != 0) {
nvgpu_err(g,
"fail to query fecs pg buffer size");
return err;
}
nvgpu_log(g, gpu_dbg_gr, "FECS PG buffer size = %u", size);
if (nvgpu_pmu_pg_buf_get_cpu_va(g, pmu) == NULL) {
err = nvgpu_dma_alloc_map_sys(vm, size, nvgpu_pmu_pg_buf(g, pmu));
if (err != 0) {
nvgpu_err(g, "failed to allocate memory");
return -ENOMEM;
}
}
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g,
&mm->pmu.inst_block);
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_REGLIST_BIND_INSTANCE, data, NULL);
if (err != 0) {
nvgpu_err(g,
"fail to bind pmu inst to gr");
return err;
}
data = u64_lo32(nvgpu_pmu_pg_buf_get_gpu_va(g, pmu) >> 8);
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_REGLIST_SET_VIRTUAL_ADDRESS, data, NULL);
if (err != 0) {
nvgpu_err(g,
"fail to set pg buffer pmu va");
return err;
}
nvgpu_log(g, gpu_dbg_gr, "done");
return err;
#else
return 0;
#endif
}
#endif
int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon)
{
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
err = g->ops.gr.falcon.load_ctxsw_ucode(g, falcon);
if (err != 0) {
goto out;
}
err = g->ops.gr.falcon.wait_ctxsw_ready(g);
out:
if (err != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
return err;
}
int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct nvgpu_gr_falcon_query_sizes *sizes = &falcon->sizes;
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
/* fecs init ramchain */
err = g->ops.gr.falcon.init_ctx_state(g, sizes);
if (err != 0) {
goto out;
}
out:
if (err != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
return err;
}
u32 nvgpu_gr_falcon_get_golden_image_size(struct nvgpu_gr_falcon *falcon)
{
return falcon->sizes.golden_image_size;
}
#ifdef CONFIG_NVGPU_DEBUGGER
u32 nvgpu_gr_falcon_get_pm_ctxsw_image_size(struct nvgpu_gr_falcon *falcon)
{
return falcon->sizes.pm_ctxsw_image_size;
}
#endif
#ifdef CONFIG_NVGPU_GFXP
u32 nvgpu_gr_falcon_get_preempt_image_size(struct nvgpu_gr_falcon *falcon)
{
return falcon->sizes.preempt_image_size;
}
#endif /* CONFIG_NVGPU_GFXP */
#ifdef CONFIG_NVGPU_GRAPHICS
u32 nvgpu_gr_falcon_get_zcull_image_size(struct nvgpu_gr_falcon *falcon)
{
return falcon->sizes.zcull_image_size;
}
#endif /* CONFIG_NVGPU_GRAPHICS */
static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->pmu.vm;
struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
int err;
err = nvgpu_alloc_inst_block(g, &ucode_info->inst_blk_desc);
if (err != 0) {
return err;
}
g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
/* Map ucode surface to GMMU */
ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
&ucode_info->surface_desc,
ucode_info->surface_desc.size,
0, /* flags */
gk20a_mem_flag_read_only,
false,
ucode_info->surface_desc.aperture);
if (ucode_info->surface_desc.gpu_va == 0ULL) {
nvgpu_err(g, "failed to update gmmu ptes");
return -ENOMEM;
}
return 0;
}
static void nvgpu_gr_falcon_init_ctxsw_ucode_segment(
struct nvgpu_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
{
u32 ucode_offset;
p_seg->offset = *offset;
p_seg->size = size;
ucode_offset = nvgpu_safe_add_u32(*offset, size);
*offset = NVGPU_ALIGN(ucode_offset, 256U);
}
static void nvgpu_gr_falcon_init_ctxsw_ucode_segments(
struct nvgpu_ctxsw_ucode_segments *segments, u32 *offset,
struct nvgpu_ctxsw_bootloader_desc *bootdesc,
u32 code_size, u32 data_size)
{
u32 boot_size = NVGPU_ALIGN(bootdesc->size, sizeof(u32));
segments->boot_entry = bootdesc->entry_point;
segments->boot_imem_offset = bootdesc->imem_offset;
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->boot,
offset, boot_size);
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->code,
offset, code_size);
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->data,
offset, data_size);
}
static void nvgpu_gr_falcon_copy_ctxsw_ucode_segments(
struct gk20a *g,
struct nvgpu_mem *dst,
struct nvgpu_ctxsw_ucode_segments *segments,
u32 *bootimage,
u32 *code, u32 *data)
{
unsigned int i;
nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage,
segments->boot.size);
nvgpu_mem_wr_n(g, dst, segments->code.offset, code,
segments->code.size);
nvgpu_mem_wr_n(g, dst, segments->data.offset, data,
segments->data.size);
/* compute a "checksum" for the boot binary to detect its version */
segments->boot_signature = 0;
for (i = 0; i < (segments->boot.size / sizeof(u32)); i++) {
segments->boot_signature = nvgpu_gr_checksum_u32(
segments->boot_signature, bootimage[i]);
}
}
int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct nvgpu_ctxsw_bootloader_desc *fecs_boot_desc;
struct nvgpu_ctxsw_bootloader_desc *gpccs_boot_desc;
struct nvgpu_firmware *fecs_fw;
struct nvgpu_firmware *gpccs_fw;
u32 *fecs_boot_image;
u32 *gpccs_boot_image;
struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info;
u32 ucode_size;
int err = 0;
nvgpu_log(g, gpu_dbg_gr, "Requst and copy FECS/GPCCS firmwares");
fecs_fw = nvgpu_request_firmware(g, NVGPU_FECS_UCODE_IMAGE, 0);
if (fecs_fw == NULL) {
nvgpu_err(g, "failed to load fecs ucode!!");
return -ENOENT;
}
fecs_boot_desc = (void *)fecs_fw->data;
fecs_boot_image = (void *)(fecs_fw->data +
sizeof(struct nvgpu_ctxsw_bootloader_desc));
gpccs_fw = nvgpu_request_firmware(g, NVGPU_GPCCS_UCODE_IMAGE, 0);
if (gpccs_fw == NULL) {
nvgpu_release_firmware(g, fecs_fw);
nvgpu_err(g, "failed to load gpccs ucode!!");
return -ENOENT;
}
gpccs_boot_desc = (void *)gpccs_fw->data;
gpccs_boot_image = (void *)(gpccs_fw->data +
sizeof(struct nvgpu_ctxsw_bootloader_desc));
ucode_size = 0;
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->fecs,
&ucode_size, fecs_boot_desc,
nvgpu_safe_mult_u32(
nvgpu_netlist_get_fecs_inst_count(g), (u32)sizeof(u32)),
nvgpu_safe_mult_u32(
nvgpu_netlist_get_fecs_data_count(g), (u32)sizeof(u32)));
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->gpccs,
&ucode_size, gpccs_boot_desc,
nvgpu_safe_mult_u32(
nvgpu_netlist_get_gpccs_inst_count(g), (u32)sizeof(u32)),
nvgpu_safe_mult_u32(
nvgpu_netlist_get_gpccs_data_count(g), (u32)sizeof(u32)));
err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
if (err != 0) {
goto clean_up;
}
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g,
&ucode_info->surface_desc,
&ucode_info->fecs,
fecs_boot_image,
nvgpu_netlist_get_fecs_inst_list(g),
nvgpu_netlist_get_fecs_data_list(g));
nvgpu_release_firmware(g, fecs_fw);
fecs_fw = NULL;
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g,
&ucode_info->surface_desc,
&ucode_info->gpccs,
gpccs_boot_image,
nvgpu_netlist_get_gpccs_inst_list(g),
nvgpu_netlist_get_gpccs_data_list(g));
nvgpu_release_firmware(g, gpccs_fw);
gpccs_fw = NULL;
err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g, falcon);
if (err != 0) {
goto clean_up;
}
return 0;
clean_up:
nvgpu_dma_free(g, &ucode_info->surface_desc);
if (gpccs_fw != NULL) {
nvgpu_release_firmware(g, gpccs_fw);
gpccs_fw = NULL;
}
if (fecs_fw != NULL) {
nvgpu_release_firmware(g, fecs_fw);
fecs_fw = NULL;
}
return err;
}
static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct nvgpu_ctxsw_ucode_info *ucode_info =
&falcon->ctxsw_ucode_info;
u64 inst_ptr;
if (g->ops.gr.falcon.bind_instblk == NULL) {
return;
}
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc,
inst_ptr);
}
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
static void nvgpu_gr_falcon_load_dmem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
ucode_u32_size = nvgpu_netlist_get_gpccs_data_count(g);
ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_data_list(g);
g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = nvgpu_netlist_get_fecs_data_count(g);
ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_data_list(g);
g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
ucode_u32_size = nvgpu_netlist_get_gpccs_inst_count(g);
ucode_u32_data = (const u32 *)nvgpu_netlist_get_gpccs_inst_list(g);
g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = nvgpu_netlist_get_fecs_inst_count(g);
ucode_u32_data = (const u32 *)nvgpu_netlist_get_fecs_inst_list(g);
g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_code32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
segments->code.offset) >> 8);
u32 addr_data32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
segments->data.offset) >> 8);
g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset,
segments->boot_signature, addr_code32, addr_data32,
segments->code.size, segments->data.size);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_load32 = u64_lo32(nvgpu_safe_add_u64(addr_base,
segments->boot.offset) >> 8);
u32 blocks = (nvgpu_safe_add_u32(segments->boot.size, 0xFFU)
& ~0xFFU) >> 8;
u32 dst = segments->boot_imem_offset;
g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset,
segments->boot_entry, addr_load32, blocks, dst);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
struct gk20a *g, u64 addr_base,
struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset)
{
/* Copy falcon bootloader into dmem */
nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base,
segments, reg_offset);
nvgpu_gr_falcon_load_ctxsw_ucode_boot(g,
addr_base, segments, reg_offset);
}
static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct nvgpu_ctxsw_ucode_info *ucode_info =
&falcon->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_log(g, gpu_dbg_gr, " ");
nvgpu_gr_falcon_bind_instblk(g, falcon);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&falcon->ctxsw_ucode_info.fecs, 0);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&falcon->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
#ifdef CONFIG_NVGPU_SIM
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
#endif
/*
* In case bootloader is not supported, revert to the old way of
* loading gr ucode, without the faster bootstrap routine.
*/
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
nvgpu_gr_falcon_load_dmem(g);
nvgpu_gr_falcon_load_imem(g);
g->ops.gr.falcon.start_ucode(g);
} else {
if (!falcon->skip_ucode_init) {
err = nvgpu_gr_falcon_init_ctxsw_ucode(g, falcon);
if (err != 0) {
return err;
}
}
nvgpu_gr_falcon_load_with_bootloader(g, falcon);
falcon->skip_ucode_init = true;
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
}
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
struct nvgpu_ctxsw_ucode_info *ucode_info =
&falcon->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_gr_falcon_bind_instblk(g, falcon);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&falcon->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
#endif
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
static int gr_falcon_sec2_or_ls_pmu_bootstrap(struct gk20a *g,
bool *bootstrap, u32 falcon_id_mask)
{
int err = 0;
bool bootstrap_set = false;
#ifdef CONFIG_NVGPU_DGPU
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
bootstrap_set = true;
nvgpu_log(g, gpu_dbg_gr, "bootstrap by SEC2");
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_FECS);
if (err == 0) {
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_GPCCS);
}
} else
#endif
#ifdef CONFIG_NVGPU_LS_PMU
if (g->support_ls_pmu) {
bootstrap_set = true;
nvgpu_log(g, gpu_dbg_gr, "bootstrap by LS PMU");
err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g,
g->pmu, g->pmu->lsfm,
falcon_id_mask);
}
#endif
*bootstrap = bootstrap_set;
return err;
}
static int gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(struct gk20a *g)
{
int err = 0;
bool bootstrap = false;
u32 falcon_idmask = BIT32(FALCON_ID_FECS) | BIT32(FALCON_ID_GPCCS);
err = gr_falcon_sec2_or_ls_pmu_bootstrap(g,
&bootstrap,
falcon_idmask);
if ((err == 0) && (!bootstrap)) {
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
if (err != 0) {
nvgpu_err(g,
"ACR GR LSF bootstrap failed");
}
}
return err;
}
static int gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(struct gk20a *g)
{
int err = 0;
u8 falcon_id_mask = 0;
bool bootstrap = false;
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
return err;
}
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_FECS)) {
falcon_id_mask |= BIT8(FALCON_ID_FECS);
}
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_GPCCS)) {
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
}
err = gr_falcon_sec2_or_ls_pmu_bootstrap(g,
&bootstrap,
(u32)falcon_id_mask);
if ((err == 0) && (!bootstrap)) {
/* GR falcons bootstrapped by ACR */
nvgpu_log(g, gpu_dbg_gr, "bootstrap by ACR");
err = 0;
}
return err;
}
#endif
static int gr_falcon_recovery_bootstrap(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
int err = 0;
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
#ifdef CONFIG_NVGPU_LS_PMU
err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, g->pmu,
g->pmu->lsfm, BIT32(FALCON_ID_FECS));
#endif
} else
#endif
{
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g, falcon);
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
err = gr_falcon_sec2_or_ls_pmu_recovery_bootstrap(g);
#else
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
if (err != 0) {
nvgpu_err(g,
"ACR GR LSF bootstrap failed");
}
#endif
}
return err;
}
static void gr_falcon_coldboot_bootstrap(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
#ifdef CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon);
} else
#endif
{
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g, falcon);
}
}
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g,
struct nvgpu_gr_falcon *falcon)
{
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
#ifdef CONFIG_NVGPU_SIM
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
#endif
if (falcon->coldboot_bootstrap_done) {
nvgpu_log(g, gpu_dbg_gr, "recovery bootstrap");
/* this must be recovery so bootstrap fecs and gpccs */
err = gr_falcon_recovery_bootstrap(g, falcon);
if (err != 0) {
nvgpu_err(g, "Unable to recover GR falcon");
return err;
}
} else {
nvgpu_log(g, gpu_dbg_gr, "coldboot bootstrap");
/* cold boot or rg exit */
falcon->coldboot_bootstrap_done = true;
gr_falcon_coldboot_bootstrap(g, falcon);
#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU)
err = gr_falcon_sec2_or_ls_pmu_coldboot_bootstrap(g);
if (err != 0) {
nvgpu_err(g, "Unable to boot GPCCS");
return err;
}
#endif
}
g->ops.gr.falcon.start_gpccs(g);
g->ops.gr.falcon.start_fecs(g);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
}
struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments(
struct nvgpu_gr_falcon *falcon)
{
return &falcon->ctxsw_ucode_info.fecs;
}
struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments(
struct nvgpu_gr_falcon *falcon)
{
return &falcon->ctxsw_ucode_info.gpccs;
}
void *nvgpu_gr_falcon_get_surface_desc_cpu_va(struct nvgpu_gr_falcon *falcon)
{
return falcon->ctxsw_ucode_info.surface_desc.cpu_va;
}
#ifdef CONFIG_NVGPU_ENGINE_RESET
struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex(
struct nvgpu_gr_falcon *falcon)
{
return &falcon->fecs_mutex;
}
#endif

View File

@@ -0,0 +1,213 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_FALCON_PRIV_H
#define NVGPU_GR_FALCON_PRIV_H
#include <nvgpu/types.h>
#include <nvgpu/nvgpu_mem.h>
struct nvgpu_ctxsw_ucode_segments;
/** GPCCS boot signature for T18X chip, type: with reserved. */
#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34U
/** FECS boot signature for T21X chip, type: with DMEM size. */
#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5cU
/** FECS boot signature for T21X chip, type: with reserved. */
#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5cU
/** FECS boot signature for T21X chip, type: without reserved. */
#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7dU
/** FECS boot signature for T21X chip, type: without reserved2. */
#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10U
/** GPCCS boot signature for T21X chip, type: with reserved. */
#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2U
/** GPCCS boot signature for T21X chip, type: without reserved. */
#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161daU
/** FECS boot signature for T12X chip, type: with reserved. */
#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78U
/** FECS boot signature for T12X chip, type: without reserved. */
#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344bU
/** FECS boot signature for T12X chip, type: older. */
#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09fU
/** GPCCS boot signature for T12X chip, type: with reserved. */
#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5U
/** GPCCS boot signature for T12X chip, type: without reserved. */
#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3U
/** GPCCS boot signature for T12X chip, type: older. */
#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877U
enum wait_ucode_status {
/** Status of ucode wait operation : LOOP. */
WAIT_UCODE_LOOP,
/** Status of ucode wait operation : timedout. */
WAIT_UCODE_TIMEOUT,
/** Status of ucode wait operation : error. */
WAIT_UCODE_ERROR,
/** Status of ucode wait operation : success. */
WAIT_UCODE_OK
};
/** Falcon operation condition : EQUAL. */
#define GR_IS_UCODE_OP_EQUAL 0U
/** Falcon operation condition : NOT_EQUAL. */
#define GR_IS_UCODE_OP_NOT_EQUAL 1U
/** Falcon operation condition : AND. */
#define GR_IS_UCODE_OP_AND 2U
/** Falcon operation condition : LESSER. */
#define GR_IS_UCODE_OP_LESSER 3U
/** Falcon operation condition : LESSER_EQUAL. */
#define GR_IS_UCODE_OP_LESSER_EQUAL 4U
/** Falcon operation condition : SKIP. */
#define GR_IS_UCODE_OP_SKIP 5U
/** Mailbox value in case of successful operation. */
#define FALCON_UCODE_HANDSHAKE_INIT_COMPLETE 1U
struct fecs_mthd_op_method {
/** Method address to send to FECS microcontroller. */
u32 addr;
/** Method data to send to FECS microcontroller. */
u32 data;
};
struct fecs_mthd_op_mailbox {
/** Mailbox ID to perform operation. */
u32 id;
/** Mailbox data to be written. */
u32 data;
/** Mailbox clear value. */
u32 clr;
/** Last read mailbox value. */
u32 *ret;
/** Mailbox value in case of operation success. */
u32 ok;
/** Mailbox value in case of operation failure. */
u32 fail;
};
struct fecs_mthd_op_cond {
/** Operation success condition. */
u32 ok;
/** Operation fail condition. */
u32 fail;
};
/**
* FECS method operation structure.
*
* This structure defines the protocol for communication with FECS
* microcontroller.
*/
struct nvgpu_fecs_method_op {
/** Method struct */
struct fecs_mthd_op_method method;
/** Mailbox struct */
struct fecs_mthd_op_mailbox mailbox;
/** Condition struct */
struct fecs_mthd_op_cond cond;
};
/**
* CTXSW falcon bootloader descriptor structure.
*/
struct nvgpu_ctxsw_bootloader_desc {
/** Start offset, unused. */
u32 start_offset;
/** Size, unused. */
u32 size;
/** IMEM offset. */
u32 imem_offset;
/** Falcon boot vector. */
u32 entry_point;
};
/**
* CTXSW ucode information structure.
*/
struct nvgpu_ctxsw_ucode_info {
/** Memory to store ucode instance block. */
struct nvgpu_mem inst_blk_desc;
/** Memory to store ucode contents locally. */
struct nvgpu_mem surface_desc;
/** Ucode segments for FECS. */
struct nvgpu_ctxsw_ucode_segments fecs;
/** Ucode segments for GPCCS. */
struct nvgpu_ctxsw_ucode_segments gpccs;
};
/**
* Structure to store various sizes queried from FECS
*/
struct nvgpu_gr_falcon_query_sizes {
/** Size of golden context image. */
u32 golden_image_size;
#ifdef CONFIG_NVGPU_DEBUGGER
u32 pm_ctxsw_image_size;
#endif
#ifdef CONFIG_NVGPU_GFXP
u32 preempt_image_size;
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
u32 zcull_image_size;
#endif
};
/**
* GR falcon data structure.
*
* This structure stores all data required to load and boot CTXSW ucode,
* and also to communicate with FECS microcontroller.
*/
struct nvgpu_gr_falcon {
/**
* CTXSW ucode information structure.
*/
struct nvgpu_ctxsw_ucode_info ctxsw_ucode_info;
/**
* Mutex to protect all FECS methods.
*/
struct nvgpu_mutex fecs_mutex;
/**
* Flag to skip ucode initialization if it is already done.
*/
bool skip_ucode_init;
/**
* Flag to trigger recovery bootstrap in case coldboot bootstrap
* was already done.
*/
bool coldboot_bootstrap_done;
/**
* Structure to hold various sizes that are queried from FECS
* microcontroller.
*/
struct nvgpu_gr_falcon_query_sizes sizes;
};
#endif /* NVGPU_GR_FALCON_PRIV_H */

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,224 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_INTR_PRIV_H
#define NVGPU_GR_INTR_PRIV_H
#include <nvgpu/types.h>
#include <nvgpu/lock.h>
#include <include/nvgpu/gr/gr_falcon.h>
struct nvgpu_channel;
/**
* Size of lookup buffer used for context translation to GPU channel
* and TSG identifiers.
* This value must be a power of 2.
*/
#define GR_CHANNEL_MAP_TLB_SIZE 2U
/**
* GR interrupt information struct.
*
* This structure maintains information on pending GR engine interrupts.
*/
struct nvgpu_gr_intr_info {
/**
* This value is set in case notification interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 notify;
/**
* This value is set in case semaphore interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 semaphore;
/**
* This value is set in case illegal notify interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 illegal_notify;
/**
* This value is set in case illegal method interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 illegal_method;
/**
* This value is set in case illegal class interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 illegal_class;
/**
* This value is set in case FECS error interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 fecs_error;
/**
* This value is set in case illegal class interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 class_error;
/**
* This value is set in case firmware method interrupt is pending.
* Same value is used to clear the interrupt.
*/
u32 fw_method;
/**
* This value is set in case exception is pending in graphics pipe.
* Same value is used to clear the interrupt.
*/
u32 exception;
/*
* This value is set when the FE receives a valid method and it
* matches with the value configured in PRI_FE_DEBUG_METHOD_* pri
* registers; In case of a match, FE proceeds to drop that method.
* This provides a way to the SW to turn off HW decoding of this
* method and convert it to a SW method.
*/
u32 debug_method;
/*
* This value is set on the completion of a LaunchDma method with
* InterruptType field configured to INTERRUPT.
*/
u32 buffer_notify;
};
/**
* TPC exception data structure.
*
* TPC exceptions can be decomposed into exceptions triggered by its
* subunits. This structure keeps track of which subunits have
* triggered exception.
*/
struct nvgpu_gr_tpc_exception {
/**
* This flag is set in case TEX exception is pending.
*/
bool tex_exception;
/**
* This flag is set in case SM exception is pending.
*/
bool sm_exception;
/**
* This flag is set in case MPC exception is pending.
*/
bool mpc_exception;
/**
* This flag is set in case PE exception is pending.
*/
bool pe_exception;
};
/**
* GR ISR data structure.
*
* This structure holds all necessary information to handle all GR engine
* error/exception interrupts.
*/
struct nvgpu_gr_isr_data {
/**
* Contents of TRAPPED_ADDR register used to decode below
* fields.
*/
u32 addr;
/**
* Low word of the trapped method data.
*/
u32 data_lo;
/**
* High word of the trapped method data.
*/
u32 data_hi;
/**
* Information of current context.
*/
u32 curr_ctx;
/**
* Pointer to faulted GPU channel.
*/
struct nvgpu_channel *ch;
/**
* Address of the trapped method.
*/
u32 offset;
/**
* Subchannel ID of the trapped method.
*/
u32 sub_chan;
/**
* Class ID corresponding to above subchannel.
*/
u32 class_num;
/**
* Value read from fecs_host_int_status h/w reg.
*/
u32 fecs_intr;
/**
* S/W defined status for fecs_host_int_status.
*/
struct nvgpu_fecs_host_intr_status fecs_host_intr_status;
};
/**
* Details of lookup buffer used to translate context to GPU
* channel/TSG identifiers.
*/
struct gr_channel_map_tlb_entry {
/**
* Information of context.
*/
u32 curr_ctx;
/**
* GPU channel ID.
*/
u32 chid;
/**
* GPU Time Slice Group ID.
*/
u32 tsgid;
};
/**
* GR interrupt management data structure.
*
* This structure holds various fields to manage GR engine interrupt
* handling.
*/
struct nvgpu_gr_intr {
/**
* Lookup buffer structure used to translate context to GPU
* channel and TSG identifiers.
*/
struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE];
/**
* Entry in lookup buffer that should be overwritten if there is
* no remaining free entry.
*/
u32 channel_tlb_flush_index;
/**
* Spinlock for all lookup buffer accesses.
*/
struct nvgpu_spinlock ch_tlb_lock;
};
#endif /* NVGPU_GR_INTR_PRIV_H */

View File

@@ -0,0 +1,143 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_PRIV_H
#define NVGPU_GR_PRIV_H
#include <nvgpu/types.h>
#include <nvgpu/cond.h>
struct nvgpu_gr_ctx_desc;
struct nvgpu_gr_global_ctx_buffer_desc;
struct nvgpu_gr_obj_ctx_golden_image;
struct nvgpu_gr_config;
#ifdef CONFIG_NVGPU_GRAPHICS
struct nvgpu_gr_zbc;
struct nvgpu_gr_zcull;
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
struct nvgpu_gr_hwpm_map;
#endif
/**
* GR engine data structure.
*
* This is the parent structure to all other GR engine data structures,
* and holds a pointer to all of them. This structure also stores
* various fields to track GR engine initialization state.
*
* Pointer to this structure is maintained in GPU driver structure.
*/
struct nvgpu_gr {
/**
* Pointer to GPU driver struct.
*/
struct gk20a *g;
/**
* Instance ID of GR engine.
*/
u32 instance_id;
/**
* Condition variable for GR initialization.
* Waiters shall wait on this condition to ensure GR engine
* is initialized.
*/
struct nvgpu_cond init_wq;
/**
* Flag to indicate if GR engine is initialized.
*/
bool initialized;
/**
* Syspipe ID of the GR instance.
*/
u32 syspipe_id;
/**
* Pointer to global context buffer descriptor structure.
*/
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer;
/**
* Pointer to Golden context image structure.
*/
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
/**
* Pointer to GR context descriptor structure.
*/
struct nvgpu_gr_ctx_desc *gr_ctx_desc;
/**
* Pointer to GR configuration structure.
*/
struct nvgpu_gr_config *config;
/**
* Pointer to GR falcon data structure.
*/
struct nvgpu_gr_falcon *falcon;
/**
* Pointer to GR interrupt data structure.
*/
struct nvgpu_gr_intr *intr;
/**
* Function pointer to remove GR s/w support.
*/
void (*remove_support)(struct gk20a *g);
/**
* Flag to indicate GR s/w has been initialized.
*/
bool sw_ready;
#ifdef CONFIG_NVGPU_DEBUGGER
struct nvgpu_gr_hwpm_map *hwpm_map;
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
struct nvgpu_gr_zcull *zcull;
struct nvgpu_gr_zbc *zbc;
#endif
#ifdef CONFIG_NVGPU_NON_FUSA
u32 fecs_feature_override_ecc_val;
#endif
#ifdef CONFIG_NVGPU_CILP
u32 cilp_preempt_pending_chid;
#endif
#if defined(CONFIG_NVGPU_RECOVERY) || defined(CONFIG_NVGPU_DEBUGGER)
struct nvgpu_mutex ctxsw_disable_mutex;
int ctxsw_disable_count;
#endif
};
#endif /* NVGPU_GR_PRIV_H */

View File

@@ -0,0 +1,396 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/obj_ctx.h>
#ifdef CONFIG_NVGPU_GRAPHICS
#include <nvgpu/gr/zcull.h>
#endif
#include <nvgpu/gr/setup.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/channel.h>
#include <nvgpu/preempt.h>
#include "gr_priv.h"
#ifdef CONFIG_NVGPU_GRAPHICS
static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c,
struct nvgpu_gr_ctx *gr_ctx)
{
int ret = 0;
nvgpu_log_fn(g, " ");
ret = nvgpu_channel_disable_tsg(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to disable channel/TSG");
return ret;
}
ret = nvgpu_preempt_channel(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to preempt channel/TSG");
goto out;
}
ret = nvgpu_gr_zcull_ctx_setup(g, c->subctx, gr_ctx);
if (ret != 0) {
nvgpu_err(g, "failed to setup zcull");
goto out;
}
/* no error at this point */
ret = nvgpu_channel_enable_tsg(g, c);
if (ret != 0) {
nvgpu_err(g, "failed to re-enable channel/TSG");
}
return ret;
out:
/*
* control reaches here if preempt failed or nvgpu_gr_zcull_ctx_setup
* failed. Propagate preempt failure err or err for
* nvgpu_gr_zcull_ctx_setup
*/
if (nvgpu_channel_enable_tsg(g, c) != 0) {
/* ch might not be bound to tsg */
nvgpu_err(g, "failed to enable channel/TSG");
}
return ret;
}
int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
u64 zcull_va, u32 mode)
{
struct nvgpu_tsg *tsg;
struct nvgpu_gr_ctx *gr_ctx;
tsg = nvgpu_tsg_from_ch(c);
if (tsg == NULL) {
return -EINVAL;
}
gr_ctx = tsg->gr_ctx;
nvgpu_gr_ctx_set_zcull_ctx(g, gr_ctx, mode, zcull_va);
return nvgpu_gr_setup_zcull(g, c, gr_ctx);
}
#endif
static int nvgpu_gr_setup_validate_channel_and_class(struct gk20a *g,
struct nvgpu_channel *c, u32 class_num)
{
int err = 0;
/* an address space needs to have been bound at this point.*/
if (!nvgpu_channel_as_bound(c)) {
nvgpu_err(g,
"not bound to address space at time"
" of grctx allocation");
return -EINVAL;
}
if (!g->ops.gpu_class.is_valid(class_num)) {
nvgpu_err(g,
"invalid obj class 0x%x", class_num);
err = -EINVAL;
}
return err;
}
static int nvgpu_gr_setup_alloc_subctx(struct gk20a *g, struct nvgpu_channel *c)
{
int err = 0;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
if (c->subctx == NULL) {
c->subctx = nvgpu_gr_subctx_alloc(g, c->vm);
if (c->subctx == NULL) {
err = -ENOMEM;
}
}
}
return err;
}
int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
u32 flags)
{
struct gk20a *g = c->g;
struct nvgpu_gr_ctx *gr_ctx;
struct nvgpu_tsg *tsg = NULL;
int err = 0;
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr,
"GR%u: allocate object context for channel %u",
gr->instance_id, c->chid);
err = nvgpu_gr_setup_validate_channel_and_class(g, c, class_num);
if (err != 0) {
goto out;
}
c->obj_class = class_num;
#ifndef CONFIG_NVGPU_HAL_NON_FUSA
/*
* Only compute and graphics classes need object context.
* Return success for valid non-compute and non-graphics classes.
* Invalid classes are already captured in
* nvgpu_gr_setup_validate_channel_and_class() function.
*/
if (!g->ops.gpu_class.is_valid_compute(class_num) &&
!g->ops.gpu_class.is_valid_gfx(class_num)) {
return 0;
}
#endif
tsg = nvgpu_tsg_from_ch(c);
if (tsg == NULL) {
return -EINVAL;
}
err = nvgpu_gr_setup_alloc_subctx(g, c);
if (err != 0) {
nvgpu_err(g, "failed to allocate gr subctx buffer");
goto out;
}
nvgpu_mutex_acquire(&tsg->ctx_init_lock);
gr_ctx = tsg->gr_ctx;
if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_ctx_mem(gr_ctx))) {
tsg->vm = c->vm;
nvgpu_vm_get(tsg->vm);
err = nvgpu_gr_obj_ctx_alloc(g, gr->golden_image,
gr->global_ctx_buffer, gr->gr_ctx_desc,
gr->config, gr_ctx, c->subctx,
tsg->vm, &c->inst_block, class_num, flags,
c->cde, c->vpr);
if (err != 0) {
nvgpu_err(g,
"failed to allocate gr ctx buffer");
nvgpu_mutex_release(&tsg->ctx_init_lock);
nvgpu_vm_put(tsg->vm);
tsg->vm = NULL;
goto out;
}
nvgpu_gr_ctx_set_tsgid(gr_ctx, tsg->tsgid);
} else {
/* commit gr ctx buffer */
nvgpu_gr_obj_ctx_commit_inst(g, &c->inst_block, gr_ctx,
c->subctx, nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
}
#ifdef CONFIG_NVGPU_FECS_TRACE
if (g->ops.gr.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.gr.fecs_trace.bind_channel(g, &c->inst_block,
c->subctx, gr_ctx, tsg->tgid, 0);
if (err != 0) {
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
}
}
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
if ((g->num_sys_perfmon == 0U) &&
(g->ops.perf.get_num_hwpm_perfmon != NULL) &&
(err == 0)) {
g->ops.perf.get_num_hwpm_perfmon(g, &g->num_sys_perfmon,
&g->num_fbp_perfmon, &g->num_gpc_perfmon);
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_gpu_dbg,
"num_sys_perfmon[%u] num_fbp_perfmon[%u] "
"num_gpc_perfmon[%u] ",
g->num_sys_perfmon, g->num_fbp_perfmon,
g->num_gpc_perfmon);
nvgpu_assert((g->num_sys_perfmon != 0U) &&
(g->num_fbp_perfmon != 0U) &&
(g->num_gpc_perfmon != 0U));
}
#endif
nvgpu_mutex_release(&tsg->ctx_init_lock);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
out:
if (c->subctx != NULL) {
nvgpu_gr_subctx_free(g, c->subctx, c->vm);
c->subctx = NULL;
}
/* 1. gr_ctx, patch_ctx and global ctx buffer mapping
can be reused so no need to release them.
2. golden image init and load is a one time thing so if
they pass, no need to undo. */
nvgpu_err(g, "fail");
return err;
}
void nvgpu_gr_setup_free_gr_ctx(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
{
nvgpu_log_fn(g, " ");
if (gr_ctx != NULL) {
#ifdef CONFIG_DEBUG_FS
if ((g->ops.gr.ctxsw_prog.dump_ctxsw_stats != NULL) &&
nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
g->gr->gr_ctx_desc)) {
g->ops.gr.ctxsw_prog.dump_ctxsw_stats(g,
nvgpu_gr_ctx_get_ctx_mem(gr_ctx));
}
#endif
nvgpu_gr_ctx_free(g, gr_ctx, g->gr->global_ctx_buffer, vm);
}
}
void nvgpu_gr_setup_free_subctx(struct nvgpu_channel *c)
{
nvgpu_log_fn(c->g, " ");
if (!nvgpu_is_enabled(c->g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
return;
}
if (c->subctx != NULL) {
nvgpu_gr_subctx_free(c->g, c->subctx, c->vm);
c->subctx = NULL;
}
}
static bool nvgpu_gr_setup_validate_preemption_mode(u32 *graphics_preempt_mode,
u32 *compute_preempt_mode,
struct nvgpu_gr_ctx *gr_ctx)
{
#ifdef CONFIG_NVGPU_GRAPHICS
/* skip setting anything if both modes are already set */
if ((*graphics_preempt_mode != 0U) &&
(*graphics_preempt_mode ==
nvgpu_gr_ctx_get_graphics_preemption_mode(gr_ctx))) {
*graphics_preempt_mode = 0;
}
#endif /* CONFIG_NVGPU_GRAPHICS */
if ((*compute_preempt_mode != 0U) &&
(*compute_preempt_mode ==
nvgpu_gr_ctx_get_compute_preemption_mode(gr_ctx))) {
*compute_preempt_mode = 0;
}
if ((*graphics_preempt_mode == 0U) && (*compute_preempt_mode == 0U)) {
return false;
}
return true;
}
int nvgpu_gr_setup_set_preemption_mode(struct nvgpu_channel *ch,
u32 graphics_preempt_mode, u32 compute_preempt_mode,
u32 gr_instance_id)
{
struct nvgpu_gr_ctx *gr_ctx;
struct gk20a *g = ch->g;
struct nvgpu_tsg *tsg;
struct vm_gk20a *vm;
struct nvgpu_gr *gr;
u32 class_num;
int err = 0;
gr = &g->gr[gr_instance_id];
class_num = ch->obj_class;
if (class_num == 0U) {
return -EINVAL;
}
if (!g->ops.gpu_class.is_valid(class_num)) {
nvgpu_err(g, "invalid obj class 0x%x", class_num);
return -EINVAL;
}
tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
return -EINVAL;
}
vm = tsg->vm;
gr_ctx = tsg->gr_ctx;
if (nvgpu_gr_setup_validate_preemption_mode(&graphics_preempt_mode,
&compute_preempt_mode, gr_ctx) == false) {
return 0;
}
nvgpu_log(g, gpu_dbg_gr | gpu_dbg_sched, "chid=%d tsgid=%d pid=%d "
"graphics_preempt_mode=%u compute_preempt_mode=%u",
ch->chid, ch->tsgid, ch->tgid,
graphics_preempt_mode, compute_preempt_mode);
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, gr->config,
gr->gr_ctx_desc, gr_ctx, vm, class_num,
graphics_preempt_mode, compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
g->ops.tsg.disable(tsg);
err = nvgpu_preempt_channel(g, ch);
if (err != 0) {
nvgpu_err(g, "failed to preempt channel/TSG");
goto enable_ch;
}
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, gr->config, gr_ctx,
ch->subctx);
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
g->ops.gr.init.commit_global_cb_manager(g, gr->config, gr_ctx,
true);
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
}
g->ops.tsg.enable(tsg);
return err;
enable_ch:
g->ops.tsg.enable(tsg);
return err;
}

View File

@@ -0,0 +1,143 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/types.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/gr/config.h>
#include "gr_priv.h"
u32 nvgpu_gr_checksum_u32(u32 a, u32 b)
{
return nvgpu_safe_cast_u64_to_u32(((u64)a + (u64)b) & (U32_MAX));
}
struct nvgpu_gr_falcon *nvgpu_gr_get_falcon_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->falcon;
}
struct nvgpu_gr_config *nvgpu_gr_get_config_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->config;
}
struct nvgpu_gr_config *nvgpu_gr_get_gr_instance_config_ptr(struct gk20a *g,
u32 gr_instance_id)
{
return g->gr[gr_instance_id].config;
}
struct nvgpu_gr_intr *nvgpu_gr_get_intr_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->intr;
}
#ifdef CONFIG_NVGPU_NON_FUSA
u32 nvgpu_gr_get_override_ecc_val(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->fecs_feature_override_ecc_val;
}
void nvgpu_gr_override_ecc_val(struct nvgpu_gr *gr, u32 ecc_val)
{
gr->fecs_feature_override_ecc_val = ecc_val;
}
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
struct nvgpu_gr_zcull *nvgpu_gr_get_zcull_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->zcull;
}
struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->zbc;
}
#endif
#ifdef CONFIG_NVGPU_FECS_TRACE
struct nvgpu_gr_global_ctx_buffer_desc *nvgpu_gr_get_global_ctx_buffer_ptr(
struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->global_ctx_buffer;
}
#endif
#ifdef CONFIG_NVGPU_CILP
u32 nvgpu_gr_get_cilp_preempt_pending_chid(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->cilp_preempt_pending_chid;
}
void nvgpu_gr_clear_cilp_preempt_pending_chid(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
gr->cilp_preempt_pending_chid =
NVGPU_INVALID_CHANNEL_ID;
}
#endif
#ifdef CONFIG_NVGPU_DEBUGGER
struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->golden_image;
}
struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
return gr->hwpm_map;
}
void nvgpu_gr_reset_falcon_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
gr->falcon = NULL;
}
void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
gr->golden_image = NULL;
}
#endif

View File

@@ -0,0 +1,615 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/netlist.h>
#include <nvgpu/log.h>
#include <nvgpu/sort.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bsearch.h>
#include <nvgpu/fbp.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/hwpm_map.h>
/* needed for pri_is_ppc_addr_shared */
#include "hal/gr/gr/gr_pri_gk20a.h"
#define NV_PCFG_BASE 0x00088000U
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200U
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200U
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020U
/* Dummy address for ctxsw'ed pri reg checksum. */
#define CTXSW_PRI_CHECKSUM_DUMMY_REG 0x00ffffffU
int nvgpu_gr_hwpm_map_init(struct gk20a *g, struct nvgpu_gr_hwpm_map **hwpm_map,
u32 size)
{
struct nvgpu_gr_hwpm_map *tmp_map;
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
if (size == 0U) {
return -EINVAL;
}
tmp_map = nvgpu_kzalloc(g, sizeof(*tmp_map));
if (tmp_map == NULL) {
return -ENOMEM;
}
tmp_map->pm_ctxsw_image_size = size;
tmp_map->init = false;
*hwpm_map = tmp_map;
return 0;
}
void nvgpu_gr_hwpm_map_deinit(struct gk20a *g,
struct nvgpu_gr_hwpm_map *hwpm_map)
{
if (hwpm_map->init) {
nvgpu_big_free(g, hwpm_map->map);
}
nvgpu_kfree(g, hwpm_map);
}
u32 nvgpu_gr_hwpm_map_get_size(struct nvgpu_gr_hwpm_map *hwpm_map)
{
return hwpm_map->pm_ctxsw_image_size;
}
static int map_cmp(const void *a, const void *b)
{
const struct ctxsw_buf_offset_map_entry *e1;
const struct ctxsw_buf_offset_map_entry *e2;
e1 = (const struct ctxsw_buf_offset_map_entry *)a;
e2 = (const struct ctxsw_buf_offset_map_entry *)b;
if (e1->addr < e2->addr) {
return -1;
}
if (e1->addr > e2->addr) {
return 1;
}
return 0;
}
static int add_ctxsw_buffer_map_entries_pmsys(
struct ctxsw_buf_offset_map_entry *map,
struct netlist_aiv_list *regs, u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt) {
return -EINVAL;
}
for (idx = 0; idx < regs->count; idx++) {
if ((base + (regs->l[idx].addr & mask)) < 0xFFFU) {
map[cnt].addr = base + (regs->l[idx].addr & mask)
+ NV_PCFG_BASE;
} else {
map[cnt].addr = base + (regs->l[idx].addr & mask);
}
map[cnt++].offset = off;
off += 4U;
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
struct ctxsw_buf_offset_map_entry *map,
struct netlist_aiv_list *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt) {
return -EINVAL;
}
/* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
* To handle the case of PPC registers getting added into GPC, the below
* code specifically checks for any PPC offsets and adds them using
* proper mask
*/
for (idx = 0; idx < regs->count; idx++) {
/* Check if the address is PPC address */
if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_BASE);
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_STRIDE);
/* Use PPC mask instead of the GPC mask provided */
u32 ppcmask = ppc_in_gpc_stride - 1U;
map[cnt].addr = base + ppc_in_gpc_base
+ (regs->l[idx].addr & ppcmask);
} else {
map[cnt].addr = base + (regs->l[idx].addr & mask);
}
map[cnt++].offset = off;
off += 4U;
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
struct netlist_aiv_list *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt) {
return -EINVAL;
}
for (idx = 0; idx < regs->count; idx++) {
map[cnt].addr = base + (regs->l[idx].addr & mask);
map[cnt++].offset = off;
off += 4U;
}
*count = cnt;
*offset = off;
return 0;
}
/* Helper function to add register entries to the register map for all
* subunits
*/
static int add_ctxsw_buffer_map_entries_subunits(
struct ctxsw_buf_offset_map_entry *map,
struct netlist_aiv_list *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 num_units,
u32 active_unit_mask, u32 stride, u32 mask)
{
u32 unit;
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + (regs->count * num_units)) > max_cnt) {
return -EINVAL;
}
/* Data is interleaved for units in ctxsw buffer */
for (idx = 0; idx < regs->count; idx++) {
for (unit = 0; unit < num_units; unit++) {
if ((active_unit_mask & BIT32(unit)) != 0U) {
map[cnt].addr = base +
(regs->l[idx].addr & mask) +
(unit * stride);
map[cnt++].offset = off;
off += 4U;
/*
* The ucode computes and saves the checksum of
* all ctxsw'ed register values within a list.
* Entries with addr=0x00ffffff are placeholder
* for these checksums.
*
* There is only one checksum for a list
* even if it contains multiple subunits. Hence,
* skip iterating over all subunits for this
* entry.
*/
if (regs->l[idx].addr ==
CTXSW_PRI_CHECKSUM_DUMMY_REG) {
break;
}
}
}
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
struct ctxsw_buf_offset_map_entry *map,
u32 *count, u32 *offset, u32 max_cnt,
struct nvgpu_gr_config *config)
{
u32 num_gpcs = nvgpu_gr_config_get_gpc_count(config);
u32 num_ppcs, num_tpcs, gpc_num, base;
u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
for (gpc_num = 0; gpc_num < num_gpcs; gpc_num++) {
num_tpcs = nvgpu_gr_config_get_gpc_tpc_count(config, gpc_num);
base = gpc_base + (gpc_stride * gpc_num) + tpc_in_gpc_base;
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_pm_tpc_ctxsw_regs(g),
count, offset, max_cnt, base,
num_tpcs, ~U32(0U), tpc_in_gpc_stride,
(tpc_in_gpc_stride - 1U)) != 0) {
return -EINVAL;
}
num_ppcs = nvgpu_gr_config_get_gpc_ppc_count(config, gpc_num);
base = gpc_base + (gpc_stride * gpc_num) + ppc_in_gpc_base;
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_pm_ppc_ctxsw_regs(g),
count, offset, max_cnt, base, num_ppcs,
~U32(0U), ppc_in_gpc_stride,
(ppc_in_gpc_stride - 1U)) != 0) {
return -EINVAL;
}
base = gpc_base + (gpc_stride * gpc_num);
if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
nvgpu_netlist_get_pm_gpc_ctxsw_regs(g),
count, offset, max_cnt, base,
(gpc_stride - 1U)) != 0) {
return -EINVAL;
}
base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(g),
count, offset, max_cnt, base, ~U32(0U)) != 0) {
return -EINVAL;
}
base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_gpc_ctxsw_regs(g),
count, offset, max_cnt, base, ~U32(0U)) != 0) {
return -EINVAL;
}
base = (NV_PERF_PMMGPCROUTER_STRIDE * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_gpc_router_ctxsw_regs(g),
count, offset, max_cnt, base, ~U32(0U)) != 0) {
return -EINVAL;
}
/* Counter Aggregation Unit, if available */
if (nvgpu_netlist_get_pm_cau_ctxsw_regs(g)->count != 0U) {
base = gpc_base + (gpc_stride * gpc_num)
+ tpc_in_gpc_base;
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_pm_cau_ctxsw_regs(g),
count, offset, max_cnt, base, num_tpcs,
~U32(0U), tpc_in_gpc_stride,
(tpc_in_gpc_stride - 1U)) != 0) {
return -EINVAL;
}
}
*offset = NVGPU_ALIGN(*offset, 256U);
base = (g->ops.perf.get_pmmgpc_per_chiplet_offset() * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g),
count, offset, max_cnt, base, ~U32(0U)) != 0) {
return -EINVAL;
}
*offset = NVGPU_ALIGN(*offset, 256U);
}
return 0;
}
/*
* PM CTXSW BUFFER LAYOUT:
*|=============================================|0x00 <----PM CTXSW BUFFER BASE
*| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
*| LIST_compressed_nv_perf_ctx_reg_sysrouter |Space allocated: numRegs words
*| PADDING for 256 byte alignment on Maxwell+ |
*|=============================================|<----256 byte aligned on Maxwell and later
*| LIST_compressed_nv_perf_sys_control_ctx_regs|Space allocated: numRegs words (+ padding)
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
*|=============================================|<----256 byte aligned
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words (+ padding)
*| PADDING for 256 byte alignment |
*|=============================================|<----256 byte aligned (if prev segment exists)
*| LIST_compressed_nv_perf_pma_control_ctx_regs|Space allocated: numRegs words (+ padding)
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
*|=============================================|<----256 byte aligned
*| LIST_compressed_nv_perf_fbp_ctx_regs |Space allocated: numRegs * n words (for n FB units)
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |Space allocated: numRegs * n words (for n FB units)
*| LIST_compressed_pm_fbpa_ctx_regs |Space allocated: numRegs * n words (for n FB units)
*| LIST_compressed_pm_rop_ctx_regs |Space allocated: numRegs * n words (for n FB units)
*| LIST_compressed_pm_ltc_ctx_regs |
*| LTC0 LTS0 |
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
*| LTCn LTS0 |
*| LTC0 LTS1 |
*| LTC1 LTS1 |
*| LTCn LTS1 |
*| LTC0 LTSn |
*| LTC1 LTSn |
*| LTCn LTSn |
*| PADDING for 256 byte alignment |
*|=============================================|<----256 byte aligned on Maxwell and later
*| LIST_compressed_nv_perf_fbp_control_ctx_regs|Space allocated: numRegs words + padding
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
*|=============================================|<----256 byte aligned on Maxwell and later
*
* Each "GPCn PRI register" segment above has this layout:
*|=============================================|<----256 byte aligned
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accomodate
*| REG0 TPC1 | all the GPC/TPC register lists
*| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
*| LIST_pm_ctx_reg_TPC REG1 TPC0 |
*| * numTpcs REG1 TPC1 |
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
*| * numPpcs REGn TPC0 |
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
*| LIST_nv_perf_ctx_reg_GPC |
*| LIST_nv_perf_gpcrouter_ctx_reg |
*| LIST_nv_perf_ctx_reg_CAU (Tur) |
*|=============================================|
*| LIST_compressed_nv_perf_gpc_control_ctx_regs|Space allocated: numRegs words + padding
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
*|=============================================|<----256 byte aligned on Maxwell and later
*/
static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
struct nvgpu_gr_hwpm_map *hwpm_map, struct nvgpu_gr_config *config)
{
u32 hwpm_ctxsw_buffer_size = hwpm_map->pm_ctxsw_image_size;
struct ctxsw_buf_offset_map_entry *map;
u32 hwpm_ctxsw_reg_count_max;
u32 map_size;
u32 i, count = 0;
u32 offset = 0;
int ret;
u32 active_fbpa_mask;
u32 num_fbps = nvgpu_fbp_get_num_fbps(g->fbp);
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 num_fbpas = nvgpu_get_litter_value(g, GPU_LIT_NUM_FBPAS);
u32 fbpa_stride = nvgpu_get_litter_value(g, GPU_LIT_FBPA_STRIDE);
u32 num_ltc = g->ops.top.get_max_ltc_per_fbp(g) *
g->ops.priv_ring.get_fbp_count(g);
if (hwpm_ctxsw_buffer_size == 0U) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
"no PM Ctxsw buffer memory in context buffer");
return -EINVAL;
}
hwpm_ctxsw_reg_count_max = hwpm_ctxsw_buffer_size >> 2;
map_size = hwpm_ctxsw_reg_count_max * (u32)sizeof(*map);
map = nvgpu_big_zalloc(g, map_size);
if (map == NULL) {
return -ENOMEM;
}
/* Add entries from _LIST_pm_ctx_reg_SYS */
if (add_ctxsw_buffer_map_entries_pmsys(map,
nvgpu_netlist_get_pm_sys_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
goto cleanup;
}
/* Add entries from _LIST_nv_perf_ctx_reg_SYS */
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_sys_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
goto cleanup;
}
/* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_sys_router_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
goto cleanup;
}
/* Add entries from _LIST_nv_perf_sys_control_ctx_reg*/
if (nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count > 0U) {
offset = NVGPU_ALIGN(offset, 256U);
ret = add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g),
&count, &offset,
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
if (ret != 0) {
goto cleanup;
}
}
if (g->ops.gr.hwpm_map.align_regs_perf_pma) {
g->ops.gr.hwpm_map.align_regs_perf_pma(&offset);
}
/* Add entries from _LIST_nv_perf_pma_ctx_reg*/
ret = add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_pma_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
if (ret != 0) {
goto cleanup;
}
offset = NVGPU_ALIGN(offset, 256U);
/* Add entries from _LIST_nv_perf_pma_control_ctx_reg*/
ret = add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
if (ret != 0) {
goto cleanup;
}
offset = NVGPU_ALIGN(offset, 256U);
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, num_fbps, ~U32(0U),
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
~U32(0U)) != 0) {
goto cleanup;
}
/* Add entries from _LIST_nv_perf_fbprouter_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_fbp_router_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
num_fbps, ~U32(0U), NV_PERF_PMM_FBP_ROUTER_STRIDE,
~U32(0U)) != 0) {
goto cleanup;
}
if (g->ops.gr.hwpm_map.get_active_fbpa_mask) {
active_fbpa_mask = g->ops.gr.hwpm_map.get_active_fbpa_mask(g);
} else {
active_fbpa_mask = ~U32(0U);
}
/* Add entries from _LIST_nv_pm_fbpa_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_pm_fbpa_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
num_fbpas, active_fbpa_mask, fbpa_stride, ~U32(0U))
!= 0) {
goto cleanup;
}
/* Add entries from _LIST_nv_pm_rop_ctx_regs */
if (add_ctxsw_buffer_map_entries(map,
nvgpu_netlist_get_pm_rop_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, ~U32(0U)) != 0) {
goto cleanup;
}
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_pm_ltc_ctxsw_regs(g), &count, &offset,
hwpm_ctxsw_reg_count_max, 0, num_ltc, ~U32(0U),
ltc_stride, ~U32(0U)) != 0) {
goto cleanup;
}
offset = NVGPU_ALIGN(offset, 256U);
/* Add entries from _LIST_nv_perf_fbp_control_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g),
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
num_fbps, ~U32(0U),
g->ops.perf.get_pmmfbp_per_chiplet_offset(),
~U32(0U)) != 0) {
goto cleanup;
}
offset = NVGPU_ALIGN(offset, 256U);
/* Add GPC entries */
if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
hwpm_ctxsw_reg_count_max, config) != 0) {
goto cleanup;
}
if (offset > hwpm_ctxsw_buffer_size) {
nvgpu_err(g, "offset > buffer size");
goto cleanup;
}
sort(map, count, sizeof(*map), map_cmp, NULL);
hwpm_map->map = map;
hwpm_map->count = count;
hwpm_map->init = true;
nvgpu_log_info(g, "Reg Addr => HWPM Ctxt switch buffer offset");
for (i = 0; i < count; i++) {
nvgpu_log_info(g, "%08x => %08x", map[i].addr, map[i].offset);
}
return 0;
cleanup:
nvgpu_err(g, "Failed to create HWPM buffer offset map");
nvgpu_big_free(g, map);
return -EINVAL;
}
/*
* This function will return the 32 bit offset for a priv register if it is
* present in the PM context buffer.
*/
int nvgpu_gr_hwmp_map_find_priv_offset(struct gk20a *g,
struct nvgpu_gr_hwpm_map *hwpm_map,
u32 addr, u32 *priv_offset, struct nvgpu_gr_config *config)
{
struct ctxsw_buf_offset_map_entry *map, *result, map_key;
int err = 0;
u32 count;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
/* Create map of pri address and pm offset if necessary */
if (!hwpm_map->init) {
err = nvgpu_gr_hwpm_map_create(g, hwpm_map, config);
if (err != 0) {
return err;
}
}
*priv_offset = 0;
map = hwpm_map->map;
count = hwpm_map->count;
map_key.addr = addr;
result = nvgpu_bsearch(&map_key, map, count, sizeof(*map), map_cmp);
if (result != NULL) {
*priv_offset = result->offset;
} else {
nvgpu_err(g, "Lookup failed for address 0x%x", addr);
err = -EINVAL;
}
return err;
}

View File

@@ -0,0 +1,982 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/log.h>
#include <nvgpu/io.h>
#include <nvgpu/mm.h>
#ifdef CONFIG_NVGPU_POWER_PG
#include <nvgpu/pmu/pmu_pg.h>
#include <nvgpu/power_features/pg.h>
#endif
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/obj_ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/netlist.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/fs_state.h>
#include <nvgpu/power_features/cg.h>
#include <nvgpu/static_analysis.h>
#include "obj_ctx_priv.h"
void nvgpu_gr_obj_ctx_commit_inst_gpu_va(struct gk20a *g,
struct nvgpu_mem *inst_block, u64 gpu_va)
{
g->ops.ramin.set_gr_ptr(g, inst_block, gpu_va);
}
void nvgpu_gr_obj_ctx_commit_inst(struct gk20a *g, struct nvgpu_mem *inst_block,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx, u64 gpu_va)
{
struct nvgpu_mem *ctxheader;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_TSG_SUBCONTEXTS)) {
nvgpu_gr_subctx_load_ctx_header(g, subctx, gr_ctx, gpu_va);
ctxheader = nvgpu_gr_subctx_get_ctx_header(subctx);
nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block,
ctxheader->gpu_va);
} else {
nvgpu_gr_obj_ctx_commit_inst_gpu_va(g, inst_block, gpu_va);
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
static int nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 class_num, u32 flags)
{
int err;
u32 graphics_preempt_mode = 0U;
u32 compute_preempt_mode = 0U;
u32 default_graphics_preempt_mode = 0U;
u32 default_compute_preempt_mode = 0U;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
/* Skip for engines other than GR */
if (!g->ops.gpu_class.is_valid_compute(class_num) &&
!g->ops.gpu_class.is_valid_gfx(class_num)) {
return 0;
}
g->ops.gr.init.get_default_preemption_modes(
&default_graphics_preempt_mode,
&default_compute_preempt_mode);
#ifdef CONFIG_NVGPU_GFXP
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP) != 0U) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
if (g->ops.gpu_class.is_valid_gfx(class_num) &&
nvgpu_gr_ctx_desc_force_preemption_gfxp(gr_ctx_desc)) {
graphics_preempt_mode = NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP;
}
#endif
#ifdef CONFIG_NVGPU_CILP
if ((flags & NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP) != 0U) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
if (g->ops.gpu_class.is_valid_compute(class_num) &&
nvgpu_gr_ctx_desc_force_preemption_cilp(gr_ctx_desc)) {
compute_preempt_mode = NVGPU_PREEMPTION_MODE_COMPUTE_CILP;
}
#endif
if (compute_preempt_mode == 0U) {
compute_preempt_mode = default_compute_preempt_mode;
}
if (graphics_preempt_mode == 0U) {
graphics_preempt_mode = default_graphics_preempt_mode;
}
err = nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(g, config,
gr_ctx_desc, gr_ctx, vm, class_num, graphics_preempt_mode,
compute_preempt_mode);
if (err != 0) {
nvgpu_err(g, "set_ctxsw_preemption_mode failed");
return err;
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
}
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
static int nvgpu_gr_obj_ctx_set_graphics_preemption_mode(struct gk20a *g,
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm,
u32 graphics_preempt_mode)
{
int err = 0;
/* set preemption modes */
switch (graphics_preempt_mode) {
#ifdef CONFIG_NVGPU_GFXP
case NVGPU_PREEMPTION_MODE_GRAPHICS_GFXP:
{
u32 rtv_cb_size;
u32 spill_size = g->ops.gr.init.get_ctx_spill_size(g);
u32 pagepool_size = g->ops.gr.init.get_ctx_pagepool_size(g);
u32 betacb_size = g->ops.gr.init.get_ctx_betacb_size(g);
u32 attrib_cb_size =
g->ops.gr.init.get_ctx_attrib_cb_size(g, betacb_size,
nvgpu_gr_config_get_tpc_count(config),
nvgpu_gr_config_get_max_tpc_count(config));
nvgpu_log_info(g, "gfxp context spill_size=%d", spill_size);
nvgpu_log_info(g, "gfxp context pagepool_size=%d", pagepool_size);
nvgpu_log_info(g, "gfxp context attrib_cb_size=%d",
attrib_cb_size);
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_SPILL_CTXSW, spill_size);
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_BETACB_CTXSW, attrib_cb_size);
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_PAGEPOOL_CTXSW, pagepool_size);
if (g->ops.gr.init.get_gfxp_rtv_cb_size != NULL) {
rtv_cb_size = g->ops.gr.init.get_gfxp_rtv_cb_size(g);
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_GFXP_RTVCB_CTXSW, rtv_cb_size);
}
err = nvgpu_gr_ctx_alloc_ctxsw_buffers(g, gr_ctx,
gr_ctx_desc, vm);
if (err != 0) {
nvgpu_err(g, "cannot allocate ctxsw buffers");
return err;
}
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
}
#endif
case NVGPU_PREEMPTION_MODE_GRAPHICS_WFI:
nvgpu_gr_ctx_init_graphics_preemption_mode(gr_ctx,
graphics_preempt_mode);
break;
default:
nvgpu_log_info(g, "graphics_preempt_mode=%u",
graphics_preempt_mode);
break;
}
return err;
}
#endif
static int nvgpu_gr_obj_ctx_set_compute_preemption_mode(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, u32 class_num, u32 compute_preempt_mode)
{
if (g->ops.gpu_class.is_valid_compute(class_num)
#ifdef CONFIG_NVGPU_GRAPHICS
|| g->ops.gpu_class.is_valid_gfx(class_num)
#endif
) {
nvgpu_gr_ctx_init_compute_preemption_mode(gr_ctx,
compute_preempt_mode);
return 0;
} else {
return -EINVAL;
}
}
int nvgpu_gr_obj_ctx_set_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, u32 class_num,
u32 graphics_preempt_mode, u32 compute_preempt_mode)
{
int err = 0;
/* check for invalid combinations */
if (nvgpu_gr_ctx_check_valid_preemption_mode(g, gr_ctx,
graphics_preempt_mode, compute_preempt_mode) == false) {
err = -EINVAL;
goto fail;
}
nvgpu_log(g, gpu_dbg_gr, "graphics_preempt_mode=%u compute_preempt_mode=%u",
graphics_preempt_mode, compute_preempt_mode);
#ifdef CONFIG_NVGPU_GRAPHICS
err = nvgpu_gr_obj_ctx_set_graphics_preemption_mode(g, config,
gr_ctx_desc, gr_ctx, vm, graphics_preempt_mode);
if (err != 0) {
goto fail;
}
#endif
err = nvgpu_gr_obj_ctx_set_compute_preemption_mode(g, gr_ctx,
class_num, compute_preempt_mode);
fail:
return err;
}
void nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(struct gk20a *g,
struct nvgpu_gr_config *config,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_gr_subctx *subctx)
{
#ifdef CONFIG_NVGPU_GFXP
u64 addr;
u32 size;
struct nvgpu_mem *mem;
#endif
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
nvgpu_gr_ctx_set_preemption_modes(g, gr_ctx);
#ifdef CONFIG_NVGPU_GFXP
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PREEMPTION_GFXP)) {
goto done;
}
if (!nvgpu_mem_is_valid(
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx))) {
goto done;
}
if (subctx != NULL) {
nvgpu_gr_subctx_set_preemption_buffer_va(g, subctx,
gr_ctx);
} else {
nvgpu_gr_ctx_set_preemption_buffer_va(g, gr_ctx);
}
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, true);
addr = nvgpu_gr_ctx_get_betacb_ctxsw_buffer(gr_ctx)->gpu_va;
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
nvgpu_gr_config_get_tpc_count(config),
nvgpu_gr_config_get_max_tpc_count(config), addr,
true);
mem = nvgpu_gr_ctx_get_pagepool_ctxsw_buffer(gr_ctx);
addr = mem->gpu_va;
nvgpu_assert(mem->size <= U32_MAX);
size = (u32)mem->size;
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
true, false);
mem = nvgpu_gr_ctx_get_spill_ctxsw_buffer(gr_ctx);
addr = mem->gpu_va;
nvgpu_assert(mem->size <= U32_MAX);
size = (u32)mem->size;
g->ops.gr.init.commit_ctxsw_spill(g, gr_ctx, addr, size, true);
g->ops.gr.init.commit_cbes_reserve(g, gr_ctx, true);
if (g->ops.gr.init.gfxp_wfi_timeout != NULL) {
g->ops.gr.init.gfxp_wfi_timeout(g, gr_ctx, true);
}
if (g->ops.gr.init.commit_gfxp_rtv_cb != NULL) {
g->ops.gr.init.commit_gfxp_rtv_cb(g, gr_ctx, true);
}
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, true);
done:
#endif
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
void nvgpu_gr_obj_ctx_commit_global_ctx_buffers(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx, bool patch)
{
u64 addr;
u32 size;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
if (patch) {
nvgpu_gr_ctx_patch_write_begin(g, gr_ctx, false);
}
/*
* MIG supports only compute class.
* Skip BUNDLE_CB, PAGEPOOL, ATTRIBUTE_CB and RTV_CB
* if 2D/3D/I2M classes(graphics) are not supported.
*/
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
/* global pagepool buffer */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_PAGEPOOL_VA);
size = nvgpu_safe_cast_u64_to_u32(nvgpu_gr_global_ctx_get_size(
global_ctx_buffer,
NVGPU_GR_GLOBAL_CTX_PAGEPOOL));
g->ops.gr.init.commit_global_pagepool(g, gr_ctx, addr, size,
patch, true);
/* global bundle cb */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_CIRCULAR_VA);
size = nvgpu_safe_cast_u64_to_u32(
g->ops.gr.init.get_bundle_cb_default_size(g));
g->ops.gr.init.commit_global_bundle_cb(g, gr_ctx, addr, size,
patch);
/* global attrib cb */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_ATTRIBUTE_VA);
g->ops.gr.init.commit_global_attrib_cb(g, gr_ctx,
nvgpu_gr_config_get_tpc_count(config),
nvgpu_gr_config_get_max_tpc_count(config), addr, patch);
g->ops.gr.init.commit_global_cb_manager(g, config, gr_ctx,
patch);
#ifdef CONFIG_NVGPU_GRAPHICS
if (g->ops.gr.init.commit_rtv_cb != NULL) {
/* RTV circular buffer */
addr = nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_RTV_CIRCULAR_BUFFER_VA);
g->ops.gr.init.commit_rtv_cb(g, addr, gr_ctx, patch);
}
#endif
}
#ifdef CONFIG_NVGPU_SM_DIVERSITY
if ((nvgpu_is_enabled(g, NVGPU_SUPPORT_SM_DIVERSITY)) &&
(nvgpu_gr_ctx_get_sm_diversity_config(gr_ctx) !=
NVGPU_DEFAULT_SM_DIVERSITY_CONFIG) &&
(g->ops.gr.init.commit_sm_id_programming != NULL)) {
int err;
err = g->ops.gr.init.commit_sm_id_programming(
g, config, gr_ctx, patch);
if (err != 0) {
nvgpu_err(g,
"commit_sm_id_programming failed err=%d", err);
}
}
#endif
if (patch) {
nvgpu_gr_ctx_patch_write_end(g, gr_ctx, false);
}
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
static int nvgpu_gr_obj_ctx_alloc_sw_bundle(struct gk20a *g)
{
int err = 0;
struct netlist_av_list *sw_bundle_init =
nvgpu_netlist_get_sw_bundle_init_av_list(g);
struct netlist_av_list *sw_veid_bundle_init =
nvgpu_netlist_get_sw_veid_bundle_init_av_list(g);
#ifdef CONFIG_NVGPU_DGPU
struct netlist_av64_list *sw_bundle64_init =
nvgpu_netlist_get_sw_bundle64_init_av64_list(g);
#endif
/* enable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, true);
/* load bundle init */
err = g->ops.gr.init.load_sw_bundle_init(g, sw_bundle_init);
if (err != 0) {
goto error;
}
if (g->ops.gr.init.load_sw_veid_bundle != NULL) {
err = g->ops.gr.init.load_sw_veid_bundle(g,
sw_veid_bundle_init);
if (err != 0) {
goto error;
}
}
#ifdef CONFIG_NVGPU_DGPU
if (g->ops.gr.init.load_sw_bundle64 != NULL) {
err = g->ops.gr.init.load_sw_bundle64(g, sw_bundle64_init);
if (err != 0) {
goto error;
}
}
#endif
/* disable pipe mode override */
g->ops.gr.init.pipe_mode_override(g, false);
err = g->ops.gr.init.wait_idle(g);
return err;
error:
/* in case of error skip waiting for GR idle - just restore state */
g->ops.gr.init.pipe_mode_override(g, false);
return err;
}
static int nvgpu_gr_obj_ctx_init_hw_state(struct gk20a *g,
struct nvgpu_mem *inst_block)
{
int err = 0;
u32 data;
u32 i;
struct netlist_aiv_list *sw_ctx_load =
nvgpu_netlist_get_sw_ctx_load_aiv_list(g);
nvgpu_log(g, gpu_dbg_gr, " ");
err = g->ops.gr.init.fe_pwr_mode_force_on(g, true);
if (err != 0) {
goto clean_up;
}
g->ops.gr.init.override_context_reset(g);
err = g->ops.gr.init.fe_pwr_mode_force_on(g, false);
if (err != 0) {
goto clean_up;
}
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_ADDRESS_BIND_PTR, data, NULL);
if (err != 0) {
goto clean_up;
}
err = g->ops.gr.init.wait_idle(g);
/* load ctx init */
nvgpu_log_info(g, "begin: netlist: sw_ctx_load: register writes");
for (i = 0U; i < sw_ctx_load->count; i++) {
nvgpu_writel(g, sw_ctx_load->l[i].addr,
sw_ctx_load->l[i].value);
}
nvgpu_log_info(g, "end: netlist: sw_ctx_load: register writes");
nvgpu_log_info(g, "configure sm_hww_esr_report mask after sw_ctx_load");
g->ops.gr.intr.set_hww_esr_report_mask(g);
#ifdef CONFIG_NVGPU_GFXP
if (g->ops.gr.init.preemption_state != NULL) {
err = g->ops.gr.init.preemption_state(g);
if (err != 0) {
goto clean_up;
}
}
#endif
nvgpu_cg_blcg_gr_load_enable(g);
err = g->ops.gr.init.wait_idle(g);
clean_up:
if (err == 0) {
nvgpu_log(g, gpu_dbg_gr, "done");
}
return err;
}
static int nvgpu_gr_obj_ctx_commit_hw_state(struct gk20a *g,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_config *config, struct nvgpu_gr_ctx *gr_ctx)
{
int err = 0;
struct netlist_av_list *sw_method_init =
nvgpu_netlist_get_sw_method_init_av_list(g);
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
struct netlist_av_list *sw_bundle_init =
nvgpu_netlist_get_sw_bundle_init_av_list(g);
#endif
nvgpu_log(g, gpu_dbg_gr, " ");
/* disable fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, false);
nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
config, gr_ctx, false);
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
/* override a few ctx state registers */
g->ops.gr.init.commit_global_timeslice(g);
}
/* floorsweep anything left */
err = nvgpu_gr_fs_state_init(g, config);
if (err != 0) {
goto restore_fe_go_idle;
}
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto restore_fe_go_idle;
}
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
if (g->ops.gr.init.auto_go_idle != NULL) {
g->ops.gr.init.auto_go_idle(g, false);
}
#endif
err = nvgpu_gr_obj_ctx_alloc_sw_bundle(g);
if (err != 0) {
goto restore_fe_go_idle;
}
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
if (g->ops.gr.init.auto_go_idle != NULL) {
g->ops.gr.init.auto_go_idle(g, true);
}
#endif
/* restore fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, true);
/* load method init */
g->ops.gr.init.load_method_init(g, sw_method_init);
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
/* restore stats bundle data through mme shadow methods */
if (g->ops.gr.init.restore_stats_counter_bundle_data != NULL) {
g->ops.gr.init.restore_stats_counter_bundle_data(g,
sw_bundle_init);
}
#endif
err = g->ops.gr.init.wait_idle(g);
if (err != 0) {
goto clean_up;
}
nvgpu_log(g, gpu_dbg_gr, "done");
return 0;
restore_fe_go_idle:
/* restore fe_go_idle */
g->ops.gr.init.fe_go_idle_timeout(g, true);
#if defined(CONFIG_NVGPU_HAL_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
if (g->ops.gr.init.auto_go_idle != NULL) {
g->ops.gr.init.auto_go_idle(g, true);
}
#endif
clean_up:
return err;
}
static int nvgpu_gr_obj_ctx_save_golden_ctx(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_ctx *gr_ctx, struct nvgpu_mem *inst_block)
{
int err = 0;
struct nvgpu_mem *gr_mem;
u64 size;
u32 data;
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image_temp =
NULL;
#endif
nvgpu_log(g, gpu_dbg_gr, " ");
gr_mem = nvgpu_gr_ctx_get_ctx_mem(gr_ctx);
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
/*
* Save ctx data before first golden context save. Restore same data
* before second golden context save. This temporary copy is
* saved in local_golden_image_temp.
*/
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
local_golden_image_temp =
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
if (local_golden_image_temp == NULL) {
err = -ENOMEM;
goto clean_up;
}
#endif
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL);
if (err != 0) {
goto clean_up;
}
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
golden_image->local_golden_image =
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
if (golden_image->local_golden_image == NULL) {
err = -ENOMEM;
goto clean_up;
}
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
/* Before second golden context save restore to before known state */
nvgpu_gr_global_ctx_load_local_golden_image(g,
local_golden_image_temp, gr_mem);
/* free local copy now */
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
local_golden_image_temp);
local_golden_image_temp = NULL;
/* Initiate second golden context save */
data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL);
if (err != 0) {
goto clean_up;
}
/* Copy the data to local buffer */
local_golden_image_temp =
nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size);
if (local_golden_image_temp == NULL) {
err = -ENOMEM;
goto clean_up;
}
/* Compare two golden context images */
if (!nvgpu_gr_global_ctx_compare_golden_images(g,
nvgpu_mem_is_sysmem(gr_mem),
golden_image->local_golden_image,
local_golden_image_temp,
size)) {
nvgpu_err(g, "golden context mismatch");
err = -ENOMEM;
}
#endif
clean_up:
#ifdef CONFIG_NVGPU_GR_GOLDEN_CTX_VERIFICATION
if (local_golden_image_temp != NULL) {
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
local_golden_image_temp);
}
#endif
if (err == 0) {
nvgpu_log(g, gpu_dbg_gr, "golden image saved with size = %llu", size);
}
return err;
}
/*
* init global golden image from a fresh gr_ctx in channel ctx.
* save a copy in local_golden_image.
*/
int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_config *config,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_mem *inst_block)
{
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
/*
* golden ctx is global to all channels. Although only the first
* channel initializes golden image, driver needs to prevent multiple
* channels from initializing golden ctx at the same time
*/
nvgpu_mutex_acquire(&golden_image->ctx_mutex);
if (golden_image->ready) {
nvgpu_log(g, gpu_dbg_gr, "golden image already saved");
goto clean_up;
}
err = nvgpu_gr_obj_ctx_init_hw_state(g, inst_block);
if (err != 0) {
goto clean_up;
}
err = nvgpu_gr_obj_ctx_commit_hw_state(g, global_ctx_buffer,
config, gr_ctx);
if (err != 0) {
goto clean_up;
}
#ifdef CONFIG_NVGPU_GRAPHICS
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
if (err != 0) {
goto clean_up;
}
}
#endif
err = nvgpu_gr_obj_ctx_save_golden_ctx(g, golden_image,
gr_ctx, inst_block);
if (err != 0) {
goto clean_up;
}
golden_image->ready = true;
#ifdef CONFIG_NVGPU_POWER_PG
nvgpu_pmu_set_golden_image_initialized(g, true);
#endif
g->ops.gr.falcon.set_current_ctx_invalid(g);
clean_up:
if (err != 0) {
nvgpu_err(g, "fail");
} else {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
}
nvgpu_mutex_release(&golden_image->ctx_mutex);
return err;
}
static int nvgpu_gr_obj_ctx_gr_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_ctx_desc *gr_ctx_desc, struct nvgpu_gr_ctx *gr_ctx,
struct vm_gk20a *vm)
{
u64 size;
int err = 0;
nvgpu_log_fn(g, " ");
size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image);
nvgpu_gr_ctx_set_size(gr_ctx_desc, NVGPU_GR_CTX_CTX,
nvgpu_safe_cast_u64_to_u32(size));
nvgpu_log(g, gpu_dbg_gr, "gr_ctx size = %llu", size);
err = nvgpu_gr_ctx_alloc(g, gr_ctx, gr_ctx_desc, vm);
if (err != 0) {
return err;
}
return 0;
}
int nvgpu_gr_obj_ctx_alloc(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer,
struct nvgpu_gr_ctx_desc *gr_ctx_desc,
struct nvgpu_gr_config *config,
struct nvgpu_gr_ctx *gr_ctx,
struct nvgpu_gr_subctx *subctx,
struct vm_gk20a *vm,
struct nvgpu_mem *inst_block,
u32 class_num, u32 flags,
bool cde, bool vpr)
{
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, " ");
err = nvgpu_gr_obj_ctx_gr_ctx_alloc(g, golden_image, gr_ctx_desc,
gr_ctx, vm);
if (err != 0) {
nvgpu_err(g, "fail to allocate TSG gr ctx buffer");
goto out;
}
/* allocate patch buffer */
if (!nvgpu_mem_is_valid(nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx))) {
nvgpu_gr_ctx_set_patch_ctx_data_count(gr_ctx, 0);
nvgpu_gr_ctx_set_size(gr_ctx_desc,
NVGPU_GR_CTX_PATCH_CTX,
nvgpu_safe_mult_u32(
g->ops.gr.init.get_patch_slots(g, config),
PATCH_CTX_SLOTS_REQUIRED_PER_ENTRY));
err = nvgpu_gr_ctx_alloc_patch_ctx(g, gr_ctx, gr_ctx_desc, vm);
if (err != 0) {
nvgpu_err(g, "fail to allocate patch buffer");
goto out;
}
}
#if defined(CONFIG_NVGPU_GFXP) || defined(CONFIG_NVGPU_CILP)
err = nvgpu_gr_obj_ctx_init_ctxsw_preemption_mode(g, config,
gr_ctx_desc, gr_ctx, vm, class_num, flags);
if (err != 0) {
nvgpu_err(g, "fail to init preemption mode");
goto out;
}
#endif
/* map global buffer to channel gpu_va and commit */
err = nvgpu_gr_ctx_map_global_ctx_buffers(g, gr_ctx,
global_ctx_buffer, vm, vpr);
if (err != 0) {
nvgpu_err(g, "fail to map global ctx buffer");
goto out;
}
nvgpu_gr_obj_ctx_commit_global_ctx_buffers(g, global_ctx_buffer,
config, gr_ctx, true);
/* commit gr ctx buffer */
nvgpu_gr_obj_ctx_commit_inst(g, inst_block, gr_ctx, subctx,
nvgpu_gr_ctx_get_ctx_mem(gr_ctx)->gpu_va);
/* init golden image */
err = nvgpu_gr_obj_ctx_alloc_golden_ctx_image(g, golden_image,
global_ctx_buffer, config, gr_ctx, inst_block);
if (err != 0) {
nvgpu_err(g, "fail to init golden ctx image");
goto out;
}
#ifdef CONFIG_NVGPU_POWER_PG
/* Re-enable ELPG now that golden image has been initialized.
* The PMU PG init code may already have tried to enable elpg, but
* would not have been able to complete this action since the golden
* image hadn't been initialized yet, so do this now.
*/
err = nvgpu_pmu_reenable_elpg(g);
if (err != 0) {
nvgpu_err(g, "fail to re-enable elpg");
goto out;
}
#endif
/* load golden image */
nvgpu_gr_ctx_load_golden_ctx_image(g, gr_ctx,
golden_image->local_golden_image, cde);
nvgpu_gr_obj_ctx_update_ctxsw_preemption_mode(g, config, gr_ctx,
subctx);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gr, "done");
return 0;
out:
/*
* 1. gr_ctx, patch_ctx and global ctx buffer mapping
* can be reused so no need to release them.
* 2. golden image init and load is a one time thing so if
* they pass, no need to undo.
*/
nvgpu_err(g, "fail");
return err;
}
void nvgpu_gr_obj_ctx_set_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image,
size_t size)
{
golden_image->size = size;
}
size_t nvgpu_gr_obj_ctx_get_golden_image_size(
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
return golden_image->size;
}
#ifdef CONFIG_NVGPU_DEBUGGER
u32 *nvgpu_gr_obj_ctx_get_local_golden_image_ptr(
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
return nvgpu_gr_global_ctx_get_local_golden_image_ptr(
golden_image->local_golden_image);
}
#endif
bool nvgpu_gr_obj_ctx_is_golden_image_ready(
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
bool ready;
nvgpu_mutex_acquire(&golden_image->ctx_mutex);
ready = golden_image->ready;
nvgpu_mutex_release(&golden_image->ctx_mutex);
return ready;
}
int nvgpu_gr_obj_ctx_init(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image **gr_golden_image, u32 size)
{
struct nvgpu_gr_obj_ctx_golden_image *golden_image;
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
golden_image = nvgpu_kzalloc(g, sizeof(*golden_image));
if (golden_image == NULL) {
return -ENOMEM;
}
nvgpu_gr_obj_ctx_set_golden_image_size(golden_image, size);
nvgpu_mutex_init(&golden_image->ctx_mutex);
*gr_golden_image = golden_image;
return 0;
}
void nvgpu_gr_obj_ctx_deinit(struct gk20a *g,
struct nvgpu_gr_obj_ctx_golden_image *golden_image)
{
if (golden_image == NULL) {
return;
}
if (golden_image->local_golden_image != NULL) {
nvgpu_gr_global_ctx_deinit_local_golden_image(g,
golden_image->local_golden_image);
golden_image->local_golden_image = NULL;
}
#ifdef CONFIG_NVGPU_POWER_PG
nvgpu_pmu_set_golden_image_initialized(g, false);
#endif
golden_image->ready = false;
nvgpu_kfree(g, golden_image);
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_OBJ_CTX_PRIV_H
#define NVGPU_GR_OBJ_CTX_PRIV_H
#include <nvgpu/types.h>
#include <nvgpu/lock.h>
struct nvgpu_gr_global_ctx_local_golden_image;
/**
* Golden context image descriptor structure.
*
* This structure stores details of the Golden context image.
*/
struct nvgpu_gr_obj_ctx_golden_image {
/**
* Flag to indicate if Golden context image is ready or not.
*/
bool ready;
/**
* Mutex to hold for accesses to Golden context image.
*/
struct nvgpu_mutex ctx_mutex;
/**
* Size of Golden context image.
*/
size_t size;
/**
* Pointer to local Golden context image struct.
*/
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image;
};
#endif /* NVGPU_GR_OBJ_CTX_PRIV_H */

View File

@@ -0,0 +1,167 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/dma.h>
#include "common/gr/subctx_priv.h"
struct nvgpu_gr_subctx *nvgpu_gr_subctx_alloc(struct gk20a *g,
struct vm_gk20a *vm)
{
struct nvgpu_gr_subctx *subctx;
int err = 0;
nvgpu_log_fn(g, " ");
subctx = nvgpu_kzalloc(g, sizeof(*subctx));
if (subctx == NULL) {
return NULL;
}
err = nvgpu_dma_alloc_sys(g,
g->ops.gr.ctxsw_prog.hw_get_fecs_header_size(),
&subctx->ctx_header);
if (err != 0) {
nvgpu_err(g, "failed to allocate sub ctx header");
goto err_free_subctx;
}
subctx->ctx_header.gpu_va = nvgpu_gmmu_map(vm,
&subctx->ctx_header,
subctx->ctx_header.size,
0, /* not GPU-cacheable */
gk20a_mem_flag_none, true,
subctx->ctx_header.aperture);
if (subctx->ctx_header.gpu_va == 0ULL) {
nvgpu_err(g, "failed to map ctx header");
goto err_free_ctx_header;
}
return subctx;
err_free_ctx_header:
nvgpu_dma_free(g, &subctx->ctx_header);
err_free_subctx:
nvgpu_kfree(g, subctx);
return NULL;
}
void nvgpu_gr_subctx_free(struct gk20a *g,
struct nvgpu_gr_subctx *subctx,
struct vm_gk20a *vm)
{
nvgpu_log_fn(g, " ");
nvgpu_gmmu_unmap(vm, &subctx->ctx_header,
subctx->ctx_header.gpu_va);
nvgpu_dma_free(g, &subctx->ctx_header);
nvgpu_kfree(g, subctx);
}
void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
struct nvgpu_gr_subctx *subctx,
struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
{
struct nvgpu_mem *ctxheader = &subctx->ctx_header;
int err = 0;
err = g->ops.mm.cache.l2_flush(g, true);
if (err != 0) {
nvgpu_err(g, "l2_flush failed");
}
/* set priv access map */
g->ops.gr.ctxsw_prog.set_priv_access_map_addr(g, ctxheader,
nvgpu_gr_ctx_get_global_ctx_va(gr_ctx,
NVGPU_GR_CTX_PRIV_ACCESS_MAP_VA));
g->ops.gr.ctxsw_prog.set_patch_addr(g, ctxheader,
nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
#ifdef CONFIG_NVGPU_DEBUGGER
g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
#endif
g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va);
g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
}
struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct nvgpu_gr_subctx *subctx)
{
return &subctx->ctx_header;
}
#ifdef CONFIG_NVGPU_HAL_NON_FUSA
void nvgpu_gr_subctx_set_patch_ctx(struct gk20a *g,
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
{
g->ops.gr.ctxsw_prog.set_patch_addr(g, &subctx->ctx_header,
nvgpu_gr_ctx_get_patch_ctx_mem(gr_ctx)->gpu_va);
}
#endif
#ifdef CONFIG_NVGPU_GRAPHICS
void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
struct nvgpu_gr_ctx *gr_ctx)
{
nvgpu_log_fn(g, " ");
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &subctx->ctx_header,
nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
}
#endif /* CONFIG_NVGPU_GRAPHICS */
#ifdef CONFIG_NVGPU_GFXP
void nvgpu_gr_subctx_set_preemption_buffer_va(struct gk20a *g,
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
{
g->ops.gr.ctxsw_prog.set_full_preemption_ptr(g, &subctx->ctx_header,
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
if (g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0 != NULL) {
g->ops.gr.ctxsw_prog.set_full_preemption_ptr_veid0(g,
&subctx->ctx_header,
nvgpu_gr_ctx_get_preempt_ctxsw_buffer(gr_ctx)->gpu_va);
}
}
#endif /* CONFIG_NVGPU_GFXP */
#ifdef CONFIG_NVGPU_DEBUGGER
void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g,
struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
{
g->ops.gr.ctxsw_prog.set_pm_ptr(g, &subctx->ctx_header,
nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
}
#endif

View File

@@ -0,0 +1,40 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_SUBCTX_PRIV_H
#define NVGPU_GR_SUBCTX_PRIV_H
struct nvgpu_mem;
/**
* GR subcontext data structure.
*
* One subcontext is allocated per GPU channel.
*/
struct nvgpu_gr_subctx {
/**
* Memory to hold subcontext header image.
*/
struct nvgpu_mem ctx_header;
};
#endif /* NVGPU_GR_SUBCTX_PRIV_H */

View File

@@ -0,0 +1,690 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/io.h>
#include <nvgpu/bug.h>
#include <nvgpu/string.h>
#include <nvgpu/power_features/pg.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/pmu_pg.h>
#endif
#include "zbc_priv.h"
#define ZBC_ENTRY_UPDATED 1
#define ZBC_ENTRY_ADDED 2
static void nvgpu_gr_zbc_update_stencil_reg(struct gk20a *g,
struct nvgpu_gr_zbc_entry *stencil_val, u32 index)
{
/* update l2 table */
if (g->ops.ltc.set_zbc_s_entry != NULL) {
g->ops.ltc.set_zbc_s_entry(g, stencil_val->stencil, index);
}
/* update zbc stencil registers */
g->ops.gr.zbc.add_stencil(g, stencil_val, index);
}
static int nvgpu_gr_zbc_add_stencil(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *stencil_val)
{
struct zbc_stencil_table *s_tbl;
u32 i;
int entry_added = -ENOSPC;
bool entry_exist = false;
/* search existing tables */
for (i = zbc->min_stencil_index; i <= zbc->max_used_stencil_index;
i++) {
s_tbl = &zbc->zbc_s_tbl[i];
if ((s_tbl->ref_cnt != 0U) &&
(s_tbl->stencil == stencil_val->stencil) &&
(s_tbl->format == stencil_val->format)) {
s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U);
entry_exist = true;
entry_added = ZBC_ENTRY_UPDATED;
break;
}
}
/* add new table */
if (!entry_exist &&
(zbc->max_used_stencil_index < zbc->max_stencil_index)) {
/* Increment used index and add new entry at that index */
zbc->max_used_stencil_index =
nvgpu_safe_add_u32(zbc->max_used_stencil_index, 1U);
s_tbl = &zbc->zbc_s_tbl[zbc->max_used_stencil_index];
WARN_ON(s_tbl->ref_cnt != 0U);
/* update sw copy */
s_tbl->stencil = stencil_val->stencil;
s_tbl->format = stencil_val->format;
s_tbl->ref_cnt = nvgpu_safe_add_u32(s_tbl->ref_cnt, 1U);
nvgpu_gr_zbc_update_stencil_reg(g, stencil_val,
zbc->max_used_stencil_index);
entry_added = ZBC_ENTRY_ADDED;
}
return entry_added;
}
static void nvgpu_gr_zbc_update_depth_reg(struct gk20a *g,
struct nvgpu_gr_zbc_entry *depth_val, u32 index)
{
/* update l2 table */
g->ops.ltc.set_zbc_depth_entry(g, depth_val->depth, index);
/* update zbc registers */
g->ops.gr.zbc.add_depth(g, depth_val, index);
}
static int nvgpu_gr_zbc_add_depth(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *depth_val)
{
struct zbc_depth_table *d_tbl;
u32 i;
int entry_added = -ENOSPC;
bool entry_exist = false;
/* search existing tables */
for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) {
d_tbl = &zbc->zbc_dep_tbl[i];
if ((d_tbl->ref_cnt != 0U) &&
(d_tbl->depth == depth_val->depth) &&
(d_tbl->format == depth_val->format)) {
d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U);
entry_exist = true;
entry_added = ZBC_ENTRY_UPDATED;
break;
}
}
/* add new table */
if (!entry_exist &&
(zbc->max_used_depth_index < zbc->max_depth_index)) {
/* Increment used index and add new entry at that index */
zbc->max_used_depth_index =
nvgpu_safe_add_u32(zbc->max_used_depth_index, 1U);
d_tbl = &zbc->zbc_dep_tbl[zbc->max_used_depth_index];
WARN_ON(d_tbl->ref_cnt != 0U);
/* update sw copy */
d_tbl->depth = depth_val->depth;
d_tbl->format = depth_val->format;
d_tbl->ref_cnt = nvgpu_safe_add_u32(d_tbl->ref_cnt, 1U);
nvgpu_gr_zbc_update_depth_reg(g, depth_val,
zbc->max_used_depth_index);
entry_added = ZBC_ENTRY_ADDED;
}
return entry_added;
}
static void nvgpu_gr_zbc_update_color_reg(struct gk20a *g,
struct nvgpu_gr_zbc_entry *color_val, u32 index)
{
/* update l2 table */
g->ops.ltc.set_zbc_color_entry(g, color_val->color_l2, index);
/* update zbc registers */
g->ops.gr.zbc.add_color(g, color_val, index);
}
static int nvgpu_gr_zbc_add_color(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *color_val)
{
struct zbc_color_table *c_tbl;
u32 i;
int entry_added = -ENOSPC;
bool entry_exist = false;
/* search existing table */
for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) {
c_tbl = &zbc->zbc_col_tbl[i];
if ((c_tbl->ref_cnt != 0U) &&
(c_tbl->format == color_val->format) &&
(nvgpu_memcmp((u8 *)c_tbl->color_ds,
(u8 *)color_val->color_ds,
sizeof(color_val->color_ds)) == 0) &&
(nvgpu_memcmp((u8 *)c_tbl->color_l2,
(u8 *)color_val->color_l2,
sizeof(color_val->color_l2)) == 0)) {
c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U);
entry_exist = true;
entry_added = ZBC_ENTRY_UPDATED;
break;
}
}
/* add new entry */
if (!entry_exist &&
(zbc->max_used_color_index < zbc->max_color_index)) {
/* Increment used index and add new entry at that index */
zbc->max_used_color_index =
nvgpu_safe_add_u32(zbc->max_used_color_index, 1U);
c_tbl = &zbc->zbc_col_tbl[zbc->max_used_color_index];
WARN_ON(c_tbl->ref_cnt != 0U);
/* update local copy */
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
c_tbl->color_l2[i] = color_val->color_l2[i];
c_tbl->color_ds[i] = color_val->color_ds[i];
}
c_tbl->format = color_val->format;
c_tbl->ref_cnt = nvgpu_safe_add_u32(c_tbl->ref_cnt, 1U);
nvgpu_gr_zbc_update_color_reg(g, color_val,
zbc->max_used_color_index);
entry_added = ZBC_ENTRY_ADDED;
}
return entry_added;
}
static int nvgpu_gr_zbc_add(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *zbc_val)
{
int added = false;
#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG)
u32 entries;
#endif
/* no endian swap ? */
nvgpu_mutex_acquire(&zbc->zbc_lock);
nvgpu_speculation_barrier();
switch (zbc_val->type) {
case NVGPU_GR_ZBC_TYPE_COLOR:
added = nvgpu_gr_zbc_add_color(g, zbc, zbc_val);
break;
case NVGPU_GR_ZBC_TYPE_DEPTH:
added = nvgpu_gr_zbc_add_depth(g, zbc, zbc_val);
break;
case NVGPU_GR_ZBC_TYPE_STENCIL:
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
added = nvgpu_gr_zbc_add_stencil(g, zbc, zbc_val);
} else {
nvgpu_err(g,
"invalid zbc table type %d", zbc_val->type);
added = -EINVAL;
goto err_mutex;
}
break;
default:
nvgpu_err(g,
"invalid zbc table type %d", zbc_val->type);
added = -EINVAL;
goto err_mutex;
}
#if defined(CONFIG_NVGPU_LS_PMU) && defined(CONFIG_NVGPU_POWER_PG)
if (added == ZBC_ENTRY_ADDED) {
/* update zbc for elpg only when new entry is added */
entries = max(
nvgpu_safe_sub_u32(zbc->max_used_color_index,
zbc->min_color_index),
nvgpu_safe_sub_u32(zbc->max_used_depth_index,
zbc->min_depth_index));
if (g->elpg_enabled) {
nvgpu_pmu_save_zbc(g, entries);
}
}
#endif
err_mutex:
nvgpu_mutex_release(&zbc->zbc_lock);
if (added < 0) {
return added;
}
return 0;
}
int nvgpu_gr_zbc_set_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_entry *zbc_val)
{
nvgpu_log(g, gpu_dbg_zbc, " zbc_val->type %u", zbc_val->type);
return nvgpu_pg_elpg_protected_call(g,
nvgpu_gr_zbc_add(g, zbc, zbc_val));
}
/* get a zbc table entry specified by index
* return table size when type is invalid */
int nvgpu_gr_zbc_query_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
struct nvgpu_gr_zbc_query_params *query_params)
{
u32 index = query_params->index_size;
u32 i;
nvgpu_speculation_barrier();
switch (query_params->type) {
case NVGPU_GR_ZBC_TYPE_INVALID:
nvgpu_log(g, gpu_dbg_zbc, "Query zbc size");
query_params->index_size = nvgpu_safe_add_u32(
nvgpu_safe_sub_u32(zbc->max_color_index,
zbc->min_color_index), 1U);
break;
case NVGPU_GR_ZBC_TYPE_COLOR:
if ((index < zbc->min_color_index) ||
(index > zbc->max_color_index)) {
nvgpu_err(g, "invalid zbc color table index %u", index);
return -EINVAL;
}
nvgpu_log(g, gpu_dbg_zbc, "Query zbc color at index %u", index);
nvgpu_speculation_barrier();
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
query_params->color_l2[i] =
zbc->zbc_col_tbl[index].color_l2[i];
query_params->color_ds[i] =
zbc->zbc_col_tbl[index].color_ds[i];
}
query_params->format = zbc->zbc_col_tbl[index].format;
query_params->ref_cnt = zbc->zbc_col_tbl[index].ref_cnt;
break;
case NVGPU_GR_ZBC_TYPE_DEPTH:
if ((index < zbc->min_depth_index) ||
(index > zbc->max_depth_index)) {
nvgpu_err(g, "invalid zbc depth table index %u", index);
return -EINVAL;
}
nvgpu_log(g, gpu_dbg_zbc, "Query zbc depth at index %u", index);
nvgpu_speculation_barrier();
query_params->depth = zbc->zbc_dep_tbl[index].depth;
query_params->format = zbc->zbc_dep_tbl[index].format;
query_params->ref_cnt = zbc->zbc_dep_tbl[index].ref_cnt;
break;
case NVGPU_GR_ZBC_TYPE_STENCIL:
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
if ((index < zbc->min_stencil_index) ||
(index > zbc->max_stencil_index)) {
nvgpu_err(g,
"invalid zbc stencil table index %u",
index);
return -EINVAL;
}
nvgpu_log(g, gpu_dbg_zbc,
"Query zbc stencil at index %u", index);
nvgpu_speculation_barrier();
query_params->stencil = zbc->zbc_s_tbl[index].stencil;
query_params->format = zbc->zbc_s_tbl[index].format;
query_params->ref_cnt = zbc->zbc_s_tbl[index].ref_cnt;
} else {
nvgpu_err(g, "invalid zbc table type");
return -EINVAL;
}
break;
default:
nvgpu_err(g, "invalid zbc table type");
return -EINVAL;
}
return 0;
}
/*
* Update zbc table registers as per sw copy of zbc tables
*/
void nvgpu_gr_zbc_load_table(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
{
unsigned int i;
for (i = zbc->min_color_index; i <= zbc->max_used_color_index; i++) {
struct zbc_color_table *c_tbl = &zbc->zbc_col_tbl[i];
struct nvgpu_gr_zbc_entry zbc_val;
zbc_val.type = NVGPU_GR_ZBC_TYPE_COLOR;
nvgpu_memcpy((u8 *)zbc_val.color_ds,
(u8 *)c_tbl->color_ds, sizeof(zbc_val.color_ds));
nvgpu_memcpy((u8 *)zbc_val.color_l2,
(u8 *)c_tbl->color_l2, sizeof(zbc_val.color_l2));
zbc_val.format = c_tbl->format;
nvgpu_gr_zbc_update_color_reg(g, &zbc_val, i);
}
for (i = zbc->min_depth_index; i <= zbc->max_used_depth_index; i++) {
struct zbc_depth_table *d_tbl = &zbc->zbc_dep_tbl[i];
struct nvgpu_gr_zbc_entry zbc_val;
zbc_val.type = NVGPU_GR_ZBC_TYPE_DEPTH;
zbc_val.depth = d_tbl->depth;
zbc_val.format = d_tbl->format;
nvgpu_gr_zbc_update_depth_reg(g, &zbc_val, i);
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
for (i = zbc->min_stencil_index;
i <= zbc->max_used_stencil_index; i++) {
struct zbc_stencil_table *s_tbl = &zbc->zbc_s_tbl[i];
struct nvgpu_gr_zbc_entry zbc_val;
zbc_val.type = NVGPU_GR_ZBC_TYPE_STENCIL;
zbc_val.stencil = s_tbl->stencil;
zbc_val.format = s_tbl->format;
nvgpu_gr_zbc_update_stencil_reg(g, &zbc_val, i);
}
}
}
static void nvgpu_gr_zbc_load_default_sw_stencil_table(struct gk20a *g,
struct nvgpu_gr_zbc *zbc)
{
u32 index = zbc->min_stencil_index;
zbc->zbc_s_tbl[index].stencil = 0x0;
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
zbc->zbc_s_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
index = nvgpu_safe_add_u32(index, 1U);
zbc->zbc_s_tbl[index].stencil = 0x1;
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
zbc->zbc_s_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
index = nvgpu_safe_add_u32(index, 1U);
zbc->zbc_s_tbl[index].stencil = 0xff;
zbc->zbc_s_tbl[index].format = GR_ZBC_STENCIL_CLEAR_FMT_U8;
zbc->zbc_s_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_s_tbl[index].ref_cnt, 1U);
zbc->max_used_stencil_index = index;
}
static void nvgpu_gr_zbc_load_default_sw_depth_table(struct gk20a *g,
struct nvgpu_gr_zbc *zbc)
{
u32 index = zbc->min_depth_index;
zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32;
zbc->zbc_dep_tbl[index].depth = 0x3f800000;
zbc->zbc_dep_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U);
index = nvgpu_safe_add_u32(index, 1U);
zbc->zbc_dep_tbl[index].format = GR_ZBC_Z_FMT_VAL_FP32;
zbc->zbc_dep_tbl[index].depth = 0;
zbc->zbc_dep_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_dep_tbl[index].ref_cnt, 1U);
zbc->max_used_depth_index = index;
}
static void nvgpu_gr_zbc_load_default_sw_color_table(struct gk20a *g,
struct nvgpu_gr_zbc *zbc)
{
u32 i;
u32 index = zbc->min_color_index;
/* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_BLACK_COLOR_FMT;
for (i = 0U; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
zbc->zbc_col_tbl[index].color_ds[i] = 0U;
zbc->zbc_col_tbl[index].color_l2[i] = 0xff000000U;
}
zbc->zbc_col_tbl[index].color_ds[3] = 0x3f800000U;
zbc->zbc_col_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
index = nvgpu_safe_add_u32(index, 1U);
/* Transparent black = (fmt 1 = zero) */
zbc->zbc_col_tbl[index].format = GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT;
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
zbc->zbc_col_tbl[index].color_ds[i] = 0U;
zbc->zbc_col_tbl[index].color_l2[i] = 0U;
}
zbc->zbc_col_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
index = nvgpu_safe_add_u32(index, 1U);
/* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
zbc->zbc_col_tbl[index].format = GR_ZBC_SOLID_WHITE_COLOR_FMT;
for (i = 0; i < NVGPU_GR_ZBC_COLOR_VALUE_SIZE; i++) {
zbc->zbc_col_tbl[index].color_ds[i] = 0x3f800000U;
zbc->zbc_col_tbl[index].color_l2[i] = 0xffffffffU;
}
zbc->zbc_col_tbl[index].ref_cnt =
nvgpu_safe_add_u32(zbc->zbc_col_tbl[index].ref_cnt, 1U);
zbc->max_used_color_index = index;
}
static void nvgpu_gr_zbc_init_indices(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
{
struct nvgpu_gr_zbc_table_indices zbc_indices;
g->ops.gr.zbc.init_table_indices(g, &zbc_indices);
zbc->min_color_index = zbc_indices.min_color_index;
zbc->max_color_index = zbc_indices.max_color_index;
zbc->min_depth_index = zbc_indices.min_depth_index;
zbc->max_depth_index = zbc_indices.max_depth_index;
zbc->min_stencil_index = zbc_indices.min_stencil_index;
zbc->max_stencil_index = zbc_indices.max_stencil_index;
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_color_index %u",
zbc->min_color_index);
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_color_index %u",
zbc->max_color_index);
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_depth_index %u",
zbc->min_depth_index);
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_depth_index %u",
zbc->max_depth_index);
nvgpu_log(g, gpu_dbg_zbc, "zbc->min_stencil_index %u",
zbc->min_stencil_index);
nvgpu_log(g, gpu_dbg_zbc, "zbc->max_stencil_index %u",
zbc->max_stencil_index);
}
static void nvgpu_gr_zbc_load_default_sw_table(struct gk20a *g,
struct nvgpu_gr_zbc *zbc)
{
nvgpu_mutex_init(&zbc->zbc_lock);
nvgpu_gr_zbc_load_default_sw_color_table(g, zbc);
nvgpu_gr_zbc_load_default_sw_depth_table(g, zbc);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL)) {
nvgpu_gr_zbc_load_default_sw_stencil_table(g, zbc);
}
}
static int gr_zbc_allocate_local_tbls(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
{
u32 zbc_col_size = nvgpu_safe_add_u32(zbc->max_color_index,
zbc->min_color_index);
u32 zbc_dep_size = nvgpu_safe_add_u32(zbc->max_depth_index,
zbc->min_depth_index);
u32 zbc_s_size = nvgpu_safe_add_u32(zbc->max_stencil_index,
zbc->min_stencil_index);
zbc->zbc_col_tbl = nvgpu_kzalloc(g,
sizeof(struct zbc_color_table) * zbc_col_size);
if (zbc->zbc_col_tbl == NULL) {
goto alloc_col_tbl_err;
}
zbc->zbc_dep_tbl = nvgpu_kzalloc(g,
sizeof(struct zbc_depth_table) * zbc_dep_size);
if (zbc->zbc_dep_tbl == NULL) {
goto alloc_dep_tbl_err;
}
zbc->zbc_s_tbl = nvgpu_kzalloc(g,
sizeof(struct zbc_stencil_table) * zbc_s_size);
if (zbc->zbc_s_tbl == NULL) {
goto alloc_s_tbl_err;
}
return 0;
alloc_s_tbl_err:
nvgpu_kfree(g, zbc->zbc_dep_tbl);
alloc_dep_tbl_err:
nvgpu_kfree(g, zbc->zbc_col_tbl);
alloc_col_tbl_err:
return -ENOMEM;
}
/* allocate the struct and load the table */
int nvgpu_gr_zbc_init(struct gk20a *g, struct nvgpu_gr_zbc **zbc)
{
int ret = -ENOMEM;
struct nvgpu_gr_zbc *gr_zbc = NULL;
*zbc = NULL;
gr_zbc = nvgpu_kzalloc(g, sizeof(*gr_zbc));
if (gr_zbc == NULL) {
return ret;
}
nvgpu_gr_zbc_init_indices(g, gr_zbc);
ret = gr_zbc_allocate_local_tbls(g, gr_zbc);
if (ret != 0) {
goto alloc_err;
}
nvgpu_gr_zbc_load_default_sw_table(g, gr_zbc);
*zbc = gr_zbc;
return ret;
alloc_err:
nvgpu_kfree(g, gr_zbc);
return ret;
}
/* deallocate the memory for the struct */
void nvgpu_gr_zbc_deinit(struct gk20a *g, struct nvgpu_gr_zbc *zbc)
{
if (zbc == NULL) {
return;
}
nvgpu_kfree(g, zbc->zbc_col_tbl);
nvgpu_kfree(g, zbc->zbc_dep_tbl);
nvgpu_kfree(g, zbc->zbc_s_tbl);
nvgpu_kfree(g, zbc);
}
struct nvgpu_gr_zbc_entry *nvgpu_gr_zbc_entry_alloc(struct gk20a *g)
{
return nvgpu_kzalloc(g, sizeof(struct nvgpu_gr_zbc_entry));
}
void nvgpu_gr_zbc_entry_free(struct gk20a *g, struct nvgpu_gr_zbc_entry *entry)
{
nvgpu_kfree(g, entry);
}
u32 nvgpu_gr_zbc_get_entry_color_ds(struct nvgpu_gr_zbc_entry *entry,
int idx)
{
return entry->color_ds[idx];
}
void nvgpu_gr_zbc_set_entry_color_ds(struct nvgpu_gr_zbc_entry *entry,
int idx, u32 ds)
{
entry->color_ds[idx] = ds;
}
u32 nvgpu_gr_zbc_get_entry_color_l2(struct nvgpu_gr_zbc_entry *entry,
int idx)
{
return entry->color_l2[idx];
}
void nvgpu_gr_zbc_set_entry_color_l2(struct nvgpu_gr_zbc_entry *entry,
int idx, u32 l2)
{
entry->color_l2[idx] = l2;
}
u32 nvgpu_gr_zbc_get_entry_depth(struct nvgpu_gr_zbc_entry *entry)
{
return entry->depth;
}
void nvgpu_gr_zbc_set_entry_depth(struct nvgpu_gr_zbc_entry *entry,
u32 depth)
{
entry->depth = depth;
}
u32 nvgpu_gr_zbc_get_entry_stencil(struct nvgpu_gr_zbc_entry *entry)
{
return entry->stencil;
}
void nvgpu_gr_zbc_set_entry_stencil(struct nvgpu_gr_zbc_entry *entry,
u32 stencil)
{
entry->stencil = stencil;
}
u32 nvgpu_gr_zbc_get_entry_type(struct nvgpu_gr_zbc_entry *entry)
{
return entry->type;
}
void nvgpu_gr_zbc_set_entry_type(struct nvgpu_gr_zbc_entry *entry,
u32 type)
{
entry->type = type;
}
u32 nvgpu_gr_zbc_get_entry_format(struct nvgpu_gr_zbc_entry *entry)
{
return entry->format;
}
void nvgpu_gr_zbc_set_entry_format(struct nvgpu_gr_zbc_entry *entry,
u32 format)
{
entry->format = format;
}

View File

@@ -0,0 +1,89 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_ZBC_PRIV_H
#define NVGPU_GR_ZBC_PRIV_H
#include <nvgpu/gr/zbc.h>
/* Opaque black (i.e. solid black, fmt 0x28 = A8B8G8R8) */
#define GR_ZBC_SOLID_BLACK_COLOR_FMT 0x28
/* Transparent black = (fmt 1 = zero) */
#define GR_ZBC_TRANSPARENT_BLACK_COLOR_FMT 0x1
/* Opaque white (i.e. solid white) = (fmt 2 = uniform 1) */
#define GR_ZBC_SOLID_WHITE_COLOR_FMT 0x2
/* z format with fp32 */
#define GR_ZBC_Z_FMT_VAL_FP32 0x1
#define GR_ZBC_STENCIL_CLEAR_FMT_INVAILD 0U
#define GR_ZBC_STENCIL_CLEAR_FMT_U8 1U
struct zbc_color_table {
u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
u32 format;
u32 ref_cnt;
};
struct zbc_depth_table {
u32 depth;
u32 format;
u32 ref_cnt;
};
struct zbc_stencil_table {
u32 stencil;
u32 format;
u32 ref_cnt;
};
struct nvgpu_gr_zbc_entry {
u32 color_ds[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
u32 color_l2[NVGPU_GR_ZBC_COLOR_VALUE_SIZE];
u32 depth;
u32 stencil;
u32 type;
u32 format;
};
/*
* HW ZBC table valid entries start at index 1.
* Entry 0 is reserved to mean "no matching entry found, do not use ZBC"
*/
struct nvgpu_gr_zbc {
struct nvgpu_mutex zbc_lock; /* Lock to access zbc table */
struct zbc_color_table *zbc_col_tbl; /* SW zbc color table pointer */
struct zbc_depth_table *zbc_dep_tbl; /* SW zbc depth table pointer */
struct zbc_stencil_table *zbc_s_tbl; /* SW zbc stencil table pointer */
u32 min_color_index; /* Minimum valid color table index */
u32 min_depth_index; /* Minimum valid depth table index */
u32 min_stencil_index; /* Minimum valid stencil table index */
u32 max_color_index; /* Maximum valid color table index */
u32 max_depth_index; /* Maximum valid depth table index */
u32 max_stencil_index; /* Maximum valid stencil table index */
u32 max_used_color_index; /* Max used color table index */
u32 max_used_depth_index; /* Max used depth table index */
u32 max_used_stencil_index; /* Max used stencil table index */
};
#endif /* NVGPU_GR_ZBC_PRIV_H */

View File

@@ -0,0 +1,176 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/io.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/zcull.h>
#include <nvgpu/gr/config.h>
#include "zcull_priv.h"
int nvgpu_gr_zcull_init(struct gk20a *g, struct nvgpu_gr_zcull **gr_zcull,
u32 size, struct nvgpu_gr_config *config)
{
struct nvgpu_gr_zcull *zcull;
int err = 0;
nvgpu_log(g, gpu_dbg_gr, "size = %u", size);
zcull = nvgpu_kzalloc(g, sizeof(*zcull));
if (zcull == NULL) {
err = -ENOMEM;
goto exit;
}
zcull->g = g;
zcull->zcull_ctxsw_image_size = size;
zcull->aliquot_width = nvgpu_gr_config_get_tpc_count(config) * 16U;
zcull->aliquot_height = 16;
zcull->width_align_pixels =
nvgpu_gr_config_get_tpc_count(config) * 16U;
zcull->height_align_pixels = 32;
zcull->aliquot_size =
zcull->aliquot_width * zcull->aliquot_height;
/* assume no floor sweeping since we only have 1 tpc in 1 gpc */
zcull->pixel_squares_by_aliquots =
nvgpu_gr_config_get_zcb_count(config) * 16U * 16U *
nvgpu_gr_config_get_tpc_count(config) /
(nvgpu_gr_config_get_gpc_count(config) *
nvgpu_gr_config_get_gpc_tpc_count(config, 0U));
exit:
*gr_zcull = zcull;
return err;
}
void nvgpu_gr_zcull_deinit(struct gk20a *g, struct nvgpu_gr_zcull *gr_zcull)
{
if (gr_zcull == NULL) {
return;
}
nvgpu_kfree(g, gr_zcull);
}
u32 nvgpu_gr_get_ctxsw_zcull_size(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull)
{
/* assuming zcull has already been initialized */
return gr_zcull->zcull_ctxsw_image_size;
}
int nvgpu_gr_zcull_init_hw(struct gk20a *g,
struct nvgpu_gr_zcull *gr_zcull,
struct nvgpu_gr_config *gr_config)
{
u32 *zcull_map_tiles, *zcull_bank_counters;
u32 map_counter;
u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS);
u32 num_tpc_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_TPC_PER_GPC);
u32 zcull_alloc_num = num_gpcs * num_tpc_per_gpc;
u32 map_tile_count;
int ret = 0;
nvgpu_log(g, gpu_dbg_gr, " ");
if (nvgpu_gr_config_get_map_tiles(gr_config) == NULL) {
return -1;
}
if (zcull_alloc_num % 8U != 0U) {
/* Total 8 fields per map reg i.e. tile_0 to tile_7*/
zcull_alloc_num += (zcull_alloc_num % 8U);
}
zcull_map_tiles = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_map_tiles == NULL) {
nvgpu_err(g,
"failed to allocate zcull map titles");
return -ENOMEM;
}
zcull_bank_counters = nvgpu_kzalloc(g, zcull_alloc_num * sizeof(u32));
if (zcull_bank_counters == NULL) {
nvgpu_err(g,
"failed to allocate zcull bank counters");
nvgpu_kfree(g, zcull_map_tiles);
return -ENOMEM;
}
for (map_counter = 0;
map_counter < nvgpu_gr_config_get_tpc_count(gr_config);
map_counter++) {
map_tile_count =
nvgpu_gr_config_get_map_tile_count(gr_config,
map_counter);
zcull_map_tiles[map_counter] =
zcull_bank_counters[map_tile_count];
zcull_bank_counters[map_tile_count]++;
}
if (g->ops.gr.zcull.program_zcull_mapping != NULL) {
g->ops.gr.zcull.program_zcull_mapping(g, zcull_alloc_num,
zcull_map_tiles);
}
nvgpu_kfree(g, zcull_map_tiles);
nvgpu_kfree(g, zcull_bank_counters);
if (g->ops.gr.zcull.init_zcull_hw != NULL) {
ret = g->ops.gr.zcull.init_zcull_hw(g, gr_zcull, gr_config);
if (ret != 0) {
nvgpu_err(g, "failed to init zcull hw. err:%d", ret);
return ret;
}
}
nvgpu_log(g, gpu_dbg_gr, "done");
return 0;
}
int nvgpu_gr_zcull_ctx_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
struct nvgpu_gr_ctx *gr_ctx)
{
int ret = 0;
if (subctx != NULL) {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, false);
if (ret == 0) {
nvgpu_gr_subctx_zcull_setup(g, subctx, gr_ctx);
}
} else {
ret = nvgpu_gr_ctx_zcull_setup(g, gr_ctx, true);
}
return ret;
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_ZCULL_PRIV_H
#define NVGPU_GR_ZCULL_PRIV_H
#include <nvgpu/types.h>
struct gk20a;
struct nvgpu_gr_zcull {
struct gk20a *g;
u32 aliquot_width;
u32 aliquot_height;
u32 aliquot_size;
u32 total_aliquots;
u32 width_align_pixels;
u32 height_align_pixels;
u32 pixel_squares_by_aliquots;
u32 zcull_ctxsw_image_size;
};
#endif /* NVGPU_GR_ZCULL_PRIV_H */