mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: add support for FECS VA
- On t186, ucode expects physical address to be programmed for FECS trace buffer. - On t194, ucode expects GPU VA to be programmed for FECS trace buffer. This patch adds extra support to handle this change for linux native. - Increase the size of FECS trace buffer (as few entries were getting dropped due to overflow of FECS trace buffer.) - This moves FECS trace buffer handling in global context buffer. - This adds extra check for updation of mailbox1 register. (Bug 200417403) EVLR-2077 Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723 Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1536028 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Nirav Patel <nipatel@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Tejal Kudav
parent
97d697a848
commit
ca3215c6b2
@@ -28,6 +28,7 @@
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/hashtable.h>
|
||||
#include <nvgpu/circ_buf.h>
|
||||
@@ -51,7 +52,7 @@
|
||||
* If HW circular buffer is getting too many "buffer full" conditions,
|
||||
* increasing this constant should help (it drives Linux' internal buffer size).
|
||||
*/
|
||||
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
|
||||
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
|
||||
#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
|
||||
#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
|
||||
#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
|
||||
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
|
||||
|
||||
struct gk20a_fecs_trace {
|
||||
|
||||
struct nvgpu_mem trace_buf;
|
||||
DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
|
||||
struct nvgpu_mutex hash_lock;
|
||||
struct nvgpu_mutex poll_lock;
|
||||
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
|
||||
}
|
||||
|
||||
static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
|
||||
struct gk20a_fecs_trace *trace, int idx)
|
||||
struct gk20a *g, int idx)
|
||||
{
|
||||
struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
||||
|
||||
return (struct gk20a_fecs_trace_record *)
|
||||
((u8 *) trace->trace_buf.cpu_va
|
||||
((u8 *) mem->cpu_va
|
||||
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
|
||||
}
|
||||
|
||||
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
pid_t cur_pid;
|
||||
pid_t new_pid;
|
||||
int count = 0;
|
||||
|
||||
/* for now, only one VM */
|
||||
const int vmid = 0;
|
||||
|
||||
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
|
||||
trace, index);
|
||||
struct gk20a_fecs_trace_record *r =
|
||||
gk20a_fecs_trace_get_record(g, index);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"consuming record trace=%p read=%d record=%p", trace, index, r);
|
||||
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
||||
continue;
|
||||
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
count++;
|
||||
}
|
||||
|
||||
gk20a_ctxsw_trace_wake_up(g, vmid);
|
||||
return 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
int gk20a_fecs_trace_poll(struct gk20a *g)
|
||||
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
|
||||
g->ops.mm.fb_flush(g);
|
||||
|
||||
while (read != write) {
|
||||
/* Ignore error code, as we want to consume all records */
|
||||
(void)gk20a_fecs_trace_ring_read(g, read);
|
||||
cnt = gk20a_fecs_trace_ring_read(g, read);
|
||||
if (cnt <= 0)
|
||||
break;
|
||||
|
||||
/* Get to next record. */
|
||||
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
|
||||
}
|
||||
|
||||
/* ensure FECS records has been updated before incrementing read index */
|
||||
nvgpu_smp_wmb();
|
||||
nvgpu_wmb();
|
||||
gk20a_fecs_trace_set_read_index(g, read);
|
||||
|
||||
done:
|
||||
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
|
||||
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
|
||||
* ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
|
||||
&trace->trace_buf);
|
||||
}
|
||||
|
||||
static void gk20a_fecs_trace_free_ring(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
|
||||
nvgpu_dma_free(g, &trace->trace_buf);
|
||||
return GK20A_FECS_TRACE_NUM_RECORDS
|
||||
* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
|
||||
{
|
||||
loff_t *pos = (loff_t *) v;
|
||||
struct gk20a *g = *(struct gk20a **)s->private;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
|
||||
struct gk20a_fecs_trace_record *r =
|
||||
gk20a_fecs_trace_get_record(g, *pos);
|
||||
int i;
|
||||
const u32 invalid_tag =
|
||||
ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
|
||||
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
|
||||
goto clean_poll_lock;
|
||||
|
||||
BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
err = gk20a_fecs_trace_alloc_ring(g);
|
||||
if (err) {
|
||||
nvgpu_warn(g, "failed to allocate FECS ring");
|
||||
goto clean_hash_lock;
|
||||
}
|
||||
|
||||
hash_init(trace->pid_hash_table);
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
|
||||
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
|
||||
|
||||
return 0;
|
||||
|
||||
clean_hash_lock:
|
||||
nvgpu_mutex_destroy(&trace->hash_lock);
|
||||
clean_poll_lock:
|
||||
nvgpu_mutex_destroy(&trace->poll_lock);
|
||||
clean:
|
||||
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
||||
|
||||
u32 lo;
|
||||
u32 hi;
|
||||
u64 pa;
|
||||
u64 addr;
|
||||
struct tsg_gk20a *tsg;
|
||||
struct nvgpu_gr_ctx *ch_ctx;
|
||||
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
||||
struct nvgpu_mem *mem;
|
||||
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
|
||||
pid_t pid;
|
||||
u32 aperture;
|
||||
u32 aperture_mask;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"chid=%d context_ptr=%x inst_block=%llx",
|
||||
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
||||
if (!trace)
|
||||
return -ENOMEM;
|
||||
|
||||
pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
|
||||
if (!pa)
|
||||
return -ENOMEM;
|
||||
aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
|
||||
mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
|
||||
addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
|
||||
aperture_mask = 0;
|
||||
} else {
|
||||
addr = nvgpu_inst_block_addr(g, mem);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
|
||||
aperture_mask = nvgpu_aperture_mask(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
|
||||
}
|
||||
if (!addr)
|
||||
return -ENOMEM;
|
||||
|
||||
lo = u64_lo32(addr);
|
||||
hi = u64_hi32(addr);
|
||||
|
||||
mem = &ch_ctx->mem;
|
||||
|
||||
if (nvgpu_mem_begin(g, mem))
|
||||
return -ENOMEM;
|
||||
|
||||
lo = u64_lo32(pa);
|
||||
hi = u64_hi32(pa);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
|
||||
lo, GK20A_FECS_TRACE_NUM_RECORDS);
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
|
||||
GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
|
||||
nvgpu_mem_end(g, mem);
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
|
||||
mem = &ch->ctx_header.mem;
|
||||
|
||||
if (nvgpu_mem_begin(g, mem))
|
||||
return -ENOMEM;
|
||||
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
|
||||
lo);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
|
||||
aperture);
|
||||
nvgpu_mem_wr(g, mem,
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
||||
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
|
||||
GK20A_FECS_TRACE_NUM_RECORDS));
|
||||
aperture_mask);
|
||||
|
||||
nvgpu_mem_end(g, mem);
|
||||
|
||||
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
|
||||
return 0;
|
||||
|
||||
nvgpu_thread_stop(&trace->poll_task);
|
||||
gk20a_fecs_trace_free_ring(g);
|
||||
gk20a_fecs_trace_free_hash_table(g);
|
||||
|
||||
nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
|
||||
int gk20a_fecs_trace_enable(struct gk20a *g);
|
||||
int gk20a_fecs_trace_disable(struct gk20a *g);
|
||||
bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
|
||||
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
|
||||
|
||||
#endif /* __FECS_TRACE_GK20A_H */
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
|
||||
#include "gk20a.h"
|
||||
#include "gr_gk20a.h"
|
||||
#include "gk20a/fecs_trace_gk20a.h"
|
||||
#include "gr_ctx_gk20a.h"
|
||||
#include "gr_pri_gk20a.h"
|
||||
#include "regops_gk20a.h"
|
||||
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
|
||||
return ret;
|
||||
}
|
||||
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
g->gr.ctx_vars.fecs_trace_buffer_size =
|
||||
gk20a_fecs_trace_buffer_size(g);
|
||||
#endif
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
|
||||
gr->ctx_vars.fecs_trace_buffer_size);
|
||||
|
||||
err = nvgpu_dma_alloc_sys(g,
|
||||
gr->ctx_vars.fecs_trace_buffer_size,
|
||||
&gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
|
||||
gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
|
||||
gk20a_gr_destroy_ctx_buffer;
|
||||
#endif
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
|
||||
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
|
||||
g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
|
||||
|
||||
tsg->gr_ctx.global_ctx_buffer_mapped = true;
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
/* FECS trace buffer */
|
||||
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
|
||||
mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
||||
gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
|
||||
gk20a_mem_flag_none, true, mem->aperture);
|
||||
if (!gpu_va)
|
||||
goto clean_up;
|
||||
g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
|
||||
g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
|
||||
g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
|
||||
"fail to commit gr ctx buffer");
|
||||
goto out;
|
||||
}
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
if (g->ops.fecs_trace.bind_channel && !c->vpr) {
|
||||
err = g->ops.fecs_trace.bind_channel(g, c);
|
||||
if (err)
|
||||
nvgpu_warn(g,
|
||||
"fail to bind channel for ctxsw trace");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
|
||||
@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
|
||||
ATTRIBUTE_VPR = 5,
|
||||
GOLDEN_CTX = 6,
|
||||
PRIV_ACCESS_MAP = 7,
|
||||
FECS_TRACE_BUFFER = 8,
|
||||
NR_GLOBAL_CTX_BUF = 9
|
||||
};
|
||||
|
||||
@@ -89,6 +90,7 @@ enum /*global_ctx_buffer_va */ {
|
||||
ATTRIBUTE_VA = 2,
|
||||
GOLDEN_CTX_VA = 3,
|
||||
PRIV_ACCESS_MAP_VA = 4,
|
||||
FECS_TRACE_BUFFER_VA = 5,
|
||||
NR_GLOBAL_CTX_BUF_VA = 6
|
||||
};
|
||||
|
||||
@@ -290,6 +292,8 @@ struct gr_gk20a {
|
||||
|
||||
u32 priv_access_map_size;
|
||||
|
||||
u32 fecs_trace_buffer_size;
|
||||
|
||||
struct gr_ucode_gk20a ucode;
|
||||
|
||||
struct av_list_gk20a sw_bundle_init;
|
||||
|
||||
@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
|
||||
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
|
||||
|
||||
/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
|
||||
if (gops->fuse.check_priv_security(g))
|
||||
|
||||
@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
|
||||
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
|
||||
|
||||
/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
|
||||
if (gops->fuse.check_priv_security(g))
|
||||
|
||||
@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
|
||||
},
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
.fecs_trace = {
|
||||
.alloc_user_buffer = NULL,
|
||||
.free_user_buffer = NULL,
|
||||
.mmap_user_buffer = NULL,
|
||||
.init = NULL,
|
||||
.deinit = NULL,
|
||||
.enable = NULL,
|
||||
.disable = NULL,
|
||||
.is_enabled = NULL,
|
||||
.reset = NULL,
|
||||
.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
|
||||
.free_user_buffer = gk20a_ctxsw_dev_ring_free,
|
||||
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
|
||||
.init = gk20a_fecs_trace_init,
|
||||
.deinit = gk20a_fecs_trace_deinit,
|
||||
.enable = gk20a_fecs_trace_enable,
|
||||
.disable = gk20a_fecs_trace_disable,
|
||||
.is_enabled = gk20a_fecs_trace_is_enabled,
|
||||
.reset = gk20a_fecs_trace_reset,
|
||||
.flush = NULL,
|
||||
.poll = NULL,
|
||||
.bind_channel = NULL,
|
||||
.unbind_channel = NULL,
|
||||
.max_entries = NULL,
|
||||
.poll = gk20a_fecs_trace_poll,
|
||||
.bind_channel = gk20a_fecs_trace_bind_channel,
|
||||
.unbind_channel = gk20a_fecs_trace_unbind_channel,
|
||||
.max_entries = gk20a_gr_max_entries,
|
||||
},
|
||||
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|
||||
.mm = {
|
||||
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
|
||||
}
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
|
||||
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
|
||||
g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
|
||||
|
||||
@@ -34,6 +34,7 @@ struct gk20a;
|
||||
#define NVGPU_IS_FMODEL 1
|
||||
#define NVGPU_DRIVER_IS_DYING 2
|
||||
#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
|
||||
#define NVGPU_FECS_TRACE_VA 4
|
||||
|
||||
/*
|
||||
* ECC flags
|
||||
|
||||
Reference in New Issue
Block a user