gpu: nvgpu: add support for FECS VA

- On t186, ucode expects physical address to be
programmed for FECS trace buffer.
- On t194, ucode expects GPU VA to be programmed
for FECS trace buffer. This patch adds extra
support to handle this change for linux native.
- Increase the size of FECS trace buffer (as few
entries were getting dropped due to overflow of
FECS trace buffer.)
- This moves FECS trace buffer handling in global
context buffer.
- This adds extra check for updation of mailbox1
register. (Bug 200417403)

EVLR-2077

Change-Id: I7c3324ce9341976a1375e0afe6c53c424a053723
Signed-off-by: Vaibhav Kachore <vkachore@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1536028
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Nirav Patel <nipatel@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vaibhav Kachore
2018-02-22 16:45:30 +05:30
committed by Tejal Kudav
parent 97d697a848
commit ca3215c6b2
8 changed files with 119 additions and 62 deletions

View File

@@ -28,6 +28,7 @@
#include <nvgpu/kmem.h>
#include <nvgpu/dma.h>
#include <nvgpu/enabled.h>
#include <nvgpu/bug.h>
#include <nvgpu/hashtable.h>
#include <nvgpu/circ_buf.h>
@@ -51,7 +52,7 @@
* If HW circular buffer is getting too many "buffer full" conditions,
* increasing this constant should help (it drives Linux' internal buffer size).
*/
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 6)
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
@@ -74,7 +75,6 @@ struct gk20a_fecs_trace_hash_ent {
struct gk20a_fecs_trace {
struct nvgpu_mem trace_buf;
DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
struct nvgpu_mutex hash_lock;
struct nvgpu_mutex poll_lock;
@@ -106,10 +106,12 @@ static inline int gk20a_fecs_trace_num_ts(void)
}
static struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
struct gk20a_fecs_trace *trace, int idx)
struct gk20a *g, int idx)
{
struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
return (struct gk20a_fecs_trace_record *)
((u8 *) trace->trace_buf.cpu_va
((u8 *) mem->cpu_va
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
}
@@ -258,12 +260,13 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
struct gk20a_fecs_trace *trace = g->fecs_trace;
pid_t cur_pid;
pid_t new_pid;
int count = 0;
/* for now, only one VM */
const int vmid = 0;
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(
trace, index);
struct gk20a_fecs_trace_record *r =
gk20a_fecs_trace_get_record(g, index);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"consuming record trace=%p read=%d record=%p", trace, index, r);
@@ -334,10 +337,11 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
continue;
gk20a_ctxsw_trace_write(g, &entry);
count++;
}
gk20a_ctxsw_trace_wake_up(g, vmid);
return 0;
return count;
}
int gk20a_fecs_trace_poll(struct gk20a *g)
@@ -376,15 +380,16 @@ int gk20a_fecs_trace_poll(struct gk20a *g)
g->ops.mm.fb_flush(g);
while (read != write) {
/* Ignore error code, as we want to consume all records */
(void)gk20a_fecs_trace_ring_read(g, read);
cnt = gk20a_fecs_trace_ring_read(g, read);
if (cnt <= 0)
break;
/* Get to next record. */
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
}
/* ensure FECS records has been updated before incrementing read index */
nvgpu_smp_wmb();
nvgpu_wmb();
gk20a_fecs_trace_set_read_index(g, read);
done:
@@ -411,20 +416,10 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
return 0;
}
static int gk20a_fecs_trace_alloc_ring(struct gk20a *g)
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
{
struct gk20a_fecs_trace *trace = g->fecs_trace;
return nvgpu_dma_alloc_sys(g, GK20A_FECS_TRACE_NUM_RECORDS
* ctxsw_prog_record_timestamp_record_size_in_bytes_v(),
&trace->trace_buf);
}
static void gk20a_fecs_trace_free_ring(struct gk20a *g)
{
struct gk20a_fecs_trace *trace = g->fecs_trace;
nvgpu_dma_free(g, &trace->trace_buf);
return GK20A_FECS_TRACE_NUM_RECORDS
* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
}
#ifdef CONFIG_DEBUG_FS
@@ -460,8 +455,8 @@ static int gk20a_fecs_trace_debugfs_ring_seq_show(
{
loff_t *pos = (loff_t *) v;
struct gk20a *g = *(struct gk20a **)s->private;
struct gk20a_fecs_trace *trace = g->fecs_trace;
struct gk20a_fecs_trace_record *r = gk20a_fecs_trace_get_record(trace, *pos);
struct gk20a_fecs_trace_record *r =
gk20a_fecs_trace_get_record(g, *pos);
int i;
const u32 invalid_tag =
ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
@@ -588,12 +583,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
goto clean_poll_lock;
BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
err = gk20a_fecs_trace_alloc_ring(g);
if (err) {
nvgpu_warn(g, "failed to allocate FECS ring");
goto clean_hash_lock;
}
hash_init(trace->pid_hash_table);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -604,8 +593,6 @@ int gk20a_fecs_trace_init(struct gk20a *g)
return 0;
clean_hash_lock:
nvgpu_mutex_destroy(&trace->hash_lock);
clean_poll_lock:
nvgpu_mutex_destroy(&trace->poll_lock);
clean:
@@ -624,14 +611,14 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
u32 lo;
u32 hi;
u64 pa;
u64 addr;
struct tsg_gk20a *tsg;
struct nvgpu_gr_ctx *ch_ctx;
struct gk20a_fecs_trace *trace = g->fecs_trace;
struct nvgpu_mem *mem;
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
pid_t pid;
u32 aperture;
u32 aperture_mask;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"chid=%d context_ptr=%x inst_block=%llx",
@@ -648,34 +635,54 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
if (!trace)
return -ENOMEM;
pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
if (!pa)
return -ENOMEM;
aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
aperture_mask = 0;
} else {
addr = nvgpu_inst_block_addr(g, mem);
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
aperture_mask = nvgpu_aperture_mask(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
}
if (!addr)
return -ENOMEM;
lo = u64_lo32(addr);
hi = u64_hi32(addr);
mem = &ch_ctx->mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
lo = u64_lo32(pa);
hi = u64_hi32(pa);
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
lo, GK20A_FECS_TRACE_NUM_RECORDS);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
GK20A_FECS_TRACE_NUM_RECORDS));
nvgpu_mem_end(g, mem);
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
mem = &ch->ctx_header.mem;
if (nvgpu_mem_begin(g, mem))
return -ENOMEM;
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
lo);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
aperture);
nvgpu_mem_wr(g, mem,
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
GK20A_FECS_TRACE_NUM_RECORDS));
aperture_mask);
nvgpu_mem_end(g, mem);
@@ -728,7 +735,6 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
return 0;
nvgpu_thread_stop(&trace->poll_task);
gk20a_fecs_trace_free_ring(g);
gk20a_fecs_trace_free_hash_table(g);
nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -39,5 +39,6 @@ int gk20a_gr_max_entries(struct gk20a *g,
int gk20a_fecs_trace_enable(struct gk20a *g);
int gk20a_fecs_trace_disable(struct gk20a *g);
bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
#endif /* __FECS_TRACE_GK20A_H */

View File

@@ -41,6 +41,7 @@
#include "gk20a.h"
#include "gr_gk20a.h"
#include "gk20a/fecs_trace_gk20a.h"
#include "gr_ctx_gk20a.h"
#include "gr_pri_gk20a.h"
#include "regops_gk20a.h"
@@ -2499,6 +2500,10 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
return ret;
}
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
#ifdef CONFIG_GK20A_CTXSW_TRACE
g->gr.ctx_vars.fecs_trace_buffer_size =
gk20a_fecs_trace_buffer_size(g);
#endif
}
nvgpu_log_fn(g, "done");
@@ -2630,6 +2635,20 @@ int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
if (err)
goto clean_up;
#ifdef CONFIG_GK20A_CTXSW_TRACE
nvgpu_log_info(g, "fecs_trace_buffer_size : %d",
gr->ctx_vars.fecs_trace_buffer_size);
err = nvgpu_dma_alloc_sys(g,
gr->ctx_vars.fecs_trace_buffer_size,
&gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem);
if (err)
goto clean_up;
gr->global_ctx_buffer[FECS_TRACE_BUFFER].destroy =
gk20a_gr_destroy_ctx_buffer;
#endif
nvgpu_log_fn(g, "done");
return 0;
@@ -2769,6 +2788,21 @@ int gr_gk20a_map_global_ctx_buffers(struct gk20a *g,
g_bfr_index[PRIV_ACCESS_MAP_VA] = PRIV_ACCESS_MAP;
tsg->gr_ctx.global_ctx_buffer_mapped = true;
#ifdef CONFIG_GK20A_CTXSW_TRACE
/* FECS trace buffer */
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
mem = &gr->global_ctx_buffer[FECS_TRACE_BUFFER].mem;
gpu_va = nvgpu_gmmu_map(ch_vm, mem, mem->size, 0,
gk20a_mem_flag_none, true, mem->aperture);
if (!gpu_va)
goto clean_up;
g_bfr_va[FECS_TRACE_BUFFER_VA] = gpu_va;
g_bfr_size[FECS_TRACE_BUFFER_VA] = mem->size;
g_bfr_index[FECS_TRACE_BUFFER_VA] = FECS_TRACE_BUFFER;
}
#endif
return 0;
clean_up:
@@ -3050,6 +3084,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, u32 class_num, u32 flags)
"fail to commit gr ctx buffer");
goto out;
}
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.fecs_trace.bind_channel(g, c);
if (err)
nvgpu_warn(g,
"fail to bind channel for ctxsw trace");
}
#endif
}
nvgpu_log_fn(g, "done");

View File

@@ -79,6 +79,7 @@ enum /* global_ctx_buffer */ {
ATTRIBUTE_VPR = 5,
GOLDEN_CTX = 6,
PRIV_ACCESS_MAP = 7,
FECS_TRACE_BUFFER = 8,
NR_GLOBAL_CTX_BUF = 9
};
@@ -89,6 +90,7 @@ enum /*global_ctx_buffer_va */ {
ATTRIBUTE_VA = 2,
GOLDEN_CTX_VA = 3,
PRIV_ACCESS_MAP_VA = 4,
FECS_TRACE_BUFFER_VA = 5,
NR_GLOBAL_CTX_BUF_VA = 6
};
@@ -290,6 +292,8 @@ struct gr_gk20a {
u32 priv_access_map_size;
u32 fecs_trace_buffer_size;
struct gr_ucode_gk20a ucode;
struct av_list_gk20a sw_bundle_init;

View File

@@ -834,6 +834,7 @@ int gp106_init_hal(struct gk20a *g)
__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, true);
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
if (gops->fuse.check_priv_security(g))

View File

@@ -732,6 +732,7 @@ int gp10b_init_hal(struct gk20a *g)
__nvgpu_set_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP, true);
__nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, false);
/* Read fuses to check if gpu needs to boot in secure/non-secure mode */
if (gops->fuse.check_priv_security(g))

View File

@@ -586,20 +586,20 @@ static const struct gpu_ops gv11b_ops = {
},
#ifdef CONFIG_GK20A_CTXSW_TRACE
.fecs_trace = {
.alloc_user_buffer = NULL,
.free_user_buffer = NULL,
.mmap_user_buffer = NULL,
.init = NULL,
.deinit = NULL,
.enable = NULL,
.disable = NULL,
.is_enabled = NULL,
.reset = NULL,
.alloc_user_buffer = gk20a_ctxsw_dev_ring_alloc,
.free_user_buffer = gk20a_ctxsw_dev_ring_free,
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
.init = gk20a_fecs_trace_init,
.deinit = gk20a_fecs_trace_deinit,
.enable = gk20a_fecs_trace_enable,
.disable = gk20a_fecs_trace_disable,
.is_enabled = gk20a_fecs_trace_is_enabled,
.reset = gk20a_fecs_trace_reset,
.flush = NULL,
.poll = NULL,
.bind_channel = NULL,
.unbind_channel = NULL,
.max_entries = NULL,
.poll = gk20a_fecs_trace_poll,
.bind_channel = gk20a_fecs_trace_bind_channel,
.unbind_channel = gk20a_fecs_trace_unbind_channel,
.max_entries = gk20a_gr_max_entries,
},
#endif /* CONFIG_GK20A_CTXSW_TRACE */
.mm = {
@@ -843,6 +843,7 @@ int gv11b_init_hal(struct gk20a *g)
}
__nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
__nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
g->bootstrap_owner = LSF_BOOTSTRAP_OWNER_DEFAULT;
__nvgpu_set_enabled(g, NVGPU_SUPPORT_MULTIPLE_WPR, false);

View File

@@ -34,6 +34,7 @@ struct gk20a;
#define NVGPU_IS_FMODEL 1
#define NVGPU_DRIVER_IS_DYING 2
#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
#define NVGPU_FECS_TRACE_VA 4
/*
* ECC flags