diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c index fb8cc15e5..270eda140 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c @@ -24,18 +24,17 @@ #include #include #include +#include +#include #include #include #include - -/* - * TODO: This include is only needed for transition phase to new unit - * Remove as soon as transition is complete - */ -#include "gk20a/fecs_trace_gk20a.h" +#include #ifdef CONFIG_GK20A_CTXSW_TRACE +static int nvgpu_gr_fecs_trace_periodic_polling(void *arg); + int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr, pid_t pid, u32 vmid, struct nvgpu_list_node *list) { @@ -266,7 +265,7 @@ int nvgpu_gr_fecs_trace_enable(struct gk20a *g) g->ops.fecs_trace.set_read_index(g, write); err = nvgpu_thread_create(&trace->poll_task, g, - gk20a_fecs_trace_periodic_polling, __func__); + nvgpu_gr_fecs_trace_periodic_polling, __func__); if (err != 0) { nvgpu_warn(g, "failed to create FECS polling task"); goto done; @@ -311,4 +310,233 @@ void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g) g->ops.fecs_trace.get_write_index(g)); } +/* + * Converts HW entry format to userspace-facing format and pushes it to the + * queue. + */ +int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index, + u32 *vm_update_mask) +{ + int i; + struct nvgpu_gpu_ctxsw_trace_entry entry = { }; + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + pid_t cur_pid = 0, new_pid = 0; + u32 cur_vmid = 0U, new_vmid = 0U; + u32 vmid = 0U; + int count = 0; + + struct nvgpu_fecs_trace_record *r = + nvgpu_gr_fecs_trace_get_record(g, index); + if (r == NULL) { + return -EINVAL; + } + + nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, + "consuming record trace=%p read=%d record=%p", trace, index, r); + + if (!nvgpu_gr_fecs_trace_is_valid_record(g, r)) { + nvgpu_warn(g, + "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", + trace, index, r, r->magic_lo, r->magic_hi); + return -EINVAL; + } + + /* Clear magic_hi to detect cases where CPU could read write index + * before FECS record is actually written to DRAM. This should not + * as we force FECS writes to SYSMEM by reading through PRAMIN. + */ + r->magic_hi = 0; + + if ((r->context_ptr != 0U) && (r->context_id != 0U)) { + nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, + &trace->context_list, &cur_pid, &cur_vmid); + } else { + cur_vmid = 0xffffffffU; + cur_pid = 0; + } + + if (r->new_context_ptr != 0U) { + nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, + &trace->context_list, &new_pid, &new_vmid); + } else { + new_vmid = 0xffffffffU; + new_pid = 0; + } + + nvgpu_log(g, gpu_dbg_ctxsw, + "context_ptr=%x (vmid=%u pid=%d)", + r->context_ptr, cur_vmid, cur_pid); + nvgpu_log(g, gpu_dbg_ctxsw, + "new_context_ptr=%x (vmid=%u pid=%d)", + r->new_context_ptr, new_vmid, new_pid); + + entry.context_id = r->context_id; + + /* break out FECS record into trace events */ + for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) { + + entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]); + entry.timestamp = + g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]); + entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; + + nvgpu_log(g, gpu_dbg_ctxsw, + "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", + entry.tag, entry.timestamp, r->context_id, + r->new_context_id); + + switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) { + case NVGPU_GPU_CTXSW_TAG_RESTORE_START: + case NVGPU_GPU_CTXSW_TAG_CONTEXT_START: + entry.context_id = r->new_context_id; + entry.pid = new_pid; + entry.vmid = new_vmid; + break; + + case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: + case NVGPU_GPU_CTXSW_TAG_FE_ACK: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP: + case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP: + case NVGPU_GPU_CTXSW_TAG_SAVE_END: + entry.context_id = r->context_id; + entry.pid = cur_pid; + entry.vmid = cur_vmid; + break; + + default: + /* tags are not guaranteed to start at the beginning */ + if ((entry.tag != 0) && (entry.tag != + NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)) { + nvgpu_warn(g, "TAG not found"); + } + continue; + } + + nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", + entry.tag, entry.context_id, entry.pid); + + if (!entry.context_id) + continue; + + if (g->ops.fecs_trace.vm_dev_write != NULL) { + g->ops.fecs_trace.vm_dev_write(g, entry.vmid, + vm_update_mask, &entry); + } else { + gk20a_ctxsw_trace_write(g, &entry); + } + count++; + } + + gk20a_ctxsw_trace_wake_up(g, vmid); + return count; +} + +int nvgpu_gr_fecs_trace_poll(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + u32 vm_update_mask = 0U; + int read = 0; + int write = 0; + int cnt; + int err = 0; + + nvgpu_mutex_acquire(&trace->poll_lock); + if (trace->enable_count == 0) { + goto done_unlock; + } + + err = gk20a_busy(g); + if (err) { + goto done_unlock; + } + + write = g->ops.fecs_trace.get_write_index(g); + if ((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS)) { + nvgpu_err(g, + "failed to acquire write index, write=%d", write); + err = write; + goto done; + } + + read = g->ops.fecs_trace.get_read_index(g); + + cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); + if (!cnt) + goto done; + + nvgpu_log(g, gpu_dbg_ctxsw, + "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", + read, g->ops.fecs_trace.get_read_index(g), write, cnt); + + /* Ensure all FECS writes have made it to SYSMEM */ + g->ops.mm.fb_flush(g); + + while (read != write) { + cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask); + if (cnt <= 0) { + break; + } + + /* Get to next record. */ + read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); + } + + /* ensure FECS records has been updated before incrementing read index */ + nvgpu_wmb(); + g->ops.fecs_trace.set_read_index(g, read); + + /* + * FECS ucode does a priv holdoff around the assertion of context + * reset. So, pri transactions (e.g. mailbox1 register write) might + * fail due to this. Hence, do write with ack i.e. write and read + * it back to make sure write happened for mailbox1. + */ + while (g->ops.fecs_trace.get_read_index(g) != read) { + nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed"); + g->ops.fecs_trace.set_read_index(g, read); + } + + if (g->ops.fecs_trace.vm_dev_update) { + g->ops.fecs_trace.vm_dev_update(g, vm_update_mask); + } + +done: + gk20a_idle(g); +done_unlock: + nvgpu_mutex_release(&trace->poll_lock); + return err; +} + +static int nvgpu_gr_fecs_trace_periodic_polling(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + nvgpu_log(g, gpu_dbg_ctxsw, "thread running"); + + while (!nvgpu_thread_should_stop(&trace->poll_task) && + trace->enable_count > 0U) { + + nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US, + GK20A_FECS_TRACE_FRAME_PERIOD_US * 2U); + + nvgpu_gr_fecs_trace_poll(g); + } + + return 0; +} + +int nvgpu_gr_fecs_trace_reset(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + if (!g->ops.fecs_trace.is_enabled(g)) + return 0; + + nvgpu_gr_fecs_trace_poll(g); + return g->ops.fecs_trace.set_read_index(g, 0); +} + #endif /* CONFIG_GK20A_CTXSW_TRACE */ diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index 36087a8c0..d20b1a126 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -51,193 +50,6 @@ static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk2 return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL); } -/* - * Converts HW entry format to userspace-facing format and pushes it to the - * queue. - */ -static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index) -{ - int i; - struct nvgpu_gpu_ctxsw_trace_entry entry = { }; - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - pid_t cur_pid; - pid_t new_pid; - u32 cur_vmid, new_vmid; - int count = 0; - - /* for now, only one VM */ - const int vmid = 0; - - struct nvgpu_fecs_trace_record *r = - nvgpu_gr_fecs_trace_get_record(g, index); - if (r == NULL) { - return -EINVAL; - } - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, - "consuming record trace=%p read=%d record=%p", trace, index, r); - - if (unlikely(!nvgpu_gr_fecs_trace_is_valid_record(g, r))) { - nvgpu_warn(g, - "trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)", - trace, index, r, r->magic_lo, r->magic_hi); - return -EINVAL; - } - - /* Clear magic_hi to detect cases where CPU could read write index - * before FECS record is actually written to DRAM. This should not - * as we force FECS writes to SYSMEM by reading through PRAMIN. - */ - r->magic_hi = 0; - - nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, &trace->context_list, - &cur_pid, &cur_vmid); - nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, &trace->context_list, - &new_pid, &new_vmid); - - nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, - "context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)", - r->context_ptr, cur_pid, r->new_context_ptr, new_pid); - - entry.context_id = r->context_id; - entry.vmid = vmid; - - /* break out FECS record into trace events */ - for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) { - - entry.tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]); - entry.timestamp = - g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]); - entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT; - - nvgpu_log(g, gpu_dbg_ctxsw, - "tag=%x timestamp=%llx context_id=%08x new_context_id=%08x", - entry.tag, entry.timestamp, r->context_id, - r->new_context_id); - - switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) { - case NVGPU_GPU_CTXSW_TAG_RESTORE_START: - case NVGPU_GPU_CTXSW_TAG_CONTEXT_START: - entry.context_id = r->new_context_id; - entry.pid = new_pid; - break; - - case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST: - case NVGPU_GPU_CTXSW_TAG_FE_ACK: - case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI: - case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP: - case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP: - case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP: - case NVGPU_GPU_CTXSW_TAG_SAVE_END: - entry.context_id = r->context_id; - entry.pid = cur_pid; - break; - - default: - /* tags are not guaranteed to start at the beginning */ - WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP)); - continue; - } - - nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld", - entry.tag, entry.context_id, entry.pid); - - if (!entry.context_id) - continue; - - gk20a_ctxsw_trace_write(g, &entry); - count++; - } - - gk20a_ctxsw_trace_wake_up(g, vmid); - return count; -} - -int gk20a_fecs_trace_poll(struct gk20a *g) -{ - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - - int read = 0; - int write = 0; - int cnt; - int err; - - err = gk20a_busy(g); - if (unlikely(err)) - return err; - - nvgpu_mutex_acquire(&trace->poll_lock); - write = g->ops.fecs_trace.get_write_index(g); - if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) { - nvgpu_err(g, - "failed to acquire write index, write=%d", write); - err = write; - goto done; - } - - read = g->ops.fecs_trace.get_read_index(g); - - cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS); - if (!cnt) - goto done; - - nvgpu_log(g, gpu_dbg_ctxsw, - "circular buffer: read=%d (mailbox=%d) write=%d cnt=%d", - read, g->ops.fecs_trace.get_read_index(g), write, cnt); - - /* Ensure all FECS writes have made it to SYSMEM */ - g->ops.mm.fb_flush(g); - - while (read != write) { - cnt = gk20a_fecs_trace_ring_read(g, read); - if (cnt > 0) { - nvgpu_log(g, gpu_dbg_ctxsw, - "number of trace entries added: %d", cnt); - } - - /* Get to next record. */ - read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1); - } - - /* ensure FECS records has been updated before incrementing read index */ - nvgpu_wmb(); - g->ops.fecs_trace.set_read_index(g, read); - - /* - * FECS ucode does a priv holdoff around the assertion of context - * reset. So, pri transactions (e.g. mailbox1 register write) might - * fail due to this. Hence, do write with ack i.e. write and read - * it back to make sure write happened for mailbox1. - */ - while (g->ops.fecs_trace.get_read_index(g) != read) { - nvgpu_log(g, gpu_dbg_ctxsw, "mailbox1 update failed"); - g->ops.fecs_trace.set_read_index(g, read); - } - -done: - nvgpu_mutex_release(&trace->poll_lock); - gk20a_idle(g); - return err; -} - -int gk20a_fecs_trace_periodic_polling(void *arg) -{ - struct gk20a *g = (struct gk20a *)arg; - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - - pr_info("%s: running\n", __func__); - - while (!nvgpu_thread_should_stop(&trace->poll_task)) { - - nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US, - GK20A_FECS_TRACE_FRAME_PERIOD_US * 2); - - gk20a_fecs_trace_poll(g); - } - - return 0; -} - int gk20a_fecs_trace_bind_channel(struct gk20a *g, struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx) { @@ -322,7 +134,7 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) if (g->ops.fecs_trace.is_enabled(g)) { if (g->ops.fecs_trace.flush) g->ops.fecs_trace.flush(g); - gk20a_fecs_trace_poll(g); + nvgpu_gr_fecs_trace_poll(g); } nvgpu_gr_fecs_trace_remove_context(g, context_ptr, @@ -331,17 +143,6 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch) return 0; } -int gk20a_fecs_trace_reset(struct gk20a *g) -{ - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); - - if (!g->ops.fecs_trace.is_enabled(g)) - return 0; - - gk20a_fecs_trace_poll(g); - return g->ops.fecs_trace.set_read_index(g, 0); -} - u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void) { return 0x26; diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h index d400a243c..9978700f0 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h @@ -28,13 +28,10 @@ struct channel_gk20a; struct nvgpu_gpu_ctxsw_trace_filter; struct nvgpu_gr_ctx; -int gk20a_fecs_trace_poll(struct gk20a *g); -int gk20a_fecs_trace_periodic_polling(void *arg); int gk20a_fecs_trace_bind_channel(struct gk20a *g, struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx); int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch); -int gk20a_fecs_trace_reset(struct gk20a *g); u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void); #endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 75f24fd0d..afdb7377a 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -672,9 +672,9 @@ static const struct gpu_ops gp10b_ops = { .enable = nvgpu_gr_fecs_trace_enable, .disable = nvgpu_gr_fecs_trace_disable, .is_enabled = nvgpu_gr_fecs_trace_is_enabled, - .reset = gk20a_fecs_trace_reset, + .reset = nvgpu_gr_fecs_trace_reset, .flush = gp10b_fecs_trace_flush, - .poll = gk20a_fecs_trace_poll, + .poll = nvgpu_gr_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, .max_entries = nvgpu_gr_fecs_trace_max_entries, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 89b23a776..81f3fd57b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -839,9 +839,9 @@ static const struct gpu_ops gv100_ops = { .enable = nvgpu_gr_fecs_trace_enable, .disable = nvgpu_gr_fecs_trace_disable, .is_enabled = nvgpu_gr_fecs_trace_is_enabled, - .reset = gk20a_fecs_trace_reset, + .reset = nvgpu_gr_fecs_trace_reset, .flush = NULL, - .poll = gk20a_fecs_trace_poll, + .poll = nvgpu_gr_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, .max_entries = nvgpu_gr_fecs_trace_max_entries, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 346f0f656..3f95f6db8 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -797,9 +797,9 @@ static const struct gpu_ops gv11b_ops = { .enable = nvgpu_gr_fecs_trace_enable, .disable = nvgpu_gr_fecs_trace_disable, .is_enabled = nvgpu_gr_fecs_trace_is_enabled, - .reset = gk20a_fecs_trace_reset, + .reset = nvgpu_gr_fecs_trace_reset, .flush = NULL, - .poll = gk20a_fecs_trace_poll, + .poll = nvgpu_gr_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, .max_entries = nvgpu_gr_fecs_trace_max_entries, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 702078bee..98a909173 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -42,6 +42,7 @@ struct nvgpu_nvhost_dev; struct nvgpu_netlist_vars; struct nvgpu_gr_global_ctx_buffer_desc; struct nvgpu_gr_fecs_trace; +struct nvgpu_gpu_ctxsw_trace_entry; struct nvgpu_cpu_time_correlation_sample; struct nvgpu_mem_sgt; struct nvgpu_warpstate; @@ -1116,6 +1117,10 @@ struct gpu_ops { int (*get_read_index)(struct gk20a *g); int (*get_write_index)(struct gk20a *g); int (*set_read_index)(struct gk20a *g, int index); + void (*vm_dev_write)(struct gk20a *g, u8 vmid, + u32 *vm_update_mask, + struct nvgpu_gpu_ctxsw_trace_entry *entry); + void (*vm_dev_update)(struct gk20a *g, u32 vm_update_mask); } fecs_trace; #endif struct { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h index 1e2099976..b6a103197 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h @@ -127,4 +127,9 @@ int nvgpu_gr_fecs_trace_disable(struct gk20a *g); bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g); void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g); +int nvgpu_gr_fecs_trace_ring_read(struct gk20a *g, int index, + u32 *vm_update_mask); +int nvgpu_gr_fecs_trace_poll(struct gk20a *g); +int nvgpu_gr_fecs_trace_reset(struct gk20a *g); + #endif /* NVGPU_GR_FECS_TRACE_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c index 2da58bb0a..6a3ef51ed 100644 --- a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c @@ -392,10 +392,11 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp) nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev); - g->ops.fecs_trace.disable(g); - nvgpu_mutex_acquire(&dev->write_lock); - dev->write_enabled = false; + if (dev->write_enabled) { + dev->write_enabled = false; + g->ops.fecs_trace.disable(g); + } nvgpu_mutex_release(&dev->write_lock); if (dev->hdr) { diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index af59bc1d7..1064c4970 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -868,9 +868,9 @@ static const struct gpu_ops tu104_ops = { .enable = nvgpu_gr_fecs_trace_enable, .disable = nvgpu_gr_fecs_trace_disable, .is_enabled = nvgpu_gr_fecs_trace_is_enabled, - .reset = gk20a_fecs_trace_reset, + .reset = nvgpu_gr_fecs_trace_reset, .flush = NULL, - .poll = gk20a_fecs_trace_poll, + .poll = nvgpu_gr_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, .max_entries = nvgpu_gr_fecs_trace_max_entries,