diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c index 03f8959c3..fb8cc15e5 100644 --- a/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c +++ b/drivers/gpu/nvgpu/common/gr/fecs_trace/fecs_trace.c @@ -28,6 +28,14 @@ #include #include +/* + * TODO: This include is only needed for transition phase to new unit + * Remove as soon as transition is complete + */ +#include "gk20a/fecs_trace_gk20a.h" + +#ifdef CONFIG_GK20A_CTXSW_TRACE + int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr, pid_t pid, u32 vmid, struct nvgpu_list_node *list) { @@ -219,3 +227,88 @@ bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g, */ return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi); } + +size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g) +{ + return GK20A_FECS_TRACE_NUM_RECORDS + * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes(); +} + +int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter) +{ + int n; + int tag; + + /* Compute number of entries per record, with given filter */ + for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++) + n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0); + + /* Return max number of entries generated for the whole ring */ + return n * GK20A_FECS_TRACE_NUM_RECORDS; +} + +int nvgpu_gr_fecs_trace_enable(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + int write; + int err = 0; + + nvgpu_mutex_acquire(&trace->enable_lock); + trace->enable_count++; + + if (trace->enable_count == 1U) { + /* drop data in hw buffer */ + if (g->ops.fecs_trace.flush) + g->ops.fecs_trace.flush(g); + + write = g->ops.fecs_trace.get_write_index(g); + g->ops.fecs_trace.set_read_index(g, write); + + err = nvgpu_thread_create(&trace->poll_task, g, + gk20a_fecs_trace_periodic_polling, __func__); + if (err != 0) { + nvgpu_warn(g, "failed to create FECS polling task"); + goto done; + } + } + +done: + nvgpu_mutex_release(&trace->enable_lock); + return err; +} + +int nvgpu_gr_fecs_trace_disable(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + if (trace == NULL) { + return -EINVAL; + } + + nvgpu_mutex_acquire(&trace->enable_lock); + trace->enable_count--; + if (trace->enable_count == 0U) { + nvgpu_thread_stop(&trace->poll_task); + } + nvgpu_mutex_release(&trace->enable_lock); + + return 0; +} + +bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g) +{ + struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; + + return (trace && (trace->enable_count > 0)); +} + +void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g) +{ + nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); + + g->ops.fecs_trace.set_read_index(g, + g->ops.fecs_trace.get_write_index(g)); +} + +#endif /* CONFIG_GK20A_CTXSW_TRACE */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr_vgpu.c index f8da03946..2983a881d 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr_vgpu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "gr_vgpu.h" #include "gk20a/fecs_trace_gk20a.h" @@ -127,7 +128,7 @@ int vgpu_gr_init_ctx_state(struct gk20a *g) g->gr.ctx_vars.priv_access_map_size = 512 * 1024; #ifdef CONFIG_GK20A_CTXSW_TRACE - g->gr.ctx_vars.fecs_trace_buffer_size = gk20a_fecs_trace_buffer_size(g); + g->gr.ctx_vars.fecs_trace_buffer_size = nvgpu_gr_fecs_trace_buffer_size(g); #endif return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c index c4d5b1bba..36087a8c0 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c @@ -220,7 +220,7 @@ done: return err; } -static int gk20a_fecs_trace_periodic_polling(void *arg) +int gk20a_fecs_trace_periodic_polling(void *arg) { struct gk20a *g = (struct gk20a *)arg; struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; @@ -238,12 +238,6 @@ static int gk20a_fecs_trace_periodic_polling(void *arg) return 0; } -size_t gk20a_fecs_trace_buffer_size(struct gk20a *g) -{ - return GK20A_FECS_TRACE_NUM_RECORDS - * g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes(); -} - int gk20a_fecs_trace_bind_channel(struct gk20a *g, struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx) { @@ -348,79 +342,6 @@ int gk20a_fecs_trace_reset(struct gk20a *g) return g->ops.fecs_trace.set_read_index(g, 0); } -int gk20a_gr_max_entries(struct gk20a *g, - struct nvgpu_gpu_ctxsw_trace_filter *filter) -{ - int n; - int tag; - - /* Compute number of entries per record, with given filter */ - for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++) - n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0); - - /* Return max number of entries generated for the whole ring */ - return n * GK20A_FECS_TRACE_NUM_RECORDS; -} - -int gk20a_fecs_trace_enable(struct gk20a *g) -{ - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - int write; - int err = 0; - - nvgpu_mutex_acquire(&trace->enable_lock); - trace->enable_count++; - - if (trace->enable_count == 1U) { - /* drop data in hw buffer */ - if (g->ops.fecs_trace.flush) - g->ops.fecs_trace.flush(g); - - write = g->ops.fecs_trace.get_write_index(g); - g->ops.fecs_trace.set_read_index(g, write); - - err = nvgpu_thread_create(&trace->poll_task, g, - gk20a_fecs_trace_periodic_polling, __func__); - if (err != 0) { - nvgpu_warn(g, "failed to create FECS polling task"); - goto done; - } - } - -done: - nvgpu_mutex_release(&trace->enable_lock); - return err; -} - -int gk20a_fecs_trace_disable(struct gk20a *g) -{ - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - - nvgpu_mutex_acquire(&trace->enable_lock); - trace->enable_count--; - if (trace->enable_count == 0U) { - nvgpu_thread_stop(&trace->poll_task); - } - nvgpu_mutex_release(&trace->enable_lock); - - return 0; -} - -bool gk20a_fecs_trace_is_enabled(struct gk20a *g) -{ - struct nvgpu_gr_fecs_trace *trace = g->fecs_trace; - - return (trace && (trace->enable_count > 0)); -} - -void gk20a_fecs_trace_reset_buffer(struct gk20a *g) -{ - nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " "); - - g->ops.fecs_trace.set_read_index(g, - g->ops.fecs_trace.get_write_index(g)); -} - u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void) { return 0x26; diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h index 6546d0337..d400a243c 100644 --- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.h @@ -29,18 +29,12 @@ struct nvgpu_gpu_ctxsw_trace_filter; struct nvgpu_gr_ctx; int gk20a_fecs_trace_poll(struct gk20a *g); +int gk20a_fecs_trace_periodic_polling(void *arg); int gk20a_fecs_trace_bind_channel(struct gk20a *g, struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx); int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch); int gk20a_fecs_trace_reset(struct gk20a *g); -int gk20a_gr_max_entries(struct gk20a *g, - struct nvgpu_gpu_ctxsw_trace_filter *filter); -int gk20a_fecs_trace_enable(struct gk20a *g); -int gk20a_fecs_trace_disable(struct gk20a *g); -bool gk20a_fecs_trace_is_enabled(struct gk20a *g); -size_t gk20a_fecs_trace_buffer_size(struct gk20a *g); -void gk20a_fecs_trace_reset_buffer(struct gk20a *g); u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void); #endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index be0e378f3..16afbd41e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -2012,7 +2013,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g) g->gr.ctx_vars.priv_access_map_size = 512 * 1024; #ifdef CONFIG_GK20A_CTXSW_TRACE g->gr.ctx_vars.fecs_trace_buffer_size = - gk20a_fecs_trace_buffer_size(g); + nvgpu_gr_fecs_trace_buffer_size(g); #endif } @@ -3591,7 +3592,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, g->ops.fecs_trace.get_buffer_full_mailbox_val()) { nvgpu_info(g, "ctxsw intr0 set by ucode, " "timestamp buffer full"); - gk20a_fecs_trace_reset_buffer(g); + nvgpu_gr_fecs_trace_reset_buffer(g); } else { nvgpu_err(g, "ctxsw intr0 set by ucode, error_code: 0x%08x", diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 902be74cf..e4045707d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -659,15 +659,15 @@ static const struct gpu_ops gp10b_ops = { .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, .init = nvgpu_gr_fecs_trace_init, .deinit = nvgpu_gr_fecs_trace_deinit, - .enable = gk20a_fecs_trace_enable, - .disable = gk20a_fecs_trace_disable, - .is_enabled = gk20a_fecs_trace_is_enabled, + .enable = nvgpu_gr_fecs_trace_enable, + .disable = nvgpu_gr_fecs_trace_disable, + .is_enabled = nvgpu_gr_fecs_trace_is_enabled, .reset = gk20a_fecs_trace_reset, .flush = gp10b_fecs_trace_flush, .poll = gk20a_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, - .max_entries = gk20a_gr_max_entries, + .max_entries = nvgpu_gr_fecs_trace_max_entries, .get_buffer_full_mailbox_val = gk20a_fecs_trace_get_buffer_full_mailbox_val, .get_read_index = gm20b_fecs_trace_get_read_index, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 0c0405c8a..d6df4f6fb 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -825,15 +825,15 @@ static const struct gpu_ops gv100_ops = { .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, .init = nvgpu_gr_fecs_trace_init, .deinit = nvgpu_gr_fecs_trace_deinit, - .enable = gk20a_fecs_trace_enable, - .disable = gk20a_fecs_trace_disable, - .is_enabled = gk20a_fecs_trace_is_enabled, + .enable = nvgpu_gr_fecs_trace_enable, + .disable = nvgpu_gr_fecs_trace_disable, + .is_enabled = nvgpu_gr_fecs_trace_is_enabled, .reset = gk20a_fecs_trace_reset, .flush = NULL, .poll = gk20a_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, - .max_entries = gk20a_gr_max_entries, + .max_entries = nvgpu_gr_fecs_trace_max_entries, .get_buffer_full_mailbox_val = gk20a_fecs_trace_get_buffer_full_mailbox_val, .get_read_index = gm20b_fecs_trace_get_read_index, diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index ce866a513..1a98e5dbe 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -783,15 +783,15 @@ static const struct gpu_ops gv11b_ops = { .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, .init = nvgpu_gr_fecs_trace_init, .deinit = nvgpu_gr_fecs_trace_deinit, - .enable = gk20a_fecs_trace_enable, - .disable = gk20a_fecs_trace_disable, - .is_enabled = gk20a_fecs_trace_is_enabled, + .enable = nvgpu_gr_fecs_trace_enable, + .disable = nvgpu_gr_fecs_trace_disable, + .is_enabled = nvgpu_gr_fecs_trace_is_enabled, .reset = gk20a_fecs_trace_reset, .flush = NULL, .poll = gk20a_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, - .max_entries = gk20a_gr_max_entries, + .max_entries = nvgpu_gr_fecs_trace_max_entries, .get_buffer_full_mailbox_val = gk20a_fecs_trace_get_buffer_full_mailbox_val, .get_read_index = gm20b_fecs_trace_get_read_index, diff --git a/drivers/gpu/nvgpu/include/nvgpu/ctxsw_trace.h b/drivers/gpu/nvgpu/include/nvgpu/ctxsw_trace.h index 8706be796..e740f1aa4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ctxsw_trace.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ctxsw_trace.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,30 +29,6 @@ struct gk20a; struct tsg_gk20a; struct channel_gk20a; -#define NVGPU_GPU_CTXSW_TAG_SOF 0x00 -#define NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01 -#define NVGPU_GPU_CTXSW_TAG_FE_ACK 0x02 -#define NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI 0x0a -#define NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP 0x0b -#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP 0x0c -#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP 0x0d -#define NVGPU_GPU_CTXSW_TAG_SAVE_END 0x03 -#define NVGPU_GPU_CTXSW_TAG_RESTORE_START 0x04 -#define NVGPU_GPU_CTXSW_TAG_CONTEXT_START 0x05 -#define NVGPU_GPU_CTXSW_TAG_ENGINE_RESET 0xfe -#define NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff -#define NVGPU_GPU_CTXSW_TAG_LAST \ - NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP - -#define NVGPU_GPU_CTXSW_FILTER_ISSET(n, p) \ - ((p)->tag_bits[(n) / 64] & (1 << ((n) & 63))) - -#define NVGPU_GPU_CTXSW_FILTER_SIZE (NVGPU_GPU_CTXSW_TAG_LAST + 1) - -struct nvgpu_gpu_ctxsw_trace_filter { - u64 tag_bits[(NVGPU_GPU_CTXSW_FILTER_SIZE + 63) / 64]; -}; - /* must be consistent with nvgpu_ctxsw_ring_header */ struct nvgpu_ctxsw_ring_header_internal { u32 magic; diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h index c0650a375..1e2099976 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/fecs_trace.h @@ -36,6 +36,26 @@ #define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL) #define GK20A_FECS_TRACE_PTIMER_SHIFT 5 +#define NVGPU_GPU_CTXSW_TAG_SOF 0x00 +#define NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01 +#define NVGPU_GPU_CTXSW_TAG_FE_ACK 0x02 +#define NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI 0x0a +#define NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP 0x0b +#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP 0x0c +#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP 0x0d +#define NVGPU_GPU_CTXSW_TAG_SAVE_END 0x03 +#define NVGPU_GPU_CTXSW_TAG_RESTORE_START 0x04 +#define NVGPU_GPU_CTXSW_TAG_CONTEXT_START 0x05 +#define NVGPU_GPU_CTXSW_TAG_ENGINE_RESET 0xfe +#define NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff +#define NVGPU_GPU_CTXSW_TAG_LAST \ + NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP + +#define NVGPU_GPU_CTXSW_FILTER_ISSET(n, p) \ + ((p)->tag_bits[(n) / 64] & (1 << ((n) & 63))) + +#define NVGPU_GPU_CTXSW_FILTER_SIZE (NVGPU_GPU_CTXSW_TAG_LAST + 1) + struct gk20a; struct nvgpu_gr_fecs_trace { @@ -59,6 +79,10 @@ struct nvgpu_fecs_trace_record { u64 ts[]; }; +struct nvgpu_gpu_ctxsw_trace_filter { + u64 tag_bits[(NVGPU_GPU_CTXSW_FILTER_SIZE + 63) / 64]; +}; + struct nvgpu_fecs_trace_context_entry { u32 context_ptr; @@ -94,4 +118,13 @@ void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g, void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr, struct nvgpu_list_node *list, pid_t *pid, u32 *vmid); +size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g); +int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g, + struct nvgpu_gpu_ctxsw_trace_filter *filter); + +int nvgpu_gr_fecs_trace_enable(struct gk20a *g); +int nvgpu_gr_fecs_trace_disable(struct gk20a *g); +bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g); +void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g); + #endif /* NVGPU_GR_FECS_TRACE_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c index 537e20a47..2da58bb0a 100644 --- a/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c +++ b/drivers/gpu/nvgpu/os/linux/ctxsw_trace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include "gk20a/gr_gk20a.h" diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index b15c76a5c..1c469a423 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -855,15 +855,15 @@ static const struct gpu_ops tu104_ops = { .mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer, .init = nvgpu_gr_fecs_trace_init, .deinit = nvgpu_gr_fecs_trace_deinit, - .enable = gk20a_fecs_trace_enable, - .disable = gk20a_fecs_trace_disable, - .is_enabled = gk20a_fecs_trace_is_enabled, + .enable = nvgpu_gr_fecs_trace_enable, + .disable = nvgpu_gr_fecs_trace_disable, + .is_enabled = nvgpu_gr_fecs_trace_is_enabled, .reset = gk20a_fecs_trace_reset, .flush = NULL, .poll = gk20a_fecs_trace_poll, .bind_channel = gk20a_fecs_trace_bind_channel, .unbind_channel = gk20a_fecs_trace_unbind_channel, - .max_entries = gk20a_gr_max_entries, + .max_entries = nvgpu_gr_fecs_trace_max_entries, .get_buffer_full_mailbox_val = tu104_fecs_trace_get_buffer_full_mailbox_val, .get_read_index = gm20b_fecs_trace_get_read_index,