From 4d2d890c01b94d10ad55643a4c2c159a98419efe Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Wed, 25 Oct 2017 09:56:09 -0700 Subject: [PATCH] gpu: nvgpu: Move ctxsw_trace_gk20a.c to common/linux Migrate ctxsw_trace_gk20a.c to common/linux/ctxsw_trace.c. This has been done becasue the ctxsw tracing code is currently too tightly tied to the Linux OS due to usage of a couple system calls: - poll() - mmap() And general Linux driver framework code. As a result pulling the logic out of the FECS tracing code is simply too large a scope for time time being. Instead the code was just copied as much as possible. The HAL ops for the FECS code was hidden behind the FECS tracing config so that the vm_area_struct is not used when QNX does not define said config. All other non-HAL functions called by the FECS ctxsw tracing code ha now also been hidden by this config. This is not pretty but for the time being it seems like the way to go. JIRA NVGPU-287 Change-Id: Ib880ab237f4abd330dc66998692c86c4507149c2 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1586547 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 4 ++- .../linux/ctxsw_trace.c} | 14 +++++----- drivers/gpu/nvgpu/common/linux/module.c | 2 ++ drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 2 ++ drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h | 18 ++++++++----- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 27 +++++++++++++------ drivers/gpu/nvgpu/gk20a/gk20a.c | 2 ++ drivers/gpu/nvgpu/gk20a/gk20a.h | 14 +++++++--- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 5 ++-- drivers/gpu/nvgpu/gp106/hal_gp106.c | 2 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++ 11 files changed, 64 insertions(+), 28 deletions(-) rename drivers/gpu/nvgpu/{gk20a/ctxsw_trace_gk20a.c => common/linux/ctxsw_trace.c} (99%) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index af7a8af53..f1a6f267f 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -105,7 +105,6 @@ nvgpu-y := \ gk20a/fb_gk20a.o \ gk20a/hal.o \ gk20a/tsg_gk20a.o \ - gk20a/ctxsw_trace_gk20a.o \ gk20a/fecs_trace_gk20a.o \ gk20a/mc_gk20a.o \ gk20a/sim_gk20a.o \ @@ -152,6 +151,9 @@ nvgpu-$(CONFIG_DEBUG_FS) += \ common/linux/debug_kmem.o endif +nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \ + common/linux/ctxsw_trace.o + nvgpu-$(CONFIG_TEGRA_GK20A) += common/linux/platform_gk20a_tegra.o nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c similarity index 99% rename from drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c rename to drivers/gpu/nvgpu/common/linux/ctxsw_trace.c index fb33de236..81a54b7e4 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c +++ b/drivers/gpu/nvgpu/common/linux/ctxsw_trace.c @@ -28,18 +28,18 @@ #include #include +#include "gk20a/gk20a.h" +#include "gk20a/gr_gk20a.h" +#include "gk20a/ctxsw_trace_gk20a.h" +#include "gk20a/platform_gk20a.h" + #include - -#include "ctxsw_trace_gk20a.h" -#include "gk20a.h" -#include "platform_gk20a.h" -#include "gr_gk20a.h" -#include "common/linux/os_linux.h" - #include #include #include +#include "os_linux.h" + #include #include diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 68ae1a661..fb5d36142 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -1104,7 +1104,9 @@ int nvgpu_remove(struct device *dev, struct class *class) if (platform->has_cde) gk20a_cde_destroy(l); +#ifdef CONFIG_GK20A_CTXSW_TRACE gk20a_ctxsw_trace_cleanup(g); +#endif gk20a_sched_ctrl_cleanup(g); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0d011b069..546f41649 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) gk20a_dbg_info("freeing bound channel context, timeout=%ld", timeout); +#ifdef CONFIG_GK20A_CTXSW_TRACE if (g->ops.fecs_trace.unbind_channel && !ch->vpr) g->ops.fecs_trace.unbind_channel(g, ch); +#endif /* release channel ctx */ g->ops.gr.free_channel_ctx(ch, was_tsg); diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h index b270581bf..dddb8603b 100644 --- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.h @@ -23,6 +23,8 @@ #ifndef __CTXSW_TRACE_GK20A_H #define __CTXSW_TRACE_GK20A_H +#include + #define GK20A_CTXSW_TRACE_NUM_DEVS 1 struct file; @@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp); int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); -unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); -int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); +ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, + size_t size, loff_t *offs); +unsigned int gk20a_ctxsw_dev_poll(struct file *filp, + struct poll_table_struct *pts); +int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma); int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); int gk20a_ctxsw_dev_ring_free(struct gk20a *g); int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); -int gk20a_ctxsw_trace_init(struct gk20a *); -void gk20a_ctxsw_trace_cleanup(struct gk20a *); -int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); +int gk20a_ctxsw_trace_init(struct gk20a *g); +void gk20a_ctxsw_trace_cleanup(struct gk20a *g); +int gk20a_ctxsw_trace_write(struct gk20a *g, + struct nvgpu_ctxsw_trace_entry *entry); void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); - #endif /* __CTXSW_TRACE_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index d10af9e91..17ae626b5 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id) if (nvgpu_pmu_disable_elpg(g)) nvgpu_err(g, "failed to set disable elpg"); } - /* resetting engine will alter read/write index. - * need to flush circular buffer before re-enabling FECS. + +#ifdef CONFIG_GK20A_CTXSW_TRACE + /* + * Resetting engine will alter read/write index. Need to flush + * circular buffer before re-enabling FECS. */ if (g->ops.fecs_trace.reset) g->ops.fecs_trace.reset(g); - /*HALT_PIPELINE method, halt GR engine*/ +#endif + + /* HALT_PIPELINE method, halt GR engine. */ if (gr_gk20a_halt_pipe(g)) nvgpu_err(g, "failed to HALT gr pipe"); - /* resetting engine using mc_enable_r() is not - enough, we do full init sequence */ + + /* + * Resetting engine using mc_enable_r() is not enough; we must + * do full init sequence. + */ gk20a_gr_reset(g); if (g->support_pmu && g->can_elpg) nvgpu_pmu_enable_elpg(g); @@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault( } } } + +#ifdef CONFIG_GK20A_CTXSW_TRACE /* * For non fake mmu fault, both tsg and ch pointers * could be valid. Check tsg first. @@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault( gk20a_ctxsw_trace_tsg_reset(g, tsg); else if (ch) gk20a_ctxsw_trace_channel_reset(g, ch); +#endif - /* disable the channel/TSG from hw and increment - * syncpoints */ - + /* + * Disable the channel/TSG from hw and increment syncpoints. + */ if (tsg) { if (!g->fifo.deferred_reset_pending) { if (!fake_fault) diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 47f6c56c2..703a7c0c2 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g) goto done; } +#ifdef CONFIG_GK20A_CTXSW_TRACE err = gk20a_ctxsw_trace_init(g); if (err) nvgpu_warn(g, "could not initialize ctxsw tracing"); +#endif err = gk20a_sched_ctrl_init(g); if (err) { diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d7fdffb02..a34f06b25 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -689,18 +689,25 @@ struct gpu_ops { int (*get_netlist_name)(struct gk20a *g, int index, char *name); bool (*is_fw_defined)(void); } gr_ctx; +#ifdef CONFIG_GK20A_CTXSW_TRACE + /* + * Currently only supported on Linux due to the extremely tight + * integration with Linux device driver structure (in particular + * mmap). + */ struct { int (*init)(struct gk20a *g); int (*max_entries)(struct gk20a *, - struct nvgpu_ctxsw_trace_filter *); + struct nvgpu_ctxsw_trace_filter *filter); int (*flush)(struct gk20a *g); int (*poll)(struct gk20a *g); int (*enable)(struct gk20a *g); int (*disable)(struct gk20a *g); bool (*is_enabled)(struct gk20a *g); int (*reset)(struct gk20a *g); - int (*bind_channel)(struct gk20a *, struct channel_gk20a *); - int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); + int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch); + int (*unbind_channel)(struct gk20a *g, + struct channel_gk20a *ch); int (*deinit)(struct gk20a *g); int (*alloc_user_buffer)(struct gk20a *g, void **buf, size_t *size); @@ -710,6 +717,7 @@ struct gpu_ops { int (*set_filter)(struct gk20a *g, struct nvgpu_ctxsw_trace_filter *filter); } fecs_trace; +#endif struct { bool (*support_sparse)(struct gk20a *g); u64 (*gmmu_map)(struct vm_gk20a *vm, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 1ea59a9db..f78d862cf 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, "fail to load golden ctx image"); goto out; } +#ifdef CONFIG_GK20A_CTXSW_TRACE if (g->ops.fecs_trace.bind_channel && !c->vpr) { err = g->ops.fecs_trace.bind_channel(g, c); - if (err) { + if (err) nvgpu_warn(g, "fail to bind channel for ctxsw trace"); - } } +#endif c->first_init = true; } diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index f576278d2..c5b662016 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -714,7 +714,9 @@ int gp106_init_hal(struct gk20a *g) gops->clock_gating = gp106_ops.clock_gating; gops->fifo = gp106_ops.fifo; gops->gr_ctx = gp106_ops.gr_ctx; +#ifdef CONFIG_GK20A_CTXSW_TRACE gops->fecs_trace = gp106_ops.fecs_trace; +#endif gops->mm = gp106_ops.mm; gops->pramin = gp106_ops.pramin; gops->therm = gp106_ops.therm; diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index cbec89bce..bb95f6dbe 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -619,7 +619,9 @@ int gp10b_init_hal(struct gk20a *g) gops->clock_gating = gp10b_ops.clock_gating; gops->fifo = gp10b_ops.fifo; gops->gr_ctx = gp10b_ops.gr_ctx; +#ifdef CONFIG_GK20A_CTXSW_TRACE gops->fecs_trace = gp10b_ops.fecs_trace; +#endif gops->mm = gp10b_ops.mm; gops->pramin = gp10b_ops.pramin; gops->therm = gp10b_ops.therm;