gpu: nvgpu: Move ctxsw_trace_gk20a.c to common/linux

Migrate ctxsw_trace_gk20a.c to common/linux/ctxsw_trace.c. This
has been done becasue the ctxsw tracing code is currently too
tightly tied to the Linux OS due to usage of a couple system calls:

  - poll()
  - mmap()

And general Linux driver framework code. As a result pulling the
logic out of the FECS tracing code is simply too large a scope for
time time being.

Instead the code was just copied as much as possible. The HAL ops
for the FECS code was hidden behind the FECS tracing config so
that the vm_area_struct is not used when QNX does not define said
config. All other non-HAL functions called by the FECS ctxsw
tracing code ha now also been hidden by this config. This is not
pretty but for the time being it seems like the way to go.

JIRA NVGPU-287

Change-Id: Ib880ab237f4abd330dc66998692c86c4507149c2
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1586547
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2017-10-25 09:56:09 -07:00
committed by mobile promotions
parent a681c505c9
commit 4d2d890c01
11 changed files with 64 additions and 28 deletions

View File

@@ -105,7 +105,6 @@ nvgpu-y := \
gk20a/fb_gk20a.o \ gk20a/fb_gk20a.o \
gk20a/hal.o \ gk20a/hal.o \
gk20a/tsg_gk20a.o \ gk20a/tsg_gk20a.o \
gk20a/ctxsw_trace_gk20a.o \
gk20a/fecs_trace_gk20a.o \ gk20a/fecs_trace_gk20a.o \
gk20a/mc_gk20a.o \ gk20a/mc_gk20a.o \
gk20a/sim_gk20a.o \ gk20a/sim_gk20a.o \
@@ -152,6 +151,9 @@ nvgpu-$(CONFIG_DEBUG_FS) += \
common/linux/debug_kmem.o common/linux/debug_kmem.o
endif endif
nvgpu-$(CONFIG_GK20A_CTXSW_TRACE) += \
common/linux/ctxsw_trace.o
nvgpu-$(CONFIG_TEGRA_GK20A) += common/linux/platform_gk20a_tegra.o nvgpu-$(CONFIG_TEGRA_GK20A) += common/linux/platform_gk20a_tegra.o
nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o
nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o

View File

@@ -28,18 +28,18 @@
#include <trace/events/gk20a.h> #include <trace/events/gk20a.h>
#include <uapi/linux/nvgpu.h> #include <uapi/linux/nvgpu.h>
#include "gk20a/gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "gk20a/ctxsw_trace_gk20a.h"
#include "gk20a/platform_gk20a.h"
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include "ctxsw_trace_gk20a.h"
#include "gk20a.h"
#include "platform_gk20a.h"
#include "gr_gk20a.h"
#include "common/linux/os_linux.h"
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/atomic.h> #include <nvgpu/atomic.h>
#include <nvgpu/barrier.h> #include <nvgpu/barrier.h>
#include "os_linux.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h> #include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h> #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>

View File

@@ -1104,7 +1104,9 @@ int nvgpu_remove(struct device *dev, struct class *class)
if (platform->has_cde) if (platform->has_cde)
gk20a_cde_destroy(l); gk20a_cde_destroy(l);
#ifdef CONFIG_GK20A_CTXSW_TRACE
gk20a_ctxsw_trace_cleanup(g); gk20a_ctxsw_trace_cleanup(g);
#endif
gk20a_sched_ctrl_cleanup(g); gk20a_sched_ctrl_cleanup(g);

View File

@@ -553,8 +553,10 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
gk20a_dbg_info("freeing bound channel context, timeout=%ld", gk20a_dbg_info("freeing bound channel context, timeout=%ld",
timeout); timeout);
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.fecs_trace.unbind_channel && !ch->vpr) if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
g->ops.fecs_trace.unbind_channel(g, ch); g->ops.fecs_trace.unbind_channel(g, ch);
#endif
/* release channel ctx */ /* release channel ctx */
g->ops.gr.free_channel_ctx(ch, was_tsg); g->ops.gr.free_channel_ctx(ch, was_tsg);

View File

@@ -23,6 +23,8 @@
#ifndef __CTXSW_TRACE_GK20A_H #ifndef __CTXSW_TRACE_GK20A_H
#define __CTXSW_TRACE_GK20A_H #define __CTXSW_TRACE_GK20A_H
#include <nvgpu/types.h>
#define GK20A_CTXSW_TRACE_NUM_DEVS 1 #define GK20A_CTXSW_TRACE_NUM_DEVS 1
struct file; struct file;
@@ -41,20 +43,22 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp); int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
long gk20a_ctxsw_dev_ioctl(struct file *filp, long gk20a_ctxsw_dev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg); unsigned int cmd, unsigned long arg);
ssize_t gk20a_ctxsw_dev_read(struct file *, char __user *, size_t, loff_t *); ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
unsigned int gk20a_ctxsw_dev_poll(struct file *, struct poll_table_struct *); size_t size, loff_t *offs);
int gk20a_ctxsw_dev_mmap(struct file *, struct vm_area_struct *); unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
struct poll_table_struct *pts);
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size); int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g, void **buf, size_t *size);
int gk20a_ctxsw_dev_ring_free(struct gk20a *g); int gk20a_ctxsw_dev_ring_free(struct gk20a *g);
int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma); int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g, struct vm_area_struct *vma);
int gk20a_ctxsw_trace_init(struct gk20a *); int gk20a_ctxsw_trace_init(struct gk20a *g);
void gk20a_ctxsw_trace_cleanup(struct gk20a *); void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
int gk20a_ctxsw_trace_write(struct gk20a *, struct nvgpu_ctxsw_trace_entry *); int gk20a_ctxsw_trace_write(struct gk20a *g,
struct nvgpu_ctxsw_trace_entry *entry);
void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid); void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid);
void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch); void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch);
void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg); void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg);
#endif /* __CTXSW_TRACE_GK20A_H */ #endif /* __CTXSW_TRACE_GK20A_H */

View File

@@ -1228,16 +1228,24 @@ void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id)
if (nvgpu_pmu_disable_elpg(g)) if (nvgpu_pmu_disable_elpg(g))
nvgpu_err(g, "failed to set disable elpg"); nvgpu_err(g, "failed to set disable elpg");
} }
/* resetting engine will alter read/write index.
* need to flush circular buffer before re-enabling FECS. #ifdef CONFIG_GK20A_CTXSW_TRACE
/*
* Resetting engine will alter read/write index. Need to flush
* circular buffer before re-enabling FECS.
*/ */
if (g->ops.fecs_trace.reset) if (g->ops.fecs_trace.reset)
g->ops.fecs_trace.reset(g); g->ops.fecs_trace.reset(g);
/*HALT_PIPELINE method, halt GR engine*/ #endif
/* HALT_PIPELINE method, halt GR engine. */
if (gr_gk20a_halt_pipe(g)) if (gr_gk20a_halt_pipe(g))
nvgpu_err(g, "failed to HALT gr pipe"); nvgpu_err(g, "failed to HALT gr pipe");
/* resetting engine using mc_enable_r() is not
enough, we do full init sequence */ /*
* Resetting engine using mc_enable_r() is not enough; we must
* do full init sequence.
*/
gk20a_gr_reset(g); gk20a_gr_reset(g);
if (g->support_pmu && g->can_elpg) if (g->support_pmu && g->can_elpg)
nvgpu_pmu_enable_elpg(g); nvgpu_pmu_enable_elpg(g);
@@ -1618,6 +1626,8 @@ static bool gk20a_fifo_handle_mmu_fault(
} }
} }
} }
#ifdef CONFIG_GK20A_CTXSW_TRACE
/* /*
* For non fake mmu fault, both tsg and ch pointers * For non fake mmu fault, both tsg and ch pointers
* could be valid. Check tsg first. * could be valid. Check tsg first.
@@ -1626,10 +1636,11 @@ static bool gk20a_fifo_handle_mmu_fault(
gk20a_ctxsw_trace_tsg_reset(g, tsg); gk20a_ctxsw_trace_tsg_reset(g, tsg);
else if (ch) else if (ch)
gk20a_ctxsw_trace_channel_reset(g, ch); gk20a_ctxsw_trace_channel_reset(g, ch);
#endif
/* disable the channel/TSG from hw and increment /*
* syncpoints */ * Disable the channel/TSG from hw and increment syncpoints.
*/
if (tsg) { if (tsg) {
if (!g->fifo.deferred_reset_pending) { if (!g->fifo.deferred_reset_pending) {
if (!fake_fault) if (!fake_fault)

View File

@@ -295,9 +295,11 @@ int gk20a_finalize_poweron(struct gk20a *g)
goto done; goto done;
} }
#ifdef CONFIG_GK20A_CTXSW_TRACE
err = gk20a_ctxsw_trace_init(g); err = gk20a_ctxsw_trace_init(g);
if (err) if (err)
nvgpu_warn(g, "could not initialize ctxsw tracing"); nvgpu_warn(g, "could not initialize ctxsw tracing");
#endif
err = gk20a_sched_ctrl_init(g); err = gk20a_sched_ctrl_init(g);
if (err) { if (err) {

View File

@@ -689,18 +689,25 @@ struct gpu_ops {
int (*get_netlist_name)(struct gk20a *g, int index, char *name); int (*get_netlist_name)(struct gk20a *g, int index, char *name);
bool (*is_fw_defined)(void); bool (*is_fw_defined)(void);
} gr_ctx; } gr_ctx;
#ifdef CONFIG_GK20A_CTXSW_TRACE
/*
* Currently only supported on Linux due to the extremely tight
* integration with Linux device driver structure (in particular
* mmap).
*/
struct { struct {
int (*init)(struct gk20a *g); int (*init)(struct gk20a *g);
int (*max_entries)(struct gk20a *, int (*max_entries)(struct gk20a *,
struct nvgpu_ctxsw_trace_filter *); struct nvgpu_ctxsw_trace_filter *filter);
int (*flush)(struct gk20a *g); int (*flush)(struct gk20a *g);
int (*poll)(struct gk20a *g); int (*poll)(struct gk20a *g);
int (*enable)(struct gk20a *g); int (*enable)(struct gk20a *g);
int (*disable)(struct gk20a *g); int (*disable)(struct gk20a *g);
bool (*is_enabled)(struct gk20a *g); bool (*is_enabled)(struct gk20a *g);
int (*reset)(struct gk20a *g); int (*reset)(struct gk20a *g);
int (*bind_channel)(struct gk20a *, struct channel_gk20a *); int (*bind_channel)(struct gk20a *g, struct channel_gk20a *ch);
int (*unbind_channel)(struct gk20a *, struct channel_gk20a *); int (*unbind_channel)(struct gk20a *g,
struct channel_gk20a *ch);
int (*deinit)(struct gk20a *g); int (*deinit)(struct gk20a *g);
int (*alloc_user_buffer)(struct gk20a *g, int (*alloc_user_buffer)(struct gk20a *g,
void **buf, size_t *size); void **buf, size_t *size);
@@ -710,6 +717,7 @@ struct gpu_ops {
int (*set_filter)(struct gk20a *g, int (*set_filter)(struct gk20a *g,
struct nvgpu_ctxsw_trace_filter *filter); struct nvgpu_ctxsw_trace_filter *filter);
} fecs_trace; } fecs_trace;
#endif
struct { struct {
bool (*support_sparse)(struct gk20a *g); bool (*support_sparse)(struct gk20a *g);
u64 (*gmmu_map)(struct vm_gk20a *vm, u64 (*gmmu_map)(struct vm_gk20a *vm,

View File

@@ -3070,13 +3070,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c,
"fail to load golden ctx image"); "fail to load golden ctx image");
goto out; goto out;
} }
#ifdef CONFIG_GK20A_CTXSW_TRACE
if (g->ops.fecs_trace.bind_channel && !c->vpr) { if (g->ops.fecs_trace.bind_channel && !c->vpr) {
err = g->ops.fecs_trace.bind_channel(g, c); err = g->ops.fecs_trace.bind_channel(g, c);
if (err) { if (err)
nvgpu_warn(g, nvgpu_warn(g,
"fail to bind channel for ctxsw trace"); "fail to bind channel for ctxsw trace");
}
} }
#endif
c->first_init = true; c->first_init = true;
} }

View File

@@ -714,7 +714,9 @@ int gp106_init_hal(struct gk20a *g)
gops->clock_gating = gp106_ops.clock_gating; gops->clock_gating = gp106_ops.clock_gating;
gops->fifo = gp106_ops.fifo; gops->fifo = gp106_ops.fifo;
gops->gr_ctx = gp106_ops.gr_ctx; gops->gr_ctx = gp106_ops.gr_ctx;
#ifdef CONFIG_GK20A_CTXSW_TRACE
gops->fecs_trace = gp106_ops.fecs_trace; gops->fecs_trace = gp106_ops.fecs_trace;
#endif
gops->mm = gp106_ops.mm; gops->mm = gp106_ops.mm;
gops->pramin = gp106_ops.pramin; gops->pramin = gp106_ops.pramin;
gops->therm = gp106_ops.therm; gops->therm = gp106_ops.therm;

View File

@@ -619,7 +619,9 @@ int gp10b_init_hal(struct gk20a *g)
gops->clock_gating = gp10b_ops.clock_gating; gops->clock_gating = gp10b_ops.clock_gating;
gops->fifo = gp10b_ops.fifo; gops->fifo = gp10b_ops.fifo;
gops->gr_ctx = gp10b_ops.gr_ctx; gops->gr_ctx = gp10b_ops.gr_ctx;
#ifdef CONFIG_GK20A_CTXSW_TRACE
gops->fecs_trace = gp10b_ops.fecs_trace; gops->fecs_trace = gp10b_ops.fecs_trace;
#endif
gops->mm = gp10b_ops.mm; gops->mm = gp10b_ops.mm;
gops->pramin = gp10b_ops.pramin; gops->pramin = gp10b_ops.pramin;
gops->therm = gp10b_ops.therm; gops->therm = gp10b_ops.therm;