gpu: nvgpu: move fecs_trace_enable/disable APIs to gr/fecs_trace

Move below APIs from gk20a/fecs_trace_gk20a.c
gk20a_fecs_trace_enable()
gk20a_fecs_trace_disable()
gk20a_fecs_trace_is_enabled()
gk20a_fecs_trace_reset_buffer()
gk20a_fecs_trace_buffer_size()
gk20a_gr_max_entries()

and move them to new gr/fecs_trace unit with below renames
nvgpu_gr_fecs_trace_enable()
nvgpu_gr_fecs_trace_disable()
nvgpu_gr_fecs_trace_is_enabled()
nvgpu_gr_fecs_trace_reset_buffer()
nvgpu_gr_fecs_trace_buffer_size()
nvgpu_gr_fecs_trace_max_entries()

Use new functions in the driver instead of old ones

Export gk20a_fecs_trace_periodic_polling() in fecs_trace_gk20a.h
header since it is needed in gr/fecs_trace for transition
This include and the function itself will be later moved to
gr/fecs_trace unit

Move struct nvgpu_gpu_ctxsw_trace_filter and all filter TSG
macros in the form NVGPU_GPU_CTXSW_TAG_* to gr/fecs_trace.h

Jira NVGPU-1880

Change-Id: Ic95b99554e626033a111452f311bbc026ec604e2
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2027530
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-02-25 17:37:28 +05:30
committed by mobile promotions
parent ec513e4a1a
commit 73d62c0c52
12 changed files with 151 additions and 131 deletions

View File

@@ -28,6 +28,14 @@
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/fecs_trace.h>
/*
* TODO: This include is only needed for transition phase to new unit
* Remove as soon as transition is complete
*/
#include "gk20a/fecs_trace_gk20a.h"
#ifdef CONFIG_GK20A_CTXSW_TRACE
int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
pid_t pid, u32 vmid, struct nvgpu_list_node *list)
{
@@ -219,3 +227,88 @@ bool nvgpu_gr_fecs_trace_is_valid_record(struct gk20a *g,
*/
return g->ops.gr.ctxsw_prog.is_ts_valid_record(r->magic_hi);
}
size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g)
{
return GK20A_FECS_TRACE_NUM_RECORDS
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
}
int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_filter *filter)
{
int n;
int tag;
/* Compute number of entries per record, with given filter */
for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++)
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
/* Return max number of entries generated for the whole ring */
return n * GK20A_FECS_TRACE_NUM_RECORDS;
}
int nvgpu_gr_fecs_trace_enable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
int write;
int err = 0;
nvgpu_mutex_acquire(&trace->enable_lock);
trace->enable_count++;
if (trace->enable_count == 1U) {
/* drop data in hw buffer */
if (g->ops.fecs_trace.flush)
g->ops.fecs_trace.flush(g);
write = g->ops.fecs_trace.get_write_index(g);
g->ops.fecs_trace.set_read_index(g, write);
err = nvgpu_thread_create(&trace->poll_task, g,
gk20a_fecs_trace_periodic_polling, __func__);
if (err != 0) {
nvgpu_warn(g, "failed to create FECS polling task");
goto done;
}
}
done:
nvgpu_mutex_release(&trace->enable_lock);
return err;
}
int nvgpu_gr_fecs_trace_disable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
if (trace == NULL) {
return -EINVAL;
}
nvgpu_mutex_acquire(&trace->enable_lock);
trace->enable_count--;
if (trace->enable_count == 0U) {
nvgpu_thread_stop(&trace->poll_task);
}
nvgpu_mutex_release(&trace->enable_lock);
return 0;
}
bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
return (trace && (trace->enable_count > 0));
}
void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g)
{
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
g->ops.fecs_trace.set_read_index(g,
g->ops.fecs_trace.get_write_index(g));
}
#endif /* CONFIG_GK20A_CTXSW_TRACE */

View File

@@ -38,6 +38,7 @@
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/fecs_trace.h>
#include "gr_vgpu.h"
#include "gk20a/fecs_trace_gk20a.h"
@@ -127,7 +128,7 @@ int vgpu_gr_init_ctx_state(struct gk20a *g)
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
#ifdef CONFIG_GK20A_CTXSW_TRACE
g->gr.ctx_vars.fecs_trace_buffer_size = gk20a_fecs_trace_buffer_size(g);
g->gr.ctx_vars.fecs_trace_buffer_size = nvgpu_gr_fecs_trace_buffer_size(g);
#endif
return 0;
}

View File

@@ -220,7 +220,7 @@ done:
return err;
}
static int gk20a_fecs_trace_periodic_polling(void *arg)
int gk20a_fecs_trace_periodic_polling(void *arg)
{
struct gk20a *g = (struct gk20a *)arg;
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
@@ -238,12 +238,6 @@ static int gk20a_fecs_trace_periodic_polling(void *arg)
return 0;
}
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
{
return GK20A_FECS_TRACE_NUM_RECORDS
* g->ops.gr.ctxsw_prog.hw_get_ts_record_size_in_bytes();
}
int gk20a_fecs_trace_bind_channel(struct gk20a *g,
struct channel_gk20a *ch, u32 vmid, struct nvgpu_gr_ctx *gr_ctx)
{
@@ -348,79 +342,6 @@ int gk20a_fecs_trace_reset(struct gk20a *g)
return g->ops.fecs_trace.set_read_index(g, 0);
}
int gk20a_gr_max_entries(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_filter *filter)
{
int n;
int tag;
/* Compute number of entries per record, with given filter */
for (n = 0, tag = 0; tag < nvgpu_gr_fecs_trace_num_ts(g); tag++)
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
/* Return max number of entries generated for the whole ring */
return n * GK20A_FECS_TRACE_NUM_RECORDS;
}
int gk20a_fecs_trace_enable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
int write;
int err = 0;
nvgpu_mutex_acquire(&trace->enable_lock);
trace->enable_count++;
if (trace->enable_count == 1U) {
/* drop data in hw buffer */
if (g->ops.fecs_trace.flush)
g->ops.fecs_trace.flush(g);
write = g->ops.fecs_trace.get_write_index(g);
g->ops.fecs_trace.set_read_index(g, write);
err = nvgpu_thread_create(&trace->poll_task, g,
gk20a_fecs_trace_periodic_polling, __func__);
if (err != 0) {
nvgpu_warn(g, "failed to create FECS polling task");
goto done;
}
}
done:
nvgpu_mutex_release(&trace->enable_lock);
return err;
}
int gk20a_fecs_trace_disable(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
nvgpu_mutex_acquire(&trace->enable_lock);
trace->enable_count--;
if (trace->enable_count == 0U) {
nvgpu_thread_stop(&trace->poll_task);
}
nvgpu_mutex_release(&trace->enable_lock);
return 0;
}
bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
{
struct nvgpu_gr_fecs_trace *trace = g->fecs_trace;
return (trace && (trace->enable_count > 0));
}
void gk20a_fecs_trace_reset_buffer(struct gk20a *g)
{
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
g->ops.fecs_trace.set_read_index(g,
g->ops.fecs_trace.get_write_index(g));
}
u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void)
{
return 0x26;

View File

@@ -29,18 +29,12 @@ struct nvgpu_gpu_ctxsw_trace_filter;
struct nvgpu_gr_ctx;
int gk20a_fecs_trace_poll(struct gk20a *g);
int gk20a_fecs_trace_periodic_polling(void *arg);
int gk20a_fecs_trace_bind_channel(struct gk20a *g,
struct channel_gk20a *ch, u32 vmid,
struct nvgpu_gr_ctx *gr_ctx);
int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch);
int gk20a_fecs_trace_reset(struct gk20a *g);
int gk20a_gr_max_entries(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_filter *filter);
int gk20a_fecs_trace_enable(struct gk20a *g);
int gk20a_fecs_trace_disable(struct gk20a *g);
bool gk20a_fecs_trace_is_enabled(struct gk20a *g);
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g);
void gk20a_fecs_trace_reset_buffer(struct gk20a *g);
u32 gk20a_fecs_trace_get_buffer_full_mailbox_val(void);
#endif /* NVGPU_GK20A_FECS_TRACE_GK20A_H */

View File

@@ -54,6 +54,7 @@
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/zbc.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/engines.h>
#include <nvgpu/engine_status.h>
@@ -2012,7 +2013,7 @@ int gr_gk20a_init_ctx_state(struct gk20a *g)
g->gr.ctx_vars.priv_access_map_size = 512 * 1024;
#ifdef CONFIG_GK20A_CTXSW_TRACE
g->gr.ctx_vars.fecs_trace_buffer_size =
gk20a_fecs_trace_buffer_size(g);
nvgpu_gr_fecs_trace_buffer_size(g);
#endif
}
@@ -3591,7 +3592,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
g->ops.fecs_trace.get_buffer_full_mailbox_val()) {
nvgpu_info(g, "ctxsw intr0 set by ucode, "
"timestamp buffer full");
gk20a_fecs_trace_reset_buffer(g);
nvgpu_gr_fecs_trace_reset_buffer(g);
} else {
nvgpu_err(g,
"ctxsw intr0 set by ucode, error_code: 0x%08x",

View File

@@ -659,15 +659,15 @@ static const struct gpu_ops gp10b_ops = {
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
.init = nvgpu_gr_fecs_trace_init,
.deinit = nvgpu_gr_fecs_trace_deinit,
.enable = gk20a_fecs_trace_enable,
.disable = gk20a_fecs_trace_disable,
.is_enabled = gk20a_fecs_trace_is_enabled,
.enable = nvgpu_gr_fecs_trace_enable,
.disable = nvgpu_gr_fecs_trace_disable,
.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
.reset = gk20a_fecs_trace_reset,
.flush = gp10b_fecs_trace_flush,
.poll = gk20a_fecs_trace_poll,
.bind_channel = gk20a_fecs_trace_bind_channel,
.unbind_channel = gk20a_fecs_trace_unbind_channel,
.max_entries = gk20a_gr_max_entries,
.max_entries = nvgpu_gr_fecs_trace_max_entries,
.get_buffer_full_mailbox_val =
gk20a_fecs_trace_get_buffer_full_mailbox_val,
.get_read_index = gm20b_fecs_trace_get_read_index,

View File

@@ -825,15 +825,15 @@ static const struct gpu_ops gv100_ops = {
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
.init = nvgpu_gr_fecs_trace_init,
.deinit = nvgpu_gr_fecs_trace_deinit,
.enable = gk20a_fecs_trace_enable,
.disable = gk20a_fecs_trace_disable,
.is_enabled = gk20a_fecs_trace_is_enabled,
.enable = nvgpu_gr_fecs_trace_enable,
.disable = nvgpu_gr_fecs_trace_disable,
.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
.reset = gk20a_fecs_trace_reset,
.flush = NULL,
.poll = gk20a_fecs_trace_poll,
.bind_channel = gk20a_fecs_trace_bind_channel,
.unbind_channel = gk20a_fecs_trace_unbind_channel,
.max_entries = gk20a_gr_max_entries,
.max_entries = nvgpu_gr_fecs_trace_max_entries,
.get_buffer_full_mailbox_val =
gk20a_fecs_trace_get_buffer_full_mailbox_val,
.get_read_index = gm20b_fecs_trace_get_read_index,

View File

@@ -783,15 +783,15 @@ static const struct gpu_ops gv11b_ops = {
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
.init = nvgpu_gr_fecs_trace_init,
.deinit = nvgpu_gr_fecs_trace_deinit,
.enable = gk20a_fecs_trace_enable,
.disable = gk20a_fecs_trace_disable,
.is_enabled = gk20a_fecs_trace_is_enabled,
.enable = nvgpu_gr_fecs_trace_enable,
.disable = nvgpu_gr_fecs_trace_disable,
.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
.reset = gk20a_fecs_trace_reset,
.flush = NULL,
.poll = gk20a_fecs_trace_poll,
.bind_channel = gk20a_fecs_trace_bind_channel,
.unbind_channel = gk20a_fecs_trace_unbind_channel,
.max_entries = gk20a_gr_max_entries,
.max_entries = nvgpu_gr_fecs_trace_max_entries,
.get_buffer_full_mailbox_val =
gk20a_fecs_trace_get_buffer_full_mailbox_val,
.get_read_index = gm20b_fecs_trace_get_read_index,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -29,30 +29,6 @@ struct gk20a;
struct tsg_gk20a;
struct channel_gk20a;
#define NVGPU_GPU_CTXSW_TAG_SOF 0x00
#define NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01
#define NVGPU_GPU_CTXSW_TAG_FE_ACK 0x02
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI 0x0a
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP 0x0b
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP 0x0c
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP 0x0d
#define NVGPU_GPU_CTXSW_TAG_SAVE_END 0x03
#define NVGPU_GPU_CTXSW_TAG_RESTORE_START 0x04
#define NVGPU_GPU_CTXSW_TAG_CONTEXT_START 0x05
#define NVGPU_GPU_CTXSW_TAG_ENGINE_RESET 0xfe
#define NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff
#define NVGPU_GPU_CTXSW_TAG_LAST \
NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP
#define NVGPU_GPU_CTXSW_FILTER_ISSET(n, p) \
((p)->tag_bits[(n) / 64] & (1 << ((n) & 63)))
#define NVGPU_GPU_CTXSW_FILTER_SIZE (NVGPU_GPU_CTXSW_TAG_LAST + 1)
struct nvgpu_gpu_ctxsw_trace_filter {
u64 tag_bits[(NVGPU_GPU_CTXSW_FILTER_SIZE + 63) / 64];
};
/* must be consistent with nvgpu_ctxsw_ring_header */
struct nvgpu_ctxsw_ring_header_internal {
u32 magic;

View File

@@ -36,6 +36,26 @@
#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
#define GK20A_FECS_TRACE_PTIMER_SHIFT 5
#define NVGPU_GPU_CTXSW_TAG_SOF 0x00
#define NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST 0x01
#define NVGPU_GPU_CTXSW_TAG_FE_ACK 0x02
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI 0x0a
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP 0x0b
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP 0x0c
#define NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP 0x0d
#define NVGPU_GPU_CTXSW_TAG_SAVE_END 0x03
#define NVGPU_GPU_CTXSW_TAG_RESTORE_START 0x04
#define NVGPU_GPU_CTXSW_TAG_CONTEXT_START 0x05
#define NVGPU_GPU_CTXSW_TAG_ENGINE_RESET 0xfe
#define NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP 0xff
#define NVGPU_GPU_CTXSW_TAG_LAST \
NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP
#define NVGPU_GPU_CTXSW_FILTER_ISSET(n, p) \
((p)->tag_bits[(n) / 64] & (1 << ((n) & 63)))
#define NVGPU_GPU_CTXSW_FILTER_SIZE (NVGPU_GPU_CTXSW_TAG_LAST + 1)
struct gk20a;
struct nvgpu_gr_fecs_trace {
@@ -59,6 +79,10 @@ struct nvgpu_fecs_trace_record {
u64 ts[];
};
struct nvgpu_gpu_ctxsw_trace_filter {
u64 tag_bits[(NVGPU_GPU_CTXSW_FILTER_SIZE + 63) / 64];
};
struct nvgpu_fecs_trace_context_entry {
u32 context_ptr;
@@ -94,4 +118,13 @@ void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list, pid_t *pid, u32 *vmid);
size_t nvgpu_gr_fecs_trace_buffer_size(struct gk20a *g);
int nvgpu_gr_fecs_trace_max_entries(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_filter *filter);
int nvgpu_gr_fecs_trace_enable(struct gk20a *g);
int nvgpu_gr_fecs_trace_disable(struct gk20a *g);
bool nvgpu_gr_fecs_trace_is_enabled(struct gk20a *g);
void nvgpu_gr_fecs_trace_reset_buffer(struct gk20a *g);
#endif /* NVGPU_GR_FECS_TRACE_H */

View File

@@ -27,6 +27,7 @@
#include <nvgpu/barrier.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/string.h>
#include "gk20a/gr_gk20a.h"

View File

@@ -855,15 +855,15 @@ static const struct gpu_ops tu104_ops = {
.mmap_user_buffer = gk20a_ctxsw_dev_mmap_buffer,
.init = nvgpu_gr_fecs_trace_init,
.deinit = nvgpu_gr_fecs_trace_deinit,
.enable = gk20a_fecs_trace_enable,
.disable = gk20a_fecs_trace_disable,
.is_enabled = gk20a_fecs_trace_is_enabled,
.enable = nvgpu_gr_fecs_trace_enable,
.disable = nvgpu_gr_fecs_trace_disable,
.is_enabled = nvgpu_gr_fecs_trace_is_enabled,
.reset = gk20a_fecs_trace_reset,
.flush = NULL,
.poll = gk20a_fecs_trace_poll,
.bind_channel = gk20a_fecs_trace_bind_channel,
.unbind_channel = gk20a_fecs_trace_unbind_channel,
.max_entries = gk20a_gr_max_entries,
.max_entries = nvgpu_gr_fecs_trace_max_entries,
.get_buffer_full_mailbox_val =
tu104_fecs_trace_get_buffer_full_mailbox_val,
.get_read_index = gm20b_fecs_trace_get_read_index,