mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Remove a dependency to a graphics type in tsg header by adding a pointer indirection. Jira NVGPU-967 Jira NVGPU-1149 Change-Id: I9177e6eedf08bfe4a3b981b67fa8d4d734f9e50f Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1822023 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
638 lines
16 KiB
C
638 lines
16 KiB
C
/*
|
|
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/dma.h>
|
|
#include <nvgpu/enabled.h>
|
|
#include <nvgpu/bug.h>
|
|
#include <nvgpu/hashtable.h>
|
|
#include <nvgpu/circ_buf.h>
|
|
#include <nvgpu/thread.h>
|
|
#include <nvgpu/barrier.h>
|
|
#include <nvgpu/mm.h>
|
|
#include <nvgpu/enabled.h>
|
|
#include <nvgpu/ctxsw_trace.h>
|
|
#include <nvgpu/io.h>
|
|
#include <nvgpu/utils.h>
|
|
#include <nvgpu/timers.h>
|
|
#include <nvgpu/channel.h>
|
|
#include <nvgpu/gk20a.h>
|
|
|
|
#include "fecs_trace_gk20a.h"
|
|
#include "gr_gk20a.h"
|
|
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/fecs_trace.h>
|
|
|
|
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
|
|
|
struct gk20a_fecs_trace_hash_ent {
|
|
u32 context_ptr;
|
|
pid_t pid;
|
|
struct hlist_node node;
|
|
};
|
|
|
|
struct gk20a_fecs_trace {
|
|
|
|
DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
|
|
struct nvgpu_mutex hash_lock;
|
|
struct nvgpu_mutex poll_lock;
|
|
struct nvgpu_thread poll_task;
|
|
bool init;
|
|
};
|
|
|
|
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
|
u32 gk20a_fecs_trace_record_ts_tag_invalid_ts_v(void)
|
|
{
|
|
return ctxsw_prog_record_timestamp_timestamp_hi_tag_invalid_timestamp_v();
|
|
}
|
|
|
|
u32 gk20a_fecs_trace_record_ts_tag_v(u64 ts)
|
|
{
|
|
return ctxsw_prog_record_timestamp_timestamp_hi_tag_v((u32) (ts >> 32));
|
|
}
|
|
|
|
u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
|
|
{
|
|
return ts & ~(((u64)ctxsw_prog_record_timestamp_timestamp_hi_tag_m()) << 32);
|
|
}
|
|
|
|
static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
|
|
}
|
|
|
|
int gk20a_fecs_trace_num_ts(void)
|
|
{
|
|
return (ctxsw_prog_record_timestamp_record_size_in_bytes_v()
|
|
- sizeof(struct gk20a_fecs_trace_record)) / sizeof(u64);
|
|
}
|
|
|
|
struct gk20a_fecs_trace_record *gk20a_fecs_trace_get_record(
|
|
struct gk20a *g, int idx)
|
|
{
|
|
struct nvgpu_mem *mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
|
|
|
return (struct gk20a_fecs_trace_record *)
|
|
((u8 *) mem->cpu_va
|
|
+ (idx * ctxsw_prog_record_timestamp_record_size_in_bytes_v()));
|
|
}
|
|
|
|
bool gk20a_fecs_trace_is_valid_record(struct gk20a_fecs_trace_record *r)
|
|
{
|
|
/*
|
|
* testing magic_hi should suffice. magic_lo is sometimes used
|
|
* as a sequence number in experimental ucode.
|
|
*/
|
|
return (r->magic_hi
|
|
== ctxsw_prog_record_timestamp_magic_value_hi_v_value_v());
|
|
}
|
|
|
|
int gk20a_fecs_trace_get_read_index(struct gk20a *g)
|
|
{
|
|
return gr_gk20a_elpg_protected_call(g,
|
|
gk20a_readl(g, gr_fecs_mailbox1_r()));
|
|
}
|
|
|
|
int gk20a_fecs_trace_get_write_index(struct gk20a *g)
|
|
{
|
|
return gr_gk20a_elpg_protected_call(g,
|
|
gk20a_readl(g, gr_fecs_mailbox0_r()));
|
|
}
|
|
|
|
static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "set read=%d", index);
|
|
return gr_gk20a_elpg_protected_call(g,
|
|
(gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
|
|
}
|
|
|
|
void gk20a_fecs_trace_hash_dump(struct gk20a *g)
|
|
{
|
|
u32 bkt;
|
|
struct gk20a_fecs_trace_hash_ent *ent;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
|
|
|
|
nvgpu_mutex_acquire(&trace->hash_lock);
|
|
hash_for_each(trace->pid_hash_table, bkt, ent, node)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
|
|
ent, bkt, ent->context_ptr, ent->pid);
|
|
|
|
}
|
|
nvgpu_mutex_release(&trace->hash_lock);
|
|
}
|
|
|
|
static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
|
|
{
|
|
struct gk20a_fecs_trace_hash_ent *he;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
|
"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
|
|
|
|
he = nvgpu_kzalloc(g, sizeof(*he));
|
|
if (unlikely(!he)) {
|
|
nvgpu_warn(g,
|
|
"can't alloc new hash entry for context_ptr=%x pid=%d",
|
|
context_ptr, pid);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
he->context_ptr = context_ptr;
|
|
he->pid = pid;
|
|
nvgpu_mutex_acquire(&trace->hash_lock);
|
|
hash_add(trace->pid_hash_table, &he->node, context_ptr);
|
|
nvgpu_mutex_release(&trace->hash_lock);
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
|
|
{
|
|
struct hlist_node *tmp;
|
|
struct gk20a_fecs_trace_hash_ent *ent;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
|
"freeing hash entry context_ptr=%x", context_ptr);
|
|
|
|
nvgpu_mutex_acquire(&trace->hash_lock);
|
|
hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
|
|
context_ptr) {
|
|
if (ent->context_ptr == context_ptr) {
|
|
hash_del(&ent->node);
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"freed hash entry=%p context_ptr=%x", ent,
|
|
ent->context_ptr);
|
|
nvgpu_kfree(g, ent);
|
|
break;
|
|
}
|
|
}
|
|
nvgpu_mutex_release(&trace->hash_lock);
|
|
}
|
|
|
|
static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
|
|
{
|
|
u32 bkt;
|
|
struct hlist_node *tmp;
|
|
struct gk20a_fecs_trace_hash_ent *ent;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
|
|
|
|
nvgpu_mutex_acquire(&trace->hash_lock);
|
|
hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
|
|
hash_del(&ent->node);
|
|
nvgpu_kfree(g, ent);
|
|
}
|
|
nvgpu_mutex_release(&trace->hash_lock);
|
|
|
|
}
|
|
|
|
static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
|
|
{
|
|
struct gk20a_fecs_trace_hash_ent *ent;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
pid_t pid = 0;
|
|
|
|
nvgpu_mutex_acquire(&trace->hash_lock);
|
|
hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
|
|
if (ent->context_ptr == context_ptr) {
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"found context_ptr=%x -> pid=%d",
|
|
ent->context_ptr, ent->pid);
|
|
pid = ent->pid;
|
|
break;
|
|
}
|
|
}
|
|
nvgpu_mutex_release(&trace->hash_lock);
|
|
|
|
return pid;
|
|
}
|
|
|
|
/*
|
|
* Converts HW entry format to userspace-facing format and pushes it to the
|
|
* queue.
|
|
*/
|
|
static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
|
|
{
|
|
int i;
|
|
struct nvgpu_gpu_ctxsw_trace_entry entry = { };
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
pid_t cur_pid;
|
|
pid_t new_pid;
|
|
int count = 0;
|
|
|
|
/* for now, only one VM */
|
|
const int vmid = 0;
|
|
|
|
struct gk20a_fecs_trace_record *r =
|
|
gk20a_fecs_trace_get_record(g, index);
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
|
"consuming record trace=%p read=%d record=%p", trace, index, r);
|
|
|
|
if (unlikely(!gk20a_fecs_trace_is_valid_record(r))) {
|
|
nvgpu_warn(g,
|
|
"trace=%p read=%d record=%p magic_lo=%08x magic_hi=%08x (invalid)",
|
|
trace, index, r, r->magic_lo, r->magic_hi);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Clear magic_hi to detect cases where CPU could read write index
|
|
* before FECS record is actually written to DRAM. This should not
|
|
* as we force FECS writes to SYSMEM by reading through PRAMIN.
|
|
*/
|
|
r->magic_hi = 0;
|
|
|
|
cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
|
|
new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
|
"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
|
|
r->context_ptr, cur_pid, r->new_context_ptr, new_pid);
|
|
|
|
entry.context_id = r->context_id;
|
|
entry.vmid = vmid;
|
|
|
|
/* break out FECS record into trace events */
|
|
for (i = 0; i < gk20a_fecs_trace_num_ts(); i++) {
|
|
|
|
entry.tag = gk20a_fecs_trace_record_ts_tag_v(r->ts[i]);
|
|
entry.timestamp = gk20a_fecs_trace_record_ts_timestamp_v(r->ts[i]);
|
|
entry.timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"tag=%x timestamp=%llx context_id=%08x new_context_id=%08x",
|
|
entry.tag, entry.timestamp, r->context_id,
|
|
r->new_context_id);
|
|
|
|
switch (nvgpu_gpu_ctxsw_tags_to_common_tags(entry.tag)) {
|
|
case NVGPU_GPU_CTXSW_TAG_RESTORE_START:
|
|
case NVGPU_GPU_CTXSW_TAG_CONTEXT_START:
|
|
entry.context_id = r->new_context_id;
|
|
entry.pid = new_pid;
|
|
break;
|
|
|
|
case NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
|
|
case NVGPU_GPU_CTXSW_TAG_FE_ACK:
|
|
case NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI:
|
|
case NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP:
|
|
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP:
|
|
case NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP:
|
|
case NVGPU_GPU_CTXSW_TAG_SAVE_END:
|
|
entry.context_id = r->context_id;
|
|
entry.pid = cur_pid;
|
|
break;
|
|
|
|
default:
|
|
/* tags are not guaranteed to start at the beginning */
|
|
WARN_ON(entry.tag && (entry.tag != NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP));
|
|
continue;
|
|
}
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "tag=%x context_id=%x pid=%lld",
|
|
entry.tag, entry.context_id, entry.pid);
|
|
|
|
if (!entry.context_id)
|
|
continue;
|
|
|
|
gk20a_ctxsw_trace_write(g, &entry);
|
|
count++;
|
|
}
|
|
|
|
gk20a_ctxsw_trace_wake_up(g, vmid);
|
|
return count;
|
|
}
|
|
|
|
int gk20a_fecs_trace_poll(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
int read = 0;
|
|
int write = 0;
|
|
int cnt;
|
|
int err;
|
|
|
|
err = gk20a_busy(g);
|
|
if (unlikely(err))
|
|
return err;
|
|
|
|
nvgpu_mutex_acquire(&trace->poll_lock);
|
|
write = gk20a_fecs_trace_get_write_index(g);
|
|
if (unlikely((write < 0) || (write >= GK20A_FECS_TRACE_NUM_RECORDS))) {
|
|
nvgpu_err(g,
|
|
"failed to acquire write index, write=%d", write);
|
|
err = write;
|
|
goto done;
|
|
}
|
|
|
|
read = gk20a_fecs_trace_get_read_index(g);
|
|
|
|
cnt = CIRC_CNT(write, read, GK20A_FECS_TRACE_NUM_RECORDS);
|
|
if (!cnt)
|
|
goto done;
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw,
|
|
"circular buffer: read=%d (mailbox=%d) write=%d cnt=%d",
|
|
read, gk20a_fecs_trace_get_read_index(g), write, cnt);
|
|
|
|
/* Ensure all FECS writes have made it to SYSMEM */
|
|
g->ops.mm.fb_flush(g);
|
|
|
|
while (read != write) {
|
|
cnt = gk20a_fecs_trace_ring_read(g, read);
|
|
if (cnt <= 0)
|
|
break;
|
|
|
|
/* Get to next record. */
|
|
read = (read + 1) & (GK20A_FECS_TRACE_NUM_RECORDS - 1);
|
|
}
|
|
|
|
/* ensure FECS records has been updated before incrementing read index */
|
|
nvgpu_wmb();
|
|
gk20a_fecs_trace_set_read_index(g, read);
|
|
|
|
done:
|
|
nvgpu_mutex_release(&trace->poll_lock);
|
|
gk20a_idle(g);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_fecs_trace_periodic_polling(void *arg)
|
|
{
|
|
struct gk20a *g = (struct gk20a *)arg;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
pr_info("%s: running\n", __func__);
|
|
|
|
while (!nvgpu_thread_should_stop(&trace->poll_task)) {
|
|
|
|
nvgpu_usleep_range(GK20A_FECS_TRACE_FRAME_PERIOD_US,
|
|
GK20A_FECS_TRACE_FRAME_PERIOD_US * 2);
|
|
|
|
gk20a_fecs_trace_poll(g);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
size_t gk20a_fecs_trace_buffer_size(struct gk20a *g)
|
|
{
|
|
return GK20A_FECS_TRACE_NUM_RECORDS
|
|
* ctxsw_prog_record_timestamp_record_size_in_bytes_v();
|
|
}
|
|
|
|
int gk20a_fecs_trace_init(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace;
|
|
int err;
|
|
|
|
trace = nvgpu_kzalloc(g, sizeof(struct gk20a_fecs_trace));
|
|
if (!trace) {
|
|
nvgpu_warn(g, "failed to allocate fecs_trace");
|
|
return -ENOMEM;
|
|
}
|
|
g->fecs_trace = trace;
|
|
|
|
err = nvgpu_mutex_init(&trace->poll_lock);
|
|
if (err != 0)
|
|
goto clean;
|
|
err = nvgpu_mutex_init(&trace->hash_lock);
|
|
if (err != 0)
|
|
goto clean_poll_lock;
|
|
|
|
BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
|
|
hash_init(trace->pid_hash_table);
|
|
|
|
__nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
|
|
|
|
trace->init = true;
|
|
|
|
return 0;
|
|
|
|
clean_poll_lock:
|
|
nvgpu_mutex_destroy(&trace->poll_lock);
|
|
clean:
|
|
nvgpu_kfree(g, trace);
|
|
g->fecs_trace = NULL;
|
|
return err;
|
|
}
|
|
|
|
int gk20a_fecs_trace_bind_channel(struct gk20a *g,
|
|
struct channel_gk20a *ch)
|
|
{
|
|
/*
|
|
* map our circ_buf to the context space and store the GPU VA
|
|
* in the context header.
|
|
*/
|
|
|
|
u32 lo;
|
|
u32 hi;
|
|
u64 addr;
|
|
struct tsg_gk20a *tsg;
|
|
struct nvgpu_gr_ctx *ch_ctx;
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
struct nvgpu_mem *mem;
|
|
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
|
|
pid_t pid;
|
|
u32 aperture_mask;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
|
"chid=%d context_ptr=%x inst_block=%llx",
|
|
ch->chid, context_ptr,
|
|
nvgpu_inst_block_addr(g, &ch->inst_block));
|
|
|
|
tsg = tsg_gk20a_from_ch(ch);
|
|
if (!tsg)
|
|
return -EINVAL;
|
|
|
|
ch_ctx = tsg->gr_ctx;
|
|
mem = &ch_ctx->mem;
|
|
|
|
if (!trace)
|
|
return -ENOMEM;
|
|
|
|
mem = &g->gr.global_ctx_buffer[FECS_TRACE_BUFFER].mem;
|
|
|
|
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA)) {
|
|
addr = ch_ctx->global_ctx_buffer_va[FECS_TRACE_BUFFER_VA];
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "gpu_va=%llx", addr);
|
|
aperture_mask = 0;
|
|
} else {
|
|
addr = nvgpu_inst_block_addr(g, mem);
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "pa=%llx", addr);
|
|
aperture_mask = nvgpu_aperture_mask(g, mem,
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
|
|
}
|
|
if (!addr)
|
|
return -ENOMEM;
|
|
|
|
lo = u64_lo32(addr);
|
|
hi = u64_hi32(addr);
|
|
|
|
mem = &ch_ctx->mem;
|
|
|
|
nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
|
|
lo, GK20A_FECS_TRACE_NUM_RECORDS);
|
|
|
|
nvgpu_mem_wr(g, mem,
|
|
ctxsw_prog_main_image_context_timestamp_buffer_control_o(),
|
|
ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
|
|
GK20A_FECS_TRACE_NUM_RECORDS));
|
|
|
|
if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
|
|
mem = &ch->ctx_header;
|
|
|
|
nvgpu_mem_wr(g, mem,
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
|
|
lo);
|
|
nvgpu_mem_wr(g, mem,
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_o(),
|
|
ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
|
|
aperture_mask);
|
|
|
|
/* pid (process identifier) in user space, corresponds to tgid (thread
|
|
* group id) in kernel space.
|
|
*/
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
pid = tsg_gk20a_from_ch(ch)->tgid;
|
|
else
|
|
pid = ch->tgid;
|
|
gk20a_fecs_trace_hash_add(g, context_ptr, pid);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
|
|
|
|
if (g->fecs_trace) {
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
|
"ch=%p context_ptr=%x", ch, context_ptr);
|
|
|
|
if (g->ops.fecs_trace.is_enabled(g)) {
|
|
if (g->ops.fecs_trace.flush)
|
|
g->ops.fecs_trace.flush(g);
|
|
gk20a_fecs_trace_poll(g);
|
|
}
|
|
gk20a_fecs_trace_hash_del(g, context_ptr);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_fecs_trace_reset(struct gk20a *g)
|
|
{
|
|
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
|
|
|
if (!g->ops.fecs_trace.is_enabled(g))
|
|
return 0;
|
|
|
|
gk20a_fecs_trace_poll(g);
|
|
return gk20a_fecs_trace_set_read_index(g, 0);
|
|
}
|
|
|
|
int gk20a_fecs_trace_deinit(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
if (!trace->init)
|
|
return 0;
|
|
|
|
nvgpu_thread_stop(&trace->poll_task);
|
|
gk20a_fecs_trace_free_hash_table(g);
|
|
|
|
nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
|
|
nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
|
|
|
|
nvgpu_kfree(g, g->fecs_trace);
|
|
g->fecs_trace = NULL;
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_gr_max_entries(struct gk20a *g,
|
|
struct nvgpu_gpu_ctxsw_trace_filter *filter)
|
|
{
|
|
int n;
|
|
int tag;
|
|
|
|
/* Compute number of entries per record, with given filter */
|
|
for (n = 0, tag = 0; tag < gk20a_fecs_trace_num_ts(); tag++)
|
|
n += (NVGPU_GPU_CTXSW_FILTER_ISSET(tag, filter) != 0);
|
|
|
|
/* Return max number of entries generated for the whole ring */
|
|
return n * GK20A_FECS_TRACE_NUM_RECORDS;
|
|
}
|
|
|
|
int gk20a_fecs_trace_enable(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
int write;
|
|
int err = 0;
|
|
|
|
if (!trace)
|
|
return -EINVAL;
|
|
|
|
if (nvgpu_thread_is_running(&trace->poll_task))
|
|
return 0;
|
|
|
|
/* drop data in hw buffer */
|
|
if (g->ops.fecs_trace.flush)
|
|
g->ops.fecs_trace.flush(g);
|
|
write = gk20a_fecs_trace_get_write_index(g);
|
|
gk20a_fecs_trace_set_read_index(g, write);
|
|
|
|
err = nvgpu_thread_create(&trace->poll_task, g,
|
|
gk20a_fecs_trace_periodic_polling, __func__);
|
|
if (err != 0) {
|
|
nvgpu_warn(g,
|
|
"failed to create FECS polling task");
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_fecs_trace_disable(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
if (nvgpu_thread_is_running(&trace->poll_task))
|
|
nvgpu_thread_stop(&trace->poll_task);
|
|
|
|
return -EPERM;
|
|
}
|
|
|
|
bool gk20a_fecs_trace_is_enabled(struct gk20a *g)
|
|
{
|
|
struct gk20a_fecs_trace *trace = g->fecs_trace;
|
|
|
|
return (trace && nvgpu_thread_is_running(&trace->poll_task));
|
|
}
|
|
#endif /* CONFIG_GK20A_CTXSW_TRACE */
|