gpu: nvgpu: create new gr/fecs_trace unit

Create new gr/fecs_trace unit with common/gr/fecs_trace/fecs_trace.c
as common source file and include/nvgpu/gr/fecs_trace.h as common
header file
This unit will be common between Linux and QNX
Corresponding HAL files will be added under common/gr/fecs_trace/
as more functionality is moved to new unit

For now move struct gk20a_fecs_trace_hash_ent to new unit and
rename it as struct nvgpu_fecs_trace_context_entry
Add vmid field to this struct since it is required for QNX

Remove use of hashtables and simply use linked list to simplify
the code. FECS tracing is not a performance sensitive use case
so perf hit could be ignored

Rename hash_lock mutex to list_lock

struct gk20a_fecs_trace and mutex list_lock are still declared in
gk20a/fecs_trace_gk20a.c, hence they cannot be used in new unit yet

Rename and update all gk20a_fecs_trace_hash_*() APIs to appropriate
nvgpu_gr_fecs_trace_*() APIs

Remove gk20a_fecs_trace_hash_dump() since it is not being used

Jira NVGPU-1880

Change-Id: I89c2715baa770dbbd864ea70ab43d83d98ba693c
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2022903
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2019-02-19 17:47:41 +05:30
committed by mobile promotions
parent decbbf3504
commit 556041f425
6 changed files with 197 additions and 127 deletions

View File

@@ -90,6 +90,7 @@ nvgpu-y += common/bus/bus_gk20a.o \
common/gr/subctx.o \
common/gr/config/gr_config.o \
common/gr/config/gr_config_gm20b.o \
common/gr/fecs_trace/fecs_trace.o \
common/netlist/netlist.o \
common/netlist/netlist_sim.o \
common/netlist/netlist_gm20b.o \

View File

@@ -135,6 +135,7 @@ srcs += common/sim.c \
common/gr/ctx.c \
common/gr/config/gr_config.c \
common/gr/config/gr_config_gm20b.c \
common/gr/fecs_trace/fecs_trace.c \
common/netlist/netlist.c \
common/netlist/netlist_sim.c \
common/netlist/netlist_gm20b.c \

View File

@@ -0,0 +1,107 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/list.h>
#include <nvgpu/log.h>
#include <nvgpu/gr/fecs_trace.h>
int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
pid_t pid, u32 vmid, struct nvgpu_list_node *list)
{
struct nvgpu_fecs_trace_context_entry *entry;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"adding hash entry context_ptr=%x -> pid=%d, vmid=%d",
context_ptr, pid, vmid);
entry = nvgpu_kzalloc(g, sizeof(*entry));
if (unlikely(!entry)) {
nvgpu_err(g,
"can't alloc new entry for context_ptr=%x pid=%d vmid=%d",
context_ptr, pid, vmid);
return -ENOMEM;
}
nvgpu_init_list_node(&entry->entry);
entry->context_ptr = context_ptr;
entry->pid = pid;
entry->vmid = vmid;
nvgpu_list_add_tail(&entry->entry, list);
return 0;
}
void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list)
{
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"freeing entry context_ptr=%x", context_ptr);
nvgpu_list_for_each_entry_safe(entry, tmp, list,
nvgpu_fecs_trace_context_entry, entry) {
if (entry->context_ptr == context_ptr) {
nvgpu_list_del(&entry->entry);
nvgpu_log(g, gpu_dbg_ctxsw,
"freed entry=%p context_ptr=%x", entry,
entry->context_ptr);
nvgpu_kfree(g, entry);
break;
}
}
}
void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
struct nvgpu_list_node *list)
{
struct nvgpu_fecs_trace_context_entry *entry, *tmp;
nvgpu_list_for_each_entry_safe(entry, tmp, list,
nvgpu_fecs_trace_context_entry, entry) {
nvgpu_list_del(&entry->entry);
nvgpu_kfree(g, entry);
}
}
void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list, pid_t *pid, u32 *vmid)
{
struct nvgpu_fecs_trace_context_entry *entry;
nvgpu_list_for_each_entry(entry, list, nvgpu_fecs_trace_context_entry,
entry) {
if (entry->context_ptr == context_ptr) {
nvgpu_log(g, gpu_dbg_ctxsw,
"found context_ptr=%x -> pid=%d, vmid=%d",
entry->context_ptr, entry->pid, entry->vmid);
*pid = entry->pid;
*vmid = entry->vmid;
return;
}
}
*pid = 0;
*vmid = 0xffffffffU;
}

View File

@@ -24,7 +24,6 @@
#include <nvgpu/dma.h>
#include <nvgpu/enabled.h>
#include <nvgpu/bug.h>
#include <nvgpu/hashtable.h>
#include <nvgpu/circ_buf.h>
#include <nvgpu/thread.h>
#include <nvgpu/barrier.h>
@@ -39,6 +38,7 @@
#include <nvgpu/gr/global_ctx.h>
#include <nvgpu/gr/subctx.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/gr/fecs_trace.h>
#include "fecs_trace_gk20a.h"
#include "gr_gk20a.h"
@@ -48,16 +48,9 @@
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
struct gk20a_fecs_trace_hash_ent {
u32 context_ptr;
pid_t pid;
struct hlist_node node;
};
struct gk20a_fecs_trace {
DECLARE_HASHTABLE(pid_hash_table, GK20A_FECS_TRACE_HASH_BITS);
struct nvgpu_mutex hash_lock;
struct nvgpu_list_node context_list;
struct nvgpu_mutex list_lock;
struct nvgpu_mutex poll_lock;
struct nvgpu_thread poll_task;
bool init;
@@ -119,111 +112,6 @@ static int gk20a_fecs_trace_set_read_index(struct gk20a *g, int index)
(gk20a_writel(g, gr_fecs_mailbox1_r(), index), 0));
}
void gk20a_fecs_trace_hash_dump(struct gk20a *g)
{
u32 bkt;
struct gk20a_fecs_trace_hash_ent *ent;
struct gk20a_fecs_trace *trace = g->fecs_trace;
nvgpu_log(g, gpu_dbg_ctxsw, "dumping hash table");
nvgpu_mutex_acquire(&trace->hash_lock);
hash_for_each(trace->pid_hash_table, bkt, ent, node)
{
nvgpu_log(g, gpu_dbg_ctxsw, " ent=%p bkt=%x context_ptr=%x pid=%d",
ent, bkt, ent->context_ptr, ent->pid);
}
nvgpu_mutex_release(&trace->hash_lock);
}
static int gk20a_fecs_trace_hash_add(struct gk20a *g, u32 context_ptr, pid_t pid)
{
struct gk20a_fecs_trace_hash_ent *he;
struct gk20a_fecs_trace *trace = g->fecs_trace;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"adding hash entry context_ptr=%x -> pid=%d", context_ptr, pid);
he = nvgpu_kzalloc(g, sizeof(*he));
if (unlikely(!he)) {
nvgpu_warn(g,
"can't alloc new hash entry for context_ptr=%x pid=%d",
context_ptr, pid);
return -ENOMEM;
}
he->context_ptr = context_ptr;
he->pid = pid;
nvgpu_mutex_acquire(&trace->hash_lock);
hash_add(trace->pid_hash_table, &he->node, context_ptr);
nvgpu_mutex_release(&trace->hash_lock);
return 0;
}
static void gk20a_fecs_trace_hash_del(struct gk20a *g, u32 context_ptr)
{
struct hlist_node *tmp;
struct gk20a_fecs_trace_hash_ent *ent;
struct gk20a_fecs_trace *trace = g->fecs_trace;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"freeing hash entry context_ptr=%x", context_ptr);
nvgpu_mutex_acquire(&trace->hash_lock);
hash_for_each_possible_safe(trace->pid_hash_table, ent, tmp, node,
context_ptr) {
if (ent->context_ptr == context_ptr) {
hash_del(&ent->node);
nvgpu_log(g, gpu_dbg_ctxsw,
"freed hash entry=%p context_ptr=%x", ent,
ent->context_ptr);
nvgpu_kfree(g, ent);
break;
}
}
nvgpu_mutex_release(&trace->hash_lock);
}
static void gk20a_fecs_trace_free_hash_table(struct gk20a *g)
{
u32 bkt;
struct hlist_node *tmp;
struct gk20a_fecs_trace_hash_ent *ent;
struct gk20a_fecs_trace *trace = g->fecs_trace;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw, "trace=%p", trace);
nvgpu_mutex_acquire(&trace->hash_lock);
hash_for_each_safe(trace->pid_hash_table, bkt, tmp, ent, node) {
hash_del(&ent->node);
nvgpu_kfree(g, ent);
}
nvgpu_mutex_release(&trace->hash_lock);
}
static pid_t gk20a_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr)
{
struct gk20a_fecs_trace_hash_ent *ent;
struct gk20a_fecs_trace *trace = g->fecs_trace;
pid_t pid = 0;
nvgpu_mutex_acquire(&trace->hash_lock);
hash_for_each_possible(trace->pid_hash_table, ent, node, context_ptr) {
if (ent->context_ptr == context_ptr) {
nvgpu_log(g, gpu_dbg_ctxsw,
"found context_ptr=%x -> pid=%d",
ent->context_ptr, ent->pid);
pid = ent->pid;
break;
}
}
nvgpu_mutex_release(&trace->hash_lock);
return pid;
}
/*
* Converts HW entry format to userspace-facing format and pushes it to the
* queue.
@@ -235,6 +123,7 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
struct gk20a_fecs_trace *trace = g->fecs_trace;
pid_t cur_pid;
pid_t new_pid;
u32 cur_vmid, new_vmid;
int count = 0;
/* for now, only one VM */
@@ -262,8 +151,12 @@ static int gk20a_fecs_trace_ring_read(struct gk20a *g, int index)
*/
r->magic_hi = 0;
cur_pid = gk20a_fecs_trace_find_pid(g, r->context_ptr);
new_pid = gk20a_fecs_trace_find_pid(g, r->new_context_ptr);
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_gr_fecs_trace_find_pid(g, r->context_ptr, &trace->context_list,
&cur_pid, &cur_vmid);
nvgpu_gr_fecs_trace_find_pid(g, r->new_context_ptr, &trace->context_list,
&new_pid, &new_vmid);
nvgpu_mutex_release(&trace->list_lock);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"context_ptr=%x (pid=%d) new_context_ptr=%x (pid=%d)",
@@ -418,12 +311,12 @@ int gk20a_fecs_trace_init(struct gk20a *g)
err = nvgpu_mutex_init(&trace->poll_lock);
if (err != 0)
goto clean;
err = nvgpu_mutex_init(&trace->hash_lock);
err = nvgpu_mutex_init(&trace->list_lock);
if (err != 0)
goto clean_poll_lock;
BUG_ON(!is_power_of_2(GK20A_FECS_TRACE_NUM_RECORDS));
hash_init(trace->pid_hash_table);
nvgpu_init_list_node(&trace->context_list);
nvgpu_set_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE, true);
@@ -453,6 +346,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
u32 aperture_mask;
struct tsg_gk20a *tsg;
int ret;
tsg = tsg_gk20a_from_ch(ch);
if (tsg == NULL) {
@@ -504,16 +398,20 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
/* pid (process identifier) in user space, corresponds to tgid (thread
* group id) in kernel space.
*/
gk20a_fecs_trace_hash_add(g, context_ptr, tsg->tgid);
nvgpu_mutex_acquire(&trace->list_lock);
ret = nvgpu_gr_fecs_trace_add_context(g, context_ptr, tsg->tgid, 0,
&trace->context_list);
nvgpu_mutex_release(&trace->list_lock);
return 0;
return ret;
}
int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
{
u32 context_ptr = gk20a_fecs_trace_fecs_context_ptr(g, ch);
struct gk20a_fecs_trace *trace = g->fecs_trace;
if (g->fecs_trace) {
if (trace) {
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"ch=%p context_ptr=%x", ch, context_ptr);
@@ -522,7 +420,11 @@ int gk20a_fecs_trace_unbind_channel(struct gk20a *g, struct channel_gk20a *ch)
g->ops.fecs_trace.flush(g);
gk20a_fecs_trace_poll(g);
}
gk20a_fecs_trace_hash_del(g, context_ptr);
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_gr_fecs_trace_remove_context(g, context_ptr,
&trace->context_list);
nvgpu_mutex_release(&trace->list_lock);
}
return 0;
}
@@ -546,9 +448,12 @@ int gk20a_fecs_trace_deinit(struct gk20a *g)
return 0;
nvgpu_thread_stop(&trace->poll_task);
gk20a_fecs_trace_free_hash_table(g);
nvgpu_mutex_destroy(&g->fecs_trace->hash_lock);
nvgpu_mutex_acquire(&trace->list_lock);
nvgpu_gr_fecs_trace_remove_contexts(g, &trace->context_list);
nvgpu_mutex_release(&trace->list_lock);
nvgpu_mutex_destroy(&g->fecs_trace->list_lock);
nvgpu_mutex_destroy(&g->fecs_trace->poll_lock);
nvgpu_kfree(g, g->fecs_trace);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -30,7 +30,6 @@ struct gk20a;
* increasing this constant should help (it drives Linux' internal buffer size).
*/
#define GK20A_FECS_TRACE_NUM_RECORDS (1 << 10)
#define GK20A_FECS_TRACE_HASH_BITS 8 /* 2^8 */
#define GK20A_FECS_TRACE_FRAME_PERIOD_US (1000000ULL/60ULL)
#define GK20A_FECS_TRACE_PTIMER_SHIFT 5

View File

@@ -0,0 +1,57 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_GR_FECS_TRACE_H
#define NVGPU_GR_FECS_TRACE_H
#include <nvgpu/types.h>
#include <nvgpu/list.h>
struct gk20a;
struct nvgpu_fecs_trace_context_entry {
u32 context_ptr;
pid_t pid;
u32 vmid;
struct nvgpu_list_node entry;
};
static inline struct nvgpu_fecs_trace_context_entry *
nvgpu_fecs_trace_context_entry_from_entry(struct nvgpu_list_node *node)
{
return (struct nvgpu_fecs_trace_context_entry *)
((uintptr_t)node -
offsetof(struct nvgpu_fecs_trace_context_entry, entry));
};
int nvgpu_gr_fecs_trace_add_context(struct gk20a *g, u32 context_ptr,
pid_t pid, u32 vmid, struct nvgpu_list_node *list);
void nvgpu_gr_fecs_trace_remove_context(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list);
void nvgpu_gr_fecs_trace_remove_contexts(struct gk20a *g,
struct nvgpu_list_node *list);
void nvgpu_gr_fecs_trace_find_pid(struct gk20a *g, u32 context_ptr,
struct nvgpu_list_node *list, pid_t *pid, u32 *vmid);
#endif /* NVGPU_GR_FECS_TRACE_H */