mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Capture thread name for every channel created
This change ensures that in scenarios where GPU enters a bad state because of the work submitted by a misbehaved thread, we should be able to capture thread name as part of our 1st set of failure logs. Changes for QNX env is pending. JIRA NVGPU-7783 Change-Id: I65d55a6ade749ff91739458e0642ed2dafaae5cc Signed-off-by: Kishan <kpalankar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2879197 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
af48120169
commit
c6d5fb348c
@@ -1260,6 +1260,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
|
||||
|
||||
ch->pid = tid;
|
||||
ch->tgid = pid; /* process granularity for FECS traces */
|
||||
nvgpu_get_thread_name(ch->thread_name);
|
||||
|
||||
#ifdef CONFIG_NVGPU_USERD
|
||||
if (nvgpu_userd_init_channel(g, ch) != 0) {
|
||||
@@ -2125,11 +2126,12 @@ static void nvgpu_channel_info_debug_dump(struct gk20a *g,
|
||||
*/
|
||||
u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl);
|
||||
|
||||
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d, deterministic: %s, domain name: %s",
|
||||
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, thread name %s, refs: %d, deterministic: %s, domain name: %s",
|
||||
info->chid,
|
||||
g->name,
|
||||
info->tsgid,
|
||||
info->pid,
|
||||
info->thread_name,
|
||||
info->refs,
|
||||
info->deterministic ? "yes" : "no",
|
||||
info->nvs_domain_name);
|
||||
@@ -2229,6 +2231,7 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g,
|
||||
info->chid = ch->chid;
|
||||
info->tsgid = ch->tsgid;
|
||||
info->pid = ch->pid;
|
||||
(void)memcpy(info->thread_name, ch->thread_name, sizeof(info->thread_name));
|
||||
info->refs = nvgpu_atomic_read(&ch->ref_count);
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
info->deterministic = nvgpu_channel_is_deterministic(ch);
|
||||
|
||||
@@ -51,6 +51,10 @@ struct nvgpu_channel_wdt;
|
||||
struct nvgpu_user_fence;
|
||||
struct nvgpu_runlist;
|
||||
|
||||
/**
|
||||
* Size of task name. Should strictly be equal to TASK_COMM_LEN
|
||||
*/
|
||||
#define TASK_NAME_LEN (16U)
|
||||
/**
|
||||
* S/W defined invalid channel identifier.
|
||||
*/
|
||||
@@ -187,6 +191,11 @@ struct nvgpu_channel_dump_info {
|
||||
u32 tsgid;
|
||||
/** Pid of the process that created this channel. */
|
||||
int pid;
|
||||
/**
|
||||
* Name of the thread that created the channel.
|
||||
* Same size as task_struct.comm[] on linux.
|
||||
*/
|
||||
char thread_name[TASK_NAME_LEN];
|
||||
/** Number of references to this channel. */
|
||||
int refs;
|
||||
/** Channel uses deterministic submit (kernel submit only). */
|
||||
@@ -356,6 +365,11 @@ struct nvgpu_channel {
|
||||
* Confusingly, at userspace level, this is what is seen as the "pid".
|
||||
*/
|
||||
pid_t tgid;
|
||||
/**
|
||||
* Name of the thread that created the channel.
|
||||
* Same size as task_struct.comm[] on linux.
|
||||
*/
|
||||
char thread_name[TASK_NAME_LEN];
|
||||
/** Lock to serialize ioctls for this channel. */
|
||||
struct nvgpu_mutex ioctl_lock;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -53,6 +53,13 @@ int nvgpu_current_tid(struct gk20a *g);
|
||||
*/
|
||||
int nvgpu_current_pid(struct gk20a *g);
|
||||
|
||||
/**
|
||||
* @brief API to get the name of current thread.
|
||||
*
|
||||
* @param dest [in/out] Pointer to the string buffer.
|
||||
*/
|
||||
void nvgpu_get_thread_name(char *dest);
|
||||
|
||||
/**
|
||||
* @brief Print the name of current thread.
|
||||
*
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA Corporation. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -150,12 +150,12 @@ void nvgpu_set_err_notifier_locked(struct nvgpu_channel *ch, u32 error)
|
||||
|
||||
if (error == NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR) {
|
||||
nvgpu_log_info(ch->g,
|
||||
"error notifier set to %d for ch %d",
|
||||
error, ch->chid);
|
||||
"error notifier set to %d for ch %d owned by %s",
|
||||
error, ch->chid, ch->thread_name);
|
||||
} else {
|
||||
nvgpu_err(ch->g,
|
||||
"error notifier set to %d for ch %d",
|
||||
error, ch->chid);
|
||||
"error notifier set to %d for ch %d owned by %s",
|
||||
error, ch->chid, ch->thread_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -30,3 +30,13 @@ void nvgpu_print_current_impl(struct gk20a *g, const char *func_name, int line,
|
||||
{
|
||||
nvgpu_log_msg_impl(g, func_name, line, type, current->comm);
|
||||
}
|
||||
|
||||
void nvgpu_get_thread_name(char *dest)
|
||||
{
|
||||
char buf[TASK_COMM_LEN];
|
||||
|
||||
get_task_comm(buf, current);
|
||||
strncpy(dest, buf, TASK_COMM_LEN);
|
||||
/* Ensure buffer is null terminated */
|
||||
dest[TASK_COMM_LEN-1] = '\0';
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -53,6 +53,11 @@ int nvgpu_current_tid(struct gk20a *g)
|
||||
return (int)pthread_self();
|
||||
}
|
||||
|
||||
void nvgpu_get_thread_name(char *dest)
|
||||
{
|
||||
(void)dest;
|
||||
}
|
||||
|
||||
void nvgpu_print_current_impl(struct gk20a *g, const char *func_name, int line,
|
||||
void *ctx, enum nvgpu_log_type type)
|
||||
{
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
|
||||
bitmap_find_next_zero_area
|
||||
fb_gv11b_write_mmu_fault_buffer_get
|
||||
@@ -342,6 +342,7 @@ nvgpu_cond_timedwait
|
||||
nvgpu_cond_unlock
|
||||
nvgpu_current_pid
|
||||
nvgpu_current_tid
|
||||
nvgpu_get_thread_name
|
||||
nvgpu_current_time_ms
|
||||
nvgpu_current_time_ns
|
||||
nvgpu_current_time_us
|
||||
|
||||
Reference in New Issue
Block a user