mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Capture thread name for every channel created
This change ensures that in scenarios where GPU enters a bad state because of the work submitted by a misbehaved thread, we should be able to capture thread name as part of our 1st set of failure logs. Changes for QNX env is pending. JIRA NVGPU-7783 Change-Id: I65d55a6ade749ff91739458e0642ed2dafaae5cc Signed-off-by: Kishan <kpalankar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2879197 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
af48120169
commit
c6d5fb348c
@@ -1260,6 +1260,7 @@ struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
|
|||||||
|
|
||||||
ch->pid = tid;
|
ch->pid = tid;
|
||||||
ch->tgid = pid; /* process granularity for FECS traces */
|
ch->tgid = pid; /* process granularity for FECS traces */
|
||||||
|
nvgpu_get_thread_name(ch->thread_name);
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_USERD
|
#ifdef CONFIG_NVGPU_USERD
|
||||||
if (nvgpu_userd_init_channel(g, ch) != 0) {
|
if (nvgpu_userd_init_channel(g, ch) != 0) {
|
||||||
@@ -2125,11 +2126,12 @@ static void nvgpu_channel_info_debug_dump(struct gk20a *g,
|
|||||||
*/
|
*/
|
||||||
u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl);
|
u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl);
|
||||||
|
|
||||||
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d, deterministic: %s, domain name: %s",
|
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, thread name %s, refs: %d, deterministic: %s, domain name: %s",
|
||||||
info->chid,
|
info->chid,
|
||||||
g->name,
|
g->name,
|
||||||
info->tsgid,
|
info->tsgid,
|
||||||
info->pid,
|
info->pid,
|
||||||
|
info->thread_name,
|
||||||
info->refs,
|
info->refs,
|
||||||
info->deterministic ? "yes" : "no",
|
info->deterministic ? "yes" : "no",
|
||||||
info->nvs_domain_name);
|
info->nvs_domain_name);
|
||||||
@@ -2229,6 +2231,7 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g,
|
|||||||
info->chid = ch->chid;
|
info->chid = ch->chid;
|
||||||
info->tsgid = ch->tsgid;
|
info->tsgid = ch->tsgid;
|
||||||
info->pid = ch->pid;
|
info->pid = ch->pid;
|
||||||
|
(void)memcpy(info->thread_name, ch->thread_name, sizeof(info->thread_name));
|
||||||
info->refs = nvgpu_atomic_read(&ch->ref_count);
|
info->refs = nvgpu_atomic_read(&ch->ref_count);
|
||||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||||
info->deterministic = nvgpu_channel_is_deterministic(ch);
|
info->deterministic = nvgpu_channel_is_deterministic(ch);
|
||||||
|
|||||||
@@ -51,6 +51,10 @@ struct nvgpu_channel_wdt;
|
|||||||
struct nvgpu_user_fence;
|
struct nvgpu_user_fence;
|
||||||
struct nvgpu_runlist;
|
struct nvgpu_runlist;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Size of task name. Should strictly be equal to TASK_COMM_LEN
|
||||||
|
*/
|
||||||
|
#define TASK_NAME_LEN (16U)
|
||||||
/**
|
/**
|
||||||
* S/W defined invalid channel identifier.
|
* S/W defined invalid channel identifier.
|
||||||
*/
|
*/
|
||||||
@@ -187,6 +191,11 @@ struct nvgpu_channel_dump_info {
|
|||||||
u32 tsgid;
|
u32 tsgid;
|
||||||
/** Pid of the process that created this channel. */
|
/** Pid of the process that created this channel. */
|
||||||
int pid;
|
int pid;
|
||||||
|
/**
|
||||||
|
* Name of the thread that created the channel.
|
||||||
|
* Same size as task_struct.comm[] on linux.
|
||||||
|
*/
|
||||||
|
char thread_name[TASK_NAME_LEN];
|
||||||
/** Number of references to this channel. */
|
/** Number of references to this channel. */
|
||||||
int refs;
|
int refs;
|
||||||
/** Channel uses deterministic submit (kernel submit only). */
|
/** Channel uses deterministic submit (kernel submit only). */
|
||||||
@@ -356,6 +365,11 @@ struct nvgpu_channel {
|
|||||||
* Confusingly, at userspace level, this is what is seen as the "pid".
|
* Confusingly, at userspace level, this is what is seen as the "pid".
|
||||||
*/
|
*/
|
||||||
pid_t tgid;
|
pid_t tgid;
|
||||||
|
/**
|
||||||
|
* Name of the thread that created the channel.
|
||||||
|
* Same size as task_struct.comm[] on linux.
|
||||||
|
*/
|
||||||
|
char thread_name[TASK_NAME_LEN];
|
||||||
/** Lock to serialize ioctls for this channel. */
|
/** Lock to serialize ioctls for this channel. */
|
||||||
struct nvgpu_mutex ioctl_lock;
|
struct nvgpu_mutex ioctl_lock;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -53,6 +53,13 @@ int nvgpu_current_tid(struct gk20a *g);
|
|||||||
*/
|
*/
|
||||||
int nvgpu_current_pid(struct gk20a *g);
|
int nvgpu_current_pid(struct gk20a *g);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief API to get the name of current thread.
|
||||||
|
*
|
||||||
|
* @param dest [in/out] Pointer to the string buffer.
|
||||||
|
*/
|
||||||
|
void nvgpu_get_thread_name(char *dest);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Print the name of current thread.
|
* @brief Print the name of current thread.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <linux/workqueue.h>
|
#include <linux/workqueue.h>
|
||||||
#include <linux/dma-buf.h>
|
#include <linux/dma-buf.h>
|
||||||
|
#include <linux/sched.h>
|
||||||
|
|
||||||
#include <nvgpu/types.h>
|
#include <nvgpu/types.h>
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2022, NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2017-2023, NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -150,12 +150,12 @@ void nvgpu_set_err_notifier_locked(struct nvgpu_channel *ch, u32 error)
|
|||||||
|
|
||||||
if (error == NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR) {
|
if (error == NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR) {
|
||||||
nvgpu_log_info(ch->g,
|
nvgpu_log_info(ch->g,
|
||||||
"error notifier set to %d for ch %d",
|
"error notifier set to %d for ch %d owned by %s",
|
||||||
error, ch->chid);
|
error, ch->chid, ch->thread_name);
|
||||||
} else {
|
} else {
|
||||||
nvgpu_err(ch->g,
|
nvgpu_err(ch->g,
|
||||||
"error notifier set to %d for ch %d",
|
"error notifier set to %d for ch %d owned by %s",
|
||||||
error, ch->chid);
|
error, ch->chid, ch->thread_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -30,3 +30,13 @@ void nvgpu_print_current_impl(struct gk20a *g, const char *func_name, int line,
|
|||||||
{
|
{
|
||||||
nvgpu_log_msg_impl(g, func_name, line, type, current->comm);
|
nvgpu_log_msg_impl(g, func_name, line, type, current->comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nvgpu_get_thread_name(char *dest)
|
||||||
|
{
|
||||||
|
char buf[TASK_COMM_LEN];
|
||||||
|
|
||||||
|
get_task_comm(buf, current);
|
||||||
|
strncpy(dest, buf, TASK_COMM_LEN);
|
||||||
|
/* Ensure buffer is null terminated */
|
||||||
|
dest[TASK_COMM_LEN-1] = '\0';
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -53,6 +53,11 @@ int nvgpu_current_tid(struct gk20a *g)
|
|||||||
return (int)pthread_self();
|
return (int)pthread_self();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nvgpu_get_thread_name(char *dest)
|
||||||
|
{
|
||||||
|
(void)dest;
|
||||||
|
}
|
||||||
|
|
||||||
void nvgpu_print_current_impl(struct gk20a *g, const char *func_name, int line,
|
void nvgpu_print_current_impl(struct gk20a *g, const char *func_name, int line,
|
||||||
void *ctx, enum nvgpu_log_type type)
|
void *ctx, enum nvgpu_log_type type)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
# Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
|
||||||
bitmap_find_next_zero_area
|
bitmap_find_next_zero_area
|
||||||
fb_gv11b_write_mmu_fault_buffer_get
|
fb_gv11b_write_mmu_fault_buffer_get
|
||||||
@@ -342,6 +342,7 @@ nvgpu_cond_timedwait
|
|||||||
nvgpu_cond_unlock
|
nvgpu_cond_unlock
|
||||||
nvgpu_current_pid
|
nvgpu_current_pid
|
||||||
nvgpu_current_tid
|
nvgpu_current_tid
|
||||||
|
nvgpu_get_thread_name
|
||||||
nvgpu_current_time_ms
|
nvgpu_current_time_ms
|
||||||
nvgpu_current_time_ns
|
nvgpu_current_time_ns
|
||||||
nvgpu_current_time_us
|
nvgpu_current_time_us
|
||||||
|
|||||||
Reference in New Issue
Block a user