mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Read sm error ioctl support for tsg
Add READ_SM_ERROR IOCTL support to TSG level. Moved the struct to save the sm_error details from gr to tsg as the sm_error support is context based, not global. Also corrected MISRA 21.1 error in header file. nvgpu_dbg_gpu_ioctl_write_single_sm_error_state and nvgpu_dbg_gpu_ioctl_read_single_sm_error_state functions are modified to use the tsg struct nvgpu_tsg_sm_error_state. Bug 200412642 Change-Id: I9e334b059078a4bb0e360b945444cc4bf1cc56ec Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1794856 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
3bd47da095
commit
bfe65407bd
@@ -396,7 +396,7 @@ struct gpu_ops {
|
|||||||
u32 sm, struct channel_gk20a *fault_ch);
|
u32 sm, struct channel_gk20a *fault_ch);
|
||||||
int (*update_sm_error_state)(struct gk20a *g,
|
int (*update_sm_error_state)(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id,
|
struct channel_gk20a *ch, u32 sm_id,
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state);
|
struct nvgpu_tsg_sm_error_state *sm_error_state);
|
||||||
int (*clear_sm_error_state)(struct gk20a *g,
|
int (*clear_sm_error_state)(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id);
|
struct channel_gk20a *ch, u32 sm_id);
|
||||||
int (*suspend_contexts)(struct gk20a *g,
|
int (*suspend_contexts)(struct gk20a *g,
|
||||||
|
|||||||
@@ -1561,19 +1561,6 @@ restore_fe_go_idle:
|
|||||||
if (err)
|
if (err)
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->sm_error_states);
|
|
||||||
|
|
||||||
/* we need to allocate this after g->ops.gr.init_fs_state() since
|
|
||||||
* we initialize gr->no_of_sm in this function
|
|
||||||
*/
|
|
||||||
gr->sm_error_states = nvgpu_kzalloc(g,
|
|
||||||
sizeof(struct nvgpu_gr_sm_error_state)
|
|
||||||
* gr->no_of_sm);
|
|
||||||
if (!gr->sm_error_states) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto restore_fe_go_idle;
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
ctx_header_words = roundup(ctx_header_bytes, sizeof(u32));
|
||||||
ctx_header_words >>= 2;
|
ctx_header_words >>= 2;
|
||||||
|
|
||||||
@@ -3072,7 +3059,6 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
|
|
||||||
memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
|
memset(&gr->compbit_store, 0, sizeof(struct compbit_store_desc));
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->sm_error_states);
|
|
||||||
nvgpu_kfree(g, gr->gpc_tpc_count);
|
nvgpu_kfree(g, gr->gpc_tpc_count);
|
||||||
nvgpu_kfree(g, gr->gpc_zcb_count);
|
nvgpu_kfree(g, gr->gpc_zcb_count);
|
||||||
nvgpu_kfree(g, gr->gpc_ppc_count);
|
nvgpu_kfree(g, gr->gpc_ppc_count);
|
||||||
@@ -4545,22 +4531,6 @@ restore_fe_go_idle:
|
|||||||
|
|
||||||
err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
|
err = gr_gk20a_wait_idle(g, gk20a_get_gr_idle_timeout(g),
|
||||||
GR_IDLE_CHECK_DEFAULT);
|
GR_IDLE_CHECK_DEFAULT);
|
||||||
if (err)
|
|
||||||
goto out;
|
|
||||||
|
|
||||||
nvgpu_kfree(g, gr->sm_error_states);
|
|
||||||
|
|
||||||
/* we need to allocate this after g->ops.gr.init_fs_state() since
|
|
||||||
* we initialize gr->no_of_sm in this function
|
|
||||||
*/
|
|
||||||
gr->sm_error_states = nvgpu_kzalloc(g,
|
|
||||||
sizeof(struct nvgpu_gr_sm_error_state) *
|
|
||||||
gr->no_of_sm);
|
|
||||||
if (!gr->sm_error_states) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto restore_fe_go_idle;
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
nvgpu_log_fn(g, "done");
|
nvgpu_log_fn(g, "done");
|
||||||
return err;
|
return err;
|
||||||
|
|||||||
@@ -254,14 +254,6 @@ struct nvgpu_preemption_modes_rec {
|
|||||||
u32 default_compute_preempt_mode; /* default mode */
|
u32 default_compute_preempt_mode; /* default mode */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nvgpu_gr_sm_error_state {
|
|
||||||
u32 hww_global_esr;
|
|
||||||
u32 hww_warp_esr;
|
|
||||||
u64 hww_warp_esr_pc;
|
|
||||||
u32 hww_global_esr_report_mask;
|
|
||||||
u32 hww_warp_esr_report_mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct gr_gk20a {
|
struct gr_gk20a {
|
||||||
struct gk20a *g;
|
struct gk20a *g;
|
||||||
struct {
|
struct {
|
||||||
@@ -427,7 +419,6 @@ struct gr_gk20a {
|
|||||||
u32 *fbp_rop_l2_en_mask;
|
u32 *fbp_rop_l2_en_mask;
|
||||||
u32 no_of_sm;
|
u32 no_of_sm;
|
||||||
struct sm_info *sm_to_cluster;
|
struct sm_info *sm_to_cluster;
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_states;
|
|
||||||
|
|
||||||
#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U)
|
#define NVGPU_SM_EXCEPTION_TYPE_MASK_NONE (0x0U)
|
||||||
#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0)
|
#define NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL (0x1U << 0)
|
||||||
|
|||||||
@@ -275,8 +275,23 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g, pid_t pid)
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
tsg = gk20a_tsg_acquire_unused_tsg(&g->fifo);
|
tsg = gk20a_tsg_acquire_unused_tsg(&g->fifo);
|
||||||
if (!tsg)
|
if (tsg == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we need to allocate this after g->ops.gr.init_fs_state() since
|
||||||
|
* we initialize gr->no_of_sm in this function
|
||||||
|
*/
|
||||||
|
if (g->gr.no_of_sm == 0U) {
|
||||||
|
nvgpu_err(g, "no_of_sm %d not set, failed allocation",
|
||||||
|
g->gr.no_of_sm);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gk20a_tsg_alloc_sm_error_states_mem(g, tsg, g->gr.no_of_sm);
|
||||||
|
if (err != 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
tsg->g = g;
|
tsg->g = g;
|
||||||
tsg->num_active_channels = 0;
|
tsg->num_active_channels = 0;
|
||||||
@@ -295,7 +310,7 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g, pid_t pid)
|
|||||||
|
|
||||||
if (g->ops.fifo.tsg_open) {
|
if (g->ops.fifo.tsg_open) {
|
||||||
err = g->ops.fifo.tsg_open(tsg);
|
err = g->ops.fifo.tsg_open(tsg);
|
||||||
if (err) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "tsg %d fifo open failed %d",
|
nvgpu_err(g, "tsg %d fifo open failed %d",
|
||||||
tsg->tsgid, err);
|
tsg->tsgid, err);
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
@@ -307,6 +322,12 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g, pid_t pid)
|
|||||||
return tsg;
|
return tsg;
|
||||||
|
|
||||||
clean_up:
|
clean_up:
|
||||||
|
|
||||||
|
if(tsg->sm_error_states != NULL) {
|
||||||
|
nvgpu_kfree(g, tsg->sm_error_states);
|
||||||
|
tsg->sm_error_states = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
|
nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@@ -317,20 +338,28 @@ void gk20a_tsg_release(struct nvgpu_ref *ref)
|
|||||||
struct gk20a *g = tsg->g;
|
struct gk20a *g = tsg->g;
|
||||||
struct gk20a_event_id_data *event_id_data, *event_id_data_temp;
|
struct gk20a_event_id_data *event_id_data, *event_id_data_temp;
|
||||||
|
|
||||||
if (g->ops.fifo.tsg_release)
|
if (g->ops.fifo.tsg_release != NULL) {
|
||||||
g->ops.fifo.tsg_release(tsg);
|
g->ops.fifo.tsg_release(tsg);
|
||||||
|
}
|
||||||
|
|
||||||
if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem))
|
if (nvgpu_mem_is_valid(&tsg->gr_ctx.mem)) {
|
||||||
gr_gk20a_free_tsg_gr_ctx(tsg);
|
gr_gk20a_free_tsg_gr_ctx(tsg);
|
||||||
|
}
|
||||||
|
|
||||||
if (g->ops.fifo.deinit_eng_method_buffers)
|
if (g->ops.fifo.deinit_eng_method_buffers != NULL) {
|
||||||
g->ops.fifo.deinit_eng_method_buffers(g, tsg);
|
g->ops.fifo.deinit_eng_method_buffers(g, tsg);
|
||||||
|
}
|
||||||
|
|
||||||
if (tsg->vm) {
|
if (tsg->vm != NULL) {
|
||||||
nvgpu_vm_put(tsg->vm);
|
nvgpu_vm_put(tsg->vm);
|
||||||
tsg->vm = NULL;
|
tsg->vm = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(tsg->sm_error_states != NULL) {
|
||||||
|
nvgpu_kfree(g, tsg->sm_error_states);
|
||||||
|
tsg->sm_error_states = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* unhook all events created on this TSG */
|
/* unhook all events created on this TSG */
|
||||||
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
||||||
nvgpu_list_for_each_entry_safe(event_id_data, event_id_data_temp,
|
nvgpu_list_for_each_entry_safe(event_id_data, event_id_data_temp,
|
||||||
@@ -360,3 +389,44 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch)
|
|||||||
|
|
||||||
return tsg;
|
return tsg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||||
|
struct tsg_gk20a *tsg,
|
||||||
|
u32 num_sm)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (tsg->sm_error_states != NULL) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
tsg->sm_error_states = nvgpu_kzalloc(g,
|
||||||
|
sizeof(struct nvgpu_tsg_sm_error_state)
|
||||||
|
* num_sm);
|
||||||
|
if (tsg->sm_error_states == NULL) {
|
||||||
|
nvgpu_err(g, "sm_error_states mem allocation failed");
|
||||||
|
err = -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
|
||||||
|
u32 sm_id,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_state)
|
||||||
|
{
|
||||||
|
struct nvgpu_tsg_sm_error_state *tsg_sm_error_states;
|
||||||
|
|
||||||
|
tsg_sm_error_states = tsg->sm_error_states + sm_id;
|
||||||
|
|
||||||
|
tsg_sm_error_states->hww_global_esr =
|
||||||
|
sm_error_state->hww_global_esr;
|
||||||
|
tsg_sm_error_states->hww_warp_esr =
|
||||||
|
sm_error_state->hww_warp_esr;
|
||||||
|
tsg_sm_error_states->hww_warp_esr_pc =
|
||||||
|
sm_error_state->hww_warp_esr_pc;
|
||||||
|
tsg_sm_error_states->hww_global_esr_report_mask =
|
||||||
|
sm_error_state->hww_global_esr_report_mask;
|
||||||
|
tsg_sm_error_states->hww_warp_esr_report_mask =
|
||||||
|
sm_error_state->hww_warp_esr_report_mask;
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,8 +19,8 @@
|
|||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
#ifndef __TSG_GK20A_H_
|
#ifndef TSG_GK20A_H
|
||||||
#define __TSG_GK20A_H_
|
#define TSG_GK20A_H
|
||||||
|
|
||||||
#include <nvgpu/lock.h>
|
#include <nvgpu/lock.h>
|
||||||
#include <nvgpu/kref.h>
|
#include <nvgpu/kref.h>
|
||||||
@@ -39,6 +39,14 @@ void gk20a_tsg_release(struct nvgpu_ref *ref);
|
|||||||
int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid);
|
int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid);
|
||||||
struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
|
struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
|
||||||
|
|
||||||
|
struct nvgpu_tsg_sm_error_state {
|
||||||
|
u32 hww_global_esr;
|
||||||
|
u32 hww_warp_esr;
|
||||||
|
u64 hww_warp_esr_pc;
|
||||||
|
u32 hww_global_esr_report_mask;
|
||||||
|
u32 hww_warp_esr_report_mask;
|
||||||
|
};
|
||||||
|
|
||||||
struct tsg_gk20a {
|
struct tsg_gk20a {
|
||||||
struct gk20a *g;
|
struct gk20a *g;
|
||||||
|
|
||||||
@@ -69,6 +77,7 @@ struct tsg_gk20a {
|
|||||||
bool tpc_num_initialized;
|
bool tpc_num_initialized;
|
||||||
bool in_use;
|
bool in_use;
|
||||||
|
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states;
|
||||||
};
|
};
|
||||||
|
|
||||||
int gk20a_enable_tsg(struct tsg_gk20a *tsg);
|
int gk20a_enable_tsg(struct tsg_gk20a *tsg);
|
||||||
@@ -84,6 +93,12 @@ int gk20a_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
|
|||||||
u32 gk20a_tsg_get_timeslice(struct tsg_gk20a *tsg);
|
u32 gk20a_tsg_get_timeslice(struct tsg_gk20a *tsg);
|
||||||
int gk20a_tsg_set_priority(struct gk20a *g, struct tsg_gk20a *tsg,
|
int gk20a_tsg_set_priority(struct gk20a *g, struct tsg_gk20a *tsg,
|
||||||
u32 priority);
|
u32 priority);
|
||||||
|
int gk20a_tsg_alloc_sm_error_states_mem(struct gk20a *g,
|
||||||
|
struct tsg_gk20a *tsg,
|
||||||
|
u32 num_sm);
|
||||||
|
void gk20a_tsg_update_sm_error_state_locked(struct tsg_gk20a *tsg,
|
||||||
|
u32 sm_id,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_state);
|
||||||
|
|
||||||
struct gk20a_event_id_data {
|
struct gk20a_event_id_data {
|
||||||
struct gk20a *g;
|
struct gk20a *g;
|
||||||
@@ -106,4 +121,4 @@ gk20a_event_id_data_from_event_id_node(struct nvgpu_list_node *node)
|
|||||||
((uintptr_t)node - offsetof(struct gk20a_event_id_data, event_id_node));
|
((uintptr_t)node - offsetof(struct gk20a_event_id_data, event_id_node));
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif /* __TSG_GK20A_H_ */
|
#endif /* TSG_GK20A_H */
|
||||||
|
|||||||
@@ -1268,32 +1268,68 @@ void gr_gm20b_get_access_map(struct gk20a *g,
|
|||||||
*num_entries = ARRAY_SIZE(wl_addr_gm20b);
|
*num_entries = ARRAY_SIZE(wl_addr_gm20b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gm20b_gr_read_sm_error_state(struct gk20a *g,
|
||||||
|
u32 offset,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||||
|
{
|
||||||
|
sm_error_states->hww_global_esr = gk20a_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
|
||||||
|
sm_error_states->hww_warp_esr = gk20a_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
|
||||||
|
sm_error_states->hww_warp_esr_pc = (u64)(gk20a_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset));
|
||||||
|
sm_error_states->hww_global_esr_report_mask = gk20a_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
|
||||||
|
sm_error_states->hww_warp_esr_report_mask = gk20a_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gm20b_gr_write_sm_error_state(struct gk20a *g,
|
||||||
|
u32 offset,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||||
|
{
|
||||||
|
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
|
||||||
|
sm_error_states->hww_global_esr);
|
||||||
|
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
|
||||||
|
sm_error_states->hww_warp_esr);
|
||||||
|
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
|
||||||
|
u64_lo32(sm_error_states->hww_warp_esr_pc));
|
||||||
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
|
||||||
|
sm_error_states->hww_global_esr_report_mask);
|
||||||
|
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
|
||||||
|
sm_error_states->hww_warp_esr_report_mask);
|
||||||
|
}
|
||||||
|
|
||||||
int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||||
struct channel_gk20a *fault_ch)
|
struct channel_gk20a *fault_ch)
|
||||||
{
|
{
|
||||||
int sm_id;
|
int sm_id;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||||
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
u32 offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||||
|
struct tsg_gk20a *tsg = NULL;
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
|
sm_id = gr_gpc0_tpc0_sm_cfg_sm_id_v(gk20a_readl(g,
|
||||||
gr_gpc0_tpc0_sm_cfg_r() + offset));
|
gr_gpc0_tpc0_sm_cfg_r() + offset));
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
|
if (fault_ch != NULL) {
|
||||||
gr_gpc0_tpc0_sm_hww_global_esr_r() + offset);
|
tsg = tsg_gk20a_from_ch(fault_ch);
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
|
}
|
||||||
gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset);
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
|
|
||||||
gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset);
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
|
|
||||||
gr_gpc0_tpc0_sm_hww_global_esr_report_mask_r() + offset);
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
|
|
||||||
gr_gpc0_tpc0_sm_hww_warp_esr_report_mask_r() + offset);
|
|
||||||
|
|
||||||
|
if (tsg == NULL) {
|
||||||
|
nvgpu_err(g, "no valid tsg");
|
||||||
|
goto record_fail;
|
||||||
|
}
|
||||||
|
|
||||||
|
sm_error_states = tsg->sm_error_states + sm_id;
|
||||||
|
gm20b_gr_read_sm_error_state(g, offset, sm_error_states);
|
||||||
|
|
||||||
|
record_fail:
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
return sm_id;
|
return sm_id;
|
||||||
@@ -1301,12 +1337,12 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
|||||||
|
|
||||||
int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id,
|
struct channel_gk20a *ch, u32 sm_id,
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state)
|
struct nvgpu_tsg_sm_error_state *sm_error_state)
|
||||||
{
|
{
|
||||||
u32 gpc, tpc, offset;
|
u32 gpc, tpc, offset;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
struct tsg_gk20a *tsg;
|
struct tsg_gk20a *tsg;
|
||||||
struct nvgpu_gr_ctx *ch_ctx;
|
struct nvgpu_gr_ctx *ch_ctx;
|
||||||
|
struct nvgpu_tsg_sm_error_state *tsg_sm_error_states;
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||||
@@ -1320,16 +1356,8 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr =
|
tsg_sm_error_states = tsg->sm_error_states + sm_id;
|
||||||
sm_error_state->hww_global_esr;
|
gk20a_tsg_update_sm_error_state_locked(tsg, sm_id, sm_error_state);
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr =
|
|
||||||
sm_error_state->hww_warp_esr;
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc =
|
|
||||||
sm_error_state->hww_warp_esr_pc;
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask =
|
|
||||||
sm_error_state->hww_global_esr_report_mask;
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
|
|
||||||
sm_error_state->hww_warp_esr_report_mask;
|
|
||||||
|
|
||||||
err = gr_gk20a_disable_ctxsw(g);
|
err = gr_gk20a_disable_ctxsw(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -1343,16 +1371,7 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
offset = gpc_stride * gpc + tpc_in_gpc_stride * tpc;
|
||||||
|
|
||||||
if (gk20a_is_channel_ctx_resident(ch)) {
|
if (gk20a_is_channel_ctx_resident(ch)) {
|
||||||
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_global_esr_r() + offset,
|
gm20b_gr_write_sm_error_state(g, offset, tsg_sm_error_states);
|
||||||
gr->sm_error_states[sm_id].hww_global_esr);
|
|
||||||
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr);
|
|
||||||
gk20a_writel(g, gr_gpc0_tpc0_sm_hww_warp_esr_pc_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc);
|
|
||||||
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask);
|
|
||||||
gk20a_writel(g, gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
|
|
||||||
} else {
|
} else {
|
||||||
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
|
||||||
if (err)
|
if (err)
|
||||||
@@ -1360,11 +1379,11 @@ int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
|
|
||||||
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
||||||
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
|
gr_gpcs_tpcs_sm_hww_global_esr_report_mask_r() + offset,
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask,
|
tsg_sm_error_states->hww_global_esr_report_mask,
|
||||||
true);
|
true);
|
||||||
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
||||||
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
|
gr_gpcs_tpcs_sm_hww_warp_esr_report_mask_r() + offset,
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
|
tsg_sm_error_states->hww_warp_esr_report_mask,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
|
||||||
@@ -1383,15 +1402,20 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g,
|
|||||||
{
|
{
|
||||||
u32 gpc, tpc, offset;
|
u32 gpc, tpc, offset;
|
||||||
u32 val;
|
u32 val;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct tsg_gk20a *tsg;
|
||||||
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
||||||
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||||
GPU_LIT_TPC_IN_GPC_STRIDE);
|
GPU_LIT_TPC_IN_GPC_STRIDE);
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
|
tsg = tsg_gk20a_from_ch(ch);
|
||||||
|
if (tsg == NULL) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
|
memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
|
||||||
|
|
||||||
err = gr_gk20a_disable_ctxsw(g);
|
err = gr_gk20a_disable_ctxsw(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ int gm20b_gr_record_sm_error_state(struct gk20a *g, u32 gpc,
|
|||||||
u32 tpc, u32 sm, struct channel_gk20a *fault_ch);
|
u32 tpc, u32 sm, struct channel_gk20a *fault_ch);
|
||||||
int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
int gm20b_gr_update_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id,
|
struct channel_gk20a *ch, u32 sm_id,
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state);
|
struct nvgpu_tsg_sm_error_state *sm_error_state);
|
||||||
int gm20b_gr_clear_sm_error_state(struct gk20a *g,
|
int gm20b_gr_clear_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id);
|
struct channel_gk20a *ch, u32 sm_id);
|
||||||
int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
|
int gr_gm20b_get_preemption_mode_flags(struct gk20a *g,
|
||||||
|
|||||||
@@ -3212,18 +3212,42 @@ void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gv11b_gr_write_sm_error_state(struct gk20a *g,
|
||||||
|
u32 offset,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||||
|
{
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
|
||||||
|
sm_error_states->hww_global_esr);
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset,
|
||||||
|
sm_error_states->hww_warp_esr);
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset,
|
||||||
|
u64_lo32(sm_error_states->hww_warp_esr_pc));
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r() + offset,
|
||||||
|
u64_hi32(sm_error_states->hww_warp_esr_pc));
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset,
|
||||||
|
sm_error_states->hww_global_esr_report_mask);
|
||||||
|
nvgpu_writel(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset,
|
||||||
|
sm_error_states->hww_warp_esr_report_mask);
|
||||||
|
}
|
||||||
|
|
||||||
int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id,
|
struct channel_gk20a *ch, u32 sm_id,
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state)
|
struct nvgpu_tsg_sm_error_state *sm_error_state)
|
||||||
{
|
{
|
||||||
struct tsg_gk20a *tsg;
|
struct tsg_gk20a *tsg;
|
||||||
u32 gpc, tpc, sm, offset;
|
u32 gpc, tpc, sm, offset;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
struct nvgpu_gr_ctx *ch_ctx;
|
struct nvgpu_gr_ctx *ch_ctx;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
struct nvgpu_tsg_sm_error_state *tsg_sm_error_states;
|
||||||
|
|
||||||
tsg = tsg_gk20a_from_ch(ch);
|
tsg = tsg_gk20a_from_ch(ch);
|
||||||
if (!tsg) {
|
if (tsg == NULL) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3231,16 +3255,8 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr =
|
tsg_sm_error_states = tsg->sm_error_states + sm_id;
|
||||||
sm_error_state->hww_global_esr;
|
gk20a_tsg_update_sm_error_state_locked(tsg, sm_id, sm_error_state);
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr =
|
|
||||||
sm_error_state->hww_warp_esr;
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc =
|
|
||||||
sm_error_state->hww_warp_esr_pc;
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask =
|
|
||||||
sm_error_state->hww_global_esr_report_mask;
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask =
|
|
||||||
sm_error_state->hww_warp_esr_report_mask;
|
|
||||||
|
|
||||||
err = gr_gk20a_disable_ctxsw(g);
|
err = gr_gk20a_disable_ctxsw(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -3257,21 +3273,7 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
gv11b_gr_sm_offset(g, sm);
|
gv11b_gr_sm_offset(g, sm);
|
||||||
|
|
||||||
if (gk20a_is_channel_ctx_resident(ch)) {
|
if (gk20a_is_channel_ctx_resident(ch)) {
|
||||||
gk20a_writel(g,
|
gv11b_gr_write_sm_error_state(g, offset, tsg_sm_error_states);
|
||||||
gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr);
|
|
||||||
gk20a_writel(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr);
|
|
||||||
gk20a_writel(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc);
|
|
||||||
gk20a_writel(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask);
|
|
||||||
gk20a_writel(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset,
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask);
|
|
||||||
} else {
|
} else {
|
||||||
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
|
err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx, false);
|
||||||
if (err) {
|
if (err) {
|
||||||
@@ -3281,12 +3283,12 @@ int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
|||||||
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
||||||
gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r() +
|
gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r() +
|
||||||
offset,
|
offset,
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask,
|
tsg_sm_error_states->hww_global_esr_report_mask,
|
||||||
true);
|
true);
|
||||||
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
gr_gk20a_ctx_patch_write(g, ch_ctx,
|
||||||
gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r() +
|
gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r() +
|
||||||
offset,
|
offset,
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask,
|
tsg_sm_error_states->hww_warp_esr_report_mask,
|
||||||
true);
|
true);
|
||||||
|
|
||||||
gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
|
gr_gk20a_ctx_patch_write_end(g, ch_ctx, false);
|
||||||
@@ -3362,13 +3364,36 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gv11b_gr_read_sm_error_state(struct gk20a *g,
|
||||||
|
u32 offset,
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states)
|
||||||
|
{
|
||||||
|
sm_error_states->hww_global_esr = nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
|
||||||
|
|
||||||
|
sm_error_states->hww_warp_esr = nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset);
|
||||||
|
|
||||||
|
sm_error_states->hww_warp_esr_pc = hi32_lo32_to_u64((nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_hi_r() + offset)),
|
||||||
|
(nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset)));
|
||||||
|
|
||||||
|
sm_error_states->hww_global_esr_report_mask = nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset);
|
||||||
|
|
||||||
|
sm_error_states->hww_warp_esr_report_mask = nvgpu_readl(g,
|
||||||
|
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset);
|
||||||
|
}
|
||||||
|
|
||||||
int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||||
struct channel_gk20a *fault_ch)
|
struct channel_gk20a *fault_ch)
|
||||||
{
|
{
|
||||||
int sm_id;
|
int sm_id;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
u32 offset, sm_per_tpc, tpc_id;
|
u32 offset, sm_per_tpc, tpc_id;
|
||||||
u32 gpc_offset, gpc_tpc_offset;
|
u32 gpc_offset, gpc_tpc_offset;
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_states = NULL;
|
||||||
|
struct tsg_gk20a *tsg = NULL;
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
@@ -3381,21 +3406,19 @@ int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
|||||||
|
|
||||||
offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
|
offset = gpc_tpc_offset + gv11b_gr_sm_offset(g, sm);
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr = gk20a_readl(g,
|
if (fault_ch != NULL) {
|
||||||
gr_gpc0_tpc0_sm0_hww_global_esr_r() + offset);
|
tsg = tsg_gk20a_from_ch(fault_ch);
|
||||||
|
}
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr = gk20a_readl(g,
|
if (tsg == NULL) {
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_r() + offset);
|
nvgpu_err(g, "no valid tsg");
|
||||||
|
goto record_fail;
|
||||||
|
}
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_pc = gk20a_readl(g,
|
sm_error_states = tsg->sm_error_states + sm_id;
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_pc_r() + offset);
|
gv11b_gr_read_sm_error_state(g, offset, sm_error_states);
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_global_esr_report_mask = gk20a_readl(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r() + offset);
|
|
||||||
|
|
||||||
gr->sm_error_states[sm_id].hww_warp_esr_report_mask = gk20a_readl(g,
|
|
||||||
gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r() + offset);
|
|
||||||
|
|
||||||
|
record_fail:
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
return sm_id;
|
return sm_id;
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ struct zbc_entry;
|
|||||||
struct zbc_query_params;
|
struct zbc_query_params;
|
||||||
struct nvgpu_gr_ctx;
|
struct nvgpu_gr_ctx;
|
||||||
struct nvgpu_warpstate;
|
struct nvgpu_warpstate;
|
||||||
struct nvgpu_gr_sm_error_state;
|
struct nvgpu_tsg_sm_error_state;
|
||||||
struct gr_ctx_desc;
|
struct gr_ctx_desc;
|
||||||
struct gr_gk20a_isr_data;
|
struct gr_gk20a_isr_data;
|
||||||
struct gk20a_debug_output;
|
struct gk20a_debug_output;
|
||||||
@@ -168,7 +168,7 @@ int gv11b_gr_sm_trigger_suspend(struct gk20a *g);
|
|||||||
void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
|
void gv11b_gr_bpt_reg_info(struct gk20a *g, struct nvgpu_warpstate *w_state);
|
||||||
int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
int gv11b_gr_update_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id,
|
struct channel_gk20a *ch, u32 sm_id,
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state);
|
struct nvgpu_tsg_sm_error_state *sm_error_state);
|
||||||
int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
|
int gv11b_gr_set_sm_debug_mode(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u64 sms, bool enable);
|
struct channel_gk20a *ch, u64 sms, bool enable);
|
||||||
int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
int gv11b_gr_record_sm_error_state(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
|
||||||
|
|||||||
@@ -22,8 +22,8 @@
|
|||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __TEGRA_VGPU_H
|
#ifndef TEGRA_VGPU_H
|
||||||
#define __TEGRA_VGPU_H
|
#define TEGRA_VGPU_H
|
||||||
|
|
||||||
#include <nvgpu/types.h>
|
#include <nvgpu/types.h>
|
||||||
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
#include <nvgpu/ecc.h> /* For NVGPU_ECC_STAT_NAME_MAX_SIZE */
|
||||||
@@ -737,6 +737,7 @@ struct tegra_vgpu_channel_event_info {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct tegra_vgpu_sm_esr_info {
|
struct tegra_vgpu_sm_esr_info {
|
||||||
|
u32 tsg_id;
|
||||||
u32 sm_id;
|
u32 sm_id;
|
||||||
u32 hww_global_esr;
|
u32 hww_global_esr;
|
||||||
u32 hww_warp_esr;
|
u32 hww_warp_esr;
|
||||||
|
|||||||
@@ -1567,56 +1567,6 @@ out:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int nvgpu_gpu_read_single_sm_error_state(struct gk20a *g,
|
|
||||||
struct nvgpu_gpu_read_single_sm_error_state_args *args)
|
|
||||||
{
|
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state;
|
|
||||||
struct nvgpu_gpu_sm_error_state_record sm_error_state_record;
|
|
||||||
u32 sm_id;
|
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
sm_id = args->sm_id;
|
|
||||||
if (sm_id >= gr->no_of_sm)
|
|
||||||
return -EINVAL;
|
|
||||||
|
|
||||||
nvgpu_speculation_barrier();
|
|
||||||
|
|
||||||
sm_error_state = gr->sm_error_states + sm_id;
|
|
||||||
sm_error_state_record.global_esr =
|
|
||||||
sm_error_state->hww_global_esr;
|
|
||||||
sm_error_state_record.warp_esr =
|
|
||||||
sm_error_state->hww_warp_esr;
|
|
||||||
sm_error_state_record.warp_esr_pc =
|
|
||||||
sm_error_state->hww_warp_esr_pc;
|
|
||||||
sm_error_state_record.global_esr_report_mask =
|
|
||||||
sm_error_state->hww_global_esr_report_mask;
|
|
||||||
sm_error_state_record.warp_esr_report_mask =
|
|
||||||
sm_error_state->hww_warp_esr_report_mask;
|
|
||||||
|
|
||||||
if (args->record_size > 0) {
|
|
||||||
size_t write_size = sizeof(*sm_error_state);
|
|
||||||
|
|
||||||
if (write_size > args->record_size)
|
|
||||||
write_size = args->record_size;
|
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
|
||||||
err = copy_to_user((void __user *)(uintptr_t)
|
|
||||||
args->record_mem,
|
|
||||||
&sm_error_state_record,
|
|
||||||
write_size);
|
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_err(g, "copy_to_user failed!");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
args->record_size = write_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||||
{
|
{
|
||||||
struct gk20a_ctrl_priv *priv = filp->private_data;
|
struct gk20a_ctrl_priv *priv = filp->private_data;
|
||||||
@@ -1925,11 +1875,6 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
(struct nvgpu_gpu_set_deterministic_opts_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE:
|
|
||||||
err = nvgpu_gpu_read_single_sm_error_state(g,
|
|
||||||
(struct nvgpu_gpu_read_single_sm_error_state_args *)buf);
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||||
err = -ENOTTY;
|
err = -ENOTTY;
|
||||||
|
|||||||
@@ -35,6 +35,7 @@
|
|||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/gr_gk20a.h"
|
#include "gk20a/gr_gk20a.h"
|
||||||
|
#include "gk20a/tsg_gk20a.h"
|
||||||
#include "gk20a/regops_gk20a.h"
|
#include "gk20a/regops_gk20a.h"
|
||||||
#include "gk20a/dbg_gpu_gk20a.h"
|
#include "gk20a/dbg_gpu_gk20a.h"
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
@@ -271,20 +272,23 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
|
|||||||
u32 sm_id;
|
u32 sm_id;
|
||||||
struct channel_gk20a *ch;
|
struct channel_gk20a *ch;
|
||||||
struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
|
struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
|
||||||
struct nvgpu_gr_sm_error_state sm_error_state;
|
struct nvgpu_tsg_sm_error_state sm_error_state;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
/* Not currently supported in the virtual case */
|
/* Not currently supported in the virtual case */
|
||||||
if (g->is_virtual)
|
if (g->is_virtual) {
|
||||||
return -ENOSYS;
|
return -ENOSYS;
|
||||||
|
}
|
||||||
|
|
||||||
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
||||||
if (!ch)
|
if (ch == NULL) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
sm_id = args->sm_id;
|
sm_id = args->sm_id;
|
||||||
if (sm_id >= gr->no_of_sm)
|
if (sm_id >= gr->no_of_sm) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_speculation_barrier();
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
@@ -300,13 +304,15 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
|
|||||||
args->sm_error_state_record_mem,
|
args->sm_error_state_record_mem,
|
||||||
read_size);
|
read_size);
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
if (err)
|
if (err != 0) {
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = gk20a_busy(g);
|
err = gk20a_busy(g);
|
||||||
if (err)
|
if (err != 0) {
|
||||||
return err;
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
sm_error_state.hww_global_esr =
|
sm_error_state.hww_global_esr =
|
||||||
sm_error_state_record.hww_global_esr;
|
sm_error_state_record.hww_global_esr;
|
||||||
@@ -335,18 +341,36 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
|
|||||||
{
|
{
|
||||||
struct gk20a *g = dbg_s->g;
|
struct gk20a *g = dbg_s->g;
|
||||||
struct gr_gk20a *gr = &g->gr;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_state;
|
struct nvgpu_tsg_sm_error_state *sm_error_state;
|
||||||
struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
|
struct nvgpu_dbg_gpu_sm_error_state_record sm_error_state_record;
|
||||||
|
struct channel_gk20a *ch;
|
||||||
|
struct tsg_gk20a *tsg;
|
||||||
u32 sm_id;
|
u32 sm_id;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
sm_id = args->sm_id;
|
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
||||||
if (sm_id >= gr->no_of_sm)
|
if (ch == NULL) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
tsg = tsg_gk20a_from_ch(ch);
|
||||||
|
if (tsg == NULL) {
|
||||||
|
nvgpu_err(g, "no valid tsg from ch");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
sm_id = args->sm_id;
|
||||||
|
if (sm_id >= gr->no_of_sm) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tsg->sm_error_states == NULL) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_speculation_barrier();
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
sm_error_state = gr->sm_error_states + sm_id;
|
sm_error_state = tsg->sm_error_states + sm_id;
|
||||||
sm_error_state_record.hww_global_esr =
|
sm_error_state_record.hww_global_esr =
|
||||||
sm_error_state->hww_global_esr;
|
sm_error_state->hww_global_esr;
|
||||||
sm_error_state_record.hww_warp_esr =
|
sm_error_state_record.hww_warp_esr =
|
||||||
@@ -370,7 +394,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
|
|||||||
&sm_error_state_record,
|
&sm_error_state_record,
|
||||||
write_size);
|
write_size);
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
if (err) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "copy_to_user failed!");
|
nvgpu_err(g, "copy_to_user failed!");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -1500,8 +1524,9 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
|
|||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
ch = nvgpu_dbg_gpu_get_session_channel(dbg_s);
|
||||||
if (!ch)
|
if (ch == NULL) {
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
sm_id = args->sm_id;
|
sm_id = args->sm_id;
|
||||||
if (sm_id >= gr->no_of_sm)
|
if (sm_id >= gr->no_of_sm)
|
||||||
@@ -1510,8 +1535,9 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
|
|||||||
nvgpu_speculation_barrier();
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
err = gk20a_busy(g);
|
err = gk20a_busy(g);
|
||||||
if (err)
|
if (err != 0) {
|
||||||
return err;
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
err = gr_gk20a_elpg_protected_call(g,
|
err = gr_gk20a_elpg_protected_call(g,
|
||||||
g->ops.gr.clear_sm_error_state(g, ch, sm_id));
|
g->ops.gr.clear_sm_error_state(g, ch, sm_id));
|
||||||
|
|||||||
@@ -536,6 +536,57 @@ static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
|
||||||
|
struct tsg_gk20a *tsg,
|
||||||
|
struct nvgpu_tsg_read_single_sm_error_state_args *args)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_tsg_sm_error_state *sm_error_state;
|
||||||
|
struct nvgpu_tsg_sm_error_state_record sm_error_state_record;
|
||||||
|
u32 sm_id;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
sm_id = args->sm_id;
|
||||||
|
if (sm_id >= gr->no_of_sm)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
|
sm_error_state = tsg->sm_error_states + sm_id;
|
||||||
|
sm_error_state_record.global_esr =
|
||||||
|
sm_error_state->hww_global_esr;
|
||||||
|
sm_error_state_record.warp_esr =
|
||||||
|
sm_error_state->hww_warp_esr;
|
||||||
|
sm_error_state_record.warp_esr_pc =
|
||||||
|
sm_error_state->hww_warp_esr_pc;
|
||||||
|
sm_error_state_record.global_esr_report_mask =
|
||||||
|
sm_error_state->hww_global_esr_report_mask;
|
||||||
|
sm_error_state_record.warp_esr_report_mask =
|
||||||
|
sm_error_state->hww_warp_esr_report_mask;
|
||||||
|
|
||||||
|
if (args->record_size > 0) {
|
||||||
|
size_t write_size = sizeof(*sm_error_state);
|
||||||
|
|
||||||
|
if (write_size > args->record_size)
|
||||||
|
write_size = args->record_size;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
err = copy_to_user((void __user *)(uintptr_t)
|
||||||
|
args->record_mem,
|
||||||
|
&sm_error_state_record,
|
||||||
|
write_size);
|
||||||
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "copy_to_user failed!");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
args->record_size = write_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
|
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||||
unsigned long arg)
|
unsigned long arg)
|
||||||
{
|
{
|
||||||
@@ -670,6 +721,13 @@ long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE:
|
||||||
|
{
|
||||||
|
err = gk20a_tsg_ioctl_read_single_sm_error_state(g, tsg,
|
||||||
|
(struct nvgpu_tsg_read_single_sm_error_state_args *)buf);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
|
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
|
||||||
cmd);
|
cmd);
|
||||||
|
|||||||
@@ -882,9 +882,6 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
|
|
||||||
gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
|
gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags);
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->sm_error_states);
|
|
||||||
gr->sm_error_states = NULL;
|
|
||||||
|
|
||||||
nvgpu_kfree(gr->g, gr->gpc_tpc_mask);
|
nvgpu_kfree(gr->g, gr->gpc_tpc_mask);
|
||||||
gr->gpc_tpc_mask = NULL;
|
gr->gpc_tpc_mask = NULL;
|
||||||
|
|
||||||
@@ -935,14 +932,6 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
|
|||||||
nvgpu_mutex_init(&gr->ctx_mutex);
|
nvgpu_mutex_init(&gr->ctx_mutex);
|
||||||
nvgpu_spinlock_init(&gr->ch_tlb_lock);
|
nvgpu_spinlock_init(&gr->ch_tlb_lock);
|
||||||
|
|
||||||
gr->sm_error_states = nvgpu_kzalloc(g,
|
|
||||||
sizeof(struct nvgpu_gr_sm_error_state) *
|
|
||||||
gr->no_of_sm);
|
|
||||||
if (!gr->sm_error_states) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto clean_up;
|
|
||||||
}
|
|
||||||
|
|
||||||
gr->remove_support = vgpu_remove_gr_support;
|
gr->remove_support = vgpu_remove_gr_support;
|
||||||
gr->sw_ready = true;
|
gr->sw_ready = true;
|
||||||
|
|
||||||
@@ -1152,12 +1141,17 @@ int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,
|
|||||||
int vgpu_gr_clear_sm_error_state(struct gk20a *g,
|
int vgpu_gr_clear_sm_error_state(struct gk20a *g,
|
||||||
struct channel_gk20a *ch, u32 sm_id)
|
struct channel_gk20a *ch, u32 sm_id)
|
||||||
{
|
{
|
||||||
struct gr_gk20a *gr = &g->gr;
|
|
||||||
struct tegra_vgpu_cmd_msg msg;
|
struct tegra_vgpu_cmd_msg msg;
|
||||||
struct tegra_vgpu_clear_sm_error_state *p =
|
struct tegra_vgpu_clear_sm_error_state *p =
|
||||||
&msg.params.clear_sm_error_state;
|
&msg.params.clear_sm_error_state;
|
||||||
|
struct tsg_gk20a *tsg;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
tsg = tsg_gk20a_from_ch(ch);
|
||||||
|
if (!tsg) {
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE;
|
msg.cmd = TEGRA_VGPU_CMD_CLEAR_SM_ERROR_STATE;
|
||||||
msg.handle = vgpu_get_handle(g);
|
msg.handle = vgpu_get_handle(g);
|
||||||
@@ -1167,7 +1161,7 @@ int vgpu_gr_clear_sm_error_state(struct gk20a *g,
|
|||||||
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
|
||||||
WARN_ON(err || msg.ret);
|
WARN_ON(err || msg.ret);
|
||||||
|
|
||||||
memset(&gr->sm_error_states[sm_id], 0, sizeof(*gr->sm_error_states));
|
memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states));
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
return err ? err : msg.ret;
|
return err ? err : msg.ret;
|
||||||
@@ -1264,7 +1258,8 @@ int vgpu_gr_resume_contexts(struct gk20a *g,
|
|||||||
void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
|
void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
|
||||||
struct tegra_vgpu_sm_esr_info *info)
|
struct tegra_vgpu_sm_esr_info *info)
|
||||||
{
|
{
|
||||||
struct nvgpu_gr_sm_error_state *sm_error_states;
|
struct nvgpu_tsg_sm_error_state *sm_error_states;
|
||||||
|
struct tsg_gk20a *tsg;
|
||||||
|
|
||||||
if (info->sm_id >= g->gr.no_of_sm) {
|
if (info->sm_id >= g->gr.no_of_sm) {
|
||||||
nvgpu_err(g, "invalid smd_id %d / %d",
|
nvgpu_err(g, "invalid smd_id %d / %d",
|
||||||
@@ -1272,9 +1267,20 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (info->tsg_id >= g->fifo.num_channels) {
|
||||||
|
nvgpu_err(g, "invalid tsg_id in sm esr event");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
tsg = &g->fifo.tsg[info->tsg_id];
|
||||||
|
if (tsg == NULL) {
|
||||||
|
nvgpu_err(g, "invalid tsg");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||||
|
|
||||||
sm_error_states = &g->gr.sm_error_states[info->sm_id];
|
sm_error_states = &tsg->sm_error_states[info->sm_id];
|
||||||
|
|
||||||
sm_error_states->hww_global_esr = info->hww_global_esr;
|
sm_error_states->hww_global_esr = info->hww_global_esr;
|
||||||
sm_error_states->hww_warp_esr = info->hww_warp_esr;
|
sm_error_states->hww_warp_esr = info->hww_warp_esr;
|
||||||
|
|||||||
@@ -861,38 +861,6 @@ struct nvgpu_gpu_set_deterministic_opts_args {
|
|||||||
__u64 channels; /* in */
|
__u64 channels; /* in */
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* This struct helps to report the SM error state of a single SM.
|
|
||||||
* This acts upon the currently resident GR context.
|
|
||||||
* Global Error status register
|
|
||||||
* Warp Error status register
|
|
||||||
* Warp Error status register PC
|
|
||||||
* Global Error status register Report Mask
|
|
||||||
* Warp Error status register Report Mask
|
|
||||||
*/
|
|
||||||
struct nvgpu_gpu_sm_error_state_record {
|
|
||||||
__u32 global_esr;
|
|
||||||
__u32 warp_esr;
|
|
||||||
__u64 warp_esr_pc;
|
|
||||||
__u32 global_esr_report_mask;
|
|
||||||
__u32 warp_esr_report_mask;
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This struct helps to read the SM error state.
|
|
||||||
*/
|
|
||||||
struct nvgpu_gpu_read_single_sm_error_state_args {
|
|
||||||
/* Valid SM ID */
|
|
||||||
__u32 sm_id;
|
|
||||||
__u32 reserved;
|
|
||||||
/*
|
|
||||||
* This is pointer to the struct nvgpu_gpu_sm_error_state_record
|
|
||||||
*/
|
|
||||||
__u64 record_mem;
|
|
||||||
/* size of the record size to read */
|
|
||||||
__u64 record_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -976,11 +944,8 @@ struct nvgpu_gpu_read_single_sm_error_state_args {
|
|||||||
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
#define NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 40, \
|
||||||
struct nvgpu_gpu_set_deterministic_opts_args)
|
struct nvgpu_gpu_set_deterministic_opts_args)
|
||||||
#define NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE \
|
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 41, \
|
|
||||||
struct nvgpu_gpu_read_single_sm_error_state_args)
|
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_READ_SINGLE_SM_ERROR_STATE)
|
_IOC_NR(NVGPU_GPU_IOCTL_SET_DETERMINISTIC_OPTS)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||||
|
|
||||||
@@ -1063,6 +1028,38 @@ struct nvgpu_tsg_bind_channel_ex_args {
|
|||||||
__u8 reserved[11];
|
__u8 reserved[11];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct helps to report the SM error state of a single SM.
|
||||||
|
* This acts upon the currently resident TSG context.
|
||||||
|
* Global Error status register
|
||||||
|
* Warp Error status register
|
||||||
|
* Warp Error status register PC
|
||||||
|
* Global Error status register Report Mask
|
||||||
|
* Warp Error status register Report Mask
|
||||||
|
*/
|
||||||
|
struct nvgpu_tsg_sm_error_state_record {
|
||||||
|
__u32 global_esr;
|
||||||
|
__u32 warp_esr;
|
||||||
|
__u64 warp_esr_pc;
|
||||||
|
__u32 global_esr_report_mask;
|
||||||
|
__u32 warp_esr_report_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct helps to read the SM error state.
|
||||||
|
*/
|
||||||
|
struct nvgpu_tsg_read_single_sm_error_state_args {
|
||||||
|
/* Valid SM ID */
|
||||||
|
__u32 sm_id;
|
||||||
|
__u32 reserved;
|
||||||
|
/*
|
||||||
|
* This is pointer to the struct nvgpu_gpu_sm_error_state_record
|
||||||
|
*/
|
||||||
|
__u64 record_mem;
|
||||||
|
/* size of the record size to read */
|
||||||
|
__u64 record_size;
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_TSG_IOCTL_BIND_CHANNEL \
|
#define NVGPU_TSG_IOCTL_BIND_CHANNEL \
|
||||||
_IOW(NVGPU_TSG_IOCTL_MAGIC, 1, int)
|
_IOW(NVGPU_TSG_IOCTL_MAGIC, 1, int)
|
||||||
#define NVGPU_TSG_IOCTL_UNBIND_CHANNEL \
|
#define NVGPU_TSG_IOCTL_UNBIND_CHANNEL \
|
||||||
@@ -1083,10 +1080,13 @@ struct nvgpu_tsg_bind_channel_ex_args {
|
|||||||
_IOR(NVGPU_TSG_IOCTL_MAGIC, 10, struct nvgpu_timeslice_args)
|
_IOR(NVGPU_TSG_IOCTL_MAGIC, 10, struct nvgpu_timeslice_args)
|
||||||
#define NVGPU_TSG_IOCTL_BIND_CHANNEL_EX \
|
#define NVGPU_TSG_IOCTL_BIND_CHANNEL_EX \
|
||||||
_IOWR(NVGPU_TSG_IOCTL_MAGIC, 11, struct nvgpu_tsg_bind_channel_ex_args)
|
_IOWR(NVGPU_TSG_IOCTL_MAGIC, 11, struct nvgpu_tsg_bind_channel_ex_args)
|
||||||
|
#define NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE \
|
||||||
|
_IOR(NVGPU_TSG_IOCTL_MAGIC, 12, \
|
||||||
|
struct nvgpu_tsg_read_single_sm_error_state_args)
|
||||||
#define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_TSG_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_tsg_bind_channel_ex_args)
|
sizeof(struct nvgpu_tsg_bind_channel_ex_args)
|
||||||
#define NVGPU_TSG_IOCTL_LAST \
|
#define NVGPU_TSG_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_TSG_IOCTL_BIND_CHANNEL_EX)
|
_IOC_NR(NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* /dev/nvhost-dbg-gpu device
|
* /dev/nvhost-dbg-gpu device
|
||||||
|
|||||||
Reference in New Issue
Block a user