From da9dee85e2c7fc47df093349efc6c70722f8dcf7 Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Wed, 10 Apr 2019 00:03:40 -0700 Subject: [PATCH] gpu: nvgpu: move mmu fault handling to hal/fifo Move chip specific mmu fault handling from fifo_gk20a.c to hal/fifo/mmu_fault_gk20a.c Move gk20a_teardown_ch_tsg to hal/rc/rc_gk20a.c JIRA NVGPU-1314 Change-Id: Idf88b1c312bc9f46c2508f2c63e948d71d622297 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/2094051 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 295 ------------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 10 - drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 + drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c | 226 +++++++++++++- drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h | 6 + drivers/gpu/nvgpu/hal/rc/rc_gk20a.c | 131 ++++++++ drivers/gpu/nvgpu/hal/rc/rc_gk20a.h | 33 +++ 10 files changed, 399 insertions(+), 306 deletions(-) create mode 100644 drivers/gpu/nvgpu/hal/rc/rc_gk20a.c create mode 100644 drivers/gpu/nvgpu/hal/rc/rc_gk20a.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index f131803fa..bdc50ab1b 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -237,6 +237,7 @@ nvgpu-y += \ hal/fuse/fuse_gm20b.o \ hal/fuse/fuse_gp10b.o \ hal/fuse/fuse_gp106.o \ + hal/rc/rc_gk20a.o \ hal/fifo/usermode_gv11b.o \ hal/fifo/usermode_tu104.o \ hal/fifo/engines_gm20b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 886222958..732ebd758 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -358,6 +358,7 @@ srcs += common/sim.c \ hal/fuse/fuse_gm20b.c \ hal/fuse/fuse_gp10b.c \ hal/fuse/fuse_gp106.c \ + hal/rc/rc_gk20a.c \ hal/fifo/usermode_gv11b.c \ hal/fifo/usermode_tu104.c \ hal/fifo/engines_gm20b.c \ diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index df2014973..1f0a9bed6 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -119,301 +119,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g) return 0; } -static bool gk20a_fifo_handle_mmu_fault_locked( - struct gk20a *g, - u32 mmu_fault_engines, /* queried from HW if 0 */ - u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ - bool id_is_tsg) -{ - bool fake_fault; - unsigned long fault_id; - unsigned long engine_mmu_fault_id; - bool verbose = true; - struct nvgpu_engine_status_info engine_status; - bool deferred_reset_pending = false; - struct fifo_gk20a *f = &g->fifo; - - nvgpu_log_fn(g, " "); - - if (nvgpu_cg_pg_disable(g) != 0) { - nvgpu_warn(g, "fail to disable power mgmt"); - } - - /* Disable fifo access */ - g->ops.gr.init.fifo_access(g, false); - - if (mmu_fault_engines != 0U) { - fault_id = mmu_fault_engines; - fake_fault = true; - } else { - fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); - fake_fault = false; - } - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - g->fifo.deferred_reset_pending = false; - nvgpu_mutex_release(&f->deferred_reset_mutex); - - /* go through all faulted engines */ - for_each_set_bit(engine_mmu_fault_id, &fault_id, 32U) { - /* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to - * engines. Convert engine_mmu_id to engine_id */ - u32 engine_id = nvgpu_engine_mmu_fault_id_to_engine_id(g, - (u32)engine_mmu_fault_id); - struct mmu_fault_info mmfault_info; - struct channel_gk20a *ch = NULL; - struct tsg_gk20a *tsg = NULL; - struct channel_gk20a *refch = NULL; - bool ctxsw; - /* read and parse engine status */ - g->ops.engine_status.read_engine_status_info(g, engine_id, - &engine_status); - - ctxsw = nvgpu_engine_status_is_ctxsw(&engine_status); - - gk20a_fifo_mmu_fault_info_dump(g, engine_id, - (u32)engine_mmu_fault_id, - fake_fault, &mmfault_info); - - if (ctxsw) { - g->ops.gr.falcon.dump_stats(g); - nvgpu_err(g, " gr_status_r: 0x%x", - gk20a_readl(g, gr_status_r())); - } - - /* get the channel/TSG */ - if (fake_fault) { - /* use next_id if context load is failing */ - u32 id, type; - - if (hw_id == ~(u32)0) { - if (nvgpu_engine_status_is_ctxsw_load( - &engine_status)) { - nvgpu_engine_status_get_next_ctx_id_type( - &engine_status, &id, &type); - } else { - nvgpu_engine_status_get_ctx_id_type( - &engine_status, &id, &type); - } - } else { - id = hw_id; - type = id_is_tsg ? - ENGINE_STATUS_CTX_ID_TYPE_TSGID : - ENGINE_STATUS_CTX_ID_TYPE_CHID; - } - - if (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID) { - tsg = &g->fifo.tsg[id]; - } else if (type == ENGINE_STATUS_CTX_ID_TYPE_CHID) { - ch = &g->fifo.channel[id]; - refch = gk20a_channel_get(ch); - if (refch != NULL) { - tsg = tsg_gk20a_from_ch(refch); - } - } - } else { - /* Look up channel from the inst block pointer. */ - ch = nvgpu_channel_refch_from_inst_ptr(g, - mmfault_info.inst_ptr); - refch = ch; - if (refch != NULL) { - tsg = tsg_gk20a_from_ch(refch); - } - } - - /* check if engine reset should be deferred */ - if (engine_id != FIFO_INVAL_ENGINE_ID) { - bool defer = nvgpu_engine_should_defer_reset(g, - engine_id, mmfault_info.client_type, - fake_fault); - if (((ch != NULL) || (tsg != NULL)) && defer) { - g->fifo.deferred_fault_engines |= BIT(engine_id); - - /* handled during channel free */ - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - g->fifo.deferred_reset_pending = true; - nvgpu_mutex_release(&f->deferred_reset_mutex); - - deferred_reset_pending = true; - - nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, - "sm debugger attached," - " deferring channel recovery to channel free"); - } else { - nvgpu_engine_reset(g, engine_id); - } - } - -#ifdef CONFIG_GK20A_CTXSW_TRACE - if (tsg != NULL) { - nvgpu_gr_fecs_trace_add_tsg_reset(g, tsg); - } -#endif - /* - * Disable the channel/TSG from hw and increment syncpoints. - */ - if (tsg != NULL) { - if (deferred_reset_pending) { - g->ops.tsg.disable(tsg); - } else { - if (!fake_fault) { - nvgpu_tsg_set_ctx_mmu_error(g, tsg); - } - verbose = nvgpu_tsg_mark_error(g, tsg); - nvgpu_tsg_abort(g, tsg, false); - } - - /* put back the ref taken early above */ - if (refch != NULL) { - gk20a_channel_put(ch); - } - } else if (refch != NULL) { - nvgpu_err(g, "mmu error in unbound channel %d", - ch->chid); - gk20a_channel_put(ch); - } else if (mmfault_info.inst_ptr == - nvgpu_inst_block_addr(g, - &g->mm.bar1.inst_block)) { - nvgpu_err(g, "mmu fault from bar1"); - } else if (mmfault_info.inst_ptr == - nvgpu_inst_block_addr(g, - &g->mm.pmu.inst_block)) { - nvgpu_err(g, "mmu fault from pmu"); - } else { - nvgpu_err(g, "couldn't locate channel for mmu fault"); - } - } - - if (!fake_fault) { - gk20a_debug_dump(g); - } - - /* clear interrupt */ - gk20a_writel(g, fifo_intr_mmu_fault_id_r(), (u32)fault_id); - - /* resume scheduler */ - gk20a_writel(g, fifo_error_sched_disable_r(), - gk20a_readl(g, fifo_error_sched_disable_r())); - - /* Re-enable fifo access */ - g->ops.gr.init.fifo_access(g, true); - - if (nvgpu_cg_pg_enable(g) != 0) { - nvgpu_warn(g, "fail to enable power mgmt"); - } - return verbose; -} - -bool gk20a_fifo_handle_mmu_fault( - struct gk20a *g, - u32 mmu_fault_engines, /* queried from HW if 0 */ - u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ - bool id_is_tsg) -{ - bool verbose; - - nvgpu_log_fn(g, " "); - - nvgpu_log_info(g, "acquire engines_reset_mutex"); - nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); - - nvgpu_fifo_lock_active_runlists(g); - - verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, - hw_id, id_is_tsg); - - nvgpu_fifo_unlock_active_runlists(g); - - nvgpu_log_info(g, "release engines_reset_mutex"); - nvgpu_mutex_release(&g->fifo.engines_reset_mutex); - - return verbose; -} - -void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids, - u32 hw_id, unsigned int id_type, unsigned int rc_type, - struct mmu_fault_info *mmfault) -{ - unsigned long engine_id, i; - unsigned long _engine_ids = __engine_ids; - unsigned long engine_ids = 0; - u32 mmu_fault_engines = 0; - u32 ref_type; - u32 ref_id; - bool ref_id_is_tsg = false; - bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false; - bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false; - - nvgpu_log_info(g, "acquire engines_reset_mutex"); - nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); - - nvgpu_fifo_lock_active_runlists(g); - - if (id_is_known) { - engine_ids = g->ops.engine.get_mask_on_id(g, - hw_id, id_is_tsg); - ref_id = hw_id; - ref_type = id_is_tsg ? - fifo_engine_status_id_type_tsgid_v() : - fifo_engine_status_id_type_chid_v(); - ref_id_is_tsg = id_is_tsg; - /* atleast one engine will get passed during sched err*/ - engine_ids |= __engine_ids; - for_each_set_bit(engine_id, &engine_ids, 32U) { - u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g, - (u32)engine_id); - - if (mmu_id != FIFO_INVAL_ENGINE_ID) { - mmu_fault_engines |= BIT(mmu_id); - } - } - } else { - /* store faulted engines in advance */ - for_each_set_bit(engine_id, &_engine_ids, 32U) { - nvgpu_engine_get_id_and_type(g, (u32)engine_id, - &ref_id, &ref_type); - if (ref_type == fifo_engine_status_id_type_tsgid_v()) { - ref_id_is_tsg = true; - } else { - ref_id_is_tsg = false; - } - /* Reset *all* engines that use the - * same channel as faulty engine */ - for (i = 0; i < g->fifo.num_engines; i++) { - u32 active_engine_id = g->fifo.active_engines_list[i]; - u32 type; - u32 id; - - nvgpu_engine_get_id_and_type(g, - active_engine_id, &id, &type); - if (ref_type == type && ref_id == id) { - u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g, - active_engine_id); - - engine_ids |= BIT(active_engine_id); - if (mmu_id != FIFO_INVAL_ENGINE_ID) { - mmu_fault_engines |= BIT(mmu_id); - } - } - } - } - } - - if (mmu_fault_engines != 0U) { - g->ops.fifo.intr_set_recover_mask(g); - - g->ops.fifo.trigger_mmu_fault(g, engine_ids); - gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id, - ref_id_is_tsg); - - g->ops.fifo.intr_unset_recover_mask(g); - } - - nvgpu_fifo_unlock_active_runlists(g); - - nvgpu_log_info(g, "release engines_reset_mutex"); - nvgpu_mutex_release(&g->fifo.engines_reset_mutex); -} - void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg) { if (is_tsg) { diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 41a75ba0c..b88f579c9 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -29,7 +29,6 @@ #include struct gk20a_debug_output; -struct mmu_fault_info; struct nvgpu_semaphore; struct channel_gk20a; struct tsg_gk20a; @@ -211,9 +210,6 @@ struct fifo_gk20a { int gk20a_init_fifo_setup_hw(struct gk20a *g); -void gk20a_fifo_isr(struct gk20a *g); -u32 gk20a_fifo_nonstall_isr(struct gk20a *g); - int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch); int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg); int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch); @@ -255,14 +251,8 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type); int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg); -void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids, - u32 hw_id, unsigned int id_type, unsigned int rc_type, - struct mmu_fault_info *mmfault); - u32 gk20a_fifo_default_timeslice_us(struct gk20a *g); int gk20a_fifo_init_pbdma_map(struct gk20a *g, u32 *pbdma_map, u32 num_pbdma); -bool gk20a_fifo_handle_mmu_fault(struct gk20a *g, - u32 mmu_fault_engines, u32 hw_id, bool id_is_tsg); #endif /* FIFO_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 6640b7776..436465ebe 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -75,6 +75,7 @@ #include "hal/fifo/ctxsw_timeout_gk20a.h" #include "hal/fifo/mmu_fault_gk20a.h" #include "hal/fifo/mmu_fault_gm20b.h" +#include "hal/rc/rc_gk20a.h" #include "hal/gr/zbc/zbc_gm20b.h" #include "hal/gr/zcull/zcull_gm20b.h" #include "hal/gr/falcon/gr_falcon_gm20b.h" diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 994e49ba0..557251ff9 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -88,6 +88,7 @@ #include "hal/fifo/mmu_fault_gm20b.h" #include "hal/fifo/mmu_fault_gp10b.h" #include "hal/fifo/ctxsw_timeout_gk20a.h" +#include "hal/rc/rc_gk20a.h" #include "hal/gr/ecc/ecc_gp10b.h" #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/fecs_trace/fecs_trace_gp10b.h" diff --git a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c index d45bb3b86..8ee0a84a5 100644 --- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c @@ -25,12 +25,23 @@ #include #include #include +#include #include +#include #include +#include +#include +#include +#include +#include +#include +#include #include #include +/* TODO: remove gr_status_r */ +#include /* fault info/descriptions */ @@ -213,7 +224,220 @@ void gk20a_fifo_mmu_fault_info_dump(struct gk20a *g, u32 engine_id, void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g) { - u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); + u32 fault_id = nvgpu_readl(g, fifo_intr_mmu_fault_id_r()); nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id); } + +bool gk20a_fifo_handle_mmu_fault_locked( + struct gk20a *g, + u32 mmu_fault_engines, /* queried from HW if 0 */ + u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ + bool id_is_tsg) +{ + bool fake_fault; + unsigned long fault_id; + unsigned long engine_mmu_fault_id; + bool debug_dump = true; + struct nvgpu_engine_status_info engine_status; + bool deferred_reset_pending = false; + struct fifo_gk20a *f = &g->fifo; + + nvgpu_log_fn(g, " "); + + if (nvgpu_cg_pg_disable(g) != 0) { + nvgpu_warn(g, "fail to disable power mgmt"); + } + + /* Disable fifo access */ + g->ops.gr.init.fifo_access(g, false); + + if (mmu_fault_engines != 0U) { + fault_id = mmu_fault_engines; + fake_fault = true; + } else { + fault_id = nvgpu_readl(g, fifo_intr_mmu_fault_id_r()); + fake_fault = false; + } + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + g->fifo.deferred_reset_pending = false; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + /* go through all faulted engines */ + for_each_set_bit(engine_mmu_fault_id, &fault_id, 32U) { + /* + * bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to + * engines. Convert engine_mmu_id to engine_id + */ + u32 engine_id = nvgpu_engine_mmu_fault_id_to_engine_id(g, + (u32)engine_mmu_fault_id); + struct mmu_fault_info mmfault_info; + struct channel_gk20a *ch = NULL; + struct tsg_gk20a *tsg = NULL; + struct channel_gk20a *refch = NULL; + bool ctxsw; + + /* read and parse engine status */ + g->ops.engine_status.read_engine_status_info(g, engine_id, + &engine_status); + + ctxsw = nvgpu_engine_status_is_ctxsw(&engine_status); + + gk20a_fifo_mmu_fault_info_dump(g, engine_id, + (u32)engine_mmu_fault_id, + fake_fault, &mmfault_info); + + if (ctxsw) { + g->ops.gr.falcon.dump_stats(g); + nvgpu_err(g, " gr_status_r: 0x%x", + nvgpu_readl(g, gr_status_r())); + } + + /* get the channel/TSG */ + if (fake_fault) { + /* use next_id if context load is failing */ + u32 id, type; + + if (hw_id == ~(u32)0) { + if (nvgpu_engine_status_is_ctxsw_load( + &engine_status)) { + nvgpu_engine_status_get_next_ctx_id_type( + &engine_status, &id, &type); + } else { + nvgpu_engine_status_get_ctx_id_type( + &engine_status, &id, &type); + } + } else { + id = hw_id; + type = id_is_tsg ? + ENGINE_STATUS_CTX_ID_TYPE_TSGID : + ENGINE_STATUS_CTX_ID_TYPE_CHID; + } + + if (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID) { + tsg = &g->fifo.tsg[id]; + } else if (type == ENGINE_STATUS_CTX_ID_TYPE_CHID) { + ch = &g->fifo.channel[id]; + refch = gk20a_channel_get(ch); + if (refch != NULL) { + tsg = tsg_gk20a_from_ch(refch); + } + } + } else { + /* Look up channel from the inst block pointer. */ + ch = nvgpu_channel_refch_from_inst_ptr(g, + mmfault_info.inst_ptr); + refch = ch; + if (refch != NULL) { + tsg = tsg_gk20a_from_ch(refch); + } + } + + /* check if engine reset should be deferred */ + if (engine_id != FIFO_INVAL_ENGINE_ID) { + bool defer = nvgpu_engine_should_defer_reset(g, + engine_id, mmfault_info.client_type, + fake_fault); + if (((ch != NULL) || (tsg != NULL)) && defer) { + g->fifo.deferred_fault_engines |= BIT(engine_id); + + /* handled during channel free */ + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + g->fifo.deferred_reset_pending = true; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + deferred_reset_pending = true; + + nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, + "sm debugger attached," + " deferring channel recovery to channel free"); + } else { + nvgpu_engine_reset(g, engine_id); + } + } + +#ifdef CONFIG_GK20A_CTXSW_TRACE + if (tsg != NULL) { + nvgpu_gr_fecs_trace_add_tsg_reset(g, tsg); + } +#endif + /* + * Disable the channel/TSG from hw and increment syncpoints. + */ + if (tsg != NULL) { + if (deferred_reset_pending) { + g->ops.tsg.disable(tsg); + } else { + if (!fake_fault) { + nvgpu_tsg_set_ctx_mmu_error(g, tsg); + } + debug_dump = nvgpu_tsg_mark_error(g, tsg); + nvgpu_tsg_abort(g, tsg, false); + } + + /* put back the ref taken early above */ + if (refch != NULL) { + gk20a_channel_put(ch); + } + } else if (refch != NULL) { + nvgpu_err(g, "mmu error in unbound channel %d", + ch->chid); + gk20a_channel_put(ch); + } else if (mmfault_info.inst_ptr == + nvgpu_inst_block_addr(g, + &g->mm.bar1.inst_block)) { + nvgpu_err(g, "mmu fault from bar1"); + } else if (mmfault_info.inst_ptr == + nvgpu_inst_block_addr(g, + &g->mm.pmu.inst_block)) { + nvgpu_err(g, "mmu fault from pmu"); + } else { + nvgpu_err(g, "couldn't locate channel for mmu fault"); + } + } + + if (!fake_fault) { + gk20a_debug_dump(g); + } + + /* clear interrupt */ + nvgpu_writel(g, fifo_intr_mmu_fault_id_r(), (u32)fault_id); + + /* resume scheduler */ + nvgpu_writel(g, fifo_error_sched_disable_r(), + nvgpu_readl(g, fifo_error_sched_disable_r())); + + /* Re-enable fifo access */ + g->ops.gr.init.fifo_access(g, true); + + if (nvgpu_cg_pg_enable(g) != 0) { + nvgpu_warn(g, "fail to enable power mgmt"); + } + return debug_dump; +} + +bool gk20a_fifo_handle_mmu_fault( + struct gk20a *g, + u32 mmu_fault_engines, /* queried from HW if 0 */ + u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ + bool id_is_tsg) +{ + bool debug_dump; + + nvgpu_log_fn(g, " "); + + nvgpu_log_info(g, "acquire engines_reset_mutex"); + nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); + + nvgpu_fifo_lock_active_runlists(g); + + debug_dump = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, + hw_id, id_is_tsg); + + nvgpu_fifo_unlock_active_runlists(g); + + nvgpu_log_info(g, "release engines_reset_mutex"); + nvgpu_mutex_release(&g->fifo.engines_reset_mutex); + + return debug_dump; +} diff --git a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h index 4b5a10b6e..6813a5fe4 100644 --- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h @@ -39,4 +39,10 @@ void gk20a_fifo_mmu_fault_info_dump(struct gk20a *g, u32 engine_id, void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g); +bool gk20a_fifo_handle_mmu_fault(struct gk20a *g, u32 mmu_fault_engines, + u32 hw_id, bool id_is_tsg); + +bool gk20a_fifo_handle_mmu_fault_locked(struct gk20a *g, u32 mmu_fault_engines, + u32 hw_id, bool id_is_tsg); + #endif /* NVGPU_FIFO_MMU_FAULT_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c new file mode 100644 index 000000000..74a60d289 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + + +void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 eng_bitmask, + u32 hw_id, unsigned int id_type, unsigned int rc_type, + struct mmu_fault_info *mmufault) +{ + unsigned long engine_id, i; + unsigned long _engine_ids = eng_bitmask; + unsigned long engine_ids = 0UL; + u32 mmu_fault_engines = 0U; + u32 ref_type; + u32 ref_id; + bool ref_id_is_tsg = false; + bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false; + bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false; + + nvgpu_log_info(g, "acquire engines_reset_mutex"); + nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); + + nvgpu_fifo_lock_active_runlists(g); + + if (id_is_known) { + engine_ids = g->ops.engine.get_mask_on_id(g, + hw_id, id_is_tsg); + ref_id = hw_id; + ref_type = id_is_tsg ? + fifo_engine_status_id_type_tsgid_v() : + fifo_engine_status_id_type_chid_v(); + ref_id_is_tsg = id_is_tsg; + /* atleast one engine will get passed during sched err*/ + engine_ids |= eng_bitmask; + for_each_set_bit(engine_id, &engine_ids, 32U) { + u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g, + (u32)engine_id); + + if (mmu_id != FIFO_INVAL_ENGINE_ID) { + mmu_fault_engines |= BIT(mmu_id); + } + } + } else { + /* store faulted engines in advance */ + for_each_set_bit(engine_id, &_engine_ids, 32U) { + nvgpu_engine_get_id_and_type(g, (u32)engine_id, + &ref_id, &ref_type); + if (ref_type == fifo_engine_status_id_type_tsgid_v()) { + ref_id_is_tsg = true; + } else { + ref_id_is_tsg = false; + } + /* + * Reset *all* engines that use the + * same channel as faulty engine + */ + for (i = 0; i < g->fifo.num_engines; i++) { + u32 active_engine_id = g->fifo.active_engines_list[i]; + u32 type; + u32 id; + + nvgpu_engine_get_id_and_type(g, + active_engine_id, &id, &type); + if (ref_type == type && ref_id == id) { + u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g, + active_engine_id); + + engine_ids |= BIT(active_engine_id); + if (mmu_id != FIFO_INVAL_ENGINE_ID) { + mmu_fault_engines |= BIT(mmu_id); + } + } + } + } + } + + if (mmu_fault_engines != 0U) { + g->ops.fifo.intr_set_recover_mask(g); + + g->ops.fifo.trigger_mmu_fault(g, engine_ids); + gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id, + ref_id_is_tsg); + + g->ops.fifo.intr_unset_recover_mask(g); + } + + nvgpu_fifo_unlock_active_runlists(g); + + nvgpu_log_info(g, "release engines_reset_mutex"); + nvgpu_mutex_release(&g->fifo.engines_reset_mutex); +} diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.h b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.h new file mode 100644 index 000000000..2648dcc68 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifndef RC_GK20A_H +#define RC_GK20A_H + +#include + +struct mmu_fault_info; + +void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 eng_bitmask, + u32 hw_id, unsigned int id_type, unsigned int rc_type, + struct mmu_fault_info *mmufault); + +#endif /* RC_GK20A_H */