diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 928d8354d..236046337 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -340,7 +340,10 @@ static struct gk20a_platform nvgpu_pci_device[] = { .is_railgated = nvgpu_pci_tegra_is_railgated, .clk_round_rate = nvgpu_pci_clk_round_rate, - .ch_wdt_timeout_ms = 7000, + /* + * WAR: PCIE X1 is very slow, set to very high value till nvlink is up + */ + .ch_wdt_timeout_ms = 30000, .honors_aperture = true, .vbios_min_version = 0x1, diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 194d5e3cd..b14b2a27f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -815,11 +815,15 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g) if (g->ops.fifo.apply_pb_timeout) g->ops.fifo.apply_pb_timeout(g); - timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US; - timeout = scale_ptimer(timeout, - ptimer_scalingfactor10x(g->ptimer_src_freq)); - timeout |= fifo_eng_timeout_detection_enabled_f(); - gk20a_writel(g, fifo_eng_timeout_r(), timeout); + if (g->ops.fifo.apply_ctxsw_timeout_intr) + g->ops.fifo.apply_ctxsw_timeout_intr(g); + else { + timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US; + timeout = scale_ptimer(timeout, + ptimer_scalingfactor10x(g->ptimer_src_freq)); + timeout |= fifo_eng_timeout_detection_enabled_f(); + gk20a_writel(g, fifo_eng_timeout_r(), timeout); + } /* clear and enable pbdma interrupt */ for (i = 0; i < host_num_pbdma; i++) { diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 9d51be594..d9a8396f7 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -531,6 +531,7 @@ struct gpu_ops { void (*get_mmu_fault_info)(struct gk20a *g, u32 mmu_fault_id, struct mmu_fault_info *mmfault); void (*apply_pb_timeout)(struct gk20a *g); + void (*apply_ctxsw_timeout_intr)(struct gk20a *g); int (*wait_engine_idle)(struct gk20a *g); u32 (*get_num_fifos)(struct gk20a *g); u32 (*get_pbdma_signature)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gv100/fifo_gv100.c b/drivers/gpu/nvgpu/gv100/fifo_gv100.c index 79862f6b1..0227720de 100644 --- a/drivers/gpu/nvgpu/gv100/fifo_gv100.c +++ b/drivers/gpu/nvgpu/gv100/fifo_gv100.c @@ -22,9 +22,12 @@ * DEALINGS IN THE SOFTWARE. */ +#include "gk20a/gk20a.h" #include "fifo_gv100.h" +#include #include +#include #define DEFAULT_FIFO_PREEMPT_TIMEOUT 0x3FFFFFUL @@ -38,3 +41,14 @@ u32 gv100_fifo_get_preempt_timeout(struct gk20a *g) return DEFAULT_FIFO_PREEMPT_TIMEOUT; } +void gv100_apply_ctxsw_timeout_intr(struct gk20a *g) +{ + u32 timeout; + + timeout = g->ch_wdt_timeout_ms*1000; + timeout = scale_ptimer(timeout, + ptimer_scalingfactor10x(g->ptimer_src_freq)); + timeout |= fifo_eng_timeout_detection_enabled_f(); + gk20a_writel(g, fifo_eng_timeout_r(), timeout); +} + diff --git a/drivers/gpu/nvgpu/gv100/fifo_gv100.h b/drivers/gpu/nvgpu/gv100/fifo_gv100.h index af6ad030d..0af3fcced 100644 --- a/drivers/gpu/nvgpu/gv100/fifo_gv100.h +++ b/drivers/gpu/nvgpu/gv100/fifo_gv100.h @@ -30,4 +30,5 @@ struct gk20a; u32 gv100_fifo_get_num_fifos(struct gk20a *g); u32 gv100_fifo_get_preempt_timeout(struct gk20a *g); +void gv100_apply_ctxsw_timeout_intr(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index d21364660..a03ae03e4 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -509,6 +509,7 @@ static const struct gpu_ops gv100_ops = { .free_channel_ctx_header = gv11b_free_subctx_header, .preempt_ch_tsg = gv11b_fifo_preempt_ch_tsg, .handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout, + .apply_ctxsw_timeout_intr = gv100_apply_ctxsw_timeout_intr, }, .gr_ctx = { .get_netlist_name = gr_gv100_get_netlist_name, diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index d90c622dd..4fd90c0de 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -1287,7 +1287,7 @@ static const char *const gv11b_sched_error_str[] = { "rl_ack_extra", "rl_rdat_timeout", "rl_rdat_extra", - "xxx-a", + "eng_ctxsw_timeout", "xxx-b", "rl_req_timeout", "new_runlist",