DNI: gpu: nvgpu: Increase GV100 ctxsw timeouts

During bringup and before nvlink is up GV100 on the DDPX platform operates
with a very, very slow sysmem link. In order to get sysmem test to pass
it is neccesary to significantly increase most timeouts by an order the
magnitude.

Bug 2040544

Change-Id: I26858afde4ae80c70f86b47cfff674b6b00b5bf8
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1627417
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
David Nieto
2017-12-27 14:28:29 -08:00
committed by mobile promotions
parent 83096b7ffc
commit 1f71f475e2
7 changed files with 31 additions and 7 deletions

View File

@@ -340,7 +340,10 @@ static struct gk20a_platform nvgpu_pci_device[] = {
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
/*
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
*/
.ch_wdt_timeout_ms = 30000,
.honors_aperture = true,
.vbios_min_version = 0x1,

View File

@@ -815,11 +815,15 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
if (g->ops.fifo.apply_pb_timeout)
g->ops.fifo.apply_pb_timeout(g);
timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_timeout_detection_enabled_f();
gk20a_writel(g, fifo_eng_timeout_r(), timeout);
if (g->ops.fifo.apply_ctxsw_timeout_intr)
g->ops.fifo.apply_ctxsw_timeout_intr(g);
else {
timeout = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_timeout_detection_enabled_f();
gk20a_writel(g, fifo_eng_timeout_r(), timeout);
}
/* clear and enable pbdma interrupt */
for (i = 0; i < host_num_pbdma; i++) {

View File

@@ -531,6 +531,7 @@ struct gpu_ops {
void (*get_mmu_fault_info)(struct gk20a *g, u32 mmu_fault_id,
struct mmu_fault_info *mmfault);
void (*apply_pb_timeout)(struct gk20a *g);
void (*apply_ctxsw_timeout_intr)(struct gk20a *g);
int (*wait_engine_idle)(struct gk20a *g);
u32 (*get_num_fifos)(struct gk20a *g);
u32 (*get_pbdma_signature)(struct gk20a *g);

View File

@@ -22,9 +22,12 @@
* DEALINGS IN THE SOFTWARE.
*/
#include "gk20a/gk20a.h"
#include "fifo_gv100.h"
#include <nvgpu/timers.h>
#include <nvgpu/hw/gv100/hw_ccsr_gv100.h>
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
#define DEFAULT_FIFO_PREEMPT_TIMEOUT 0x3FFFFFUL
@@ -38,3 +41,14 @@ u32 gv100_fifo_get_preempt_timeout(struct gk20a *g)
return DEFAULT_FIFO_PREEMPT_TIMEOUT;
}
void gv100_apply_ctxsw_timeout_intr(struct gk20a *g)
{
u32 timeout;
timeout = g->ch_wdt_timeout_ms*1000;
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_timeout_detection_enabled_f();
gk20a_writel(g, fifo_eng_timeout_r(), timeout);
}

View File

@@ -30,4 +30,5 @@ struct gk20a;
u32 gv100_fifo_get_num_fifos(struct gk20a *g);
u32 gv100_fifo_get_preempt_timeout(struct gk20a *g);
void gv100_apply_ctxsw_timeout_intr(struct gk20a *g);
#endif

View File

@@ -509,6 +509,7 @@ static const struct gpu_ops gv100_ops = {
.free_channel_ctx_header = gv11b_free_subctx_header,
.preempt_ch_tsg = gv11b_fifo_preempt_ch_tsg,
.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
.apply_ctxsw_timeout_intr = gv100_apply_ctxsw_timeout_intr,
},
.gr_ctx = {
.get_netlist_name = gr_gv100_get_netlist_name,

View File

@@ -1287,7 +1287,7 @@ static const char *const gv11b_sched_error_str[] = {
"rl_ack_extra",
"rl_rdat_timeout",
"rl_rdat_extra",
"xxx-a",
"eng_ctxsw_timeout",
"xxx-b",
"rl_req_timeout",
"new_runlist",