gpu: nvgpu: increase tu104 flush retries

nvrm_gpu_tests uses dGPU detach/attach APIs to test
GCOFF sequence. This results in a timeout on tu104 while
detaching dGPU.

...snip...
[start: NvRmGpuTest_Lib_AttachDevice]
Detaching dGPU at index 2001...
[   89.933604] -- nvrm_gpu_tests: Ended subtest
 NvRmGpuTest_Device_ChooseKind_Color_Incompressible
[   91.473919] nvgpu: 0001:01:00.0 __nvgpu_timeout_expired_msg_retry:118  [ERR]
  No more retries @ gk20a_mm_fb_flush+0xd4/0x328 [nvgpu]
[   91.474280] nvgpu: 0001:01:00.0            gm20b_fb_dump_wpr_info:240  [ERR]
  WPR: 08000080 08000081 01c00002 01c01e03 1ffffe04 00000005
...snip...

Increase retries from 2000 to 2500 which seems to be sufficient.

Bug 200491474

Change-Id: I7bab4e39c677361d3642e034f5bdb9ba75d4c450
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2019432
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2019-02-14 23:10:18 +05:30
committed by mobile promotions
parent 09d5059369
commit d4f7c90020
5 changed files with 84 additions and 2 deletions

View File

@@ -478,6 +478,7 @@ nvgpu-y += \
gv100/hal_gv100.o \
gv100/gsp_gv100.o \
gv100/clk_gv100.o \
tu104/mm_tu104.o \
tu104/hal_tu104.o \
tu104/fifo_tu104.o \
tu104/gr_tu104.o \

View File

@@ -291,6 +291,7 @@ srcs += common/sim.c \
tu104/fbpa_tu104.c \
tu104/fifo_tu104.c \
tu104/gr_tu104.c \
tu104/mm_tu104.c \
tu104/hal_tu104.c \
tu104/sec2_tu104.c \
tu104/func_tu104.c \

View File

@@ -137,8 +137,8 @@
#include "gv100/bios_gv100.h"
#include "gv100/fifo_gv100.h"
#include "gv100/gr_gv100.h"
#include "gv100/mm_gv100.h"
#include "tu104/mm_tu104.h"
#include "tu104/fifo_tu104.h"
#include "tu104/gr_tu104.h"
#include "tu104/bios_tu104.h"
@@ -929,7 +929,7 @@ static const struct gpu_ops tu104_ops = {
.remove_bar2_vm = gp10b_remove_bar2_vm,
.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy,
.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
.get_flush_retries = gv100_mm_get_flush_retries,
.get_flush_retries = tu104_mm_get_flush_retries,
.bar1_map_userd = NULL,
},
.pramin = {

View File

@@ -0,0 +1,47 @@
/*
* TU104 memory management
*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/mm.h>
#include "tu104/mm_tu104.h"
u32 tu104_mm_get_flush_retries(struct gk20a *g, enum nvgpu_flush_op op)
{
u32 retries;
switch (op) {
/* TU104 has a large FB so it needs larger timeouts */
case NVGPU_FLUSH_FB:
retries = 2500;
break;
case NVGPU_FLUSH_L2_FLUSH:
retries = 2000;
break;
default:
retries = 200; /* Default retry timer */
break;
}
return retries;
}

View File

@@ -0,0 +1,33 @@
/*
* TU104 memory management
*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef MM_TU104_H
#define MM_TU104_H
struct gk20a;
enum nvgpu_flush_op;
u32 tu104_mm_get_flush_retries(struct gk20a *g, enum nvgpu_flush_op op);
#endif