gpu: nvgpu: add and use sw-only timers

The nvgpu timeout API has an internal override for presilicon mode by
default: in presi simulation environments the timeouts never trigger.
This behaviour is intended in the original usecase of the timer unit
with hardware polling loops. In pure software logic though, the timer
must trigger after the specified timeout even in presi mode so add a new
init function to produce a timer for software logic. Use this new kind
of timer in channel and scheduling worker threads.

The channel worker currently times out for just the purpose of the
channel watchdog timer which has its own internal timer. Although that's
just software, the general expectation is that the watchdog does not
trigger in presilicon tests that run slower than usual. The internal
watchdog timer thus keeps the non-sw mode.

Bug 3521828

Change-Id: I48ae8522c7ce2346a930e766528d8b64195f81d8
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2662541
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Konsta Hölttä
2022-02-01 15:59:42 +02:00
committed by mobile promotions
parent 621417bc73
commit 8736c0d467
7 changed files with 53 additions and 11 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2015-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -142,7 +142,7 @@ void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
ch_worker->watchdog_interval = 100U;
nvgpu_timeout_init_cpu_timer(worker->g, &ch_worker->timeout,
nvgpu_timeout_init_cpu_timer_sw(worker->g, &ch_worker->timeout,
ch_worker->watchdog_interval);
}
@@ -175,7 +175,7 @@ void nvgpu_channel_worker_poll_wakeup_post_process_item(
if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
nvgpu_channel_poll_wdt(g);
nvgpu_timeout_init_cpu_timer(g, &ch_worker->timeout,
nvgpu_timeout_init_cpu_timer_sw(g, &ch_worker->timeout,
ch_worker->watchdog_interval);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -92,6 +92,10 @@ static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
{
struct gk20a *g = wdt->g;
/*
* Note: this is intentionally not the sw kind of timer to avoid false
* triggers in pre-si environments that tend to run slow.
*/
nvgpu_timeout_init_cpu_timer(g, &wdt->timer, wdt->limit_ms);
wdt->ch_state = *state;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -67,7 +67,7 @@ static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker)
/* 100 ms is a nice arbitrary timeout for default status */
nvs_worker->current_timeout = 100;
nvgpu_timeout_init_cpu_timer(worker->g, &nvs_worker->timeout,
nvgpu_timeout_init_cpu_timer_sw(worker->g, &nvs_worker->timeout,
nvs_worker->current_timeout);
}
@@ -135,7 +135,7 @@ static void nvgpu_nvs_worker_wakeup_post_process(struct nvgpu_worker *worker)
(next_timeout_ns + NSEC_PER_MSEC - 1) / NSEC_PER_MSEC;
}
nvgpu_timeout_init_cpu_timer(g, &nvs_worker->timeout,
nvgpu_timeout_init_cpu_timer_sw(g, &nvs_worker->timeout,
nvs_worker->current_timeout);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -166,6 +166,24 @@ int nvgpu_timeout_init_flags(struct gk20a *g, struct nvgpu_timeout *timeout,
void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_ms);
/**
* @brief Initialize a pure software timeout.
*
* Init a cpu clock based timeout with no pre-si override. See
* nvgpu_timeout_init_flags() and NVGPU_TIMER_CPU_TIMER for full explanation.
*
* This builds a timer that has the NVGPU_TIMER_NO_PRE_SI flag. Most often,
* hardware polling related loops are preferred to be infinite in presilicon
* simulation mode. That's not the case in some timers for only software logic,
* which this function is for.
*
* @param g [in] GPU driver structure.
* @param timeout [in] Timeout object to initialize.
* @param duration [in] Timeout duration in milliseconds.
*/
void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_ms);
/**
* @brief Initialize a timeout.
*

View File

@@ -1,7 +1,7 @@
/*
* Color decompression engine support
*
* Copyright (c) 2014-2021, NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014-2022, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -936,7 +936,7 @@ __acquires(&cde_app->mutex)
struct gk20a_cde_ctx *cde_ctx = NULL;
struct nvgpu_timeout timeout;
nvgpu_timeout_init_cpu_timer(g, &timeout, MAX_CTX_RETRY_TIME);
nvgpu_timeout_init_cpu_timer_sw(g, &timeout, MAX_CTX_RETRY_TIME);
do {
cde_ctx = gk20a_cde_do_get_context(l);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -45,6 +45,16 @@ void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout
nvgpu_assert(err == 0);
}
void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_ms)
{
int err = nvgpu_timeout_init_flags(g, timeout, duration_ms,
NVGPU_TIMER_CPU_TIMER |
NVGPU_TIMER_NO_PRE_SI);
nvgpu_assert(err == 0);
}
void nvgpu_timeout_init_retry(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_count)
{

View File

@@ -132,6 +132,16 @@ void nvgpu_timeout_init_cpu_timer(struct gk20a *g, struct nvgpu_timeout *timeout
nvgpu_assert(err == 0);
}
void nvgpu_timeout_init_cpu_timer_sw(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_ms)
{
int err = nvgpu_timeout_init_flags(g, timeout, duration_ms,
NVGPU_TIMER_CPU_TIMER |
NVGPU_TIMER_NO_PRE_SI);
nvgpu_assert(err == 0);
}
void nvgpu_timeout_init_retry(struct gk20a *g, struct nvgpu_timeout *timeout,
u32 duration_count)
{