mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
Open source GPL/LGPL release
This commit is contained in:
2304
drivers/gpu/nvgpu/common/fifo/channel.c
Normal file
2304
drivers/gpu/nvgpu/common/fifo/channel.c
Normal file
File diff suppressed because it is too large
Load Diff
199
drivers/gpu/nvgpu/common/fifo/channel_wdt.c
Normal file
199
drivers/gpu/nvgpu/common/fifo/channel_wdt.c
Normal file
@@ -0,0 +1,199 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "channel_wdt.h"
|
||||
#include "channel_worker.h"
|
||||
|
||||
#include <nvgpu/watchdog.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
|
||||
void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
|
||||
{
|
||||
ch->wdt_debug_dump = dump;
|
||||
}
|
||||
|
||||
static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
|
||||
struct nvgpu_channel *ch)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
struct nvgpu_channel_wdt_state state = { 0, 0 };
|
||||
|
||||
/*
|
||||
* Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
|
||||
* the moment because system suspend puts g->regs away but doesn't stop
|
||||
* the worker thread that runs the watchdog. This might need to be
|
||||
* cleared up in the future.
|
||||
*/
|
||||
if (nvgpu_channel_wdt_running(ch->wdt)) {
|
||||
/*
|
||||
* Read the state only if the wdt is on to avoid unnecessary
|
||||
* accesses. The kernel mem for userd may not even exist; this
|
||||
* channel could be in usermode submit mode.
|
||||
*/
|
||||
state.gp_get = g->ops.userd.gp_get(g, ch);
|
||||
state.pb_get = g->ops.userd.pb_get(g, ch);
|
||||
}
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
|
||||
{
|
||||
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
|
||||
|
||||
/*
|
||||
* FIXME: channel recovery can race the submit path and can start even
|
||||
* after this, but this check is the best we can do for now.
|
||||
*/
|
||||
if (!nvgpu_channel_check_unserviceable(ch)) {
|
||||
nvgpu_channel_wdt_start(ch->wdt, &state);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_channel_restart_all_wdts(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 chid;
|
||||
|
||||
for (chid = 0; chid < f->num_channels; chid++) {
|
||||
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
|
||||
|
||||
if (ch != NULL) {
|
||||
if ((ch->wdt != NULL) &&
|
||||
!nvgpu_channel_check_unserviceable(ch)) {
|
||||
struct nvgpu_channel_wdt_state state =
|
||||
nvgpu_channel_collect_wdt_state(ch);
|
||||
|
||||
nvgpu_channel_wdt_rewind(ch->wdt, &state);
|
||||
}
|
||||
nvgpu_channel_put(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (nvgpu_channel_check_unserviceable(ch)) {
|
||||
/* channel is already recovered */
|
||||
nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_err(g, "Job on channel %d timed out", ch->chid);
|
||||
|
||||
/* force reset calls gk20a_debug_dump but not this */
|
||||
if (ch->wdt_debug_dump) {
|
||||
gk20a_gr_debug_dump(g);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
|
||||
if (g->ops.tsg.force_reset(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
|
||||
ch->wdt_debug_dump) != 0) {
|
||||
nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Test the watchdog progress. If the channel is stuck, reset it.
|
||||
*
|
||||
* The gpu is implicitly on at this point because the watchdog can only run on
|
||||
* channels that have submitted jobs pending for cleanup.
|
||||
*/
|
||||
static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
|
||||
{
|
||||
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
|
||||
|
||||
if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
|
||||
nvgpu_channel_recover_from_wdt(ch);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
|
||||
{
|
||||
struct nvgpu_channel_worker *ch_worker =
|
||||
nvgpu_channel_worker_from_worker(worker);
|
||||
int ret;
|
||||
|
||||
ch_worker->watchdog_interval = 100U;
|
||||
|
||||
ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout,
|
||||
ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(worker->g, "timeout_init failed: %d", ret);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop every living channel, check timeouts and handle stuck channels.
|
||||
*/
|
||||
static void nvgpu_channel_poll_wdt(struct gk20a *g)
|
||||
{
|
||||
unsigned int chid;
|
||||
|
||||
for (chid = 0; chid < g->fifo.num_channels; chid++) {
|
||||
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
|
||||
|
||||
if (ch != NULL) {
|
||||
if (!nvgpu_channel_check_unserviceable(ch)) {
|
||||
nvgpu_channel_check_wdt(ch);
|
||||
}
|
||||
nvgpu_channel_put(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_channel_worker_poll_wakeup_post_process_item(
|
||||
struct nvgpu_worker *worker)
|
||||
{
|
||||
struct gk20a *g = worker->g;
|
||||
|
||||
struct nvgpu_channel_worker *ch_worker =
|
||||
nvgpu_channel_worker_from_worker(worker);
|
||||
int ret;
|
||||
|
||||
if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
|
||||
nvgpu_channel_poll_wdt(g);
|
||||
ret = nvgpu_timeout_init(g, &ch_worker->timeout,
|
||||
ch_worker->watchdog_interval,
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "timeout_init failed: %d", ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
|
||||
struct nvgpu_worker *worker)
|
||||
{
|
||||
struct nvgpu_channel_worker *ch_worker =
|
||||
nvgpu_channel_worker_from_worker(worker);
|
||||
|
||||
return ch_worker->watchdog_interval;
|
||||
}
|
||||
42
drivers/gpu/nvgpu/common/fifo/channel_wdt.h
Normal file
42
drivers/gpu/nvgpu/common/fifo/channel_wdt.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef NVGPU_COMMON_FIFO_CHANNEL_WDT_H
|
||||
#define NVGPU_COMMON_FIFO_CHANNEL_WDT_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct nvgpu_channel;
|
||||
|
||||
#ifdef CONFIG_NVGPU_CHANNEL_WDT
|
||||
struct nvgpu_worker;
|
||||
|
||||
void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch);
|
||||
void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker);
|
||||
void nvgpu_channel_worker_poll_wakeup_post_process_item(
|
||||
struct nvgpu_worker *worker);
|
||||
u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
|
||||
struct nvgpu_worker *worker);
|
||||
#else
|
||||
static inline void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
|
||||
#endif /* CONFIG_NVGPU_CHANNEL_WDT */
|
||||
|
||||
#endif /* NVGPU_COMMON_FIFO_CHANNEL_WDT_H */
|
||||
118
drivers/gpu/nvgpu/common/fifo/channel_worker.c
Normal file
118
drivers/gpu/nvgpu/common/fifo/channel_worker.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "channel_worker.h"
|
||||
#include "channel_wdt.h"
|
||||
|
||||
#include <nvgpu/worker.h>
|
||||
#include <nvgpu/channel.h>
|
||||
|
||||
static inline struct nvgpu_channel *
|
||||
nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
|
||||
{
|
||||
return (struct nvgpu_channel *)
|
||||
((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
|
||||
};
|
||||
|
||||
static void nvgpu_channel_worker_poll_wakeup_process_item(
|
||||
struct nvgpu_list_node *work_item)
|
||||
{
|
||||
struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item);
|
||||
|
||||
nvgpu_assert(ch != NULL);
|
||||
|
||||
nvgpu_log_fn(ch->g, " ");
|
||||
|
||||
nvgpu_channel_clean_up_jobs(ch);
|
||||
|
||||
/* ref taken when enqueued */
|
||||
nvgpu_channel_put(ch);
|
||||
}
|
||||
|
||||
static const struct nvgpu_worker_ops channel_worker_ops = {
|
||||
#ifdef CONFIG_NVGPU_CHANNEL_WDT
|
||||
.pre_process = nvgpu_channel_worker_poll_init,
|
||||
.wakeup_post_process =
|
||||
nvgpu_channel_worker_poll_wakeup_post_process_item,
|
||||
.wakeup_timeout =
|
||||
nvgpu_channel_worker_poll_wakeup_condition_get_timeout,
|
||||
#endif
|
||||
.wakeup_early_exit = NULL,
|
||||
.wakeup_process_item =
|
||||
nvgpu_channel_worker_poll_wakeup_process_item,
|
||||
.wakeup_condition = NULL,
|
||||
};
|
||||
|
||||
/**
|
||||
* Initialize the channel worker's metadata and start the background thread.
|
||||
*/
|
||||
int nvgpu_channel_worker_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_worker *worker = &g->channel_worker.worker;
|
||||
|
||||
nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name);
|
||||
|
||||
return nvgpu_worker_init(g, worker, &channel_worker_ops);
|
||||
}
|
||||
|
||||
void nvgpu_channel_worker_deinit(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_worker *worker = &g->channel_worker.worker;
|
||||
|
||||
nvgpu_worker_deinit(worker);
|
||||
}
|
||||
|
||||
/**
|
||||
* Append a channel to the worker's list, if not there already.
|
||||
*
|
||||
* The worker thread processes work items (channels in its work list) and polls
|
||||
* for other things. This adds @ch to the end of the list and wakes the worker
|
||||
* up immediately. If the channel already existed in the list, it's not added,
|
||||
* because in that case it has been scheduled already but has not yet been
|
||||
* processed.
|
||||
*/
|
||||
void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
int ret;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
/*
|
||||
* Ref released when this item gets processed. The caller should hold
|
||||
* one ref already, so normally shouldn't fail, but the channel could
|
||||
* end up being freed between the time the caller got its reference and
|
||||
* the time we end up here (e.g., if the client got killed); if so, just
|
||||
* return.
|
||||
*/
|
||||
if (nvgpu_channel_get(ch) == NULL) {
|
||||
nvgpu_info(g, "cannot get ch ref for worker!");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = nvgpu_worker_enqueue(&g->channel_worker.worker,
|
||||
&ch->worker_item);
|
||||
if (ret != 0) {
|
||||
nvgpu_channel_put(ch);
|
||||
return;
|
||||
}
|
||||
}
|
||||
37
drivers/gpu/nvgpu/common/fifo/channel_worker.h
Normal file
37
drivers/gpu/nvgpu/common/fifo/channel_worker.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
|
||||
#define NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
|
||||
void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch);
|
||||
|
||||
static inline struct nvgpu_channel_worker *
|
||||
nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
|
||||
{
|
||||
return (struct nvgpu_channel_worker *)
|
||||
((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker));
|
||||
};
|
||||
|
||||
#endif /* NVGPU_COMMON_FIFO_CHANNEL_WORKER_H */
|
||||
88
drivers/gpu/nvgpu/common/fifo/engine_status.c
Normal file
88
drivers/gpu/nvgpu/common/fifo/engine_status.c
Normal file
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/engine_status.h>
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw_switch(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH;
|
||||
}
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw_load(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_LOAD;
|
||||
}
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw_save(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SAVE;
|
||||
}
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return (nvgpu_engine_status_is_ctxsw_switch(engine_status) ||
|
||||
nvgpu_engine_status_is_ctxsw_load(engine_status) ||
|
||||
nvgpu_engine_status_is_ctxsw_save(engine_status));
|
||||
}
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw_invalid(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_INVALID;
|
||||
}
|
||||
|
||||
bool nvgpu_engine_status_is_ctxsw_valid(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID;
|
||||
}
|
||||
bool nvgpu_engine_status_is_ctx_type_tsg(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctx_id_type == ENGINE_STATUS_CTX_ID_TYPE_TSGID;
|
||||
}
|
||||
bool nvgpu_engine_status_is_next_ctx_type_tsg(struct nvgpu_engine_status_info
|
||||
*engine_status)
|
||||
{
|
||||
return engine_status->ctx_next_id_type ==
|
||||
ENGINE_STATUS_CTX_NEXT_ID_TYPE_TSGID;
|
||||
}
|
||||
|
||||
void nvgpu_engine_status_get_ctx_id_type(struct nvgpu_engine_status_info
|
||||
*engine_status, u32 *ctx_id, u32 *ctx_type)
|
||||
{
|
||||
*ctx_id = engine_status->ctx_id;
|
||||
*ctx_type = engine_status->ctx_id_type;
|
||||
}
|
||||
|
||||
void nvgpu_engine_status_get_next_ctx_id_type(struct nvgpu_engine_status_info
|
||||
*engine_status, u32 *ctx_next_id,
|
||||
u32 *ctx_next_type)
|
||||
{
|
||||
*ctx_next_id = engine_status->ctx_next_id;
|
||||
*ctx_next_type = engine_status->ctx_next_id_type;
|
||||
}
|
||||
960
drivers/gpu/nvgpu/common/fifo/engines.c
Normal file
960
drivers/gpu/nvgpu/common/fifo/engines.c
Normal file
@@ -0,0 +1,960 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/errno.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/bitops.h>
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
#include <nvgpu/pmu.h>
|
||||
#include <nvgpu/pmu/mutex.h>
|
||||
#endif
|
||||
#include <nvgpu/runlist.h>
|
||||
#include <nvgpu/engines.h>
|
||||
#include <nvgpu/engine_status.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/pbdma_status.h>
|
||||
#include <nvgpu/power_features/pg.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/device.h>
|
||||
#include <nvgpu/gr/gr_falcon.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
#include <nvgpu/gr/gr_instances.h>
|
||||
#include <nvgpu/fifo.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
#include <nvgpu/swprofile.h>
|
||||
|
||||
#include <nvgpu/fifo/swprofile.h>
|
||||
|
||||
#define FECS_METHOD_WFI_RESTORE 0x80000U
|
||||
|
||||
enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g,
|
||||
const struct nvgpu_device *dev)
|
||||
{
|
||||
enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL;
|
||||
|
||||
if (nvgpu_device_is_graphics(g, dev)) {
|
||||
ret = NVGPU_ENGINE_GR;
|
||||
} else if (nvgpu_device_is_ce(g, dev)) {
|
||||
/* For now, all CE engines have separate runlists. We can
|
||||
* identify the NVGPU_ENGINE_GRCE type CE using runlist_id
|
||||
* comparsion logic with GR runlist_id in init_info()
|
||||
*/
|
||||
ret = NVGPU_ENGINE_ASYNC_CE;
|
||||
} else {
|
||||
ret = NVGPU_ENGINE_INVAL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
|
||||
struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
if (engine_id >= f->max_engines) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return f->host_engines[engine_id];
|
||||
}
|
||||
|
||||
bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
if (engine_id >= f->max_engines) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return f->host_engines[engine_id] != NULL;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
|
||||
if (dev == NULL) {
|
||||
nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
|
||||
inst_id);
|
||||
return NVGPU_INVALID_ENG_ID;
|
||||
}
|
||||
|
||||
return dev->engine_id;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_gr_id(struct gk20a *g)
|
||||
{
|
||||
/* Consider 1st available GR engine */
|
||||
return nvgpu_engine_get_gr_id_for_inst(g, 0U);
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
const struct nvgpu_device *dev = NULL;
|
||||
|
||||
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
|
||||
if (dev == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return BIT32(dev->intr_id);
|
||||
}
|
||||
|
||||
u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
u32 intr_mask = 0U;
|
||||
u32 i;
|
||||
|
||||
for (i = 0U; i < g->num_gr_instances; i++) {
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS,
|
||||
nvgpu_gr_get_syspipe_id(g, i));
|
||||
if (dev == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
intr_mask |= BIT32(dev->intr_id);
|
||||
}
|
||||
|
||||
return intr_mask;
|
||||
}
|
||||
|
||||
u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
u32 i;
|
||||
u32 mask = 0U;
|
||||
|
||||
/*
|
||||
* For old chips - pre-Pascal - we have COPY[0-2], for new chips we
|
||||
* have some number of LCE instances. For the purpose of this code we
|
||||
* imagine a system that could have both; in reality that'll never be
|
||||
* the case.
|
||||
*
|
||||
* This can be cleaned up in the future by defining a SW type for CE and
|
||||
* hiding this ugliness in the device management code.
|
||||
*/
|
||||
for (i = NVGPU_DEVTYPE_COPY0; i <= NVGPU_DEVTYPE_COPY2; i++) {
|
||||
dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
|
||||
if (dev == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
mask |= BIT32(dev->intr_id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now take care of LCEs.
|
||||
*/
|
||||
for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
|
||||
nvgpu_assert(dev != NULL);
|
||||
|
||||
mask |= BIT32(dev->intr_id);
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
|
||||
|
||||
static void nvgpu_engine_enable_activity(struct gk20a *g,
|
||||
const struct nvgpu_device *dev)
|
||||
{
|
||||
nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED);
|
||||
}
|
||||
|
||||
void nvgpu_engine_enable_activity_all(struct gk20a *g)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i < g->fifo.num_engines; i++) {
|
||||
nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_engine_disable_activity(struct gk20a *g,
|
||||
const struct nvgpu_device *dev,
|
||||
bool wait_for_idle)
|
||||
{
|
||||
u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID;
|
||||
u32 engine_chid = NVGPU_INVALID_CHANNEL_ID;
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = -EINVAL;
|
||||
#endif
|
||||
int err = 0;
|
||||
struct nvgpu_channel *ch = NULL;
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
struct nvgpu_pbdma_status_info pbdma_status;
|
||||
unsigned long runlist_served_pbdmas;
|
||||
unsigned long bit;
|
||||
u32 pbdma_id;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
|
||||
&engine_status);
|
||||
if (engine_status.is_busy && !wait_for_idle) {
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (g->ops.pmu.is_pmu_supported(g)) {
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_runlist_set_state(g, BIT32(dev->runlist_id),
|
||||
RUNLIST_DISABLED);
|
||||
|
||||
runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask;
|
||||
|
||||
for_each_set_bit(bit, &runlist_served_pbdmas,
|
||||
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
|
||||
pbdma_id = U32(bit);
|
||||
/* chid from pbdma status */
|
||||
g->ops.pbdma_status.read_pbdma_status_info(g,
|
||||
pbdma_id,
|
||||
&pbdma_status);
|
||||
if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) ||
|
||||
nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) {
|
||||
pbdma_chid = pbdma_status.id;
|
||||
} else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) ||
|
||||
nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) {
|
||||
pbdma_chid = pbdma_status.next_id;
|
||||
} else {
|
||||
/* Nothing to do here */
|
||||
}
|
||||
|
||||
if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) {
|
||||
ch = nvgpu_channel_from_id(g, pbdma_chid);
|
||||
if (ch != NULL) {
|
||||
err = g->ops.fifo.preempt_channel(g, ch);
|
||||
nvgpu_channel_put(ch);
|
||||
}
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* chid from engine status */
|
||||
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
|
||||
&engine_status);
|
||||
if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) ||
|
||||
nvgpu_engine_status_is_ctxsw_save(&engine_status)) {
|
||||
engine_chid = engine_status.ctx_id;
|
||||
} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) ||
|
||||
nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
|
||||
engine_chid = engine_status.ctx_next_id;
|
||||
} else {
|
||||
/* Nothing to do here */
|
||||
}
|
||||
|
||||
if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) {
|
||||
ch = nvgpu_channel_from_id(g, engine_chid);
|
||||
if (ch != NULL) {
|
||||
err = g->ops.fifo.preempt_channel(g, ch);
|
||||
nvgpu_channel_put(ch);
|
||||
}
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
|
||||
clean_up:
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
if (nvgpu_pmu_lock_release(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token) != 0){
|
||||
nvgpu_err(g, "failed to release PMU lock");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (err != 0) {
|
||||
nvgpu_log_fn(g, "failed");
|
||||
nvgpu_engine_enable_activity(g, dev);
|
||||
} else {
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_engine_disable_activity_all(struct gk20a *g,
|
||||
bool wait_for_idle)
|
||||
{
|
||||
unsigned int i;
|
||||
int err = 0, ret = 0;
|
||||
|
||||
for (i = 0; i < g->fifo.num_engines; i++) {
|
||||
err = nvgpu_engine_disable_activity(g,
|
||||
g->fifo.active_engines[i],
|
||||
wait_for_idle);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to disable engine %d activity",
|
||||
g->fifo.active_engines[i]->engine_id);
|
||||
ret = err;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
while (i-- != 0U) {
|
||||
nvgpu_engine_enable_activity(g,
|
||||
g->fifo.active_engines[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_engine_wait_for_idle(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_timeout timeout;
|
||||
u32 delay = POLL_DELAY_MIN_US;
|
||||
int ret = 0, err = 0;
|
||||
u32 i, host_num_engines;
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
host_num_engines =
|
||||
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
|
||||
|
||||
err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g),
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (err != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < host_num_engines; i++) {
|
||||
if (!nvgpu_engine_check_valid_id(g, i)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = -ETIMEDOUT;
|
||||
do {
|
||||
g->ops.engine_status.read_engine_status_info(g, i,
|
||||
&engine_status);
|
||||
if (!engine_status.is_busy) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
nvgpu_usleep_range(delay, delay * 2U);
|
||||
delay = min_t(u32,
|
||||
delay << 1U, POLL_DELAY_MAX_US);
|
||||
} while (nvgpu_timeout_expired(&timeout) == 0);
|
||||
|
||||
if (ret != 0) {
|
||||
/* possible causes:
|
||||
* check register settings programmed in hal set by
|
||||
* elcg_init_idle_filters and init_therm_setup_hw
|
||||
*/
|
||||
nvgpu_err(g, "cannot idle engine: %u "
|
||||
"engine_status: 0x%08x", i,
|
||||
engine_status.reg_data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */
|
||||
|
||||
int nvgpu_engine_setup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int err = 0;
|
||||
size_t size;
|
||||
|
||||
f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
|
||||
size = nvgpu_safe_mult_u64(f->max_engines,
|
||||
sizeof(struct nvgpu_device *));
|
||||
|
||||
/*
|
||||
* Allocate the two device lists for host devices.
|
||||
*/
|
||||
f->host_engines = nvgpu_kzalloc(g, size);
|
||||
if (f->host_engines == NULL) {
|
||||
nvgpu_err(g, "OOM allocating host engine list");
|
||||
return -ENOMEM;
|
||||
}
|
||||
f->active_engines = nvgpu_kzalloc(g, size);
|
||||
if (f->active_engines == NULL) {
|
||||
nvgpu_err(g, "no mem for active engine list");
|
||||
err = -ENOMEM;
|
||||
goto clean_up_engine_info;
|
||||
}
|
||||
|
||||
err = nvgpu_engine_init_info(f);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "init engine info failed");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_kfree(g, f->active_engines);
|
||||
f->active_engines = NULL;
|
||||
|
||||
clean_up_engine_info:
|
||||
nvgpu_kfree(g, f->host_engines);
|
||||
f->host_engines = NULL;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_engine_cleanup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
f->num_engines = 0;
|
||||
nvgpu_kfree(g, f->host_engines);
|
||||
f->host_engines = NULL;
|
||||
nvgpu_kfree(g, f->active_engines);
|
||||
f->active_engines = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_ENGINE_RESET
|
||||
static void nvgpu_engine_gr_reset(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE);
|
||||
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
if (nvgpu_pg_elpg_disable(g) != 0 ) {
|
||||
nvgpu_err(g, "failed to set disable elpg");
|
||||
}
|
||||
#endif
|
||||
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE);
|
||||
|
||||
#ifdef CONFIG_NVGPU_FECS_TRACE
|
||||
/*
|
||||
* Resetting engine will alter read/write index. Need to flush
|
||||
* circular buffer before re-enabling FECS.
|
||||
*/
|
||||
if (g->ops.gr.fecs_trace.reset != NULL) {
|
||||
if (g->ops.gr.fecs_trace.reset(g) != 0) {
|
||||
nvgpu_warn(g, "failed to reset fecs traces");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET);
|
||||
|
||||
/*
|
||||
* HALT_PIPELINE method and gr reset during recovery is supported
|
||||
* starting nvgpu-next simulation.
|
||||
*/
|
||||
err = g->ops.gr.falcon.ctrl_ctxsw(g,
|
||||
NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to halt gr pipe");
|
||||
}
|
||||
|
||||
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE);
|
||||
|
||||
/*
|
||||
* resetting only engine is not
|
||||
* enough, we do full init sequence
|
||||
*/
|
||||
nvgpu_log(g, gpu_dbg_rec, "resetting gr engine");
|
||||
|
||||
err = nvgpu_gr_reset(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to reset gr engine");
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_POWER_PG
|
||||
if (nvgpu_pg_elpg_enable(g) != 0) {
|
||||
nvgpu_err(g, "failed to set enable elpg");
|
||||
}
|
||||
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void nvgpu_engine_reset(struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
|
||||
const struct nvgpu_device *dev;
|
||||
int err = 0;
|
||||
u32 gr_instance_id;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (g == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_swprofile_begin_sample(prof);
|
||||
|
||||
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
|
||||
if (dev == NULL) {
|
||||
nvgpu_err(g, "unsupported engine_id %d", engine_id);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!nvgpu_device_is_ce(g, dev) &&
|
||||
!nvgpu_device_is_graphics(g, dev)) {
|
||||
nvgpu_warn(g, "Ignoring reset for non-host engine.");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple case first: reset a copy engine.
|
||||
*/
|
||||
if (nvgpu_device_is_ce(g, dev)) {
|
||||
err = nvgpu_mc_reset_dev(g, dev);
|
||||
if (err != 0) {
|
||||
nvgpu_log_info(g, "CE engine [id:%u] reset failed",
|
||||
dev->engine_id);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now reset a GR engine.
|
||||
*/
|
||||
gr_instance_id =
|
||||
nvgpu_grmgr_get_gr_instance_id_for_syspipe(
|
||||
g, dev->inst_id);
|
||||
|
||||
nvgpu_gr_exec_for_instance(g,
|
||||
gr_instance_id, nvgpu_engine_gr_reset(g));
|
||||
}
|
||||
#endif
|
||||
|
||||
u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
u32 nr_lces;
|
||||
u32 i;
|
||||
|
||||
/*
|
||||
* Obtain a runlist ID for the fastest available CE. The priority order
|
||||
* is:
|
||||
*
|
||||
* 1. Last available LCE
|
||||
* 2. Last available COPY[0-2]
|
||||
* 3. GRAPHICS runlist as a last resort.
|
||||
*/
|
||||
nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE);
|
||||
if (nr_lces > 0U) {
|
||||
dev = nvgpu_device_get(g,
|
||||
NVGPU_DEVTYPE_LCE,
|
||||
nr_lces - 1U);
|
||||
nvgpu_assert(dev != NULL);
|
||||
|
||||
return dev->runlist_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx
|
||||
* are all > 0.
|
||||
*/
|
||||
for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) {
|
||||
dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
|
||||
if (dev != NULL) {
|
||||
return dev->runlist_id;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Fall back to GR.
|
||||
*/
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
|
||||
nvgpu_assert(dev != NULL);
|
||||
|
||||
return dev->runlist_id;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
|
||||
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
|
||||
if (dev == NULL) {
|
||||
nvgpu_warn(g, "No GR device on this GPU?!");
|
||||
return NVGPU_INVALID_RUNLIST_ID;
|
||||
}
|
||||
|
||||
return dev->runlist_id;
|
||||
}
|
||||
|
||||
bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
|
||||
{
|
||||
u32 i;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
for (i = 0U; i < f->num_engines; i++) {
|
||||
const struct nvgpu_device *dev = f->active_engines[i];
|
||||
|
||||
if (dev->runlist_id == runlist_id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Link engine IDs to MMU IDs and vice versa.
|
||||
*/
|
||||
u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
|
||||
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
|
||||
|
||||
if (dev == NULL) {
|
||||
nvgpu_err(g,
|
||||
"engine_id: %u is not in active list",
|
||||
engine_id);
|
||||
return NVGPU_INVALID_ENG_ID;
|
||||
}
|
||||
|
||||
return dev->fault_id;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id)
|
||||
{
|
||||
u32 i;
|
||||
const struct nvgpu_device *dev;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
for (i = 0U; i < f->num_engines; i++) {
|
||||
dev = f->active_engines[i];
|
||||
|
||||
if (dev->fault_id == fault_id) {
|
||||
return dev->engine_id;
|
||||
}
|
||||
}
|
||||
|
||||
return NVGPU_INVALID_ENG_ID;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg)
|
||||
{
|
||||
unsigned int i;
|
||||
u32 engines = 0;
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
u32 ctx_id;
|
||||
u32 type;
|
||||
bool busy;
|
||||
|
||||
for (i = 0; i < g->fifo.num_engines; i++) {
|
||||
const struct nvgpu_device *dev = g->fifo.active_engines[i];
|
||||
|
||||
g->ops.engine_status.read_engine_status_info(g,
|
||||
dev->engine_id, &engine_status);
|
||||
|
||||
if (nvgpu_engine_status_is_ctxsw_load(
|
||||
&engine_status)) {
|
||||
nvgpu_engine_status_get_next_ctx_id_type(
|
||||
&engine_status, &ctx_id, &type);
|
||||
} else {
|
||||
nvgpu_engine_status_get_ctx_id_type(
|
||||
&engine_status, &ctx_id, &type);
|
||||
}
|
||||
|
||||
busy = engine_status.is_busy;
|
||||
|
||||
if (!busy || !(ctx_id == id)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) ||
|
||||
(!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) {
|
||||
engines |= BIT32(dev->engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
return engines;
|
||||
}
|
||||
|
||||
static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f,
|
||||
const struct nvgpu_device *dev)
|
||||
{
|
||||
bool found;
|
||||
struct nvgpu_device *dev_rw;
|
||||
struct gk20a *g = f->g;
|
||||
|
||||
dev_rw = (struct nvgpu_device *)dev;
|
||||
|
||||
/*
|
||||
* Populate the PBDMA info for this device; ideally it'd be done
|
||||
* during device init, but the FIFO unit is not out of reset that
|
||||
* early in the nvgpu_finalize_poweron() sequence.
|
||||
*
|
||||
* We only need to do this for native; vGPU already has pbdma_id
|
||||
* populated during device initialization.
|
||||
*/
|
||||
if (g->ops.fifo.find_pbdma_for_runlist != NULL) {
|
||||
found = g->ops.fifo.find_pbdma_for_runlist(g,
|
||||
dev->runlist_id,
|
||||
&dev_rw->pbdma_id);
|
||||
if (!found) {
|
||||
nvgpu_err(g, "busted pbdma map");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NVGPU_NEXT)
|
||||
{
|
||||
int err = nvgpu_next_engine_init_one_dev(g, dev);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
f->host_engines[dev->engine_id] = dev;
|
||||
f->active_engines[f->num_engines] = dev;
|
||||
++f->num_engines;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_engine_init_info(struct nvgpu_fifo *f)
|
||||
{
|
||||
int err;
|
||||
struct gk20a *g = f->g;
|
||||
const struct nvgpu_device *dev;
|
||||
|
||||
f->num_engines = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list");
|
||||
nvgpu_log(g, gpu_dbg_device, " GFX devices: %u",
|
||||
nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS));
|
||||
|
||||
nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) {
|
||||
err = nvgpu_engine_init_one_dev(f, dev);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return g->ops.engine.init_ce_info(f);
|
||||
}
|
||||
|
||||
void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id,
|
||||
u32 *id, u32 *type)
|
||||
{
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
|
||||
g->ops.engine_status.read_engine_status_info(g, engine_id,
|
||||
&engine_status);
|
||||
|
||||
/* use next_id if context load is failing */
|
||||
if (nvgpu_engine_status_is_ctxsw_load(
|
||||
&engine_status)) {
|
||||
nvgpu_engine_status_get_next_ctx_id_type(
|
||||
&engine_status, id, type);
|
||||
} else {
|
||||
nvgpu_engine_status_get_ctx_id_type(
|
||||
&engine_status, id, type);
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
|
||||
u32 *id_ptr, bool *is_tsg_ptr)
|
||||
{
|
||||
u32 i;
|
||||
u32 id = U32_MAX;
|
||||
bool is_tsg = false;
|
||||
u32 mailbox2;
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
const struct nvgpu_device *dev = NULL;
|
||||
|
||||
for (i = 0U; i < g->fifo.num_engines; i++) {
|
||||
dev = g->fifo.active_engines[i];
|
||||
|
||||
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
|
||||
&engine_status);
|
||||
|
||||
/*
|
||||
* we are interested in busy engines that
|
||||
* are doing context switch
|
||||
*/
|
||||
if (!engine_status.is_busy ||
|
||||
!nvgpu_engine_status_is_ctxsw(&engine_status)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
|
||||
id = engine_status.ctx_next_id;
|
||||
is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
|
||||
&engine_status);
|
||||
} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
|
||||
mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
|
||||
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2);
|
||||
if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) {
|
||||
id = engine_status.ctx_next_id;
|
||||
is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
|
||||
&engine_status);
|
||||
} else {
|
||||
id = engine_status.ctx_id;
|
||||
is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
|
||||
&engine_status);
|
||||
}
|
||||
} else {
|
||||
id = engine_status.ctx_id;
|
||||
is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
|
||||
&engine_status);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
*id_ptr = id;
|
||||
*is_tsg_ptr = is_tsg;
|
||||
|
||||
return dev->engine_id;
|
||||
}
|
||||
|
||||
u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 i, eng_bitmask = 0U;
|
||||
struct nvgpu_engine_status_info engine_status;
|
||||
|
||||
for (i = 0U; i < f->num_engines; i++) {
|
||||
const struct nvgpu_device *dev = f->active_engines[i];
|
||||
|
||||
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
|
||||
&engine_status);
|
||||
|
||||
if (engine_status.is_busy && (dev->runlist_id == runlist_id)) {
|
||||
eng_bitmask |= BIT32(dev->engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
return eng_bitmask;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
|
||||
u32 engine_subid, bool fake_fault)
|
||||
{
|
||||
const struct nvgpu_device *dev;
|
||||
|
||||
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
|
||||
if (dev == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* channel recovery is only deferred if an sm debugger
|
||||
* is attached and has MMU debug mode is enabled
|
||||
*/
|
||||
if (!g->ops.gr.sm_debugger_attached(g) ||
|
||||
!g->ops.fb.is_debug_mode_enabled(g)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* if this fault is fake (due to RC recovery), don't defer recovery */
|
||||
if (fake_fault) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dev->type != NVGPU_DEVTYPE_GRAPHICS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
|
||||
}
|
||||
#endif
|
||||
|
||||
u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id,
|
||||
u32 gr_eng_fault_id)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 num_subctx;
|
||||
u32 veid = INVAL_ID;
|
||||
|
||||
num_subctx = f->max_subctx_count;
|
||||
|
||||
if ((mmu_fault_id >= gr_eng_fault_id) &&
|
||||
(mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id,
|
||||
num_subctx))) {
|
||||
veid = mmu_fault_id - gr_eng_fault_id;
|
||||
}
|
||||
|
||||
return veid;
|
||||
}
|
||||
|
||||
static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g,
|
||||
u32 mmu_fault_id, u32 *veid)
|
||||
{
|
||||
u32 i;
|
||||
u32 engine_id = INVAL_ID;
|
||||
const struct nvgpu_device *dev;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
for (i = 0U; i < f->num_engines; i++) {
|
||||
dev = f->active_engines[i];
|
||||
|
||||
if (dev->type == NVGPU_DEVTYPE_GRAPHICS) {
|
||||
*veid = nvgpu_engine_mmu_fault_id_to_veid(g,
|
||||
mmu_fault_id, dev->fault_id);
|
||||
if (*veid != INVAL_ID) {
|
||||
engine_id = dev->engine_id;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (dev->fault_id == mmu_fault_id) {
|
||||
engine_id = dev->engine_id;
|
||||
*veid = INVAL_ID;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return engine_id;
|
||||
}
|
||||
|
||||
void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g,
|
||||
u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id)
|
||||
{
|
||||
*engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g,
|
||||
mmu_fault_id, veid);
|
||||
|
||||
if (*engine_id == INVAL_ID) {
|
||||
*pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g,
|
||||
mmu_fault_id);
|
||||
} else {
|
||||
*pbdma_id = INVAL_ID;
|
||||
}
|
||||
}
|
||||
315
drivers/gpu/nvgpu/common/fifo/fifo.c
Normal file
315
drivers/gpu/nvgpu/common/fifo/fifo.c
Normal file
@@ -0,0 +1,315 @@
|
||||
/*
|
||||
* FIFO
|
||||
*
|
||||
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/trace.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/fifo.h>
|
||||
#include <nvgpu/engines.h>
|
||||
#include <nvgpu/runlist.h>
|
||||
#include <nvgpu/preempt.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/pbdma.h>
|
||||
#include <nvgpu/tsg.h>
|
||||
#include <nvgpu/vm_area.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/cic.h>
|
||||
#include <nvgpu/mc.h>
|
||||
#include <nvgpu/swprofile.h>
|
||||
#include <nvgpu/fifo/swprofile.h>
|
||||
|
||||
static const char *nvgpu_fifo_kickoff_profile_events[] = {
|
||||
NVGPU_FIFO_KICKOFF_PROFILE_EVENTS,
|
||||
};
|
||||
|
||||
static const char *nvgpu_fifo_recovery_profile_events[] = {
|
||||
NVGPU_FIFO_RECOVERY_PROFILE_EVENTS,
|
||||
};
|
||||
|
||||
static const char *nvgpu_fifo_engine_reset_events[] = {
|
||||
NVGPU_FIFO_ENGINE_RESET_EVENTS,
|
||||
};
|
||||
|
||||
void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
#ifdef CONFIG_NVGPU_USERD
|
||||
g->ops.userd.cleanup_sw(g);
|
||||
#endif
|
||||
nvgpu_channel_cleanup_sw(g);
|
||||
nvgpu_tsg_cleanup_sw(g);
|
||||
nvgpu_runlist_cleanup_sw(g);
|
||||
nvgpu_engine_cleanup_sw(g);
|
||||
if (g->ops.pbdma.cleanup_sw != NULL) {
|
||||
g->ops.pbdma.cleanup_sw(g);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
f->deferred_reset_pending = false;
|
||||
nvgpu_mutex_destroy(&f->deferred_reset_mutex);
|
||||
#endif
|
||||
nvgpu_mutex_destroy(&f->engines_reset_mutex);
|
||||
nvgpu_mutex_destroy(&f->intr.isr.mutex);
|
||||
|
||||
f->sw_ready = false;
|
||||
}
|
||||
|
||||
void nvgpu_fifo_cleanup_sw(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
nvgpu_channel_worker_deinit(g);
|
||||
#endif
|
||||
nvgpu_fifo_cleanup_sw_common(g);
|
||||
}
|
||||
|
||||
static void nvgpu_fifo_remove_support(struct nvgpu_fifo *f)
|
||||
{
|
||||
struct gk20a *g = f->g;
|
||||
|
||||
g->ops.fifo.cleanup_sw(g);
|
||||
}
|
||||
|
||||
int nvgpu_fifo_setup_sw_common(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
f->g = g;
|
||||
|
||||
nvgpu_mutex_init(&f->intr.isr.mutex);
|
||||
nvgpu_mutex_init(&f->engines_reset_mutex);
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
nvgpu_mutex_init(&f->deferred_reset_mutex);
|
||||
#endif
|
||||
|
||||
nvgpu_swprofile_initialize(g, &f->kickoff_profiler,
|
||||
nvgpu_fifo_kickoff_profile_events);
|
||||
nvgpu_swprofile_initialize(g, &f->recovery_profiler,
|
||||
nvgpu_fifo_recovery_profile_events);
|
||||
nvgpu_swprofile_initialize(g, &f->eng_reset_profiler,
|
||||
nvgpu_fifo_engine_reset_events);
|
||||
|
||||
|
||||
err = nvgpu_channel_setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init channel support");
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = nvgpu_tsg_setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init tsg support");
|
||||
goto clean_up_channel;
|
||||
}
|
||||
|
||||
if (g->ops.pbdma.setup_sw != NULL) {
|
||||
err = g->ops.pbdma.setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init pbdma support");
|
||||
goto clean_up_tsg;
|
||||
}
|
||||
}
|
||||
|
||||
err = nvgpu_engine_setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init engine support");
|
||||
goto clean_up_pbdma;
|
||||
}
|
||||
|
||||
err = nvgpu_runlist_setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init runlist support");
|
||||
goto clean_up_engine;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_USERD
|
||||
err = g->ops.userd.setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init userd support");
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
#endif
|
||||
|
||||
f->remove_support = nvgpu_fifo_remove_support;
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_NVGPU_USERD
|
||||
clean_up_runlist:
|
||||
nvgpu_runlist_cleanup_sw(g);
|
||||
#endif
|
||||
|
||||
clean_up_engine:
|
||||
nvgpu_engine_cleanup_sw(g);
|
||||
|
||||
clean_up_pbdma:
|
||||
if (g->ops.pbdma.cleanup_sw != NULL) {
|
||||
g->ops.pbdma.cleanup_sw(g);
|
||||
}
|
||||
|
||||
clean_up_tsg:
|
||||
nvgpu_tsg_cleanup_sw(g);
|
||||
|
||||
clean_up_channel:
|
||||
nvgpu_channel_cleanup_sw(g);
|
||||
|
||||
clean_up:
|
||||
nvgpu_err(g, "init fifo support failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_fifo_setup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (f->sw_ready) {
|
||||
nvgpu_log_fn(g, "skip init");
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = nvgpu_fifo_setup_sw_common(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fifo common sw setup failed, err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
err = nvgpu_channel_worker_init(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "worker init fail, err=%d", err);
|
||||
goto clean_up;
|
||||
}
|
||||
#endif
|
||||
|
||||
f->sw_ready = true;
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
||||
clean_up:
|
||||
nvgpu_fifo_cleanup_sw_common(g);
|
||||
|
||||
return err;
|
||||
#endif
|
||||
}
|
||||
|
||||
int nvgpu_fifo_init_support(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = g->ops.fifo.setup_sw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fifo sw setup failed, err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (g->ops.fifo.init_fifo_setup_hw != NULL) {
|
||||
err = g->ops.fifo.init_fifo_setup_hw(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "fifo hw setup failed, err=%d", err);
|
||||
goto clean_up;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_fifo_cleanup_sw_common(g);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char * const pbdma_ch_eng_status_str[] = {
|
||||
"invalid",
|
||||
"valid",
|
||||
"NA",
|
||||
"NA",
|
||||
"NA",
|
||||
"load",
|
||||
"save",
|
||||
"switch",
|
||||
};
|
||||
|
||||
static const char * const not_found_str[] = {
|
||||
"NOT FOUND"
|
||||
};
|
||||
|
||||
const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index)
|
||||
{
|
||||
if (index >= ARRAY_SIZE(pbdma_ch_eng_status_str)) {
|
||||
return not_found_str[0];
|
||||
} else {
|
||||
return pbdma_ch_eng_status_str[index];
|
||||
}
|
||||
}
|
||||
|
||||
static void disable_fifo_interrupts(struct gk20a *g)
|
||||
{
|
||||
/** Disable fifo intr */
|
||||
g->ops.fifo.intr_0_enable(g, false);
|
||||
g->ops.fifo.intr_1_enable(g, false);
|
||||
|
||||
if (g->ops.fifo.intr_top_enable == NULL) {
|
||||
nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
|
||||
NVGPU_CIC_INTR_DISABLE);
|
||||
nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
|
||||
NVGPU_CIC_INTR_DISABLE);
|
||||
} else {
|
||||
g->ops.fifo.intr_top_enable(g, NVGPU_CIC_INTR_DISABLE);
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_fifo_suspend(struct gk20a *g)
|
||||
{
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (g->ops.mm.is_bar1_supported(g)) {
|
||||
g->ops.fifo.bar1_snooping_disable(g);
|
||||
}
|
||||
|
||||
disable_fifo_interrupts(g);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_fifo_sw_quiesce(struct gk20a *g)
|
||||
{
|
||||
u32 runlist_mask = U32_MAX;
|
||||
|
||||
g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED);
|
||||
|
||||
/* Preempt all runlists */
|
||||
nvgpu_fifo_preempt_runlists_for_rc(g, runlist_mask);
|
||||
}
|
||||
149
drivers/gpu/nvgpu/common/fifo/job.c
Normal file
149
drivers/gpu/nvgpu/common/fifo/job.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
#include <nvgpu/circ_buf.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/job.h>
|
||||
#include <nvgpu/priv_cmdbuf.h>
|
||||
#include <nvgpu/fence.h>
|
||||
|
||||
static inline struct nvgpu_channel_job *
|
||||
channel_gk20a_job_from_list(struct nvgpu_list_node *node)
|
||||
{
|
||||
return (struct nvgpu_channel_job *)
|
||||
((uintptr_t)node - offsetof(struct nvgpu_channel_job, list));
|
||||
};
|
||||
|
||||
int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_job **job_out)
|
||||
{
|
||||
unsigned int put = c->joblist.pre_alloc.put;
|
||||
unsigned int get = c->joblist.pre_alloc.get;
|
||||
unsigned int next = (put + 1) % c->joblist.pre_alloc.length;
|
||||
bool full = next == get;
|
||||
|
||||
if (full) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
*job_out = &c->joblist.pre_alloc.jobs[put];
|
||||
(void) memset(*job_out, 0, sizeof(**job_out));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_channel_free_job(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_job *job)
|
||||
{
|
||||
/*
|
||||
* Nothing needed for now. The job contents are preallocated. The
|
||||
* completion fence may briefly outlive the job, but the job memory is
|
||||
* reclaimed only when a new submit comes in and the ringbuffer has ran
|
||||
* out of space.
|
||||
*/
|
||||
}
|
||||
|
||||
void nvgpu_channel_joblist_lock(struct nvgpu_channel *c)
|
||||
{
|
||||
nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
|
||||
}
|
||||
|
||||
void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c)
|
||||
{
|
||||
nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
|
||||
}
|
||||
|
||||
struct nvgpu_channel_job *nvgpu_channel_joblist_peek(struct nvgpu_channel *c)
|
||||
{
|
||||
unsigned int get = c->joblist.pre_alloc.get;
|
||||
unsigned int put = c->joblist.pre_alloc.put;
|
||||
bool empty = get == put;
|
||||
|
||||
return empty ? NULL : &c->joblist.pre_alloc.jobs[get];
|
||||
}
|
||||
|
||||
void nvgpu_channel_joblist_add(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_job *job)
|
||||
{
|
||||
c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) %
|
||||
(c->joblist.pre_alloc.length);
|
||||
}
|
||||
|
||||
void nvgpu_channel_joblist_delete(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_job *job)
|
||||
{
|
||||
c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) %
|
||||
(c->joblist.pre_alloc.length);
|
||||
}
|
||||
|
||||
int nvgpu_channel_joblist_init(struct nvgpu_channel *c, u32 num_jobs)
|
||||
{
|
||||
int err;
|
||||
u32 size;
|
||||
|
||||
size = (u32)sizeof(struct nvgpu_channel_job);
|
||||
if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) {
|
||||
err = -ERANGE;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/*
|
||||
* The max capacity of this ring buffer is the alloc size minus one (in
|
||||
* units of item slot), so allocate a size of (num_jobs + 1) * size
|
||||
* bytes.
|
||||
*/
|
||||
c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
|
||||
nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_add_u32(num_jobs, 1U),
|
||||
size));
|
||||
if (c->joblist.pre_alloc.jobs == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/*
|
||||
* length is the allocation size of the ringbuffer; the number of jobs
|
||||
* that fit is one less.
|
||||
*/
|
||||
c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U);
|
||||
c->joblist.pre_alloc.put = 0;
|
||||
c->joblist.pre_alloc.get = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
|
||||
(void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_channel_joblist_deinit(struct nvgpu_channel *c)
|
||||
{
|
||||
if (c->joblist.pre_alloc.jobs != NULL) {
|
||||
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
|
||||
c->joblist.pre_alloc.jobs = NULL;
|
||||
}
|
||||
}
|
||||
55
drivers/gpu/nvgpu/common/fifo/pbdma.c
Normal file
55
drivers/gpu/nvgpu/common/fifo/pbdma.c
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/pbdma.h>
|
||||
|
||||
static void nvgpu_pbdma_init_intr_descs(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
if (g->ops.pbdma.device_fatal_0_intr_descs != NULL) {
|
||||
f->intr.pbdma.device_fatal_0 =
|
||||
g->ops.pbdma.device_fatal_0_intr_descs();
|
||||
}
|
||||
|
||||
if (g->ops.pbdma.channel_fatal_0_intr_descs != NULL) {
|
||||
f->intr.pbdma.channel_fatal_0 =
|
||||
g->ops.pbdma.channel_fatal_0_intr_descs();
|
||||
}
|
||||
if (g->ops.pbdma.restartable_0_intr_descs != NULL) {
|
||||
f->intr.pbdma.restartable_0 =
|
||||
g->ops.pbdma.restartable_0_intr_descs();
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_pbdma_setup_sw(struct gk20a *g)
|
||||
{
|
||||
nvgpu_pbdma_init_intr_descs(g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
|
||||
{
|
||||
return;
|
||||
}
|
||||
54
drivers/gpu/nvgpu/common/fifo/pbdma_status.c
Normal file
54
drivers/gpu/nvgpu/common/fifo/pbdma_status.c
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/pbdma_status.h>
|
||||
|
||||
bool nvgpu_pbdma_status_is_chsw_switch(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SWITCH;
|
||||
}
|
||||
bool nvgpu_pbdma_status_is_chsw_load(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_LOAD;
|
||||
}
|
||||
bool nvgpu_pbdma_status_is_chsw_save(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SAVE;
|
||||
}
|
||||
bool nvgpu_pbdma_status_is_chsw_valid(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_VALID;
|
||||
}
|
||||
bool nvgpu_pbdma_status_is_id_type_tsg(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->id_type == PBDMA_STATUS_ID_TYPE_TSGID;
|
||||
}
|
||||
bool nvgpu_pbdma_status_is_next_id_type_tsg(struct nvgpu_pbdma_status_info
|
||||
*pbdma_status)
|
||||
{
|
||||
return pbdma_status->next_id_type == PBDMA_STATUS_NEXT_ID_TYPE_TSGID;
|
||||
}
|
||||
220
drivers/gpu/nvgpu/common/fifo/preempt.c
Normal file
220
drivers/gpu/nvgpu/common/fifo/preempt.c
Normal file
@@ -0,0 +1,220 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/errata.h>
|
||||
#include <nvgpu/runlist.h>
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/tsg.h>
|
||||
#include <nvgpu/preempt.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/cic.h>
|
||||
#include <nvgpu/rc.h>
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
#include <nvgpu/pmu/mutex.h>
|
||||
#endif
|
||||
|
||||
u32 nvgpu_preempt_get_timeout(struct gk20a *g)
|
||||
{
|
||||
return g->ctxsw_timeout_period_ms;
|
||||
}
|
||||
|
||||
int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 preempt_retry_count = 10U;
|
||||
u32 preempt_retry_timeout =
|
||||
nvgpu_preempt_get_timeout(g) / preempt_retry_count;
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = 0;
|
||||
#endif
|
||||
|
||||
nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
|
||||
|
||||
if (tsg->runlist == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
do {
|
||||
nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
|
||||
|
||||
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
|
||||
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
|
||||
RUNLIST_DISABLED);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
#endif
|
||||
g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
|
||||
|
||||
/*
|
||||
* Poll for preempt done. if stalling interrupts are pending
|
||||
* while preempt is in progress we poll for stalling interrupts
|
||||
* to finish based on return value from this function and
|
||||
* retry preempt again.
|
||||
* If HW is hung, on the last retry instance we try to identify
|
||||
* the engines hung and set the runlist reset_eng_bitmask
|
||||
* and mark preemption completion.
|
||||
*/
|
||||
ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
|
||||
ID_TYPE_TSG, preempt_retry_count > 1U);
|
||||
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
int err = nvgpu_pmu_lock_release(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
|
||||
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
|
||||
RUNLIST_ENABLED);
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&tsg->runlist->runlist_lock);
|
||||
|
||||
if (ret != -EAGAIN) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret = nvgpu_cic_wait_for_stall_interrupts(g, preempt_retry_timeout);
|
||||
if (ret != 0) {
|
||||
nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
|
||||
}
|
||||
} while (--preempt_retry_count != 0U);
|
||||
|
||||
if (ret != 0) {
|
||||
if (nvgpu_platform_is_silicon(g)) {
|
||||
nvgpu_err(g, "preempt timed out for tsgid: %u, "
|
||||
"ctxsw timeout will trigger recovery if needed",
|
||||
tsg->tsgid);
|
||||
} else {
|
||||
nvgpu_rc_preempt_timeout(g, tsg);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch)
|
||||
{
|
||||
int err;
|
||||
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
|
||||
|
||||
if (tsg != NULL) {
|
||||
err = g->ops.fifo.preempt_tsg(ch->g, tsg);
|
||||
} else {
|
||||
err = g->ops.fifo.preempt_channel(ch->g, ch);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* called from rc */
|
||||
int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g,
|
||||
struct nvgpu_tsg *tsg)
|
||||
{
|
||||
unsigned long runlist_served_pbdmas;
|
||||
unsigned long pbdma_id_bit;
|
||||
u32 tsgid, pbdma_id;
|
||||
|
||||
if (g->ops.fifo.preempt_poll_pbdma == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
tsgid = tsg->tsgid;
|
||||
runlist_served_pbdmas = tsg->runlist->pbdma_bitmask;
|
||||
|
||||
for_each_set_bit(pbdma_id_bit, &runlist_served_pbdmas,
|
||||
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
|
||||
pbdma_id = U32(pbdma_id_bit);
|
||||
/*
|
||||
* If pbdma preempt fails the only option is to reset
|
||||
* GPU. Any sort of hang indicates the entire GPU’s
|
||||
* memory system would be blocked.
|
||||
*/
|
||||
if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) {
|
||||
nvgpu_err(g, "PBDMA preempt failed");
|
||||
return -EBUSY;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should be called with runlist_lock held for all the
|
||||
* runlists set in runlists_mask
|
||||
*/
|
||||
void nvgpu_fifo_preempt_runlists_for_rc(struct gk20a *g, u32 runlists_bitmask)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 i;
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = 0;
|
||||
#endif
|
||||
|
||||
/* runlist_lock are locked by teardown and sched are disabled too */
|
||||
nvgpu_log_fn(g, "preempt runlists_bitmask:0x%08x", runlists_bitmask);
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
#endif
|
||||
|
||||
for (i = 0U; i < f->num_runlists; i++) {
|
||||
struct nvgpu_runlist *runlist;
|
||||
|
||||
runlist = &f->active_runlists[i];
|
||||
|
||||
if ((BIT32(runlist->id) & runlists_bitmask) == 0U) {
|
||||
continue;
|
||||
}
|
||||
/* issue runlist preempt */
|
||||
g->ops.fifo.preempt_trigger(g, runlist->id,
|
||||
ID_TYPE_RUNLIST);
|
||||
#ifdef CONFIG_NVGPU_RECOVERY
|
||||
/*
|
||||
* Preemption will never complete in RC due to some
|
||||
* fatal condition. Do not poll for preemption to
|
||||
* complete. Reset engines served by runlists.
|
||||
*/
|
||||
runlist->reset_eng_bitmask = runlist->eng_bitmask;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
|
||||
&token);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
|
||||
err);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
333
drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
Normal file
333
drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
Normal file
@@ -0,0 +1,333 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/log2.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/vm.h>
|
||||
#include <nvgpu/priv_cmdbuf.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/trace.h>
|
||||
#include <nvgpu/circ_buf.h>
|
||||
|
||||
struct priv_cmd_entry {
|
||||
struct nvgpu_mem *mem;
|
||||
u32 off; /* offset in mem, in u32 entries */
|
||||
u32 fill_off; /* write offset from off, in u32 entries */
|
||||
u32 size; /* in words */
|
||||
u32 alloc_size;
|
||||
};
|
||||
|
||||
struct priv_cmd_queue {
|
||||
struct vm_gk20a *vm;
|
||||
struct nvgpu_mem mem; /* pushbuf */
|
||||
u32 size; /* allocated length in words */
|
||||
u32 put; /* next entry will begin here */
|
||||
u32 get; /* next entry to free begins here */
|
||||
|
||||
/* an entry is a fragment of the pushbuf memory */
|
||||
struct priv_cmd_entry *entries;
|
||||
u32 entries_len; /* allocated length */
|
||||
u32 entry_put;
|
||||
u32 entry_get;
|
||||
};
|
||||
|
||||
/* allocate private cmd buffer queue.
|
||||
used for inserting commands before/after user submitted buffers. */
|
||||
int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
|
||||
u32 job_count, struct priv_cmd_queue **queue)
|
||||
{
|
||||
struct gk20a *g = vm->mm->g;
|
||||
struct priv_cmd_queue *q;
|
||||
u64 size, tmp_size;
|
||||
int err = 0;
|
||||
u32 wait_size, incr_size;
|
||||
u32 mem_per_job;
|
||||
|
||||
/*
|
||||
* sema size is at least as much as syncpt size, but semas may not be
|
||||
* enabled in the build. If neither semas nor syncpts are enabled, priv
|
||||
* cmdbufs and as such kernel mode submits with job tracking won't be
|
||||
* supported.
|
||||
*/
|
||||
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
||||
wait_size = g->ops.sync.sema.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.sema.get_incr_cmd_size();
|
||||
#else
|
||||
wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
|
||||
incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Compute the amount of priv_cmdbuf space we need. In general the
|
||||
* worst case is the kernel inserts both a semaphore pre-fence and
|
||||
* post-fence. Any sync-pt fences will take less memory so we can
|
||||
* ignore them unless they're the only supported type. Jobs can also
|
||||
* have more than one pre-fence but that's abnormal and we'll -EAGAIN
|
||||
* if such jobs would fill the queue.
|
||||
*
|
||||
* A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
|
||||
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
|
||||
* 10 words: all the same as an ACQ plus a non-stalling intr which is
|
||||
* another 2 words. In reality these numbers vary by chip but we'll use
|
||||
* 8 and 10 as examples.
|
||||
*
|
||||
* Given the job count, cmdbuf space is allocated such that each job
|
||||
* can get one wait command and one increment command:
|
||||
*
|
||||
* job_count * (8 + 10) * 4 bytes
|
||||
*
|
||||
* These cmdbufs are inserted as gpfifo entries right before and after
|
||||
* the user submitted gpfifo entries per submit.
|
||||
*
|
||||
* One extra slot is added to the queue length so that the requested
|
||||
* job count can actually be allocated. This ring buffer implementation
|
||||
* is full when the number of consumed entries is one less than the
|
||||
* allocation size:
|
||||
*
|
||||
* alloc bytes = job_count * (wait + incr + 1) * slot in bytes
|
||||
*/
|
||||
mem_per_job = nvgpu_safe_mult_u32(
|
||||
nvgpu_safe_add_u32(
|
||||
nvgpu_safe_add_u32(wait_size, incr_size),
|
||||
1U),
|
||||
(u32)sizeof(u32));
|
||||
/* both 32 bit and mem_per_job is small */
|
||||
size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job);
|
||||
|
||||
tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
|
||||
if (tmp_size > U32_MAX) {
|
||||
return -ERANGE;
|
||||
}
|
||||
size = (u32)tmp_size;
|
||||
|
||||
q = nvgpu_kzalloc(g, sizeof(*q));
|
||||
if (q == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
q->vm = vm;
|
||||
|
||||
if (job_count > U32_MAX / 2U - 1U) {
|
||||
err = -ERANGE;
|
||||
goto err_free_queue;
|
||||
}
|
||||
|
||||
/* One extra to account for the full condition: 2 * job_count + 1 */
|
||||
q->entries_len = nvgpu_safe_mult_u32(2U,
|
||||
nvgpu_safe_add_u32(job_count, 1U));
|
||||
q->entries = nvgpu_vzalloc(g,
|
||||
nvgpu_safe_mult_u64((u64)q->entries_len,
|
||||
sizeof(*q->entries)));
|
||||
if (q->entries == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto err_free_queue;
|
||||
}
|
||||
|
||||
err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "%s: memory allocation failed", __func__);
|
||||
goto err_free_entries;
|
||||
}
|
||||
|
||||
tmp_size = q->mem.size / sizeof(u32);
|
||||
nvgpu_assert(tmp_size <= U32_MAX);
|
||||
q->size = (u32)tmp_size;
|
||||
|
||||
*queue = q;
|
||||
return 0;
|
||||
err_free_entries:
|
||||
nvgpu_vfree(g, q->entries);
|
||||
err_free_queue:
|
||||
nvgpu_kfree(g, q);
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q)
|
||||
{
|
||||
struct vm_gk20a *vm = q->vm;
|
||||
struct gk20a *g = vm->mm->g;
|
||||
|
||||
nvgpu_dma_unmap_free(vm, &q->mem);
|
||||
nvgpu_vfree(g, q->entries);
|
||||
nvgpu_kfree(g, q);
|
||||
}
|
||||
|
||||
/* allocate a cmd buffer with given size. size is number of u32 entries */
|
||||
static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size,
|
||||
struct priv_cmd_entry *e)
|
||||
{
|
||||
struct gk20a *g = q->vm->mm->g;
|
||||
u32 size = orig_size;
|
||||
u32 free_count;
|
||||
|
||||
nvgpu_log_fn(g, "size %d", orig_size);
|
||||
|
||||
/*
|
||||
* If free space in the end is less than requested, increase the size
|
||||
* to make the real allocated space start from beginning. The hardware
|
||||
* expects each cmdbuf to be contiguous in the dma space.
|
||||
*
|
||||
* This too small extra space in the end may happen because the
|
||||
* requested wait and incr command buffers do not necessarily align
|
||||
* with the whole buffer capacity. They don't always align because the
|
||||
* buffer size is rounded to the next power of two and because not all
|
||||
* jobs necessarily use exactly one wait command.
|
||||
*/
|
||||
if (nvgpu_safe_add_u32(q->put, size) > q->size) {
|
||||
size = orig_size + (q->size - q->put);
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "priv cmd queue get:put %d:%d",
|
||||
q->get, q->put);
|
||||
|
||||
nvgpu_assert(q->put < q->size);
|
||||
nvgpu_assert(q->get < q->size);
|
||||
nvgpu_assert(q->size > 0U);
|
||||
free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U);
|
||||
|
||||
if (size > free_count) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
e->fill_off = 0;
|
||||
e->size = orig_size;
|
||||
e->alloc_size = size;
|
||||
e->mem = &q->mem;
|
||||
|
||||
/*
|
||||
* if we have increased size to skip free space in the end, set put
|
||||
* to beginning of cmd buffer + size, as if the prev put was at
|
||||
* position 0.
|
||||
*/
|
||||
if (size != orig_size) {
|
||||
e->off = 0;
|
||||
q->put = orig_size;
|
||||
} else {
|
||||
e->off = q->put;
|
||||
q->put = (q->put + orig_size) & (q->size - 1U);
|
||||
}
|
||||
|
||||
/* we already handled q->put + size > q->size so BUG_ON this */
|
||||
BUG_ON(q->put > q->size);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size,
|
||||
struct priv_cmd_entry **e)
|
||||
{
|
||||
u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len;
|
||||
struct priv_cmd_entry *entry;
|
||||
int err;
|
||||
|
||||
if (next_put == q->entry_get) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
entry = &q->entries[q->entry_put];
|
||||
|
||||
err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
q->entry_put = next_put;
|
||||
*e = entry;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q,
|
||||
struct priv_cmd_entry *e)
|
||||
{
|
||||
nvgpu_assert(q->put < q->size);
|
||||
nvgpu_assert(q->size > 0U);
|
||||
nvgpu_assert(e->alloc_size <= q->size);
|
||||
q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U);
|
||||
|
||||
(void)memset(e, 0, sizeof(*e));
|
||||
|
||||
nvgpu_assert(q->entry_put < q->entries_len);
|
||||
nvgpu_assert(q->entries_len > 0U);
|
||||
q->entry_put = (q->entry_put + q->entries_len - 1U)
|
||||
% q->entries_len;
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e)
|
||||
{
|
||||
struct gk20a *g = q->vm->mm->g;
|
||||
|
||||
if ((q->get != e->off) && e->off != 0U) {
|
||||
nvgpu_err(g, "priv cmdbuf requests out-of-order");
|
||||
}
|
||||
nvgpu_assert(q->size > 0U);
|
||||
q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U);
|
||||
q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) % q->entries_len;
|
||||
|
||||
(void)memset(e, 0, sizeof(*e));
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
|
||||
u32 *data, u32 entries)
|
||||
{
|
||||
nvgpu_assert(e->fill_off + entries <= e->size);
|
||||
nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
|
||||
data, entries * sizeof(u32));
|
||||
e->fill_off += entries;
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
|
||||
u32 entries)
|
||||
{
|
||||
nvgpu_assert(e->fill_off + entries <= e->size);
|
||||
nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
|
||||
0, entries * sizeof(u32));
|
||||
e->fill_off += entries;
|
||||
}
|
||||
|
||||
void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e,
|
||||
u64 *gva, u32 *size)
|
||||
{
|
||||
/*
|
||||
* The size is written to the pushbuf entry, so make sure this buffer
|
||||
* is complete at this point. The responsibility of the channel sync is
|
||||
* to be consistent in allocation and usage, and the matching size and
|
||||
* add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there.
|
||||
*/
|
||||
nvgpu_assert(e->fill_off == e->size);
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACE
|
||||
if (e->mem->aperture == APERTURE_SYSMEM) {
|
||||
trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0,
|
||||
(u32 *)e->mem->cpu_va + e->off);
|
||||
}
|
||||
#endif
|
||||
*gva = nvgpu_safe_add_u64(e->mem->gpu_va,
|
||||
nvgpu_safe_mult_u64((u64)e->off, sizeof(u32)));
|
||||
*size = e->size;
|
||||
}
|
||||
914
drivers/gpu/nvgpu/common/fifo/runlist.c
Normal file
914
drivers/gpu/nvgpu/common/fifo/runlist.c
Normal file
@@ -0,0 +1,914 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/fifo.h>
|
||||
#include <nvgpu/engines.h>
|
||||
#include <nvgpu/device.h>
|
||||
#include <nvgpu/runlist.h>
|
||||
#include <nvgpu/ptimer.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/rc.h>
|
||||
#include <nvgpu/static_analysis.h>
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
#include <nvgpu/pmu/mutex.h>
|
||||
#endif
|
||||
|
||||
void nvgpu_runlist_lock_active_runlists(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
struct nvgpu_runlist *runlist;
|
||||
u32 i;
|
||||
|
||||
nvgpu_log_info(g, "acquire runlist_lock for active runlists");
|
||||
for (i = 0; i < g->fifo.num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
nvgpu_mutex_acquire(&runlist->runlist_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_runlist_unlock_active_runlists(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
struct nvgpu_runlist *runlist;
|
||||
u32 i;
|
||||
|
||||
nvgpu_log_info(g, "release runlist_lock for active runlists");
|
||||
for (i = 0; i < g->fifo.num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
nvgpu_mutex_release(&runlist->runlist_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left,
|
||||
struct nvgpu_tsg *tsg)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32);
|
||||
struct nvgpu_channel *ch;
|
||||
u32 count = 0;
|
||||
u32 timeslice;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
if (*entries_left == 0U) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
|
||||
/* add TSG entry */
|
||||
nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
|
||||
|
||||
/*
|
||||
* timeslice is measured with PTIMER.
|
||||
* On some platforms, PTIMER is lower than 1GHz.
|
||||
*/
|
||||
timeslice = scale_ptimer(tsg->timeslice_us,
|
||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||
|
||||
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
|
||||
|
||||
nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x",
|
||||
*entries_left,
|
||||
(*runlist_entry)[0], (*runlist_entry)[1]);
|
||||
*runlist_entry += runlist_entry_words;
|
||||
count++;
|
||||
(*entries_left)--;
|
||||
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
/* add runnable channels bound to this TSG */
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
|
||||
nvgpu_channel, ch_entry) {
|
||||
if (!nvgpu_test_bit(ch->chid,
|
||||
runlist->active_channels)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (*entries_left == 0U) {
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
|
||||
nvgpu_log_info(g, "add channel %d to runlist",
|
||||
ch->chid);
|
||||
g->ops.runlist.get_ch_entry(ch, *runlist_entry);
|
||||
nvgpu_log_info(g, "rl entries left %d runlist [0] %x [1] %x",
|
||||
*entries_left,
|
||||
(*runlist_entry)[0], (*runlist_entry)[1]);
|
||||
count = nvgpu_safe_add_u32(count, 1U);
|
||||
*runlist_entry += runlist_entry_words;
|
||||
(*entries_left)--;
|
||||
}
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
static u32 nvgpu_runlist_append_prio(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left,
|
||||
u32 interleave_level)
|
||||
{
|
||||
u32 count = 0;
|
||||
unsigned long tsgid;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
|
||||
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
|
||||
u32 entries;
|
||||
|
||||
if (tsg->interleave_level == interleave_level) {
|
||||
entries = nvgpu_runlist_append_tsg(f->g, runlist,
|
||||
runlist_entry, entries_left, tsg);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static u32 nvgpu_runlist_append_hi(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left)
|
||||
{
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
/*
|
||||
* No higher levels - this is where the "recursion" ends; just add all
|
||||
* active TSGs at this level.
|
||||
*/
|
||||
return nvgpu_runlist_append_prio(f, runlist, runlist_entry,
|
||||
entries_left,
|
||||
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH);
|
||||
}
|
||||
|
||||
static u32 nvgpu_runlist_append_med(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left)
|
||||
{
|
||||
u32 count = 0;
|
||||
unsigned long tsgid;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
|
||||
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
|
||||
u32 entries;
|
||||
|
||||
if (tsg->interleave_level !=
|
||||
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* LEVEL_MEDIUM list starts with a LEVEL_HIGH, if any */
|
||||
|
||||
entries = nvgpu_runlist_append_hi(f, runlist,
|
||||
runlist_entry, entries_left);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
|
||||
entries = nvgpu_runlist_append_tsg(f->g, runlist,
|
||||
runlist_entry, entries_left, tsg);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static u32 nvgpu_runlist_append_low(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left)
|
||||
{
|
||||
u32 count = 0;
|
||||
unsigned long tsgid;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
|
||||
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
|
||||
u32 entries;
|
||||
|
||||
if (tsg->interleave_level !=
|
||||
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* The medium level starts with the highs, if any. */
|
||||
|
||||
entries = nvgpu_runlist_append_med(f, runlist,
|
||||
runlist_entry, entries_left);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
|
||||
entries = nvgpu_runlist_append_hi(f, runlist,
|
||||
runlist_entry, entries_left);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
|
||||
entries = nvgpu_runlist_append_tsg(f->g, runlist,
|
||||
runlist_entry, entries_left, tsg);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
}
|
||||
|
||||
if (count == 0U) {
|
||||
/*
|
||||
* No transitions to fill with higher levels, so add
|
||||
* the next level once. If that's empty too, we have only
|
||||
* LEVEL_HIGH jobs.
|
||||
*/
|
||||
count = nvgpu_runlist_append_med(f, runlist,
|
||||
runlist_entry, entries_left);
|
||||
if (count == 0U) {
|
||||
count = nvgpu_runlist_append_hi(f, runlist,
|
||||
runlist_entry, entries_left);
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 **runlist_entry,
|
||||
u32 *entries_left)
|
||||
{
|
||||
u32 count = 0, entries, i;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
/* Group by priority but don't interleave. High comes first. */
|
||||
|
||||
for (i = 0; i < NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS; i++) {
|
||||
u32 level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH - i;
|
||||
|
||||
entries = nvgpu_runlist_append_prio(f, runlist, runlist_entry,
|
||||
entries_left, level);
|
||||
if (entries == RUNLIST_APPEND_FAILURE) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
count += entries;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 buf_id,
|
||||
u32 max_entries)
|
||||
{
|
||||
u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va;
|
||||
|
||||
/*
|
||||
* The entry pointer and capacity counter that live on the stack here
|
||||
* keep track of the current position and the remaining space when tsg
|
||||
* and channel entries are ultimately appended.
|
||||
*/
|
||||
if (f->g->runlist_interleave) {
|
||||
return nvgpu_runlist_append_low(f, runlist,
|
||||
&runlist_entry_base, &max_entries);
|
||||
} else {
|
||||
return nvgpu_runlist_append_flat(f, runlist,
|
||||
&runlist_entry_base, &max_entries);
|
||||
}
|
||||
}
|
||||
|
||||
static bool nvgpu_runlist_modify_active_locked(struct gk20a *g,
|
||||
struct nvgpu_runlist *runlist,
|
||||
struct nvgpu_channel *ch, bool add)
|
||||
{
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
|
||||
tsg = nvgpu_tsg_from_ch(ch);
|
||||
|
||||
if (tsg == NULL) {
|
||||
/*
|
||||
* Unsupported condition, but shouldn't break anything. Warn
|
||||
* and tell the caller that nothing has changed.
|
||||
*/
|
||||
nvgpu_warn(g, "Bare channel in runlist update");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (add) {
|
||||
if (nvgpu_test_and_set_bit(ch->chid,
|
||||
runlist->active_channels)) {
|
||||
/* was already there */
|
||||
return false;
|
||||
} else {
|
||||
/* new, and belongs to a tsg */
|
||||
nvgpu_set_bit(tsg->tsgid, runlist->active_tsgs);
|
||||
tsg->num_active_channels = nvgpu_safe_add_u32(
|
||||
tsg->num_active_channels, 1U);
|
||||
}
|
||||
} else {
|
||||
if (!nvgpu_test_and_clear_bit(ch->chid,
|
||||
runlist->active_channels)) {
|
||||
/* wasn't there */
|
||||
return false;
|
||||
} else {
|
||||
tsg->num_active_channels = nvgpu_safe_sub_u32(
|
||||
tsg->num_active_channels, 1U);
|
||||
if (tsg->num_active_channels == 0U) {
|
||||
/* was the only member of this tsg */
|
||||
nvgpu_clear_bit(tsg->tsgid,
|
||||
runlist->active_tsgs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int nvgpu_runlist_reconstruct_locked(struct gk20a *g,
|
||||
struct nvgpu_runlist *runlist,
|
||||
u32 buf_id, bool add_entries)
|
||||
{
|
||||
u32 num_entries;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
rl_dbg(g, "[%u] switch to new buffer 0x%16llx",
|
||||
runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id]));
|
||||
|
||||
if (!add_entries) {
|
||||
runlist->count = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id,
|
||||
f->num_runlist_entries);
|
||||
if (num_entries == RUNLIST_APPEND_FAILURE) {
|
||||
return -E2BIG;
|
||||
}
|
||||
runlist->count = num_entries;
|
||||
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
|
||||
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
|
||||
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
|
||||
WARN_ON(runlist->count > f->num_runlist_entries);
|
||||
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
|
||||
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
|
||||
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
|
||||
struct nvgpu_channel *ch, bool add,
|
||||
bool wait_for_finish)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 buf_id;
|
||||
bool add_entries;
|
||||
|
||||
if (ch != NULL) {
|
||||
bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add);
|
||||
if (!update) {
|
||||
/* no change in runlist contents */
|
||||
return 0;
|
||||
}
|
||||
/* had a channel to update, so reconstruct */
|
||||
add_entries = true;
|
||||
} else {
|
||||
/* no channel; add means update all, !add means clear all */
|
||||
add_entries = add;
|
||||
}
|
||||
|
||||
/* double buffering, swap to next */
|
||||
buf_id = (rl->cur_buffer == 0U) ? 1U : 0U;
|
||||
|
||||
ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id);
|
||||
|
||||
if (wait_for_finish) {
|
||||
ret = g->ops.runlist.wait_pending(g, rl->id);
|
||||
|
||||
if (ret == -ETIMEDOUT) {
|
||||
nvgpu_err(g, "runlist %d update timeout", rl->id);
|
||||
/* trigger runlist update timeout recovery */
|
||||
return ret;
|
||||
|
||||
} else {
|
||||
if (ret == -EINTR) {
|
||||
nvgpu_err(g, "runlist update interrupted");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rl->cur_buffer = buf_id;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
|
||||
/* trigger host to expire current timeslice and reschedule runlist from front */
|
||||
int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
|
||||
bool wait_preempt)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
struct nvgpu_runlist *runlist;
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
|
||||
runlist = ch->runlist;
|
||||
if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) {
|
||||
return -EBUSY;
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(
|
||||
g, g->pmu, PMU_MUTEX_ID_FIFO, &token);
|
||||
#endif
|
||||
|
||||
g->ops.runlist.hw_submit(
|
||||
g, runlist->id, runlist->count, runlist->cur_buffer);
|
||||
|
||||
if (preempt_next) {
|
||||
if (g->ops.runlist.reschedule_preempt_next_locked(ch,
|
||||
wait_preempt) != 0) {
|
||||
nvgpu_err(g, "reschedule preempt next failed");
|
||||
}
|
||||
}
|
||||
|
||||
if (g->ops.runlist.wait_pending(g, runlist->id) != 0) {
|
||||
nvgpu_err(g, "wait pending failed for runlist %u",
|
||||
runlist->id);
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
if (nvgpu_pmu_lock_release(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token) != 0) {
|
||||
nvgpu_err(g, "failed to release PMU lock");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
nvgpu_mutex_release(&runlist->runlist_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* add/remove a channel from runlist
|
||||
special cases below: runlist->active_channels will NOT be changed.
|
||||
(ch == NULL && !add) means remove all active channels from runlist.
|
||||
(ch == NULL && add) means restore all active channels on runlist. */
|
||||
static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
|
||||
struct nvgpu_channel *ch,
|
||||
bool add, bool wait_for_finish)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_mutex_acquire(&rl->runlist_lock);
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
#endif
|
||||
ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish);
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
if (nvgpu_pmu_lock_release(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token) != 0) {
|
||||
nvgpu_err(g, "failed to release PMU lock");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
nvgpu_mutex_release(&rl->runlist_lock);
|
||||
|
||||
if (ret == -ETIMEDOUT) {
|
||||
nvgpu_rc_runlist_update(g, rl->id);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
|
||||
struct nvgpu_channel *ch,
|
||||
bool add, bool wait_for_finish)
|
||||
{
|
||||
nvgpu_assert(ch != NULL);
|
||||
|
||||
return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
|
||||
}
|
||||
|
||||
int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
|
||||
bool add, bool wait_for_finish)
|
||||
{
|
||||
return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
|
||||
}
|
||||
|
||||
int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int ret = -EINVAL;
|
||||
unsigned long runlist_id = 0;
|
||||
int errcode;
|
||||
unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
|
||||
|
||||
if (g == NULL) {
|
||||
goto end;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) {
|
||||
/* Capture the last failure error code */
|
||||
errcode = g->ops.runlist.reload(g,
|
||||
f->runlists[runlist_id], add, true);
|
||||
if (errcode != 0) {
|
||||
nvgpu_err(g,
|
||||
"failed to update_runlist %lu %d",
|
||||
runlist_id, errcode);
|
||||
ret = errcode;
|
||||
}
|
||||
}
|
||||
end:
|
||||
return ret;
|
||||
}
|
||||
|
||||
const char *nvgpu_runlist_interleave_level_name(u32 interleave_level)
|
||||
{
|
||||
const char *ret_string = NULL;
|
||||
|
||||
switch (interleave_level) {
|
||||
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
||||
ret_string = "LOW";
|
||||
break;
|
||||
|
||||
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
|
||||
ret_string = "MEDIUM";
|
||||
break;
|
||||
|
||||
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
|
||||
ret_string = "HIGH";
|
||||
break;
|
||||
|
||||
default:
|
||||
ret_string = "?";
|
||||
break;
|
||||
}
|
||||
|
||||
return ret_string;
|
||||
}
|
||||
|
||||
void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask,
|
||||
u32 runlist_state)
|
||||
{
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
|
||||
int mutex_ret = 0;
|
||||
#endif
|
||||
nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
|
||||
runlists_mask, runlist_state);
|
||||
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token);
|
||||
#endif
|
||||
g->ops.runlist.write_state(g, runlists_mask, runlist_state);
|
||||
#ifdef CONFIG_NVGPU_LS_PMU
|
||||
if (mutex_ret == 0) {
|
||||
if (nvgpu_pmu_lock_release(g, g->pmu,
|
||||
PMU_MUTEX_ID_FIFO, &token) != 0) {
|
||||
nvgpu_err(g, "failed to release PMU lock");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void nvgpu_runlist_cleanup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 i, j;
|
||||
struct nvgpu_runlist *runlist;
|
||||
|
||||
if ((f->runlists == NULL) || (f->active_runlists == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
g = f->g;
|
||||
|
||||
for (i = 0; i < f->num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
|
||||
nvgpu_dma_free(g, &runlist->mem[j]);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, runlist->active_channels);
|
||||
runlist->active_channels = NULL;
|
||||
|
||||
nvgpu_kfree(g, runlist->active_tsgs);
|
||||
runlist->active_tsgs = NULL;
|
||||
|
||||
nvgpu_mutex_destroy(&runlist->runlist_lock);
|
||||
f->runlists[runlist->id] = NULL;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, f->active_runlists);
|
||||
f->active_runlists = NULL;
|
||||
f->num_runlists = 0;
|
||||
nvgpu_kfree(g, f->runlists);
|
||||
f->runlists = NULL;
|
||||
f->max_runlists = 0;
|
||||
}
|
||||
|
||||
void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f)
|
||||
{
|
||||
struct nvgpu_runlist *runlist;
|
||||
const struct nvgpu_device *dev;
|
||||
u32 i, j;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (g->is_virtual) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < f->num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
|
||||
(void) g->ops.fifo.find_pbdma_for_runlist(g,
|
||||
runlist->id,
|
||||
&runlist->pbdma_bitmask);
|
||||
nvgpu_log(g, gpu_dbg_info, "runlist %d: pbdma bitmask 0x%x",
|
||||
runlist->id, runlist->pbdma_bitmask);
|
||||
|
||||
for (j = 0; j < f->num_engines; j++) {
|
||||
dev = f->active_engines[j];
|
||||
|
||||
if (dev->runlist_id == runlist->id) {
|
||||
runlist->eng_bitmask |= BIT32(dev->engine_id);
|
||||
}
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_info, "runlist %d: act eng bitmask 0x%x",
|
||||
runlist->id, runlist->eng_bitmask);
|
||||
}
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
static int nvgpu_init_active_runlist_mapping(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_runlist *runlist;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
unsigned int runlist_id;
|
||||
size_t runlist_size;
|
||||
u32 i, j;
|
||||
int err = 0;
|
||||
|
||||
rl_dbg(g, "Building active runlist map.");
|
||||
|
||||
/*
|
||||
* In most case we want to loop through active runlists only. Here
|
||||
* we need to loop through all possible runlists, to build the mapping
|
||||
* between runlists[runlist_id] and active_runlists[i].
|
||||
*/
|
||||
i = 0U;
|
||||
for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
|
||||
if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
|
||||
/* skip inactive runlist */
|
||||
rl_dbg(g, " Skipping invalid runlist: %d", runlist_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
rl_dbg(g, " Configuring HW runlist: %u", runlist_id);
|
||||
rl_dbg(g, " SW runlist index to HW: %u -> %u", i, runlist_id);
|
||||
|
||||
runlist = &f->active_runlists[i];
|
||||
runlist->id = runlist_id;
|
||||
f->runlists[runlist_id] = runlist;
|
||||
i = nvgpu_safe_add_u32(i, 1U);
|
||||
|
||||
runlist->active_channels =
|
||||
nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
|
||||
BITS_PER_BYTE));
|
||||
if (runlist->active_channels == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
|
||||
runlist->active_tsgs =
|
||||
nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
|
||||
BITS_PER_BYTE));
|
||||
if (runlist->active_tsgs == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
|
||||
runlist_size = (size_t)f->runlist_entry_size *
|
||||
(size_t)f->num_runlist_entries;
|
||||
rl_dbg(g, " RL entries: %d", f->num_runlist_entries);
|
||||
rl_dbg(g, " RL size %zu", runlist_size);
|
||||
|
||||
for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
|
||||
err = nvgpu_dma_alloc_flags_sys(g,
|
||||
g->is_virtual ?
|
||||
0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED,
|
||||
runlist_size,
|
||||
&runlist->mem[j]);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "memory allocation failed");
|
||||
err = -ENOMEM;
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_mutex_init(&runlist->runlist_lock);
|
||||
|
||||
/*
|
||||
* None of buffers is pinned if this value doesn't change.
|
||||
* Otherwise, one of them (cur_buffer) must have been pinned.
|
||||
*/
|
||||
runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up_runlist:
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_runlist_setup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 num_runlists = 0U;
|
||||
unsigned int runlist_id;
|
||||
int err = 0;
|
||||
|
||||
rl_dbg(g, "Initializing Runlists");
|
||||
|
||||
nvgpu_spinlock_init(&f->runlist_submit_lock);
|
||||
|
||||
f->runlist_entry_size = g->ops.runlist.entry_size(g);
|
||||
f->num_runlist_entries = g->ops.runlist.length_max(g);
|
||||
f->max_runlists = g->ops.runlist.count_max(g);
|
||||
|
||||
f->runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
|
||||
sizeof(*f->runlists), f->max_runlists));
|
||||
if (f->runlists == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
|
||||
for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
|
||||
if (nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
|
||||
num_runlists = nvgpu_safe_add_u32(num_runlists, 1U);
|
||||
}
|
||||
}
|
||||
f->num_runlists = num_runlists;
|
||||
|
||||
f->active_runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
|
||||
sizeof(*f->active_runlists), num_runlists));
|
||||
if (f->active_runlists == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
|
||||
|
||||
rl_dbg(g, " Max runlists: %u", f->max_runlists);
|
||||
rl_dbg(g, " Active runlists: %u", f->num_runlists);
|
||||
rl_dbg(g, " RL entry size: %u bytes", f->runlist_entry_size);
|
||||
rl_dbg(g, " Max RL entries: %u", f->num_runlist_entries);
|
||||
|
||||
err = nvgpu_init_active_runlist_mapping(g);
|
||||
if (err != 0) {
|
||||
goto clean_up_runlist;
|
||||
}
|
||||
|
||||
g->ops.runlist.init_enginfo(g, f);
|
||||
return 0;
|
||||
|
||||
clean_up_runlist:
|
||||
nvgpu_runlist_cleanup_sw(g);
|
||||
rl_dbg(g, "fail");
|
||||
return err;
|
||||
}
|
||||
|
||||
u32 nvgpu_runlist_get_runlists_mask(struct gk20a *g, u32 id,
|
||||
unsigned int id_type, u32 act_eng_bitmask, u32 pbdma_bitmask)
|
||||
{
|
||||
u32 i, runlists_mask = 0;
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
struct nvgpu_runlist *runlist;
|
||||
|
||||
bool bitmask_disabled = ((act_eng_bitmask == 0U) &&
|
||||
(pbdma_bitmask == 0U));
|
||||
|
||||
/* engine and/or pbdma ids are known */
|
||||
if (!bitmask_disabled) {
|
||||
for (i = 0U; i < f->num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
|
||||
if ((runlist->eng_bitmask & act_eng_bitmask) != 0U) {
|
||||
runlists_mask |= BIT32(runlist->id);
|
||||
}
|
||||
|
||||
if ((runlist->pbdma_bitmask & pbdma_bitmask) != 0U) {
|
||||
runlists_mask |= BIT32(runlist->id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (id_type != ID_TYPE_UNKNOWN) {
|
||||
if (id_type == ID_TYPE_TSG) {
|
||||
runlist = f->tsg[id].runlist;
|
||||
} else {
|
||||
runlist = f->channel[id].runlist;
|
||||
}
|
||||
|
||||
if (runlist == NULL) {
|
||||
/* Warning on Linux, real assert on QNX. */
|
||||
nvgpu_assert(runlist != NULL);
|
||||
} else {
|
||||
runlists_mask |= BIT32(runlist->id);
|
||||
}
|
||||
} else {
|
||||
if (bitmask_disabled) {
|
||||
nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
|
||||
"and pbdma ids are unknown");
|
||||
|
||||
for (i = 0U; i < f->num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
|
||||
runlists_mask |= BIT32(runlist->id);
|
||||
}
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
|
||||
"and/or pbdma ids are known");
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask);
|
||||
return runlists_mask;
|
||||
}
|
||||
|
||||
void nvgpu_runlist_unlock_runlists(struct gk20a *g, u32 runlists_mask)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
struct nvgpu_runlist *runlist;
|
||||
u32 i;
|
||||
|
||||
nvgpu_log_info(g, "release runlist_lock for runlists set in "
|
||||
"runlists_mask: 0x%08x", runlists_mask);
|
||||
|
||||
for (i = 0U; i < f->num_runlists; i++) {
|
||||
runlist = &f->active_runlists[i];
|
||||
|
||||
if ((BIT32(i) & runlists_mask) != 0U) {
|
||||
nvgpu_mutex_release(&runlist->runlist_lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
837
drivers/gpu/nvgpu/common/fifo/submit.c
Normal file
837
drivers/gpu/nvgpu/common/fifo/submit.c
Normal file
@@ -0,0 +1,837 @@
|
||||
/*
|
||||
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/ltc.h>
|
||||
#include <nvgpu/os_sched.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/channel_sync.h>
|
||||
#include <nvgpu/channel_sync_syncpt.h>
|
||||
#include <nvgpu/watchdog.h>
|
||||
#include <nvgpu/job.h>
|
||||
#include <nvgpu/priv_cmdbuf.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/fence.h>
|
||||
#include <nvgpu/swprofile.h>
|
||||
#include <nvgpu/vpr.h>
|
||||
#include <nvgpu/trace.h>
|
||||
#include <nvgpu/nvhost.h>
|
||||
#include <nvgpu/user_fence.h>
|
||||
|
||||
#include <nvgpu/fifo/swprofile.h>
|
||||
|
||||
/*
|
||||
* We might need two extra gpfifo entries per submit - one for pre fence and
|
||||
* one for post fence.
|
||||
*/
|
||||
#define EXTRA_GPFIFO_ENTRIES 2U
|
||||
|
||||
static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct priv_cmd_entry **wait_cmd, bool flag_sync_fence)
|
||||
{
|
||||
/*
|
||||
* A single input sync fd may contain multiple fences. The preallocated
|
||||
* priv cmdbuf space allows exactly one per submit in the worst case.
|
||||
* Require at most one wait for consistent deterministic submits; if
|
||||
* there are more and no space, we'll -EAGAIN in nondeterministic mode.
|
||||
*/
|
||||
u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
|
||||
1U : 0U;
|
||||
int err;
|
||||
|
||||
if (flag_sync_fence) {
|
||||
nvgpu_assert(fence->id <= (u32)INT_MAX);
|
||||
err = nvgpu_channel_sync_wait_fence_fd(c->sync,
|
||||
(int)fence->id, wait_cmd, max_wait_cmds);
|
||||
} else {
|
||||
struct nvgpu_channel_sync_syncpt *sync_syncpt;
|
||||
|
||||
sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync);
|
||||
if (sync_syncpt != NULL) {
|
||||
err = nvgpu_channel_sync_wait_syncpt(sync_syncpt,
|
||||
fence->id, fence->value, wait_cmd);
|
||||
} else {
|
||||
err = -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
|
||||
struct priv_cmd_entry **incr_cmd,
|
||||
struct nvgpu_fence_type *post_fence, bool flag_fence_get,
|
||||
bool need_wfi, bool need_sync_fence)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (flag_fence_get) {
|
||||
err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
|
||||
post_fence, need_wfi, need_sync_fence);
|
||||
} else {
|
||||
err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
|
||||
post_fence, need_sync_fence);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the submit synchronization - pre-fences and post-fences.
|
||||
*/
|
||||
static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_channel_job *job,
|
||||
u32 flags)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
bool need_sync_fence;
|
||||
bool new_sync_created = false;
|
||||
int err = 0;
|
||||
bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U;
|
||||
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
|
||||
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
|
||||
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
|
||||
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_acquire(&c->sync_lock);
|
||||
if (c->sync == NULL) {
|
||||
c->sync = nvgpu_channel_sync_create(c);
|
||||
if (c->sync == NULL) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_unlock;
|
||||
}
|
||||
new_sync_created = true;
|
||||
}
|
||||
nvgpu_channel_sync_get_ref(c->sync);
|
||||
}
|
||||
|
||||
if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
|
||||
err = g->ops.channel.set_syncpt(c);
|
||||
if (err != 0) {
|
||||
goto clean_up_put_sync;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Optionally insert syncpt/semaphore wait in the beginning of gpfifo
|
||||
* submission when user requested.
|
||||
*/
|
||||
if (flag_fence_wait) {
|
||||
err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd,
|
||||
flag_sync_fence);
|
||||
if (err != 0) {
|
||||
goto clean_up_put_sync;
|
||||
}
|
||||
}
|
||||
|
||||
need_sync_fence = flag_fence_get && flag_sync_fence;
|
||||
|
||||
/*
|
||||
* Always generate an increment at the end of a GPFIFO submission. When
|
||||
* we do job tracking, post fences are needed for various reasons even
|
||||
* if not requested by user.
|
||||
*/
|
||||
err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
|
||||
flag_fence_get, need_wfi, need_sync_fence);
|
||||
if (err != 0) {
|
||||
goto clean_up_wait_cmd;
|
||||
}
|
||||
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
}
|
||||
return 0;
|
||||
|
||||
clean_up_wait_cmd:
|
||||
if (job->wait_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
|
||||
}
|
||||
job->wait_cmd = NULL;
|
||||
clean_up_put_sync:
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
if (nvgpu_channel_sync_put_ref_and_check(c->sync)
|
||||
&& g->aggressive_sync_destroy) {
|
||||
nvgpu_channel_sync_destroy(c->sync);
|
||||
}
|
||||
}
|
||||
clean_up_unlock:
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
nvgpu_mutex_release(&c->sync_lock);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c,
|
||||
struct priv_cmd_entry *cmd)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
|
||||
struct nvgpu_gpfifo_entry gpfifo_entry;
|
||||
u64 gva;
|
||||
u32 size;
|
||||
|
||||
nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size);
|
||||
g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size);
|
||||
|
||||
nvgpu_mem_wr_n(g, gpfifo_mem,
|
||||
c->gpfifo.put * (u32)sizeof(gpfifo_entry),
|
||||
&gpfifo_entry, (u32)sizeof(gpfifo_entry));
|
||||
|
||||
c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U);
|
||||
}
|
||||
|
||||
static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
|
||||
u32 gpfifo_size = c->gpfifo.entry_num;
|
||||
u32 len = num_entries;
|
||||
u32 start = c->gpfifo.put;
|
||||
u32 end = start + len; /* exclusive */
|
||||
int err;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
if (end > gpfifo_size) {
|
||||
/* wrap-around */
|
||||
u32 length0 = gpfifo_size - start;
|
||||
u32 length1 = len - length0;
|
||||
|
||||
err = g->os_channel.copy_user_gpfifo(
|
||||
&gpfifo_cpu[start], userdata,
|
||||
0, length0);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = g->os_channel.copy_user_gpfifo(
|
||||
gpfifo_cpu, userdata,
|
||||
length0, length1);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
} else {
|
||||
err = g->os_channel.copy_user_gpfifo(
|
||||
&gpfifo_cpu[start], userdata,
|
||||
0, len);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *src, u32 num_entries)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
|
||||
/* in bytes */
|
||||
u32 gpfifo_size =
|
||||
c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry);
|
||||
u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry);
|
||||
u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry);
|
||||
u32 end = start + len; /* exclusive */
|
||||
|
||||
if (end > gpfifo_size) {
|
||||
/* wrap-around */
|
||||
u32 length0 = gpfifo_size - start;
|
||||
u32 length1 = len - length0;
|
||||
struct nvgpu_gpfifo_entry *src2 = &src[length0];
|
||||
|
||||
nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
|
||||
nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
|
||||
} else {
|
||||
nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy source gpfifo entries into the gpfifo ring buffer, potentially
|
||||
* splitting into two memcpys to handle wrap-around.
|
||||
*/
|
||||
static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *kern_gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries)
|
||||
{
|
||||
int err;
|
||||
|
||||
if ((kern_gpfifo == NULL)
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
&& (c->gpfifo.pipe == NULL)
|
||||
#endif
|
||||
) {
|
||||
/*
|
||||
* This path (from userspace to sysmem) is special in order to
|
||||
* avoid two copies unnecessarily (from user to pipe, then from
|
||||
* pipe to gpu sysmem buffer).
|
||||
*/
|
||||
err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
|
||||
num_entries);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
#ifdef CONFIG_NVGPU_DGPU
|
||||
else if (kern_gpfifo == NULL) {
|
||||
/* from userspace to vidmem, use the common path */
|
||||
err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe,
|
||||
userdata, 0, num_entries);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
|
||||
num_entries);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
/* from kernel to either sysmem or vidmem, don't need
|
||||
* copy_user_gpfifo so use the common path */
|
||||
nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
|
||||
}
|
||||
|
||||
trace_write_pushbuffers(c, num_entries);
|
||||
|
||||
c->gpfifo.put = (c->gpfifo.put + num_entries) &
|
||||
(c->gpfifo.entry_num - 1U);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler,
|
||||
bool need_deferred_cleanup)
|
||||
{
|
||||
bool skip_buffer_refcounting = (flags &
|
||||
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
||||
struct nvgpu_channel_job *job = NULL;
|
||||
int err;
|
||||
|
||||
nvgpu_channel_joblist_lock(c);
|
||||
err = nvgpu_channel_alloc_job(c, &job);
|
||||
nvgpu_channel_joblist_unlock(c);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
|
||||
if (err != 0) {
|
||||
goto clean_up_job;
|
||||
}
|
||||
|
||||
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
|
||||
|
||||
/*
|
||||
* wait_cmd can be unset even if flag_fence_wait exists; the
|
||||
* android sync framework for example can provide entirely
|
||||
* empty fences that act like trivially expired waits.
|
||||
*/
|
||||
if (job->wait_cmd != NULL) {
|
||||
nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd);
|
||||
}
|
||||
|
||||
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries);
|
||||
if (err != 0) {
|
||||
goto clean_up_gpfifo_wait;
|
||||
}
|
||||
|
||||
nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd);
|
||||
|
||||
err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
|
||||
if (err != 0) {
|
||||
goto clean_up_gpfifo_incr;
|
||||
}
|
||||
|
||||
nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
|
||||
|
||||
if (fence_out != NULL) {
|
||||
/* This fence ref is going somewhere else but it's owned by the
|
||||
* job; the caller is expected to release it promptly, so that
|
||||
* a subsequent job cannot reclaim its memory.
|
||||
*/
|
||||
*fence_out = nvgpu_fence_get(&job->post_fence);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up_gpfifo_incr:
|
||||
/*
|
||||
* undo the incr priv cmdbuf and the user entries:
|
||||
* new gp.put =
|
||||
* (gp.put - (1 + num_entries)) & (gp.entry_num - 1) =
|
||||
* (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1)
|
||||
* the + entry_num does not affect the result but avoids wrapping below
|
||||
* zero for MISRA, although it would be well defined.
|
||||
*/
|
||||
c->gpfifo.put =
|
||||
(nvgpu_safe_add_u32(c->gpfifo.put,
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num,
|
||||
nvgpu_safe_add_u32(1U, num_entries)))) &
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
|
||||
clean_up_gpfifo_wait:
|
||||
if (job->wait_cmd != NULL) {
|
||||
/*
|
||||
* undo the wait priv cmdbuf entry:
|
||||
* gp.put =
|
||||
* (gp.put - 1) & (gp.entry_num - 1) =
|
||||
* (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1)
|
||||
* same as above with the gp.entry_num on the left side.
|
||||
*/
|
||||
c->gpfifo.put =
|
||||
nvgpu_safe_add_u32(c->gpfifo.put,
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) &
|
||||
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
|
||||
}
|
||||
nvgpu_fence_put(&job->post_fence);
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
|
||||
if (job->wait_cmd != NULL) {
|
||||
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
|
||||
}
|
||||
clean_up_job:
|
||||
nvgpu_channel_free_job(c, job);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler)
|
||||
{
|
||||
int err;
|
||||
|
||||
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
|
||||
|
||||
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
|
||||
num_entries);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (fence_out != NULL) {
|
||||
*fence_out = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required)
|
||||
{
|
||||
/*
|
||||
* Make sure we have enough space for gpfifo entries. Check cached
|
||||
* values first and then read from HW. If no space, return -EAGAIN
|
||||
* and let userpace decide to re-try request or not.
|
||||
*/
|
||||
if (nvgpu_channel_get_gpfifo_free_count(c) < required) {
|
||||
if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) <
|
||||
required) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_do_submit(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler,
|
||||
bool need_job_tracking,
|
||||
bool need_deferred_cleanup)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
int err;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACE
|
||||
trace_gk20a_channel_submit_gpfifo(g->name,
|
||||
c->chid,
|
||||
num_entries,
|
||||
flags,
|
||||
fence ? fence->id : 0,
|
||||
fence ? fence->value : 0);
|
||||
#endif
|
||||
|
||||
nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
|
||||
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
||||
|
||||
err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
if (need_job_tracking) {
|
||||
err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
|
||||
userdata, num_entries, flags, fence,
|
||||
fence_out, profiler, need_deferred_cleanup);
|
||||
} else {
|
||||
err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
|
||||
userdata, num_entries, fence_out, profiler);
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);
|
||||
|
||||
g->ops.userd.gp_put(g, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
|
||||
static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler)
|
||||
{
|
||||
bool skip_buffer_refcounting = (flags &
|
||||
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
||||
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
|
||||
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
|
||||
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
|
||||
struct gk20a *g = c->g;
|
||||
bool need_job_tracking;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_assert(nvgpu_channel_is_deterministic(c));
|
||||
|
||||
/* sync framework on post fences would not be deterministic */
|
||||
if (flag_fence_get && flag_sync_fence) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* this would be O(n) */
|
||||
if (!skip_buffer_refcounting) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* the watchdog needs periodic job cleanup */
|
||||
if (nvgpu_channel_wdt_enabled(c->wdt)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Job tracking is necessary on deterministic channels if and only if
|
||||
* pre- or post-fence functionality is needed. If not, a fast submit
|
||||
* can be done (ie. only need to write out userspace GPFIFO entries and
|
||||
* update GP_PUT).
|
||||
*/
|
||||
need_job_tracking = flag_fence_wait || flag_fence_get;
|
||||
|
||||
if (need_job_tracking) {
|
||||
/* nvgpu_semaphore is dynamically allocated, not pooled */
|
||||
if (!nvgpu_has_syncpoints(g)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* dynamic sync allocation wouldn't be deterministic */
|
||||
if (g->aggressive_sync_destroy_thresh != 0U) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* (Try to) clean up a single job, if available. Each job
|
||||
* requires the same amount of metadata, so this is enough for
|
||||
* the job list, fence pool, and private command buffers that
|
||||
* this submit will need.
|
||||
*
|
||||
* This submit might still need more gpfifo space than what the
|
||||
* previous has used. The job metadata doesn't look at it
|
||||
* though - the hw GP_GET pointer can be much further away than
|
||||
* our metadata pointers; gpfifo space is "freed" by the HW.
|
||||
*/
|
||||
nvgpu_channel_clean_up_deterministic_job(c);
|
||||
}
|
||||
|
||||
/* Grab access to HW to deal with do_idle */
|
||||
nvgpu_rwsem_down_read(&g->deterministic_busy);
|
||||
|
||||
if (c->deterministic_railgate_allowed) {
|
||||
/*
|
||||
* Nope - this channel has dropped its own power ref. As
|
||||
* deterministic submits don't hold power on per each submitted
|
||||
* job like normal ones do, the GPU might railgate any time now
|
||||
* and thus submit is disallowed.
|
||||
*/
|
||||
err = -EINVAL;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
||||
fence_out, profiler, need_job_tracking, false);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* No hw access beyond this point */
|
||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_log_fn(g, "fail %d", err);
|
||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler)
|
||||
{
|
||||
bool skip_buffer_refcounting = (flags &
|
||||
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
|
||||
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
|
||||
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
|
||||
struct gk20a *g = c->g;
|
||||
bool need_job_tracking;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_assert(!nvgpu_channel_is_deterministic(c));
|
||||
|
||||
/*
|
||||
* Job tracking is necessary for any of the following conditions on
|
||||
* non-deterministic channels:
|
||||
* - pre- or post-fence functionality
|
||||
* - GPU rail-gating
|
||||
* - VPR resize enabled
|
||||
* - buffer refcounting
|
||||
* - channel watchdog
|
||||
*
|
||||
* If none of the conditions are met, then job tracking is not
|
||||
* required and a fast submit can be done (ie. only need to write
|
||||
* out userspace GPFIFO entries and update GP_PUT).
|
||||
*/
|
||||
need_job_tracking = flag_fence_wait ||
|
||||
flag_fence_get ||
|
||||
nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
|
||||
nvgpu_is_vpr_resize_enabled() ||
|
||||
!skip_buffer_refcounting ||
|
||||
nvgpu_channel_wdt_enabled(c->wdt);
|
||||
|
||||
if (need_job_tracking) {
|
||||
/*
|
||||
* Get a power ref because this isn't a deterministic
|
||||
* channel that holds them during the channel lifetime.
|
||||
* This one is released by nvgpu_channel_clean_up_jobs,
|
||||
* via syncpt or sema interrupt, whichever is used.
|
||||
*/
|
||||
err = gk20a_busy(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"failed to host gk20a to submit gpfifo");
|
||||
nvgpu_print_current(g, NULL, NVGPU_ERROR);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
|
||||
fence_out, profiler, need_job_tracking, true);
|
||||
if (err != 0) {
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_log_fn(g, "fail %d", err);
|
||||
gk20a_idle(g);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int check_submit_allowed(struct nvgpu_channel *c)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (nvgpu_channel_check_unserviceable(c)) {
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
if (c->usermode_submit_enabled) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* an address space needs to have been bound at this point. */
|
||||
if (!nvgpu_channel_as_bound(c)) {
|
||||
nvgpu_err(g,
|
||||
"not bound to an address space at time of gpfifo"
|
||||
" submission.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out,
|
||||
struct nvgpu_swprofiler *profiler)
|
||||
{
|
||||
struct gk20a *g = c->g;
|
||||
int err;
|
||||
|
||||
err = check_submit_allowed(c);
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fifo not large enough for request. Return error immediately.
|
||||
* Kernel can insert gpfifo entries before and after user gpfifos.
|
||||
* So, add extra entries in user request. Also, HW with fifo size N
|
||||
* can accept only N-1 entries.
|
||||
*/
|
||||
if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) {
|
||||
nvgpu_err(g, "not enough gpfifo space allocated");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);
|
||||
|
||||
/* update debug settings */
|
||||
nvgpu_ltc_sync_enabled(g);
|
||||
|
||||
nvgpu_log_info(g, "channel %d", c->chid);
|
||||
|
||||
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
|
||||
if (c->deterministic) {
|
||||
err = nvgpu_submit_deterministic(c, gpfifo, userdata,
|
||||
num_entries, flags, fence, fence_out, profiler);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
|
||||
num_entries, flags, fence, fence_out, profiler);
|
||||
}
|
||||
|
||||
if (err != 0) {
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACE
|
||||
if (fence_out != NULL && *fence_out != NULL) {
|
||||
/*
|
||||
* This is not a good example on how to use the fence type.
|
||||
* Don't touch the priv data. The debug trace is special.
|
||||
*/
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
trace_gk20a_channel_submitted_gpfifo(g->name,
|
||||
c->chid, num_entries, flags,
|
||||
(*fence_out)->priv.syncpt_id,
|
||||
(*fence_out)->priv.syncpt_value);
|
||||
#else
|
||||
trace_gk20a_channel_submitted_gpfifo(g->name,
|
||||
c->chid, num_entries, flags,
|
||||
0, 0);
|
||||
#endif
|
||||
} else {
|
||||
trace_gk20a_channel_submitted_gpfifo(g->name,
|
||||
c->chid, num_entries, flags,
|
||||
0, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
|
||||
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
||||
|
||||
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_userdata userdata,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_user_fence *fence_out,
|
||||
struct nvgpu_swprofiler *profiler)
|
||||
{
|
||||
struct nvgpu_fence_type *fence_internal = NULL;
|
||||
int err;
|
||||
|
||||
err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
|
||||
flags, fence, &fence_internal, profiler);
|
||||
if (err == 0 && fence_internal != NULL) {
|
||||
*fence_out = nvgpu_fence_extract_user(fence_internal);
|
||||
nvgpu_fence_put(fence_internal);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct nvgpu_fence_type **fence_out)
|
||||
{
|
||||
struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
|
||||
|
||||
return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
|
||||
flags, fence, fence_out, NULL);
|
||||
}
|
||||
1120
drivers/gpu/nvgpu/common/fifo/tsg.c
Normal file
1120
drivers/gpu/nvgpu/common/fifo/tsg.c
Normal file
File diff suppressed because it is too large
Load Diff
157
drivers/gpu/nvgpu/common/fifo/userd.c
Normal file
157
drivers/gpu/nvgpu/common/fifo/userd.c
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
* USERD
|
||||
*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/trace.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/fifo.h>
|
||||
#include <nvgpu/fifo/userd.h>
|
||||
#include <nvgpu/vm_area.h>
|
||||
#include <nvgpu/dma.h>
|
||||
|
||||
int nvgpu_userd_init_slabs(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int err;
|
||||
|
||||
nvgpu_mutex_init(&f->userd_mutex);
|
||||
|
||||
f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE / g->ops.userd.entry_size(g);
|
||||
f->num_userd_slabs =
|
||||
DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab);
|
||||
|
||||
f->userd_slabs = nvgpu_big_zalloc(g, f->num_userd_slabs *
|
||||
sizeof(struct nvgpu_mem));
|
||||
if (f->userd_slabs == NULL) {
|
||||
nvgpu_err(g, "could not allocate userd slabs");
|
||||
err = -ENOMEM;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_mutex_destroy(&f->userd_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_userd_free_slabs(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
u32 slab;
|
||||
|
||||
for (slab = 0; slab < f->num_userd_slabs; slab++) {
|
||||
nvgpu_dma_free(g, &f->userd_slabs[slab]);
|
||||
}
|
||||
nvgpu_big_free(g, f->userd_slabs);
|
||||
f->userd_slabs = NULL;
|
||||
|
||||
nvgpu_mutex_destroy(&f->userd_mutex);
|
||||
}
|
||||
|
||||
int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
struct nvgpu_mem *mem;
|
||||
u32 slab = c->chid / f->num_channels_per_slab;
|
||||
int err = 0;
|
||||
|
||||
if (slab > f->num_userd_slabs) {
|
||||
nvgpu_err(g, "chid %u, slab %u out of range (max=%u)",
|
||||
c->chid, slab, f->num_userd_slabs);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mem = &g->fifo.userd_slabs[slab];
|
||||
|
||||
nvgpu_mutex_acquire(&f->userd_mutex);
|
||||
if (!nvgpu_mem_is_valid(mem)) {
|
||||
err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "userd allocation failed, err=%d", err);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (g->ops.mm.is_bar1_supported(g)) {
|
||||
mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem,
|
||||
slab * NVGPU_CPU_PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
c->userd_mem = mem;
|
||||
c->userd_offset = (c->chid % f->num_channels_per_slab) *
|
||||
g->ops.userd.entry_size(g);
|
||||
c->userd_iova = nvgpu_channel_userd_addr(c);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info,
|
||||
"chid=%u slab=%u mem=%p offset=%u addr=%llx gpu_va=%llx",
|
||||
c->chid, slab, mem, c->userd_offset,
|
||||
nvgpu_channel_userd_addr(c),
|
||||
nvgpu_channel_userd_gpu_va(c));
|
||||
|
||||
done:
|
||||
nvgpu_mutex_release(&f->userd_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_userd_setup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
int err;
|
||||
u32 size, num_pages;
|
||||
|
||||
err = nvgpu_userd_init_slabs(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to init userd support");
|
||||
return err;
|
||||
}
|
||||
|
||||
size = f->num_channels * g->ops.userd.entry_size(g);
|
||||
num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE);
|
||||
err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
|
||||
num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up:
|
||||
nvgpu_userd_free_slabs(g);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_userd_cleanup_sw(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_fifo *f = &g->fifo;
|
||||
|
||||
if (f->userd_gpu_va != 0ULL) {
|
||||
(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
|
||||
f->userd_gpu_va = 0ULL;
|
||||
}
|
||||
|
||||
nvgpu_userd_free_slabs(g);
|
||||
}
|
||||
278
drivers/gpu/nvgpu/common/fifo/watchdog.c
Normal file
278
drivers/gpu/nvgpu/common/fifo/watchdog.c
Normal file
@@ -0,0 +1,278 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/watchdog.h>
|
||||
#include <nvgpu/error_notifier.h>
|
||||
#include <nvgpu/watchdog.h>
|
||||
#include <nvgpu/string.h>
|
||||
|
||||
struct nvgpu_channel_wdt {
|
||||
struct gk20a *g;
|
||||
|
||||
/* lock protects the running timer state */
|
||||
struct nvgpu_spinlock lock;
|
||||
struct nvgpu_timeout timer;
|
||||
bool running;
|
||||
struct nvgpu_channel_wdt_state ch_state;
|
||||
|
||||
/* lock not needed */
|
||||
u32 limit_ms;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt));
|
||||
|
||||
if (wdt == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
wdt->g = g;
|
||||
nvgpu_spinlock_init(&wdt->lock);
|
||||
wdt->enabled = true;
|
||||
wdt->limit_ms = g->ch_wdt_init_limit_ms;
|
||||
|
||||
return wdt;
|
||||
}
|
||||
|
||||
void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
nvgpu_kfree(wdt->g, wdt);
|
||||
}
|
||||
|
||||
void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
wdt->enabled = true;
|
||||
}
|
||||
|
||||
void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
wdt->enabled = false;
|
||||
}
|
||||
|
||||
bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
return wdt->enabled;
|
||||
}
|
||||
|
||||
void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms)
|
||||
{
|
||||
wdt->limit_ms = limit_ms;
|
||||
}
|
||||
|
||||
u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
return wdt->limit_ms;
|
||||
}
|
||||
|
||||
static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
|
||||
struct nvgpu_channel_wdt_state *state)
|
||||
{
|
||||
struct gk20a *g = wdt->g;
|
||||
int ret;
|
||||
|
||||
ret = nvgpu_timeout_init(g, &wdt->timer,
|
||||
wdt->limit_ms,
|
||||
NVGPU_TIMER_CPU_TIMER);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "timeout_init failed: %d", ret);
|
||||
return;
|
||||
}
|
||||
|
||||
wdt->ch_state = *state;
|
||||
wdt->running = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start a timeout counter (watchdog) on this channel.
|
||||
*
|
||||
* Trigger a watchdog to recover the channel after the per-platform timeout
|
||||
* duration (but strictly no earlier) if the channel hasn't advanced within
|
||||
* that time.
|
||||
*
|
||||
* If the timeout is already running, do nothing. This should be called when
|
||||
* new jobs are submitted. The timeout will stop when the last tracked job
|
||||
* finishes, making the channel idle.
|
||||
*/
|
||||
void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
|
||||
struct nvgpu_channel_wdt_state *state)
|
||||
{
|
||||
if (!nvgpu_is_timeouts_enabled(wdt->g)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!wdt->enabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
|
||||
if (wdt->running) {
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
return;
|
||||
}
|
||||
nvgpu_channel_wdt_init(wdt, state);
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop a running timeout counter (watchdog) on this channel.
|
||||
*
|
||||
* Make the watchdog consider the channel not running, so that it won't get
|
||||
* recovered even if no progress is detected. Progress is not tracked if the
|
||||
* watchdog is turned off.
|
||||
*
|
||||
* No guarantees are made about concurrent execution of the timeout handler.
|
||||
* (This should be called from an update handler running in the same thread
|
||||
* with the watchdog.)
|
||||
*/
|
||||
bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
bool was_running;
|
||||
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
was_running = wdt->running;
|
||||
wdt->running = false;
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
return was_running;
|
||||
}
|
||||
|
||||
/**
|
||||
* Continue a previously stopped timeout
|
||||
*
|
||||
* Enable the timeout again but don't reinitialize its timer.
|
||||
*
|
||||
* No guarantees are made about concurrent execution of the timeout handler.
|
||||
* (This should be called from an update handler running in the same thread
|
||||
* with the watchdog.)
|
||||
*/
|
||||
void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
wdt->running = true;
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the counter of a timeout that is in effect.
|
||||
*
|
||||
* If this channel has an active timeout, act as if something happened on the
|
||||
* channel right now.
|
||||
*
|
||||
* Rewinding a stopped counter is irrelevant; this is a no-op for non-running
|
||||
* timeouts. Stopped timeouts can only be started (which is technically a
|
||||
* rewind too) or continued (where the stop is actually pause).
|
||||
*/
|
||||
void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
|
||||
struct nvgpu_channel_wdt_state *state)
|
||||
{
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
if (wdt->running) {
|
||||
nvgpu_channel_wdt_init(wdt, state);
|
||||
}
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the watchdog is running.
|
||||
*
|
||||
* A running watchdog means one that is requested to run and expire in the
|
||||
* future. The state of a running watchdog has to be checked periodically to
|
||||
* see if it's expired.
|
||||
*/
|
||||
bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt)
|
||||
{
|
||||
bool running;
|
||||
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
running = wdt->running;
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
|
||||
return running;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a channel has been stuck for the watchdog limit.
|
||||
*
|
||||
* Test if this channel has really got stuck at this point by checking if its
|
||||
* {gp,pb}_get have advanced or not. If progress was detected, start the timer
|
||||
* from zero again. If no {gp,pb}_get action happened in the watchdog time
|
||||
* limit, return true. Else return false.
|
||||
*/
|
||||
static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
|
||||
struct nvgpu_channel_wdt_state *state)
|
||||
{
|
||||
struct gk20a *g = wdt->g;
|
||||
struct nvgpu_channel_wdt_state previous_state;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
/* Get status but keep timer running */
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
previous_state = wdt->ch_state;
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
|
||||
if (nvgpu_memcmp((const u8 *)state,
|
||||
(const u8 *)&previous_state,
|
||||
sizeof(*state)) != 0) {
|
||||
/* Channel has advanced, timer keeps going but resets */
|
||||
nvgpu_channel_wdt_rewind(wdt, state);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!nvgpu_timeout_peek_expired(&wdt->timer)) {
|
||||
/* Seems stuck but waiting to time out */
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test if the per-channel watchdog is on; check the timeout in that case.
|
||||
*
|
||||
* Each channel has an expiration time based watchdog. The timer is
|
||||
* (re)initialized in two situations: when a new job is submitted on an idle
|
||||
* channel and when the timeout is checked but progress is detected. The
|
||||
* watchdog timeout limit is a coarse sliding window.
|
||||
*
|
||||
* The timeout is stopped (disabled) after the last job in a row finishes
|
||||
* and marks the channel idle.
|
||||
*/
|
||||
bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
|
||||
struct nvgpu_channel_wdt_state *state)
|
||||
{
|
||||
bool running;
|
||||
|
||||
nvgpu_spinlock_acquire(&wdt->lock);
|
||||
running = wdt->running;
|
||||
nvgpu_spinlock_release(&wdt->lock);
|
||||
|
||||
if (running) {
|
||||
return nvgpu_channel_wdt_handler(wdt, state);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user