Open source GPL/LGPL release

This commit is contained in:
svcmobrel-release
2022-07-21 16:03:29 -07:00
commit f338182221
2260 changed files with 576813 additions and 0 deletions

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,199 @@
/*
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "channel_wdt.h"
#include "channel_worker.h"
#include <nvgpu/watchdog.h>
#include <nvgpu/channel.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/gk20a.h>
void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
{
ch->wdt_debug_dump = dump;
}
static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
struct nvgpu_channel_wdt_state state = { 0, 0 };
/*
* Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
* the moment because system suspend puts g->regs away but doesn't stop
* the worker thread that runs the watchdog. This might need to be
* cleared up in the future.
*/
if (nvgpu_channel_wdt_running(ch->wdt)) {
/*
* Read the state only if the wdt is on to avoid unnecessary
* accesses. The kernel mem for userd may not even exist; this
* channel could be in usermode submit mode.
*/
state.gp_get = g->ops.userd.gp_get(g, ch);
state.pb_get = g->ops.userd.pb_get(g, ch);
}
return state;
}
void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
{
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
/*
* FIXME: channel recovery can race the submit path and can start even
* after this, but this check is the best we can do for now.
*/
if (!nvgpu_channel_check_unserviceable(ch)) {
nvgpu_channel_wdt_start(ch->wdt, &state);
}
}
void nvgpu_channel_restart_all_wdts(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
u32 chid;
for (chid = 0; chid < f->num_channels; chid++) {
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
if (ch != NULL) {
if ((ch->wdt != NULL) &&
!nvgpu_channel_check_unserviceable(ch)) {
struct nvgpu_channel_wdt_state state =
nvgpu_channel_collect_wdt_state(ch);
nvgpu_channel_wdt_rewind(ch->wdt, &state);
}
nvgpu_channel_put(ch);
}
}
}
static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
nvgpu_log_fn(g, " ");
if (nvgpu_channel_check_unserviceable(ch)) {
/* channel is already recovered */
nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
return;
}
nvgpu_err(g, "Job on channel %d timed out", ch->chid);
/* force reset calls gk20a_debug_dump but not this */
if (ch->wdt_debug_dump) {
gk20a_gr_debug_dump(g);
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
if (g->ops.tsg.force_reset(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
ch->wdt_debug_dump) != 0) {
nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
}
#endif
}
/*
* Test the watchdog progress. If the channel is stuck, reset it.
*
* The gpu is implicitly on at this point because the watchdog can only run on
* channels that have submitted jobs pending for cleanup.
*/
static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
{
struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
nvgpu_channel_recover_from_wdt(ch);
}
}
void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
{
struct nvgpu_channel_worker *ch_worker =
nvgpu_channel_worker_from_worker(worker);
int ret;
ch_worker->watchdog_interval = 100U;
ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout,
ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER);
if (ret != 0) {
nvgpu_err(worker->g, "timeout_init failed: %d", ret);
}
}
/**
* Loop every living channel, check timeouts and handle stuck channels.
*/
static void nvgpu_channel_poll_wdt(struct gk20a *g)
{
unsigned int chid;
for (chid = 0; chid < g->fifo.num_channels; chid++) {
struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
if (ch != NULL) {
if (!nvgpu_channel_check_unserviceable(ch)) {
nvgpu_channel_check_wdt(ch);
}
nvgpu_channel_put(ch);
}
}
}
void nvgpu_channel_worker_poll_wakeup_post_process_item(
struct nvgpu_worker *worker)
{
struct gk20a *g = worker->g;
struct nvgpu_channel_worker *ch_worker =
nvgpu_channel_worker_from_worker(worker);
int ret;
if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
nvgpu_channel_poll_wdt(g);
ret = nvgpu_timeout_init(g, &ch_worker->timeout,
ch_worker->watchdog_interval,
NVGPU_TIMER_CPU_TIMER);
if (ret != 0) {
nvgpu_err(g, "timeout_init failed: %d", ret);
}
}
}
u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
struct nvgpu_worker *worker)
{
struct nvgpu_channel_worker *ch_worker =
nvgpu_channel_worker_from_worker(worker);
return ch_worker->watchdog_interval;
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_COMMON_FIFO_CHANNEL_WDT_H
#define NVGPU_COMMON_FIFO_CHANNEL_WDT_H
#include <nvgpu/types.h>
struct nvgpu_channel;
#ifdef CONFIG_NVGPU_CHANNEL_WDT
struct nvgpu_worker;
void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch);
void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker);
void nvgpu_channel_worker_poll_wakeup_post_process_item(
struct nvgpu_worker *worker);
u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
struct nvgpu_worker *worker);
#else
static inline void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
#endif /* CONFIG_NVGPU_CHANNEL_WDT */
#endif /* NVGPU_COMMON_FIFO_CHANNEL_WDT_H */

View File

@@ -0,0 +1,118 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "channel_worker.h"
#include "channel_wdt.h"
#include <nvgpu/worker.h>
#include <nvgpu/channel.h>
static inline struct nvgpu_channel *
nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
{
return (struct nvgpu_channel *)
((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
};
static void nvgpu_channel_worker_poll_wakeup_process_item(
struct nvgpu_list_node *work_item)
{
struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item);
nvgpu_assert(ch != NULL);
nvgpu_log_fn(ch->g, " ");
nvgpu_channel_clean_up_jobs(ch);
/* ref taken when enqueued */
nvgpu_channel_put(ch);
}
static const struct nvgpu_worker_ops channel_worker_ops = {
#ifdef CONFIG_NVGPU_CHANNEL_WDT
.pre_process = nvgpu_channel_worker_poll_init,
.wakeup_post_process =
nvgpu_channel_worker_poll_wakeup_post_process_item,
.wakeup_timeout =
nvgpu_channel_worker_poll_wakeup_condition_get_timeout,
#endif
.wakeup_early_exit = NULL,
.wakeup_process_item =
nvgpu_channel_worker_poll_wakeup_process_item,
.wakeup_condition = NULL,
};
/**
* Initialize the channel worker's metadata and start the background thread.
*/
int nvgpu_channel_worker_init(struct gk20a *g)
{
struct nvgpu_worker *worker = &g->channel_worker.worker;
nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name);
return nvgpu_worker_init(g, worker, &channel_worker_ops);
}
void nvgpu_channel_worker_deinit(struct gk20a *g)
{
struct nvgpu_worker *worker = &g->channel_worker.worker;
nvgpu_worker_deinit(worker);
}
/**
* Append a channel to the worker's list, if not there already.
*
* The worker thread processes work items (channels in its work list) and polls
* for other things. This adds @ch to the end of the list and wakes the worker
* up immediately. If the channel already existed in the list, it's not added,
* because in that case it has been scheduled already but has not yet been
* processed.
*/
void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch)
{
struct gk20a *g = ch->g;
int ret;
nvgpu_log_fn(g, " ");
/*
* Ref released when this item gets processed. The caller should hold
* one ref already, so normally shouldn't fail, but the channel could
* end up being freed between the time the caller got its reference and
* the time we end up here (e.g., if the client got killed); if so, just
* return.
*/
if (nvgpu_channel_get(ch) == NULL) {
nvgpu_info(g, "cannot get ch ref for worker!");
return;
}
ret = nvgpu_worker_enqueue(&g->channel_worker.worker,
&ch->worker_item);
if (ret != 0) {
nvgpu_channel_put(ch);
return;
}
}

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
#define NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
#include <nvgpu/gk20a.h>
void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch);
static inline struct nvgpu_channel_worker *
nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
{
return (struct nvgpu_channel_worker *)
((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker));
};
#endif /* NVGPU_COMMON_FIFO_CHANNEL_WORKER_H */

View File

@@ -0,0 +1,88 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/io.h>
#include <nvgpu/engine_status.h>
bool nvgpu_engine_status_is_ctxsw_switch(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH;
}
bool nvgpu_engine_status_is_ctxsw_load(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_LOAD;
}
bool nvgpu_engine_status_is_ctxsw_save(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SAVE;
}
bool nvgpu_engine_status_is_ctxsw(struct nvgpu_engine_status_info
*engine_status)
{
return (nvgpu_engine_status_is_ctxsw_switch(engine_status) ||
nvgpu_engine_status_is_ctxsw_load(engine_status) ||
nvgpu_engine_status_is_ctxsw_save(engine_status));
}
bool nvgpu_engine_status_is_ctxsw_invalid(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_INVALID;
}
bool nvgpu_engine_status_is_ctxsw_valid(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID;
}
bool nvgpu_engine_status_is_ctx_type_tsg(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctx_id_type == ENGINE_STATUS_CTX_ID_TYPE_TSGID;
}
bool nvgpu_engine_status_is_next_ctx_type_tsg(struct nvgpu_engine_status_info
*engine_status)
{
return engine_status->ctx_next_id_type ==
ENGINE_STATUS_CTX_NEXT_ID_TYPE_TSGID;
}
void nvgpu_engine_status_get_ctx_id_type(struct nvgpu_engine_status_info
*engine_status, u32 *ctx_id, u32 *ctx_type)
{
*ctx_id = engine_status->ctx_id;
*ctx_type = engine_status->ctx_id_type;
}
void nvgpu_engine_status_get_next_ctx_id_type(struct nvgpu_engine_status_info
*engine_status, u32 *ctx_next_id,
u32 *ctx_next_type)
{
*ctx_next_id = engine_status->ctx_next_id;
*ctx_next_type = engine_status->ctx_next_id_type;
}

View File

@@ -0,0 +1,960 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/errno.h>
#include <nvgpu/timers.h>
#include <nvgpu/bitops.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu.h>
#include <nvgpu/pmu/mutex.h>
#endif
#include <nvgpu/runlist.h>
#include <nvgpu/engines.h>
#include <nvgpu/engine_status.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/pbdma_status.h>
#include <nvgpu/power_features/pg.h>
#include <nvgpu/channel.h>
#include <nvgpu/soc.h>
#include <nvgpu/device.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/fifo.h>
#include <nvgpu/static_analysis.h>
#include <nvgpu/swprofile.h>
#include <nvgpu/fifo/swprofile.h>
#define FECS_METHOD_WFI_RESTORE 0x80000U
enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g,
const struct nvgpu_device *dev)
{
enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL;
if (nvgpu_device_is_graphics(g, dev)) {
ret = NVGPU_ENGINE_GR;
} else if (nvgpu_device_is_ce(g, dev)) {
/* For now, all CE engines have separate runlists. We can
* identify the NVGPU_ENGINE_GRCE type CE using runlist_id
* comparsion logic with GR runlist_id in init_info()
*/
ret = NVGPU_ENGINE_ASYNC_CE;
} else {
ret = NVGPU_ENGINE_INVAL;
}
return ret;
}
const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
struct gk20a *g, u32 engine_id)
{
struct nvgpu_fifo *f = &g->fifo;
if (engine_id >= f->max_engines) {
return NULL;
}
return f->host_engines[engine_id];
}
bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
{
struct nvgpu_fifo *f = &g->fifo;
if (engine_id >= f->max_engines) {
return false;
}
return f->host_engines[engine_id] != NULL;
}
u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
{
const struct nvgpu_device *dev;
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
if (dev == NULL) {
nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
inst_id);
return NVGPU_INVALID_ENG_ID;
}
return dev->engine_id;
}
u32 nvgpu_engine_get_gr_id(struct gk20a *g)
{
/* Consider 1st available GR engine */
return nvgpu_engine_get_gr_id_for_inst(g, 0U);
}
u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
{
const struct nvgpu_device *dev = NULL;
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
if (dev == NULL) {
return 0;
}
return BIT32(dev->intr_id);
}
u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g)
{
const struct nvgpu_device *dev;
u32 intr_mask = 0U;
u32 i;
for (i = 0U; i < g->num_gr_instances; i++) {
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS,
nvgpu_gr_get_syspipe_id(g, i));
if (dev == NULL) {
continue;
}
intr_mask |= BIT32(dev->intr_id);
}
return intr_mask;
}
u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g)
{
const struct nvgpu_device *dev;
u32 i;
u32 mask = 0U;
/*
* For old chips - pre-Pascal - we have COPY[0-2], for new chips we
* have some number of LCE instances. For the purpose of this code we
* imagine a system that could have both; in reality that'll never be
* the case.
*
* This can be cleaned up in the future by defining a SW type for CE and
* hiding this ugliness in the device management code.
*/
for (i = NVGPU_DEVTYPE_COPY0; i <= NVGPU_DEVTYPE_COPY2; i++) {
dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
if (dev == NULL) {
continue;
}
mask |= BIT32(dev->intr_id);
}
/*
* Now take care of LCEs.
*/
for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
nvgpu_assert(dev != NULL);
mask |= BIT32(dev->intr_id);
}
return mask;
}
#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
static void nvgpu_engine_enable_activity(struct gk20a *g,
const struct nvgpu_device *dev)
{
nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED);
}
void nvgpu_engine_enable_activity_all(struct gk20a *g)
{
u32 i;
for (i = 0; i < g->fifo.num_engines; i++) {
nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]);
}
}
int nvgpu_engine_disable_activity(struct gk20a *g,
const struct nvgpu_device *dev,
bool wait_for_idle)
{
u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID;
u32 engine_chid = NVGPU_INVALID_CHANNEL_ID;
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = -EINVAL;
#endif
int err = 0;
struct nvgpu_channel *ch = NULL;
struct nvgpu_engine_status_info engine_status;
struct nvgpu_pbdma_status_info pbdma_status;
unsigned long runlist_served_pbdmas;
unsigned long bit;
u32 pbdma_id;
struct nvgpu_fifo *f = &g->fifo;
nvgpu_log_fn(g, " ");
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
&engine_status);
if (engine_status.is_busy && !wait_for_idle) {
return -EBUSY;
}
#ifdef CONFIG_NVGPU_LS_PMU
if (g->ops.pmu.is_pmu_supported(g)) {
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
}
#endif
nvgpu_runlist_set_state(g, BIT32(dev->runlist_id),
RUNLIST_DISABLED);
runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask;
for_each_set_bit(bit, &runlist_served_pbdmas,
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
pbdma_id = U32(bit);
/* chid from pbdma status */
g->ops.pbdma_status.read_pbdma_status_info(g,
pbdma_id,
&pbdma_status);
if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) ||
nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) {
pbdma_chid = pbdma_status.id;
} else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) ||
nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) {
pbdma_chid = pbdma_status.next_id;
} else {
/* Nothing to do here */
}
if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) {
ch = nvgpu_channel_from_id(g, pbdma_chid);
if (ch != NULL) {
err = g->ops.fifo.preempt_channel(g, ch);
nvgpu_channel_put(ch);
}
if (err != 0) {
goto clean_up;
}
}
}
/* chid from engine status */
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
&engine_status);
if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) ||
nvgpu_engine_status_is_ctxsw_save(&engine_status)) {
engine_chid = engine_status.ctx_id;
} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) ||
nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
engine_chid = engine_status.ctx_next_id;
} else {
/* Nothing to do here */
}
if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) {
ch = nvgpu_channel_from_id(g, engine_chid);
if (ch != NULL) {
err = g->ops.fifo.preempt_channel(g, ch);
nvgpu_channel_put(ch);
}
if (err != 0) {
goto clean_up;
}
}
clean_up:
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
if (nvgpu_pmu_lock_release(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token) != 0){
nvgpu_err(g, "failed to release PMU lock");
}
}
#endif
if (err != 0) {
nvgpu_log_fn(g, "failed");
nvgpu_engine_enable_activity(g, dev);
} else {
nvgpu_log_fn(g, "done");
}
return err;
}
int nvgpu_engine_disable_activity_all(struct gk20a *g,
bool wait_for_idle)
{
unsigned int i;
int err = 0, ret = 0;
for (i = 0; i < g->fifo.num_engines; i++) {
err = nvgpu_engine_disable_activity(g,
g->fifo.active_engines[i],
wait_for_idle);
if (err != 0) {
nvgpu_err(g, "failed to disable engine %d activity",
g->fifo.active_engines[i]->engine_id);
ret = err;
break;
}
}
if (err != 0) {
while (i-- != 0U) {
nvgpu_engine_enable_activity(g,
g->fifo.active_engines[i]);
}
}
return ret;
}
int nvgpu_engine_wait_for_idle(struct gk20a *g)
{
struct nvgpu_timeout timeout;
u32 delay = POLL_DELAY_MIN_US;
int ret = 0, err = 0;
u32 i, host_num_engines;
struct nvgpu_engine_status_info engine_status;
nvgpu_log_fn(g, " ");
host_num_engines =
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g),
NVGPU_TIMER_CPU_TIMER);
if (err != 0) {
return -EINVAL;
}
for (i = 0; i < host_num_engines; i++) {
if (!nvgpu_engine_check_valid_id(g, i)) {
continue;
}
ret = -ETIMEDOUT;
do {
g->ops.engine_status.read_engine_status_info(g, i,
&engine_status);
if (!engine_status.is_busy) {
ret = 0;
break;
}
nvgpu_usleep_range(delay, delay * 2U);
delay = min_t(u32,
delay << 1U, POLL_DELAY_MAX_US);
} while (nvgpu_timeout_expired(&timeout) == 0);
if (ret != 0) {
/* possible causes:
* check register settings programmed in hal set by
* elcg_init_idle_filters and init_therm_setup_hw
*/
nvgpu_err(g, "cannot idle engine: %u "
"engine_status: 0x%08x", i,
engine_status.reg_data);
break;
}
}
nvgpu_log_fn(g, "done");
return ret;
}
#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */
int nvgpu_engine_setup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
int err = 0;
size_t size;
f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
size = nvgpu_safe_mult_u64(f->max_engines,
sizeof(struct nvgpu_device *));
/*
* Allocate the two device lists for host devices.
*/
f->host_engines = nvgpu_kzalloc(g, size);
if (f->host_engines == NULL) {
nvgpu_err(g, "OOM allocating host engine list");
return -ENOMEM;
}
f->active_engines = nvgpu_kzalloc(g, size);
if (f->active_engines == NULL) {
nvgpu_err(g, "no mem for active engine list");
err = -ENOMEM;
goto clean_up_engine_info;
}
err = nvgpu_engine_init_info(f);
if (err != 0) {
nvgpu_err(g, "init engine info failed");
goto clean_up;
}
return 0;
clean_up:
nvgpu_kfree(g, f->active_engines);
f->active_engines = NULL;
clean_up_engine_info:
nvgpu_kfree(g, f->host_engines);
f->host_engines = NULL;
return err;
}
void nvgpu_engine_cleanup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
f->num_engines = 0;
nvgpu_kfree(g, f->host_engines);
f->host_engines = NULL;
nvgpu_kfree(g, f->active_engines);
f->active_engines = NULL;
}
#ifdef CONFIG_NVGPU_ENGINE_RESET
static void nvgpu_engine_gr_reset(struct gk20a *g)
{
struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
int err = 0;
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE);
#ifdef CONFIG_NVGPU_POWER_PG
if (nvgpu_pg_elpg_disable(g) != 0 ) {
nvgpu_err(g, "failed to set disable elpg");
}
#endif
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE);
#ifdef CONFIG_NVGPU_FECS_TRACE
/*
* Resetting engine will alter read/write index. Need to flush
* circular buffer before re-enabling FECS.
*/
if (g->ops.gr.fecs_trace.reset != NULL) {
if (g->ops.gr.fecs_trace.reset(g) != 0) {
nvgpu_warn(g, "failed to reset fecs traces");
}
}
#endif
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET);
/*
* HALT_PIPELINE method and gr reset during recovery is supported
* starting nvgpu-next simulation.
*/
err = g->ops.gr.falcon.ctrl_ctxsw(g,
NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
if (err != 0) {
nvgpu_err(g, "failed to halt gr pipe");
}
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE);
/*
* resetting only engine is not
* enough, we do full init sequence
*/
nvgpu_log(g, gpu_dbg_rec, "resetting gr engine");
err = nvgpu_gr_reset(g);
if (err != 0) {
nvgpu_err(g, "failed to reset gr engine");
}
#ifdef CONFIG_NVGPU_POWER_PG
if (nvgpu_pg_elpg_enable(g) != 0) {
nvgpu_err(g, "failed to set enable elpg");
}
nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE);
#endif
}
void nvgpu_engine_reset(struct gk20a *g, u32 engine_id)
{
struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
const struct nvgpu_device *dev;
int err = 0;
u32 gr_instance_id;
nvgpu_log_fn(g, " ");
if (g == NULL) {
return;
}
nvgpu_swprofile_begin_sample(prof);
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
if (dev == NULL) {
nvgpu_err(g, "unsupported engine_id %d", engine_id);
return;
}
if (!nvgpu_device_is_ce(g, dev) &&
!nvgpu_device_is_graphics(g, dev)) {
nvgpu_warn(g, "Ignoring reset for non-host engine.");
return;
}
/*
* Simple case first: reset a copy engine.
*/
if (nvgpu_device_is_ce(g, dev)) {
err = nvgpu_mc_reset_dev(g, dev);
if (err != 0) {
nvgpu_log_info(g, "CE engine [id:%u] reset failed",
dev->engine_id);
}
return;
}
/*
* Now reset a GR engine.
*/
gr_instance_id =
nvgpu_grmgr_get_gr_instance_id_for_syspipe(
g, dev->inst_id);
nvgpu_gr_exec_for_instance(g,
gr_instance_id, nvgpu_engine_gr_reset(g));
}
#endif
u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g)
{
const struct nvgpu_device *dev;
u32 nr_lces;
u32 i;
/*
* Obtain a runlist ID for the fastest available CE. The priority order
* is:
*
* 1. Last available LCE
* 2. Last available COPY[0-2]
* 3. GRAPHICS runlist as a last resort.
*/
nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE);
if (nr_lces > 0U) {
dev = nvgpu_device_get(g,
NVGPU_DEVTYPE_LCE,
nr_lces - 1U);
nvgpu_assert(dev != NULL);
return dev->runlist_id;
}
/*
* Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx
* are all > 0.
*/
for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) {
dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
if (dev != NULL) {
return dev->runlist_id;
}
}
/*
* Fall back to GR.
*/
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
nvgpu_assert(dev != NULL);
return dev->runlist_id;
}
u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g)
{
const struct nvgpu_device *dev;
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
if (dev == NULL) {
nvgpu_warn(g, "No GR device on this GPU?!");
return NVGPU_INVALID_RUNLIST_ID;
}
return dev->runlist_id;
}
bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
{
u32 i;
struct nvgpu_fifo *f = &g->fifo;
for (i = 0U; i < f->num_engines; i++) {
const struct nvgpu_device *dev = f->active_engines[i];
if (dev->runlist_id == runlist_id) {
return true;
}
}
return false;
}
/*
* Link engine IDs to MMU IDs and vice versa.
*/
u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id)
{
const struct nvgpu_device *dev;
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
if (dev == NULL) {
nvgpu_err(g,
"engine_id: %u is not in active list",
engine_id);
return NVGPU_INVALID_ENG_ID;
}
return dev->fault_id;
}
u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id)
{
u32 i;
const struct nvgpu_device *dev;
struct nvgpu_fifo *f = &g->fifo;
for (i = 0U; i < f->num_engines; i++) {
dev = f->active_engines[i];
if (dev->fault_id == fault_id) {
return dev->engine_id;
}
}
return NVGPU_INVALID_ENG_ID;
}
u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg)
{
unsigned int i;
u32 engines = 0;
struct nvgpu_engine_status_info engine_status;
u32 ctx_id;
u32 type;
bool busy;
for (i = 0; i < g->fifo.num_engines; i++) {
const struct nvgpu_device *dev = g->fifo.active_engines[i];
g->ops.engine_status.read_engine_status_info(g,
dev->engine_id, &engine_status);
if (nvgpu_engine_status_is_ctxsw_load(
&engine_status)) {
nvgpu_engine_status_get_next_ctx_id_type(
&engine_status, &ctx_id, &type);
} else {
nvgpu_engine_status_get_ctx_id_type(
&engine_status, &ctx_id, &type);
}
busy = engine_status.is_busy;
if (!busy || !(ctx_id == id)) {
continue;
}
if ((is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) ||
(!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) {
engines |= BIT32(dev->engine_id);
}
}
return engines;
}
static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f,
const struct nvgpu_device *dev)
{
bool found;
struct nvgpu_device *dev_rw;
struct gk20a *g = f->g;
dev_rw = (struct nvgpu_device *)dev;
/*
* Populate the PBDMA info for this device; ideally it'd be done
* during device init, but the FIFO unit is not out of reset that
* early in the nvgpu_finalize_poweron() sequence.
*
* We only need to do this for native; vGPU already has pbdma_id
* populated during device initialization.
*/
if (g->ops.fifo.find_pbdma_for_runlist != NULL) {
found = g->ops.fifo.find_pbdma_for_runlist(g,
dev->runlist_id,
&dev_rw->pbdma_id);
if (!found) {
nvgpu_err(g, "busted pbdma map");
return -EINVAL;
}
}
#if defined(CONFIG_NVGPU_NEXT)
{
int err = nvgpu_next_engine_init_one_dev(g, dev);
if (err != 0) {
return err;
}
}
#endif
f->host_engines[dev->engine_id] = dev;
f->active_engines[f->num_engines] = dev;
++f->num_engines;
return 0;
}
int nvgpu_engine_init_info(struct nvgpu_fifo *f)
{
int err;
struct gk20a *g = f->g;
const struct nvgpu_device *dev;
f->num_engines = 0;
nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list");
nvgpu_log(g, gpu_dbg_device, " GFX devices: %u",
nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS));
nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) {
err = nvgpu_engine_init_one_dev(f, dev);
if (err != 0) {
return err;
}
}
return g->ops.engine.init_ce_info(f);
}
void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id,
u32 *id, u32 *type)
{
struct nvgpu_engine_status_info engine_status;
g->ops.engine_status.read_engine_status_info(g, engine_id,
&engine_status);
/* use next_id if context load is failing */
if (nvgpu_engine_status_is_ctxsw_load(
&engine_status)) {
nvgpu_engine_status_get_next_ctx_id_type(
&engine_status, id, type);
} else {
nvgpu_engine_status_get_ctx_id_type(
&engine_status, id, type);
}
}
u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
u32 *id_ptr, bool *is_tsg_ptr)
{
u32 i;
u32 id = U32_MAX;
bool is_tsg = false;
u32 mailbox2;
struct nvgpu_engine_status_info engine_status;
const struct nvgpu_device *dev = NULL;
for (i = 0U; i < g->fifo.num_engines; i++) {
dev = g->fifo.active_engines[i];
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
&engine_status);
/*
* we are interested in busy engines that
* are doing context switch
*/
if (!engine_status.is_busy ||
!nvgpu_engine_status_is_ctxsw(&engine_status)) {
continue;
}
if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
id = engine_status.ctx_next_id;
is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
&engine_status);
} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2);
if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) {
id = engine_status.ctx_next_id;
is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
&engine_status);
} else {
id = engine_status.ctx_id;
is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
&engine_status);
}
} else {
id = engine_status.ctx_id;
is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
&engine_status);
}
break;
}
*id_ptr = id;
*is_tsg_ptr = is_tsg;
return dev->engine_id;
}
u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
{
struct nvgpu_fifo *f = &g->fifo;
u32 i, eng_bitmask = 0U;
struct nvgpu_engine_status_info engine_status;
for (i = 0U; i < f->num_engines; i++) {
const struct nvgpu_device *dev = f->active_engines[i];
g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
&engine_status);
if (engine_status.is_busy && (dev->runlist_id == runlist_id)) {
eng_bitmask |= BIT32(dev->engine_id);
}
}
return eng_bitmask;
}
#ifdef CONFIG_NVGPU_DEBUGGER
bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
u32 engine_subid, bool fake_fault)
{
const struct nvgpu_device *dev;
dev = nvgpu_engine_get_active_eng_info(g, engine_id);
if (dev == NULL) {
return false;
}
/*
* channel recovery is only deferred if an sm debugger
* is attached and has MMU debug mode is enabled
*/
if (!g->ops.gr.sm_debugger_attached(g) ||
!g->ops.fb.is_debug_mode_enabled(g)) {
return false;
}
/* if this fault is fake (due to RC recovery), don't defer recovery */
if (fake_fault) {
return false;
}
if (dev->type != NVGPU_DEVTYPE_GRAPHICS) {
return false;
}
return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
}
#endif
u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id,
u32 gr_eng_fault_id)
{
struct nvgpu_fifo *f = &g->fifo;
u32 num_subctx;
u32 veid = INVAL_ID;
num_subctx = f->max_subctx_count;
if ((mmu_fault_id >= gr_eng_fault_id) &&
(mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id,
num_subctx))) {
veid = mmu_fault_id - gr_eng_fault_id;
}
return veid;
}
static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g,
u32 mmu_fault_id, u32 *veid)
{
u32 i;
u32 engine_id = INVAL_ID;
const struct nvgpu_device *dev;
struct nvgpu_fifo *f = &g->fifo;
for (i = 0U; i < f->num_engines; i++) {
dev = f->active_engines[i];
if (dev->type == NVGPU_DEVTYPE_GRAPHICS) {
*veid = nvgpu_engine_mmu_fault_id_to_veid(g,
mmu_fault_id, dev->fault_id);
if (*veid != INVAL_ID) {
engine_id = dev->engine_id;
break;
}
} else {
if (dev->fault_id == mmu_fault_id) {
engine_id = dev->engine_id;
*veid = INVAL_ID;
break;
}
}
}
return engine_id;
}
void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g,
u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id)
{
*engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g,
mmu_fault_id, veid);
if (*engine_id == INVAL_ID) {
*pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g,
mmu_fault_id);
} else {
*pbdma_id = INVAL_ID;
}
}

View File

@@ -0,0 +1,315 @@
/*
* FIFO
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/trace.h>
#include <nvgpu/dma.h>
#include <nvgpu/fifo.h>
#include <nvgpu/engines.h>
#include <nvgpu/runlist.h>
#include <nvgpu/preempt.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/pbdma.h>
#include <nvgpu/tsg.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/cic.h>
#include <nvgpu/mc.h>
#include <nvgpu/swprofile.h>
#include <nvgpu/fifo/swprofile.h>
static const char *nvgpu_fifo_kickoff_profile_events[] = {
NVGPU_FIFO_KICKOFF_PROFILE_EVENTS,
};
static const char *nvgpu_fifo_recovery_profile_events[] = {
NVGPU_FIFO_RECOVERY_PROFILE_EVENTS,
};
static const char *nvgpu_fifo_engine_reset_events[] = {
NVGPU_FIFO_ENGINE_RESET_EVENTS,
};
void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
nvgpu_log_fn(g, " ");
#ifdef CONFIG_NVGPU_USERD
g->ops.userd.cleanup_sw(g);
#endif
nvgpu_channel_cleanup_sw(g);
nvgpu_tsg_cleanup_sw(g);
nvgpu_runlist_cleanup_sw(g);
nvgpu_engine_cleanup_sw(g);
if (g->ops.pbdma.cleanup_sw != NULL) {
g->ops.pbdma.cleanup_sw(g);
}
#ifdef CONFIG_NVGPU_DEBUGGER
f->deferred_reset_pending = false;
nvgpu_mutex_destroy(&f->deferred_reset_mutex);
#endif
nvgpu_mutex_destroy(&f->engines_reset_mutex);
nvgpu_mutex_destroy(&f->intr.isr.mutex);
f->sw_ready = false;
}
void nvgpu_fifo_cleanup_sw(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
nvgpu_channel_worker_deinit(g);
#endif
nvgpu_fifo_cleanup_sw_common(g);
}
static void nvgpu_fifo_remove_support(struct nvgpu_fifo *f)
{
struct gk20a *g = f->g;
g->ops.fifo.cleanup_sw(g);
}
int nvgpu_fifo_setup_sw_common(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
int err = 0;
nvgpu_log_fn(g, " ");
f->g = g;
nvgpu_mutex_init(&f->intr.isr.mutex);
nvgpu_mutex_init(&f->engines_reset_mutex);
#ifdef CONFIG_NVGPU_DEBUGGER
nvgpu_mutex_init(&f->deferred_reset_mutex);
#endif
nvgpu_swprofile_initialize(g, &f->kickoff_profiler,
nvgpu_fifo_kickoff_profile_events);
nvgpu_swprofile_initialize(g, &f->recovery_profiler,
nvgpu_fifo_recovery_profile_events);
nvgpu_swprofile_initialize(g, &f->eng_reset_profiler,
nvgpu_fifo_engine_reset_events);
err = nvgpu_channel_setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init channel support");
goto clean_up;
}
err = nvgpu_tsg_setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init tsg support");
goto clean_up_channel;
}
if (g->ops.pbdma.setup_sw != NULL) {
err = g->ops.pbdma.setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init pbdma support");
goto clean_up_tsg;
}
}
err = nvgpu_engine_setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init engine support");
goto clean_up_pbdma;
}
err = nvgpu_runlist_setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init runlist support");
goto clean_up_engine;
}
#ifdef CONFIG_NVGPU_USERD
err = g->ops.userd.setup_sw(g);
if (err != 0) {
nvgpu_err(g, "failed to init userd support");
goto clean_up_runlist;
}
#endif
f->remove_support = nvgpu_fifo_remove_support;
nvgpu_log_fn(g, "done");
return 0;
#ifdef CONFIG_NVGPU_USERD
clean_up_runlist:
nvgpu_runlist_cleanup_sw(g);
#endif
clean_up_engine:
nvgpu_engine_cleanup_sw(g);
clean_up_pbdma:
if (g->ops.pbdma.cleanup_sw != NULL) {
g->ops.pbdma.cleanup_sw(g);
}
clean_up_tsg:
nvgpu_tsg_cleanup_sw(g);
clean_up_channel:
nvgpu_channel_cleanup_sw(g);
clean_up:
nvgpu_err(g, "init fifo support failed");
return err;
}
int nvgpu_fifo_setup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
int err = 0;
nvgpu_log_fn(g, " ");
if (f->sw_ready) {
nvgpu_log_fn(g, "skip init");
return 0;
}
err = nvgpu_fifo_setup_sw_common(g);
if (err != 0) {
nvgpu_err(g, "fifo common sw setup failed, err=%d", err);
return err;
}
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
err = nvgpu_channel_worker_init(g);
if (err != 0) {
nvgpu_err(g, "worker init fail, err=%d", err);
goto clean_up;
}
#endif
f->sw_ready = true;
nvgpu_log_fn(g, "done");
return 0;
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
clean_up:
nvgpu_fifo_cleanup_sw_common(g);
return err;
#endif
}
int nvgpu_fifo_init_support(struct gk20a *g)
{
int err;
err = g->ops.fifo.setup_sw(g);
if (err != 0) {
nvgpu_err(g, "fifo sw setup failed, err=%d", err);
return err;
}
if (g->ops.fifo.init_fifo_setup_hw != NULL) {
err = g->ops.fifo.init_fifo_setup_hw(g);
if (err != 0) {
nvgpu_err(g, "fifo hw setup failed, err=%d", err);
goto clean_up;
}
}
return 0;
clean_up:
nvgpu_fifo_cleanup_sw_common(g);
return err;
}
static const char * const pbdma_ch_eng_status_str[] = {
"invalid",
"valid",
"NA",
"NA",
"NA",
"load",
"save",
"switch",
};
static const char * const not_found_str[] = {
"NOT FOUND"
};
const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index)
{
if (index >= ARRAY_SIZE(pbdma_ch_eng_status_str)) {
return not_found_str[0];
} else {
return pbdma_ch_eng_status_str[index];
}
}
static void disable_fifo_interrupts(struct gk20a *g)
{
/** Disable fifo intr */
g->ops.fifo.intr_0_enable(g, false);
g->ops.fifo.intr_1_enable(g, false);
if (g->ops.fifo.intr_top_enable == NULL) {
nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
NVGPU_CIC_INTR_DISABLE);
nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
NVGPU_CIC_INTR_DISABLE);
} else {
g->ops.fifo.intr_top_enable(g, NVGPU_CIC_INTR_DISABLE);
}
}
int nvgpu_fifo_suspend(struct gk20a *g)
{
nvgpu_log_fn(g, " ");
if (g->ops.mm.is_bar1_supported(g)) {
g->ops.fifo.bar1_snooping_disable(g);
}
disable_fifo_interrupts(g);
nvgpu_log_fn(g, "done");
return 0;
}
void nvgpu_fifo_sw_quiesce(struct gk20a *g)
{
u32 runlist_mask = U32_MAX;
g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED);
/* Preempt all runlists */
nvgpu_fifo_preempt_runlists_for_rc(g, runlist_mask);
}

View File

@@ -0,0 +1,149 @@
/*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/barrier.h>
#include <nvgpu/circ_buf.h>
#include <nvgpu/channel.h>
#include <nvgpu/job.h>
#include <nvgpu/priv_cmdbuf.h>
#include <nvgpu/fence.h>
static inline struct nvgpu_channel_job *
channel_gk20a_job_from_list(struct nvgpu_list_node *node)
{
return (struct nvgpu_channel_job *)
((uintptr_t)node - offsetof(struct nvgpu_channel_job, list));
};
int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
struct nvgpu_channel_job **job_out)
{
unsigned int put = c->joblist.pre_alloc.put;
unsigned int get = c->joblist.pre_alloc.get;
unsigned int next = (put + 1) % c->joblist.pre_alloc.length;
bool full = next == get;
if (full) {
return -EAGAIN;
}
*job_out = &c->joblist.pre_alloc.jobs[put];
(void) memset(*job_out, 0, sizeof(**job_out));
return 0;
}
void nvgpu_channel_free_job(struct nvgpu_channel *c,
struct nvgpu_channel_job *job)
{
/*
* Nothing needed for now. The job contents are preallocated. The
* completion fence may briefly outlive the job, but the job memory is
* reclaimed only when a new submit comes in and the ringbuffer has ran
* out of space.
*/
}
void nvgpu_channel_joblist_lock(struct nvgpu_channel *c)
{
nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
}
void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c)
{
nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
}
struct nvgpu_channel_job *nvgpu_channel_joblist_peek(struct nvgpu_channel *c)
{
unsigned int get = c->joblist.pre_alloc.get;
unsigned int put = c->joblist.pre_alloc.put;
bool empty = get == put;
return empty ? NULL : &c->joblist.pre_alloc.jobs[get];
}
void nvgpu_channel_joblist_add(struct nvgpu_channel *c,
struct nvgpu_channel_job *job)
{
c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) %
(c->joblist.pre_alloc.length);
}
void nvgpu_channel_joblist_delete(struct nvgpu_channel *c,
struct nvgpu_channel_job *job)
{
c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) %
(c->joblist.pre_alloc.length);
}
int nvgpu_channel_joblist_init(struct nvgpu_channel *c, u32 num_jobs)
{
int err;
u32 size;
size = (u32)sizeof(struct nvgpu_channel_job);
if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) {
err = -ERANGE;
goto clean_up;
}
/*
* The max capacity of this ring buffer is the alloc size minus one (in
* units of item slot), so allocate a size of (num_jobs + 1) * size
* bytes.
*/
c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
nvgpu_safe_mult_u32(
nvgpu_safe_add_u32(num_jobs, 1U),
size));
if (c->joblist.pre_alloc.jobs == NULL) {
err = -ENOMEM;
goto clean_up;
}
/*
* length is the allocation size of the ringbuffer; the number of jobs
* that fit is one less.
*/
c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U);
c->joblist.pre_alloc.put = 0;
c->joblist.pre_alloc.get = 0;
return 0;
clean_up:
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
(void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
return err;
}
void nvgpu_channel_joblist_deinit(struct nvgpu_channel *c)
{
if (c->joblist.pre_alloc.jobs != NULL) {
nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
c->joblist.pre_alloc.jobs = NULL;
}
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/pbdma.h>
static void nvgpu_pbdma_init_intr_descs(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
if (g->ops.pbdma.device_fatal_0_intr_descs != NULL) {
f->intr.pbdma.device_fatal_0 =
g->ops.pbdma.device_fatal_0_intr_descs();
}
if (g->ops.pbdma.channel_fatal_0_intr_descs != NULL) {
f->intr.pbdma.channel_fatal_0 =
g->ops.pbdma.channel_fatal_0_intr_descs();
}
if (g->ops.pbdma.restartable_0_intr_descs != NULL) {
f->intr.pbdma.restartable_0 =
g->ops.pbdma.restartable_0_intr_descs();
}
}
int nvgpu_pbdma_setup_sw(struct gk20a *g)
{
nvgpu_pbdma_init_intr_descs(g);
return 0;
}
void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
{
return;
}

View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/pbdma_status.h>
bool nvgpu_pbdma_status_is_chsw_switch(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SWITCH;
}
bool nvgpu_pbdma_status_is_chsw_load(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_LOAD;
}
bool nvgpu_pbdma_status_is_chsw_save(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SAVE;
}
bool nvgpu_pbdma_status_is_chsw_valid(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_VALID;
}
bool nvgpu_pbdma_status_is_id_type_tsg(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->id_type == PBDMA_STATUS_ID_TYPE_TSGID;
}
bool nvgpu_pbdma_status_is_next_id_type_tsg(struct nvgpu_pbdma_status_info
*pbdma_status)
{
return pbdma_status->next_id_type == PBDMA_STATUS_NEXT_ID_TYPE_TSGID;
}

View File

@@ -0,0 +1,220 @@
/*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/soc.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/errata.h>
#include <nvgpu/runlist.h>
#include <nvgpu/types.h>
#include <nvgpu/channel.h>
#include <nvgpu/tsg.h>
#include <nvgpu/preempt.h>
#include <nvgpu/nvgpu_err.h>
#include <nvgpu/cic.h>
#include <nvgpu/rc.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/mutex.h>
#endif
u32 nvgpu_preempt_get_timeout(struct gk20a *g)
{
return g->ctxsw_timeout_period_ms;
}
int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
{
int ret = 0;
u32 preempt_retry_count = 10U;
u32 preempt_retry_timeout =
nvgpu_preempt_get_timeout(g) / preempt_retry_count;
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = 0;
#endif
nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
if (tsg->runlist == NULL) {
return 0;
}
do {
nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
RUNLIST_DISABLED);
}
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
#endif
g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
/*
* Poll for preempt done. if stalling interrupts are pending
* while preempt is in progress we poll for stalling interrupts
* to finish based on return value from this function and
* retry preempt again.
* If HW is hung, on the last retry instance we try to identify
* the engines hung and set the runlist reset_eng_bitmask
* and mark preemption completion.
*/
ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
ID_TYPE_TSG, preempt_retry_count > 1U);
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
int err = nvgpu_pmu_lock_release(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
if (err != 0) {
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
}
}
#endif
if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
RUNLIST_ENABLED);
}
nvgpu_mutex_release(&tsg->runlist->runlist_lock);
if (ret != -EAGAIN) {
break;
}
ret = nvgpu_cic_wait_for_stall_interrupts(g, preempt_retry_timeout);
if (ret != 0) {
nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
}
} while (--preempt_retry_count != 0U);
if (ret != 0) {
if (nvgpu_platform_is_silicon(g)) {
nvgpu_err(g, "preempt timed out for tsgid: %u, "
"ctxsw timeout will trigger recovery if needed",
tsg->tsgid);
} else {
nvgpu_rc_preempt_timeout(g, tsg);
}
}
return ret;
}
int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch)
{
int err;
struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
if (tsg != NULL) {
err = g->ops.fifo.preempt_tsg(ch->g, tsg);
} else {
err = g->ops.fifo.preempt_channel(ch->g, ch);
}
return err;
}
/* called from rc */
int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g,
struct nvgpu_tsg *tsg)
{
unsigned long runlist_served_pbdmas;
unsigned long pbdma_id_bit;
u32 tsgid, pbdma_id;
if (g->ops.fifo.preempt_poll_pbdma == NULL) {
return 0;
}
tsgid = tsg->tsgid;
runlist_served_pbdmas = tsg->runlist->pbdma_bitmask;
for_each_set_bit(pbdma_id_bit, &runlist_served_pbdmas,
nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
pbdma_id = U32(pbdma_id_bit);
/*
* If pbdma preempt fails the only option is to reset
* GPU. Any sort of hang indicates the entire GPUs
* memory system would be blocked.
*/
if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) {
nvgpu_err(g, "PBDMA preempt failed");
return -EBUSY;
}
}
return 0;
}
/*
* This should be called with runlist_lock held for all the
* runlists set in runlists_mask
*/
void nvgpu_fifo_preempt_runlists_for_rc(struct gk20a *g, u32 runlists_bitmask)
{
struct nvgpu_fifo *f = &g->fifo;
u32 i;
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = 0;
#endif
/* runlist_lock are locked by teardown and sched are disabled too */
nvgpu_log_fn(g, "preempt runlists_bitmask:0x%08x", runlists_bitmask);
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
#endif
for (i = 0U; i < f->num_runlists; i++) {
struct nvgpu_runlist *runlist;
runlist = &f->active_runlists[i];
if ((BIT32(runlist->id) & runlists_bitmask) == 0U) {
continue;
}
/* issue runlist preempt */
g->ops.fifo.preempt_trigger(g, runlist->id,
ID_TYPE_RUNLIST);
#ifdef CONFIG_NVGPU_RECOVERY
/*
* Preemption will never complete in RC due to some
* fatal condition. Do not poll for preemption to
* complete. Reset engines served by runlists.
*/
runlist->reset_eng_bitmask = runlist->eng_bitmask;
#endif
}
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
&token);
if (err != 0) {
nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
err);
}
}
#endif
}

View File

@@ -0,0 +1,333 @@
/*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/log.h>
#include <nvgpu/utils.h>
#include <nvgpu/log2.h>
#include <nvgpu/barrier.h>
#include <nvgpu/dma.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/kmem.h>
#include <nvgpu/vm.h>
#include <nvgpu/priv_cmdbuf.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/trace.h>
#include <nvgpu/circ_buf.h>
struct priv_cmd_entry {
struct nvgpu_mem *mem;
u32 off; /* offset in mem, in u32 entries */
u32 fill_off; /* write offset from off, in u32 entries */
u32 size; /* in words */
u32 alloc_size;
};
struct priv_cmd_queue {
struct vm_gk20a *vm;
struct nvgpu_mem mem; /* pushbuf */
u32 size; /* allocated length in words */
u32 put; /* next entry will begin here */
u32 get; /* next entry to free begins here */
/* an entry is a fragment of the pushbuf memory */
struct priv_cmd_entry *entries;
u32 entries_len; /* allocated length */
u32 entry_put;
u32 entry_get;
};
/* allocate private cmd buffer queue.
used for inserting commands before/after user submitted buffers. */
int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
u32 job_count, struct priv_cmd_queue **queue)
{
struct gk20a *g = vm->mm->g;
struct priv_cmd_queue *q;
u64 size, tmp_size;
int err = 0;
u32 wait_size, incr_size;
u32 mem_per_job;
/*
* sema size is at least as much as syncpt size, but semas may not be
* enabled in the build. If neither semas nor syncpts are enabled, priv
* cmdbufs and as such kernel mode submits with job tracking won't be
* supported.
*/
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
wait_size = g->ops.sync.sema.get_wait_cmd_size();
incr_size = g->ops.sync.sema.get_incr_cmd_size();
#else
wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
#endif
/*
* Compute the amount of priv_cmdbuf space we need. In general the
* worst case is the kernel inserts both a semaphore pre-fence and
* post-fence. Any sync-pt fences will take less memory so we can
* ignore them unless they're the only supported type. Jobs can also
* have more than one pre-fence but that's abnormal and we'll -EAGAIN
* if such jobs would fill the queue.
*
* A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
* 10 words: all the same as an ACQ plus a non-stalling intr which is
* another 2 words. In reality these numbers vary by chip but we'll use
* 8 and 10 as examples.
*
* Given the job count, cmdbuf space is allocated such that each job
* can get one wait command and one increment command:
*
* job_count * (8 + 10) * 4 bytes
*
* These cmdbufs are inserted as gpfifo entries right before and after
* the user submitted gpfifo entries per submit.
*
* One extra slot is added to the queue length so that the requested
* job count can actually be allocated. This ring buffer implementation
* is full when the number of consumed entries is one less than the
* allocation size:
*
* alloc bytes = job_count * (wait + incr + 1) * slot in bytes
*/
mem_per_job = nvgpu_safe_mult_u32(
nvgpu_safe_add_u32(
nvgpu_safe_add_u32(wait_size, incr_size),
1U),
(u32)sizeof(u32));
/* both 32 bit and mem_per_job is small */
size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job);
tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
if (tmp_size > U32_MAX) {
return -ERANGE;
}
size = (u32)tmp_size;
q = nvgpu_kzalloc(g, sizeof(*q));
if (q == NULL) {
return -ENOMEM;
}
q->vm = vm;
if (job_count > U32_MAX / 2U - 1U) {
err = -ERANGE;
goto err_free_queue;
}
/* One extra to account for the full condition: 2 * job_count + 1 */
q->entries_len = nvgpu_safe_mult_u32(2U,
nvgpu_safe_add_u32(job_count, 1U));
q->entries = nvgpu_vzalloc(g,
nvgpu_safe_mult_u64((u64)q->entries_len,
sizeof(*q->entries)));
if (q->entries == NULL) {
err = -ENOMEM;
goto err_free_queue;
}
err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem);
if (err != 0) {
nvgpu_err(g, "%s: memory allocation failed", __func__);
goto err_free_entries;
}
tmp_size = q->mem.size / sizeof(u32);
nvgpu_assert(tmp_size <= U32_MAX);
q->size = (u32)tmp_size;
*queue = q;
return 0;
err_free_entries:
nvgpu_vfree(g, q->entries);
err_free_queue:
nvgpu_kfree(g, q);
return err;
}
void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q)
{
struct vm_gk20a *vm = q->vm;
struct gk20a *g = vm->mm->g;
nvgpu_dma_unmap_free(vm, &q->mem);
nvgpu_vfree(g, q->entries);
nvgpu_kfree(g, q);
}
/* allocate a cmd buffer with given size. size is number of u32 entries */
static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size,
struct priv_cmd_entry *e)
{
struct gk20a *g = q->vm->mm->g;
u32 size = orig_size;
u32 free_count;
nvgpu_log_fn(g, "size %d", orig_size);
/*
* If free space in the end is less than requested, increase the size
* to make the real allocated space start from beginning. The hardware
* expects each cmdbuf to be contiguous in the dma space.
*
* This too small extra space in the end may happen because the
* requested wait and incr command buffers do not necessarily align
* with the whole buffer capacity. They don't always align because the
* buffer size is rounded to the next power of two and because not all
* jobs necessarily use exactly one wait command.
*/
if (nvgpu_safe_add_u32(q->put, size) > q->size) {
size = orig_size + (q->size - q->put);
}
nvgpu_log_info(g, "priv cmd queue get:put %d:%d",
q->get, q->put);
nvgpu_assert(q->put < q->size);
nvgpu_assert(q->get < q->size);
nvgpu_assert(q->size > 0U);
free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U);
if (size > free_count) {
return -EAGAIN;
}
e->fill_off = 0;
e->size = orig_size;
e->alloc_size = size;
e->mem = &q->mem;
/*
* if we have increased size to skip free space in the end, set put
* to beginning of cmd buffer + size, as if the prev put was at
* position 0.
*/
if (size != orig_size) {
e->off = 0;
q->put = orig_size;
} else {
e->off = q->put;
q->put = (q->put + orig_size) & (q->size - 1U);
}
/* we already handled q->put + size > q->size so BUG_ON this */
BUG_ON(q->put > q->size);
nvgpu_log_fn(g, "done");
return 0;
}
int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size,
struct priv_cmd_entry **e)
{
u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len;
struct priv_cmd_entry *entry;
int err;
if (next_put == q->entry_get) {
return -EAGAIN;
}
entry = &q->entries[q->entry_put];
err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry);
if (err != 0) {
return err;
}
q->entry_put = next_put;
*e = entry;
return 0;
}
void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q,
struct priv_cmd_entry *e)
{
nvgpu_assert(q->put < q->size);
nvgpu_assert(q->size > 0U);
nvgpu_assert(e->alloc_size <= q->size);
q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U);
(void)memset(e, 0, sizeof(*e));
nvgpu_assert(q->entry_put < q->entries_len);
nvgpu_assert(q->entries_len > 0U);
q->entry_put = (q->entry_put + q->entries_len - 1U)
% q->entries_len;
}
void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e)
{
struct gk20a *g = q->vm->mm->g;
if ((q->get != e->off) && e->off != 0U) {
nvgpu_err(g, "priv cmdbuf requests out-of-order");
}
nvgpu_assert(q->size > 0U);
q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U);
q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) % q->entries_len;
(void)memset(e, 0, sizeof(*e));
}
void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
u32 *data, u32 entries)
{
nvgpu_assert(e->fill_off + entries <= e->size);
nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
data, entries * sizeof(u32));
e->fill_off += entries;
}
void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
u32 entries)
{
nvgpu_assert(e->fill_off + entries <= e->size);
nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
0, entries * sizeof(u32));
e->fill_off += entries;
}
void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e,
u64 *gva, u32 *size)
{
/*
* The size is written to the pushbuf entry, so make sure this buffer
* is complete at this point. The responsibility of the channel sync is
* to be consistent in allocation and usage, and the matching size and
* add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there.
*/
nvgpu_assert(e->fill_off == e->size);
#ifdef CONFIG_NVGPU_TRACE
if (e->mem->aperture == APERTURE_SYSMEM) {
trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0,
(u32 *)e->mem->cpu_va + e->off);
}
#endif
*gva = nvgpu_safe_add_u64(e->mem->gpu_va,
nvgpu_safe_mult_u64((u64)e->off, sizeof(u32)));
*size = e->size;
}

View File

@@ -0,0 +1,914 @@
/*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/fifo.h>
#include <nvgpu/engines.h>
#include <nvgpu/device.h>
#include <nvgpu/runlist.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/bug.h>
#include <nvgpu/dma.h>
#include <nvgpu/rc.h>
#include <nvgpu/static_analysis.h>
#ifdef CONFIG_NVGPU_LS_PMU
#include <nvgpu/pmu/mutex.h>
#endif
void nvgpu_runlist_lock_active_runlists(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_runlist *runlist;
u32 i;
nvgpu_log_info(g, "acquire runlist_lock for active runlists");
for (i = 0; i < g->fifo.num_runlists; i++) {
runlist = &f->active_runlists[i];
nvgpu_mutex_acquire(&runlist->runlist_lock);
}
}
void nvgpu_runlist_unlock_active_runlists(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_runlist *runlist;
u32 i;
nvgpu_log_info(g, "release runlist_lock for active runlists");
for (i = 0; i < g->fifo.num_runlists; i++) {
runlist = &f->active_runlists[i];
nvgpu_mutex_release(&runlist->runlist_lock);
}
}
static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left,
struct nvgpu_tsg *tsg)
{
struct nvgpu_fifo *f = &g->fifo;
u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32);
struct nvgpu_channel *ch;
u32 count = 0;
u32 timeslice;
nvgpu_log_fn(f->g, " ");
if (*entries_left == 0U) {
return RUNLIST_APPEND_FAILURE;
}
/* add TSG entry */
nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
/*
* timeslice is measured with PTIMER.
* On some platforms, PTIMER is lower than 1GHz.
*/
timeslice = scale_ptimer(tsg->timeslice_us,
ptimer_scalingfactor10x(g->ptimer_src_freq));
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x",
*entries_left,
(*runlist_entry)[0], (*runlist_entry)[1]);
*runlist_entry += runlist_entry_words;
count++;
(*entries_left)--;
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
/* add runnable channels bound to this TSG */
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
nvgpu_channel, ch_entry) {
if (!nvgpu_test_bit(ch->chid,
runlist->active_channels)) {
continue;
}
if (*entries_left == 0U) {
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
return RUNLIST_APPEND_FAILURE;
}
nvgpu_log_info(g, "add channel %d to runlist",
ch->chid);
g->ops.runlist.get_ch_entry(ch, *runlist_entry);
nvgpu_log_info(g, "rl entries left %d runlist [0] %x [1] %x",
*entries_left,
(*runlist_entry)[0], (*runlist_entry)[1]);
count = nvgpu_safe_add_u32(count, 1U);
*runlist_entry += runlist_entry_words;
(*entries_left)--;
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
return count;
}
static u32 nvgpu_runlist_append_prio(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left,
u32 interleave_level)
{
u32 count = 0;
unsigned long tsgid;
nvgpu_log_fn(f->g, " ");
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
u32 entries;
if (tsg->interleave_level == interleave_level) {
entries = nvgpu_runlist_append_tsg(f->g, runlist,
runlist_entry, entries_left, tsg);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
}
}
return count;
}
static u32 nvgpu_runlist_append_hi(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left)
{
nvgpu_log_fn(f->g, " ");
/*
* No higher levels - this is where the "recursion" ends; just add all
* active TSGs at this level.
*/
return nvgpu_runlist_append_prio(f, runlist, runlist_entry,
entries_left,
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH);
}
static u32 nvgpu_runlist_append_med(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left)
{
u32 count = 0;
unsigned long tsgid;
nvgpu_log_fn(f->g, " ");
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
u32 entries;
if (tsg->interleave_level !=
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) {
continue;
}
/* LEVEL_MEDIUM list starts with a LEVEL_HIGH, if any */
entries = nvgpu_runlist_append_hi(f, runlist,
runlist_entry, entries_left);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
entries = nvgpu_runlist_append_tsg(f->g, runlist,
runlist_entry, entries_left, tsg);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
}
return count;
}
static u32 nvgpu_runlist_append_low(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left)
{
u32 count = 0;
unsigned long tsgid;
nvgpu_log_fn(f->g, " ");
for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
u32 entries;
if (tsg->interleave_level !=
NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) {
continue;
}
/* The medium level starts with the highs, if any. */
entries = nvgpu_runlist_append_med(f, runlist,
runlist_entry, entries_left);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
entries = nvgpu_runlist_append_hi(f, runlist,
runlist_entry, entries_left);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
entries = nvgpu_runlist_append_tsg(f->g, runlist,
runlist_entry, entries_left, tsg);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
}
if (count == 0U) {
/*
* No transitions to fill with higher levels, so add
* the next level once. If that's empty too, we have only
* LEVEL_HIGH jobs.
*/
count = nvgpu_runlist_append_med(f, runlist,
runlist_entry, entries_left);
if (count == 0U) {
count = nvgpu_runlist_append_hi(f, runlist,
runlist_entry, entries_left);
}
}
return count;
}
static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 **runlist_entry,
u32 *entries_left)
{
u32 count = 0, entries, i;
nvgpu_log_fn(f->g, " ");
/* Group by priority but don't interleave. High comes first. */
for (i = 0; i < NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS; i++) {
u32 level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH - i;
entries = nvgpu_runlist_append_prio(f, runlist, runlist_entry,
entries_left, level);
if (entries == RUNLIST_APPEND_FAILURE) {
return RUNLIST_APPEND_FAILURE;
}
count += entries;
}
return count;
}
u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
struct nvgpu_runlist *runlist,
u32 buf_id,
u32 max_entries)
{
u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va;
/*
* The entry pointer and capacity counter that live on the stack here
* keep track of the current position and the remaining space when tsg
* and channel entries are ultimately appended.
*/
if (f->g->runlist_interleave) {
return nvgpu_runlist_append_low(f, runlist,
&runlist_entry_base, &max_entries);
} else {
return nvgpu_runlist_append_flat(f, runlist,
&runlist_entry_base, &max_entries);
}
}
static bool nvgpu_runlist_modify_active_locked(struct gk20a *g,
struct nvgpu_runlist *runlist,
struct nvgpu_channel *ch, bool add)
{
struct nvgpu_tsg *tsg = NULL;
tsg = nvgpu_tsg_from_ch(ch);
if (tsg == NULL) {
/*
* Unsupported condition, but shouldn't break anything. Warn
* and tell the caller that nothing has changed.
*/
nvgpu_warn(g, "Bare channel in runlist update");
return false;
}
if (add) {
if (nvgpu_test_and_set_bit(ch->chid,
runlist->active_channels)) {
/* was already there */
return false;
} else {
/* new, and belongs to a tsg */
nvgpu_set_bit(tsg->tsgid, runlist->active_tsgs);
tsg->num_active_channels = nvgpu_safe_add_u32(
tsg->num_active_channels, 1U);
}
} else {
if (!nvgpu_test_and_clear_bit(ch->chid,
runlist->active_channels)) {
/* wasn't there */
return false;
} else {
tsg->num_active_channels = nvgpu_safe_sub_u32(
tsg->num_active_channels, 1U);
if (tsg->num_active_channels == 0U) {
/* was the only member of this tsg */
nvgpu_clear_bit(tsg->tsgid,
runlist->active_tsgs);
}
}
}
return true;
}
static int nvgpu_runlist_reconstruct_locked(struct gk20a *g,
struct nvgpu_runlist *runlist,
u32 buf_id, bool add_entries)
{
u32 num_entries;
struct nvgpu_fifo *f = &g->fifo;
rl_dbg(g, "[%u] switch to new buffer 0x%16llx",
runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id]));
if (!add_entries) {
runlist->count = 0;
return 0;
}
num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id,
f->num_runlist_entries);
if (num_entries == RUNLIST_APPEND_FAILURE) {
return -E2BIG;
}
runlist->count = num_entries;
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
WARN_ON(runlist->count > f->num_runlist_entries);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
return 0;
}
int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
struct nvgpu_channel *ch, bool add,
bool wait_for_finish)
{
int ret = 0;
u32 buf_id;
bool add_entries;
if (ch != NULL) {
bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add);
if (!update) {
/* no change in runlist contents */
return 0;
}
/* had a channel to update, so reconstruct */
add_entries = true;
} else {
/* no channel; add means update all, !add means clear all */
add_entries = add;
}
/* double buffering, swap to next */
buf_id = (rl->cur_buffer == 0U) ? 1U : 0U;
ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries);
if (ret != 0) {
return ret;
}
g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id);
if (wait_for_finish) {
ret = g->ops.runlist.wait_pending(g, rl->id);
if (ret == -ETIMEDOUT) {
nvgpu_err(g, "runlist %d update timeout", rl->id);
/* trigger runlist update timeout recovery */
return ret;
} else {
if (ret == -EINTR) {
nvgpu_err(g, "runlist update interrupted");
}
}
}
rl->cur_buffer = buf_id;
return ret;
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
/* trigger host to expire current timeslice and reschedule runlist from front */
int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
bool wait_preempt)
{
struct gk20a *g = ch->g;
struct nvgpu_runlist *runlist;
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = 0;
#endif
int ret = 0;
runlist = ch->runlist;
if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) {
return -EBUSY;
}
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(
g, g->pmu, PMU_MUTEX_ID_FIFO, &token);
#endif
g->ops.runlist.hw_submit(
g, runlist->id, runlist->count, runlist->cur_buffer);
if (preempt_next) {
if (g->ops.runlist.reschedule_preempt_next_locked(ch,
wait_preempt) != 0) {
nvgpu_err(g, "reschedule preempt next failed");
}
}
if (g->ops.runlist.wait_pending(g, runlist->id) != 0) {
nvgpu_err(g, "wait pending failed for runlist %u",
runlist->id);
}
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
if (nvgpu_pmu_lock_release(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token) != 0) {
nvgpu_err(g, "failed to release PMU lock");
}
}
#endif
nvgpu_mutex_release(&runlist->runlist_lock);
return ret;
}
#endif
/* add/remove a channel from runlist
special cases below: runlist->active_channels will NOT be changed.
(ch == NULL && !add) means remove all active channels from runlist.
(ch == NULL && add) means restore all active channels on runlist. */
static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
struct nvgpu_channel *ch,
bool add, bool wait_for_finish)
{
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = 0;
#endif
int ret = 0;
nvgpu_log_fn(g, " ");
nvgpu_mutex_acquire(&rl->runlist_lock);
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
#endif
ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish);
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
if (nvgpu_pmu_lock_release(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token) != 0) {
nvgpu_err(g, "failed to release PMU lock");
}
}
#endif
nvgpu_mutex_release(&rl->runlist_lock);
if (ret == -ETIMEDOUT) {
nvgpu_rc_runlist_update(g, rl->id);
}
return ret;
}
int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
struct nvgpu_channel *ch,
bool add, bool wait_for_finish)
{
nvgpu_assert(ch != NULL);
return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
}
int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
bool add, bool wait_for_finish)
{
return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
}
int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
{
struct nvgpu_fifo *f = &g->fifo;
int ret = -EINVAL;
unsigned long runlist_id = 0;
int errcode;
unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
if (g == NULL) {
goto end;
}
ret = 0;
for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) {
/* Capture the last failure error code */
errcode = g->ops.runlist.reload(g,
f->runlists[runlist_id], add, true);
if (errcode != 0) {
nvgpu_err(g,
"failed to update_runlist %lu %d",
runlist_id, errcode);
ret = errcode;
}
}
end:
return ret;
}
const char *nvgpu_runlist_interleave_level_name(u32 interleave_level)
{
const char *ret_string = NULL;
switch (interleave_level) {
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
ret_string = "LOW";
break;
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
ret_string = "MEDIUM";
break;
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
ret_string = "HIGH";
break;
default:
ret_string = "?";
break;
}
return ret_string;
}
void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask,
u32 runlist_state)
{
#ifdef CONFIG_NVGPU_LS_PMU
u32 token = PMU_INVALID_MUTEX_OWNER_ID;
int mutex_ret = 0;
#endif
nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
runlists_mask, runlist_state);
#ifdef CONFIG_NVGPU_LS_PMU
mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token);
#endif
g->ops.runlist.write_state(g, runlists_mask, runlist_state);
#ifdef CONFIG_NVGPU_LS_PMU
if (mutex_ret == 0) {
if (nvgpu_pmu_lock_release(g, g->pmu,
PMU_MUTEX_ID_FIFO, &token) != 0) {
nvgpu_err(g, "failed to release PMU lock");
}
}
#endif
}
void nvgpu_runlist_cleanup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
u32 i, j;
struct nvgpu_runlist *runlist;
if ((f->runlists == NULL) || (f->active_runlists == NULL)) {
return;
}
g = f->g;
for (i = 0; i < f->num_runlists; i++) {
runlist = &f->active_runlists[i];
for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
nvgpu_dma_free(g, &runlist->mem[j]);
}
nvgpu_kfree(g, runlist->active_channels);
runlist->active_channels = NULL;
nvgpu_kfree(g, runlist->active_tsgs);
runlist->active_tsgs = NULL;
nvgpu_mutex_destroy(&runlist->runlist_lock);
f->runlists[runlist->id] = NULL;
}
nvgpu_kfree(g, f->active_runlists);
f->active_runlists = NULL;
f->num_runlists = 0;
nvgpu_kfree(g, f->runlists);
f->runlists = NULL;
f->max_runlists = 0;
}
void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f)
{
struct nvgpu_runlist *runlist;
const struct nvgpu_device *dev;
u32 i, j;
nvgpu_log_fn(g, " ");
if (g->is_virtual) {
return;
}
for (i = 0; i < f->num_runlists; i++) {
runlist = &f->active_runlists[i];
(void) g->ops.fifo.find_pbdma_for_runlist(g,
runlist->id,
&runlist->pbdma_bitmask);
nvgpu_log(g, gpu_dbg_info, "runlist %d: pbdma bitmask 0x%x",
runlist->id, runlist->pbdma_bitmask);
for (j = 0; j < f->num_engines; j++) {
dev = f->active_engines[j];
if (dev->runlist_id == runlist->id) {
runlist->eng_bitmask |= BIT32(dev->engine_id);
}
}
nvgpu_log(g, gpu_dbg_info, "runlist %d: act eng bitmask 0x%x",
runlist->id, runlist->eng_bitmask);
}
nvgpu_log_fn(g, "done");
}
static int nvgpu_init_active_runlist_mapping(struct gk20a *g)
{
struct nvgpu_runlist *runlist;
struct nvgpu_fifo *f = &g->fifo;
unsigned int runlist_id;
size_t runlist_size;
u32 i, j;
int err = 0;
rl_dbg(g, "Building active runlist map.");
/*
* In most case we want to loop through active runlists only. Here
* we need to loop through all possible runlists, to build the mapping
* between runlists[runlist_id] and active_runlists[i].
*/
i = 0U;
for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
/* skip inactive runlist */
rl_dbg(g, " Skipping invalid runlist: %d", runlist_id);
continue;
}
rl_dbg(g, " Configuring HW runlist: %u", runlist_id);
rl_dbg(g, " SW runlist index to HW: %u -> %u", i, runlist_id);
runlist = &f->active_runlists[i];
runlist->id = runlist_id;
f->runlists[runlist_id] = runlist;
i = nvgpu_safe_add_u32(i, 1U);
runlist->active_channels =
nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
BITS_PER_BYTE));
if (runlist->active_channels == NULL) {
err = -ENOMEM;
goto clean_up_runlist;
}
runlist->active_tsgs =
nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
BITS_PER_BYTE));
if (runlist->active_tsgs == NULL) {
err = -ENOMEM;
goto clean_up_runlist;
}
runlist_size = (size_t)f->runlist_entry_size *
(size_t)f->num_runlist_entries;
rl_dbg(g, " RL entries: %d", f->num_runlist_entries);
rl_dbg(g, " RL size %zu", runlist_size);
for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
err = nvgpu_dma_alloc_flags_sys(g,
g->is_virtual ?
0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED,
runlist_size,
&runlist->mem[j]);
if (err != 0) {
nvgpu_err(g, "memory allocation failed");
err = -ENOMEM;
goto clean_up_runlist;
}
}
nvgpu_mutex_init(&runlist->runlist_lock);
/*
* None of buffers is pinned if this value doesn't change.
* Otherwise, one of them (cur_buffer) must have been pinned.
*/
runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
}
return 0;
clean_up_runlist:
return err;
}
int nvgpu_runlist_setup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
u32 num_runlists = 0U;
unsigned int runlist_id;
int err = 0;
rl_dbg(g, "Initializing Runlists");
nvgpu_spinlock_init(&f->runlist_submit_lock);
f->runlist_entry_size = g->ops.runlist.entry_size(g);
f->num_runlist_entries = g->ops.runlist.length_max(g);
f->max_runlists = g->ops.runlist.count_max(g);
f->runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
sizeof(*f->runlists), f->max_runlists));
if (f->runlists == NULL) {
err = -ENOMEM;
goto clean_up_runlist;
}
for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
if (nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
num_runlists = nvgpu_safe_add_u32(num_runlists, 1U);
}
}
f->num_runlists = num_runlists;
f->active_runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
sizeof(*f->active_runlists), num_runlists));
if (f->active_runlists == NULL) {
err = -ENOMEM;
goto clean_up_runlist;
}
rl_dbg(g, " Max runlists: %u", f->max_runlists);
rl_dbg(g, " Active runlists: %u", f->num_runlists);
rl_dbg(g, " RL entry size: %u bytes", f->runlist_entry_size);
rl_dbg(g, " Max RL entries: %u", f->num_runlist_entries);
err = nvgpu_init_active_runlist_mapping(g);
if (err != 0) {
goto clean_up_runlist;
}
g->ops.runlist.init_enginfo(g, f);
return 0;
clean_up_runlist:
nvgpu_runlist_cleanup_sw(g);
rl_dbg(g, "fail");
return err;
}
u32 nvgpu_runlist_get_runlists_mask(struct gk20a *g, u32 id,
unsigned int id_type, u32 act_eng_bitmask, u32 pbdma_bitmask)
{
u32 i, runlists_mask = 0;
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_runlist *runlist;
bool bitmask_disabled = ((act_eng_bitmask == 0U) &&
(pbdma_bitmask == 0U));
/* engine and/or pbdma ids are known */
if (!bitmask_disabled) {
for (i = 0U; i < f->num_runlists; i++) {
runlist = &f->active_runlists[i];
if ((runlist->eng_bitmask & act_eng_bitmask) != 0U) {
runlists_mask |= BIT32(runlist->id);
}
if ((runlist->pbdma_bitmask & pbdma_bitmask) != 0U) {
runlists_mask |= BIT32(runlist->id);
}
}
}
if (id_type != ID_TYPE_UNKNOWN) {
if (id_type == ID_TYPE_TSG) {
runlist = f->tsg[id].runlist;
} else {
runlist = f->channel[id].runlist;
}
if (runlist == NULL) {
/* Warning on Linux, real assert on QNX. */
nvgpu_assert(runlist != NULL);
} else {
runlists_mask |= BIT32(runlist->id);
}
} else {
if (bitmask_disabled) {
nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
"and pbdma ids are unknown");
for (i = 0U; i < f->num_runlists; i++) {
runlist = &f->active_runlists[i];
runlists_mask |= BIT32(runlist->id);
}
} else {
nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
"and/or pbdma ids are known");
}
}
nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask);
return runlists_mask;
}
void nvgpu_runlist_unlock_runlists(struct gk20a *g, u32 runlists_mask)
{
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_runlist *runlist;
u32 i;
nvgpu_log_info(g, "release runlist_lock for runlists set in "
"runlists_mask: 0x%08x", runlists_mask);
for (i = 0U; i < f->num_runlists; i++) {
runlist = &f->active_runlists[i];
if ((BIT32(i) & runlists_mask) != 0U) {
nvgpu_mutex_release(&runlist->runlist_lock);
}
}
}

View File

@@ -0,0 +1,837 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/channel.h>
#include <nvgpu/ltc.h>
#include <nvgpu/os_sched.h>
#include <nvgpu/utils.h>
#include <nvgpu/channel.h>
#include <nvgpu/channel_sync.h>
#include <nvgpu/channel_sync_syncpt.h>
#include <nvgpu/watchdog.h>
#include <nvgpu/job.h>
#include <nvgpu/priv_cmdbuf.h>
#include <nvgpu/bug.h>
#include <nvgpu/fence.h>
#include <nvgpu/swprofile.h>
#include <nvgpu/vpr.h>
#include <nvgpu/trace.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/user_fence.h>
#include <nvgpu/fifo/swprofile.h>
/*
* We might need two extra gpfifo entries per submit - one for pre fence and
* one for post fence.
*/
#define EXTRA_GPFIFO_ENTRIES 2U
static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
struct nvgpu_channel_fence *fence,
struct priv_cmd_entry **wait_cmd, bool flag_sync_fence)
{
/*
* A single input sync fd may contain multiple fences. The preallocated
* priv cmdbuf space allows exactly one per submit in the worst case.
* Require at most one wait for consistent deterministic submits; if
* there are more and no space, we'll -EAGAIN in nondeterministic mode.
*/
u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
1U : 0U;
int err;
if (flag_sync_fence) {
nvgpu_assert(fence->id <= (u32)INT_MAX);
err = nvgpu_channel_sync_wait_fence_fd(c->sync,
(int)fence->id, wait_cmd, max_wait_cmds);
} else {
struct nvgpu_channel_sync_syncpt *sync_syncpt;
sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync);
if (sync_syncpt != NULL) {
err = nvgpu_channel_sync_wait_syncpt(sync_syncpt,
fence->id, fence->value, wait_cmd);
} else {
err = -EINVAL;
}
}
return err;
}
static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
struct priv_cmd_entry **incr_cmd,
struct nvgpu_fence_type *post_fence, bool flag_fence_get,
bool need_wfi, bool need_sync_fence)
{
int err;
if (flag_fence_get) {
err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
post_fence, need_wfi, need_sync_fence);
} else {
err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
post_fence, need_sync_fence);
}
return err;
}
/*
* Handle the submit synchronization - pre-fences and post-fences.
*/
static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
struct nvgpu_channel_fence *fence,
struct nvgpu_channel_job *job,
u32 flags)
{
struct gk20a *g = c->g;
bool need_sync_fence;
bool new_sync_created = false;
int err = 0;
bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U;
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
if (g->aggressive_sync_destroy_thresh != 0U) {
nvgpu_mutex_acquire(&c->sync_lock);
if (c->sync == NULL) {
c->sync = nvgpu_channel_sync_create(c);
if (c->sync == NULL) {
err = -ENOMEM;
goto clean_up_unlock;
}
new_sync_created = true;
}
nvgpu_channel_sync_get_ref(c->sync);
}
if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
err = g->ops.channel.set_syncpt(c);
if (err != 0) {
goto clean_up_put_sync;
}
}
/*
* Optionally insert syncpt/semaphore wait in the beginning of gpfifo
* submission when user requested.
*/
if (flag_fence_wait) {
err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd,
flag_sync_fence);
if (err != 0) {
goto clean_up_put_sync;
}
}
need_sync_fence = flag_fence_get && flag_sync_fence;
/*
* Always generate an increment at the end of a GPFIFO submission. When
* we do job tracking, post fences are needed for various reasons even
* if not requested by user.
*/
err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
flag_fence_get, need_wfi, need_sync_fence);
if (err != 0) {
goto clean_up_wait_cmd;
}
if (g->aggressive_sync_destroy_thresh != 0U) {
nvgpu_mutex_release(&c->sync_lock);
}
return 0;
clean_up_wait_cmd:
if (job->wait_cmd != NULL) {
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
}
job->wait_cmd = NULL;
clean_up_put_sync:
if (g->aggressive_sync_destroy_thresh != 0U) {
if (nvgpu_channel_sync_put_ref_and_check(c->sync)
&& g->aggressive_sync_destroy) {
nvgpu_channel_sync_destroy(c->sync);
}
}
clean_up_unlock:
if (g->aggressive_sync_destroy_thresh != 0U) {
nvgpu_mutex_release(&c->sync_lock);
}
return err;
}
static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c,
struct priv_cmd_entry *cmd)
{
struct gk20a *g = c->g;
struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
struct nvgpu_gpfifo_entry gpfifo_entry;
u64 gva;
u32 size;
nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size);
g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size);
nvgpu_mem_wr_n(g, gpfifo_mem,
c->gpfifo.put * (u32)sizeof(gpfifo_entry),
&gpfifo_entry, (u32)sizeof(gpfifo_entry));
c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U);
}
static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries)
{
struct gk20a *g = c->g;
struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
u32 gpfifo_size = c->gpfifo.entry_num;
u32 len = num_entries;
u32 start = c->gpfifo.put;
u32 end = start + len; /* exclusive */
int err;
nvgpu_speculation_barrier();
if (end > gpfifo_size) {
/* wrap-around */
u32 length0 = gpfifo_size - start;
u32 length1 = len - length0;
err = g->os_channel.copy_user_gpfifo(
&gpfifo_cpu[start], userdata,
0, length0);
if (err != 0) {
return err;
}
err = g->os_channel.copy_user_gpfifo(
gpfifo_cpu, userdata,
length0, length1);
if (err != 0) {
return err;
}
} else {
err = g->os_channel.copy_user_gpfifo(
&gpfifo_cpu[start], userdata,
0, len);
if (err != 0) {
return err;
}
}
return 0;
}
static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *src, u32 num_entries)
{
struct gk20a *g = c->g;
struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
/* in bytes */
u32 gpfifo_size =
c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry);
u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry);
u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry);
u32 end = start + len; /* exclusive */
if (end > gpfifo_size) {
/* wrap-around */
u32 length0 = gpfifo_size - start;
u32 length1 = len - length0;
struct nvgpu_gpfifo_entry *src2 = &src[length0];
nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
} else {
nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
}
}
/*
* Copy source gpfifo entries into the gpfifo ring buffer, potentially
* splitting into two memcpys to handle wrap-around.
*/
static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *kern_gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries)
{
int err;
if ((kern_gpfifo == NULL)
#ifdef CONFIG_NVGPU_DGPU
&& (c->gpfifo.pipe == NULL)
#endif
) {
/*
* This path (from userspace to sysmem) is special in order to
* avoid two copies unnecessarily (from user to pipe, then from
* pipe to gpu sysmem buffer).
*/
err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
num_entries);
if (err != 0) {
return err;
}
}
#ifdef CONFIG_NVGPU_DGPU
else if (kern_gpfifo == NULL) {
/* from userspace to vidmem, use the common path */
err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe,
userdata, 0, num_entries);
if (err != 0) {
return err;
}
nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
num_entries);
}
#endif
else {
/* from kernel to either sysmem or vidmem, don't need
* copy_user_gpfifo so use the common path */
nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
}
trace_write_pushbuffers(c, num_entries);
c->gpfifo.put = (c->gpfifo.put + num_entries) &
(c->gpfifo.entry_num - 1U);
return 0;
}
static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler,
bool need_deferred_cleanup)
{
bool skip_buffer_refcounting = (flags &
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
struct nvgpu_channel_job *job = NULL;
int err;
nvgpu_channel_joblist_lock(c);
err = nvgpu_channel_alloc_job(c, &job);
nvgpu_channel_joblist_unlock(c);
if (err != 0) {
return err;
}
err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
if (err != 0) {
goto clean_up_job;
}
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
/*
* wait_cmd can be unset even if flag_fence_wait exists; the
* android sync framework for example can provide entirely
* empty fences that act like trivially expired waits.
*/
if (job->wait_cmd != NULL) {
nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd);
}
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries);
if (err != 0) {
goto clean_up_gpfifo_wait;
}
nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd);
err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
if (err != 0) {
goto clean_up_gpfifo_incr;
}
nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
if (fence_out != NULL) {
/* This fence ref is going somewhere else but it's owned by the
* job; the caller is expected to release it promptly, so that
* a subsequent job cannot reclaim its memory.
*/
*fence_out = nvgpu_fence_get(&job->post_fence);
}
return 0;
clean_up_gpfifo_incr:
/*
* undo the incr priv cmdbuf and the user entries:
* new gp.put =
* (gp.put - (1 + num_entries)) & (gp.entry_num - 1) =
* (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1)
* the + entry_num does not affect the result but avoids wrapping below
* zero for MISRA, although it would be well defined.
*/
c->gpfifo.put =
(nvgpu_safe_add_u32(c->gpfifo.put,
nvgpu_safe_sub_u32(c->gpfifo.entry_num,
nvgpu_safe_add_u32(1U, num_entries)))) &
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
clean_up_gpfifo_wait:
if (job->wait_cmd != NULL) {
/*
* undo the wait priv cmdbuf entry:
* gp.put =
* (gp.put - 1) & (gp.entry_num - 1) =
* (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1)
* same as above with the gp.entry_num on the left side.
*/
c->gpfifo.put =
nvgpu_safe_add_u32(c->gpfifo.put,
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) &
nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
}
nvgpu_fence_put(&job->post_fence);
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
if (job->wait_cmd != NULL) {
nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
}
clean_up_job:
nvgpu_channel_free_job(c, job);
return err;
}
static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler)
{
int err;
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
num_entries);
if (err != 0) {
return err;
}
if (fence_out != NULL) {
*fence_out = NULL;
}
return 0;
}
static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required)
{
/*
* Make sure we have enough space for gpfifo entries. Check cached
* values first and then read from HW. If no space, return -EAGAIN
* and let userpace decide to re-try request or not.
*/
if (nvgpu_channel_get_gpfifo_free_count(c) < required) {
if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) <
required) {
return -EAGAIN;
}
}
return 0;
}
static int nvgpu_do_submit(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler,
bool need_job_tracking,
bool need_deferred_cleanup)
{
struct gk20a *g = c->g;
int err;
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_channel_submit_gpfifo(g->name,
c->chid,
num_entries,
flags,
fence ? fence->id : 0,
fence ? fence->value : 0);
#endif
nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES);
if (err != 0) {
return err;
}
if (need_job_tracking) {
err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
userdata, num_entries, flags, fence,
fence_out, profiler, need_deferred_cleanup);
} else {
err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
userdata, num_entries, fence_out, profiler);
}
if (err != 0) {
return err;
}
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);
g->ops.userd.gp_put(g, c);
return 0;
}
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler)
{
bool skip_buffer_refcounting = (flags &
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
struct gk20a *g = c->g;
bool need_job_tracking;
int err = 0;
nvgpu_assert(nvgpu_channel_is_deterministic(c));
/* sync framework on post fences would not be deterministic */
if (flag_fence_get && flag_sync_fence) {
return -EINVAL;
}
/* this would be O(n) */
if (!skip_buffer_refcounting) {
return -EINVAL;
}
/* the watchdog needs periodic job cleanup */
if (nvgpu_channel_wdt_enabled(c->wdt)) {
return -EINVAL;
}
/*
* Job tracking is necessary on deterministic channels if and only if
* pre- or post-fence functionality is needed. If not, a fast submit
* can be done (ie. only need to write out userspace GPFIFO entries and
* update GP_PUT).
*/
need_job_tracking = flag_fence_wait || flag_fence_get;
if (need_job_tracking) {
/* nvgpu_semaphore is dynamically allocated, not pooled */
if (!nvgpu_has_syncpoints(g)) {
return -EINVAL;
}
/* dynamic sync allocation wouldn't be deterministic */
if (g->aggressive_sync_destroy_thresh != 0U) {
return -EINVAL;
}
/*
* (Try to) clean up a single job, if available. Each job
* requires the same amount of metadata, so this is enough for
* the job list, fence pool, and private command buffers that
* this submit will need.
*
* This submit might still need more gpfifo space than what the
* previous has used. The job metadata doesn't look at it
* though - the hw GP_GET pointer can be much further away than
* our metadata pointers; gpfifo space is "freed" by the HW.
*/
nvgpu_channel_clean_up_deterministic_job(c);
}
/* Grab access to HW to deal with do_idle */
nvgpu_rwsem_down_read(&g->deterministic_busy);
if (c->deterministic_railgate_allowed) {
/*
* Nope - this channel has dropped its own power ref. As
* deterministic submits don't hold power on per each submitted
* job like normal ones do, the GPU might railgate any time now
* and thus submit is disallowed.
*/
err = -EINVAL;
goto clean_up;
}
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
fence_out, profiler, need_job_tracking, false);
if (err != 0) {
goto clean_up;
}
/* No hw access beyond this point */
nvgpu_rwsem_up_read(&g->deterministic_busy);
return 0;
clean_up:
nvgpu_log_fn(g, "fail %d", err);
nvgpu_rwsem_up_read(&g->deterministic_busy);
return err;
}
#endif
static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler)
{
bool skip_buffer_refcounting = (flags &
NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
struct gk20a *g = c->g;
bool need_job_tracking;
int err = 0;
nvgpu_assert(!nvgpu_channel_is_deterministic(c));
/*
* Job tracking is necessary for any of the following conditions on
* non-deterministic channels:
* - pre- or post-fence functionality
* - GPU rail-gating
* - VPR resize enabled
* - buffer refcounting
* - channel watchdog
*
* If none of the conditions are met, then job tracking is not
* required and a fast submit can be done (ie. only need to write
* out userspace GPFIFO entries and update GP_PUT).
*/
need_job_tracking = flag_fence_wait ||
flag_fence_get ||
nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
nvgpu_is_vpr_resize_enabled() ||
!skip_buffer_refcounting ||
nvgpu_channel_wdt_enabled(c->wdt);
if (need_job_tracking) {
/*
* Get a power ref because this isn't a deterministic
* channel that holds them during the channel lifetime.
* This one is released by nvgpu_channel_clean_up_jobs,
* via syncpt or sema interrupt, whichever is used.
*/
err = gk20a_busy(g);
if (err != 0) {
nvgpu_err(g,
"failed to host gk20a to submit gpfifo");
nvgpu_print_current(g, NULL, NVGPU_ERROR);
return err;
}
}
err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
fence_out, profiler, need_job_tracking, true);
if (err != 0) {
goto clean_up;
}
return 0;
clean_up:
nvgpu_log_fn(g, "fail %d", err);
gk20a_idle(g);
return err;
}
static int check_submit_allowed(struct nvgpu_channel *c)
{
struct gk20a *g = c->g;
if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
return -ENODEV;
}
if (nvgpu_channel_check_unserviceable(c)) {
return -ETIMEDOUT;
}
if (c->usermode_submit_enabled) {
return -EINVAL;
}
if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) {
return -ENOMEM;
}
/* an address space needs to have been bound at this point. */
if (!nvgpu_channel_as_bound(c)) {
nvgpu_err(g,
"not bound to an address space at time of gpfifo"
" submission.");
return -EINVAL;
}
return 0;
}
static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out,
struct nvgpu_swprofiler *profiler)
{
struct gk20a *g = c->g;
int err;
err = check_submit_allowed(c);
if (err != 0) {
return err;
}
/*
* Fifo not large enough for request. Return error immediately.
* Kernel can insert gpfifo entries before and after user gpfifos.
* So, add extra entries in user request. Also, HW with fifo size N
* can accept only N-1 entries.
*/
if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) {
nvgpu_err(g, "not enough gpfifo space allocated");
return -ENOMEM;
}
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);
/* update debug settings */
nvgpu_ltc_sync_enabled(g);
nvgpu_log_info(g, "channel %d", c->chid);
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
if (c->deterministic) {
err = nvgpu_submit_deterministic(c, gpfifo, userdata,
num_entries, flags, fence, fence_out, profiler);
} else
#endif
{
err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
num_entries, flags, fence, fence_out, profiler);
}
if (err != 0) {
return err;
}
#ifdef CONFIG_NVGPU_TRACE
if (fence_out != NULL && *fence_out != NULL) {
/*
* This is not a good example on how to use the fence type.
* Don't touch the priv data. The debug trace is special.
*/
#ifdef CONFIG_TEGRA_GK20A_NVHOST
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid, num_entries, flags,
(*fence_out)->priv.syncpt_id,
(*fence_out)->priv.syncpt_value);
#else
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid, num_entries, flags,
0, 0);
#endif
} else {
trace_gk20a_channel_submitted_gpfifo(g->name,
c->chid, num_entries, flags,
0, 0);
}
#endif
nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);
nvgpu_log_fn(g, "done");
return err;
}
int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
struct nvgpu_gpfifo_userdata userdata,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_user_fence *fence_out,
struct nvgpu_swprofiler *profiler)
{
struct nvgpu_fence_type *fence_internal = NULL;
int err;
err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
flags, fence, &fence_internal, profiler);
if (err == 0 && fence_internal != NULL) {
*fence_out = nvgpu_fence_extract_user(fence_internal);
nvgpu_fence_put(fence_internal);
}
return err;
}
int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *gpfifo,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct nvgpu_fence_type **fence_out)
{
struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
flags, fence, fence_out, NULL);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,157 @@
/*
* USERD
*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/trace.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/fifo.h>
#include <nvgpu/fifo/userd.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/dma.h>
int nvgpu_userd_init_slabs(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
int err;
nvgpu_mutex_init(&f->userd_mutex);
f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE / g->ops.userd.entry_size(g);
f->num_userd_slabs =
DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab);
f->userd_slabs = nvgpu_big_zalloc(g, f->num_userd_slabs *
sizeof(struct nvgpu_mem));
if (f->userd_slabs == NULL) {
nvgpu_err(g, "could not allocate userd slabs");
err = -ENOMEM;
goto clean_up;
}
return 0;
clean_up:
nvgpu_mutex_destroy(&f->userd_mutex);
return err;
}
void nvgpu_userd_free_slabs(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
u32 slab;
for (slab = 0; slab < f->num_userd_slabs; slab++) {
nvgpu_dma_free(g, &f->userd_slabs[slab]);
}
nvgpu_big_free(g, f->userd_slabs);
f->userd_slabs = NULL;
nvgpu_mutex_destroy(&f->userd_mutex);
}
int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
{
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_mem *mem;
u32 slab = c->chid / f->num_channels_per_slab;
int err = 0;
if (slab > f->num_userd_slabs) {
nvgpu_err(g, "chid %u, slab %u out of range (max=%u)",
c->chid, slab, f->num_userd_slabs);
return -EINVAL;
}
mem = &g->fifo.userd_slabs[slab];
nvgpu_mutex_acquire(&f->userd_mutex);
if (!nvgpu_mem_is_valid(mem)) {
err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
if (err != 0) {
nvgpu_err(g, "userd allocation failed, err=%d", err);
goto done;
}
if (g->ops.mm.is_bar1_supported(g)) {
mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem,
slab * NVGPU_CPU_PAGE_SIZE);
}
}
c->userd_mem = mem;
c->userd_offset = (c->chid % f->num_channels_per_slab) *
g->ops.userd.entry_size(g);
c->userd_iova = nvgpu_channel_userd_addr(c);
nvgpu_log(g, gpu_dbg_info,
"chid=%u slab=%u mem=%p offset=%u addr=%llx gpu_va=%llx",
c->chid, slab, mem, c->userd_offset,
nvgpu_channel_userd_addr(c),
nvgpu_channel_userd_gpu_va(c));
done:
nvgpu_mutex_release(&f->userd_mutex);
return err;
}
int nvgpu_userd_setup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
int err;
u32 size, num_pages;
err = nvgpu_userd_init_slabs(g);
if (err != 0) {
nvgpu_err(g, "failed to init userd support");
return err;
}
size = f->num_channels * g->ops.userd.entry_size(g);
num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE);
err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0);
if (err != 0) {
nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
goto clean_up;
}
return 0;
clean_up:
nvgpu_userd_free_slabs(g);
return err;
}
void nvgpu_userd_cleanup_sw(struct gk20a *g)
{
struct nvgpu_fifo *f = &g->fifo;
if (f->userd_gpu_va != 0ULL) {
(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
f->userd_gpu_va = 0ULL;
}
nvgpu_userd_free_slabs(g);
}

View File

@@ -0,0 +1,278 @@
/*
* Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/watchdog.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/watchdog.h>
#include <nvgpu/string.h>
struct nvgpu_channel_wdt {
struct gk20a *g;
/* lock protects the running timer state */
struct nvgpu_spinlock lock;
struct nvgpu_timeout timer;
bool running;
struct nvgpu_channel_wdt_state ch_state;
/* lock not needed */
u32 limit_ms;
bool enabled;
};
struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g)
{
struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt));
if (wdt == NULL) {
return NULL;
}
wdt->g = g;
nvgpu_spinlock_init(&wdt->lock);
wdt->enabled = true;
wdt->limit_ms = g->ch_wdt_init_limit_ms;
return wdt;
}
void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt)
{
nvgpu_kfree(wdt->g, wdt);
}
void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt)
{
wdt->enabled = true;
}
void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt)
{
wdt->enabled = false;
}
bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt)
{
return wdt->enabled;
}
void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms)
{
wdt->limit_ms = limit_ms;
}
u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
{
return wdt->limit_ms;
}
static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state)
{
struct gk20a *g = wdt->g;
int ret;
ret = nvgpu_timeout_init(g, &wdt->timer,
wdt->limit_ms,
NVGPU_TIMER_CPU_TIMER);
if (ret != 0) {
nvgpu_err(g, "timeout_init failed: %d", ret);
return;
}
wdt->ch_state = *state;
wdt->running = true;
}
/**
* Start a timeout counter (watchdog) on this channel.
*
* Trigger a watchdog to recover the channel after the per-platform timeout
* duration (but strictly no earlier) if the channel hasn't advanced within
* that time.
*
* If the timeout is already running, do nothing. This should be called when
* new jobs are submitted. The timeout will stop when the last tracked job
* finishes, making the channel idle.
*/
void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state)
{
if (!nvgpu_is_timeouts_enabled(wdt->g)) {
return;
}
if (!wdt->enabled) {
return;
}
nvgpu_spinlock_acquire(&wdt->lock);
if (wdt->running) {
nvgpu_spinlock_release(&wdt->lock);
return;
}
nvgpu_channel_wdt_init(wdt, state);
nvgpu_spinlock_release(&wdt->lock);
}
/**
* Stop a running timeout counter (watchdog) on this channel.
*
* Make the watchdog consider the channel not running, so that it won't get
* recovered even if no progress is detected. Progress is not tracked if the
* watchdog is turned off.
*
* No guarantees are made about concurrent execution of the timeout handler.
* (This should be called from an update handler running in the same thread
* with the watchdog.)
*/
bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
{
bool was_running;
nvgpu_spinlock_acquire(&wdt->lock);
was_running = wdt->running;
wdt->running = false;
nvgpu_spinlock_release(&wdt->lock);
return was_running;
}
/**
* Continue a previously stopped timeout
*
* Enable the timeout again but don't reinitialize its timer.
*
* No guarantees are made about concurrent execution of the timeout handler.
* (This should be called from an update handler running in the same thread
* with the watchdog.)
*/
void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt)
{
nvgpu_spinlock_acquire(&wdt->lock);
wdt->running = true;
nvgpu_spinlock_release(&wdt->lock);
}
/**
* Reset the counter of a timeout that is in effect.
*
* If this channel has an active timeout, act as if something happened on the
* channel right now.
*
* Rewinding a stopped counter is irrelevant; this is a no-op for non-running
* timeouts. Stopped timeouts can only be started (which is technically a
* rewind too) or continued (where the stop is actually pause).
*/
void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state)
{
nvgpu_spinlock_acquire(&wdt->lock);
if (wdt->running) {
nvgpu_channel_wdt_init(wdt, state);
}
nvgpu_spinlock_release(&wdt->lock);
}
/**
* Check if the watchdog is running.
*
* A running watchdog means one that is requested to run and expire in the
* future. The state of a running watchdog has to be checked periodically to
* see if it's expired.
*/
bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt)
{
bool running;
nvgpu_spinlock_acquire(&wdt->lock);
running = wdt->running;
nvgpu_spinlock_release(&wdt->lock);
return running;
}
/**
* Check if a channel has been stuck for the watchdog limit.
*
* Test if this channel has really got stuck at this point by checking if its
* {gp,pb}_get have advanced or not. If progress was detected, start the timer
* from zero again. If no {gp,pb}_get action happened in the watchdog time
* limit, return true. Else return false.
*/
static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state)
{
struct gk20a *g = wdt->g;
struct nvgpu_channel_wdt_state previous_state;
nvgpu_log_fn(g, " ");
/* Get status but keep timer running */
nvgpu_spinlock_acquire(&wdt->lock);
previous_state = wdt->ch_state;
nvgpu_spinlock_release(&wdt->lock);
if (nvgpu_memcmp((const u8 *)state,
(const u8 *)&previous_state,
sizeof(*state)) != 0) {
/* Channel has advanced, timer keeps going but resets */
nvgpu_channel_wdt_rewind(wdt, state);
return false;
}
if (!nvgpu_timeout_peek_expired(&wdt->timer)) {
/* Seems stuck but waiting to time out */
return false;
}
return true;
}
/**
* Test if the per-channel watchdog is on; check the timeout in that case.
*
* Each channel has an expiration time based watchdog. The timer is
* (re)initialized in two situations: when a new job is submitted on an idle
* channel and when the timeout is checked but progress is detected. The
* watchdog timeout limit is a coarse sliding window.
*
* The timeout is stopped (disabled) after the last job in a row finishes
* and marks the channel idle.
*/
bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
struct nvgpu_channel_wdt_state *state)
{
bool running;
nvgpu_spinlock_acquire(&wdt->lock);
running = wdt->running;
nvgpu_spinlock_release(&wdt->lock);
if (running) {
return nvgpu_channel_wdt_handler(wdt, state);
} else {
return false;
}
}