linux-nvgpu/drivers/gpu/nvgpu/common/fifo/channel.c

/*
 * GK20A Graphics channel
 *
 * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#include <nvgpu/trace.h>
#include <nvgpu/mm.h>
#include <nvgpu/semaphore.h>
#include <nvgpu/timers.h>
#include <nvgpu/kmem.h>
#include <nvgpu/dma.h>
#include <nvgpu/log.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/list.h>
#include <nvgpu/circ_buf.h>
#include <nvgpu/cond.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/debugger.h>
#include <nvgpu/ltc.h>
#include <nvgpu/barrier.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/os_sched.h>
#include <nvgpu/log2.h>
#include <nvgpu/ptimer.h>
#include <nvgpu/worker.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/mc.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/engines.h>
#include <nvgpu/channel.h>
#include <nvgpu/channel_sync.h>
#include <nvgpu/channel_sync_syncpt.h>
#include <nvgpu/channel_sync_semaphore.h>
#include <nvgpu/channel_user_syncpt.h>
#include <nvgpu/runlist.h>
#include <nvgpu/watchdog.h>
#include <nvgpu/fifo/userd.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/fence.h>
#include <nvgpu/preempt.h>
#include <nvgpu/static_analysis.h>
#ifdef CONFIG_NVGPU_DEBUGGER
#include <nvgpu/gr/gr.h>
#endif
#include <nvgpu/job.h>
#include <nvgpu/priv_cmdbuf.h>

static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch);
static void channel_dump_ref_actions(struct nvgpu_channel *ch);

#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
static const struct nvgpu_worker_ops channel_worker_ops;
#endif

static int channel_setup_ramfc(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args,
		u64 gpfifo_gpu_va, u32 gpfifo_size);

/* allocate GPU channel */
static struct nvgpu_channel *allocate_channel(struct nvgpu_fifo *f)
{
	struct nvgpu_channel *ch = NULL;
	struct gk20a *g = f->g;

	nvgpu_mutex_acquire(&f->free_chs_mutex);
	if (!nvgpu_list_empty(&f->free_chs)) {
		ch = nvgpu_list_first_entry(&f->free_chs, nvgpu_channel,
							  free_chs);
		nvgpu_list_del(&ch->free_chs);
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
		WARN_ON(nvgpu_atomic_read(&ch->ref_count) != 0);
		WARN_ON(ch->referenceable);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
		f->used_channels = nvgpu_safe_add_u32(f->used_channels, 1U);
	}
	nvgpu_mutex_release(&f->free_chs_mutex);

	if ((g->aggressive_sync_destroy_thresh != 0U) &&
			(f->used_channels >
			 g->aggressive_sync_destroy_thresh)) {
		g->aggressive_sync_destroy = true;
	}

	return ch;
}

static void free_channel(struct nvgpu_fifo *f,
		struct nvgpu_channel *ch)
{
	struct gk20a *g = f->g;

#ifdef CONFIG_NVGPU_TRACE
	trace_gk20a_release_used_channel(ch->chid);
#endif
	/* refcount is zero here and channel is in a freed/dead state */
	nvgpu_mutex_acquire(&f->free_chs_mutex);
	/* add to head to increase visibility of timing-related bugs */
	nvgpu_list_add(&ch->free_chs, &f->free_chs);
	f->used_channels = nvgpu_safe_sub_u32(f->used_channels, 1U);
	nvgpu_mutex_release(&f->free_chs_mutex);

	/*
	 * On teardown it is not possible to dereference platform, but ignoring
	 * this is fine then because no new channels would be created.
	 */
	if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
		if ((g->aggressive_sync_destroy_thresh != 0U) &&
			(f->used_channels <
			 g->aggressive_sync_destroy_thresh)) {
			g->aggressive_sync_destroy = false;
		}
	}
}

void nvgpu_channel_commit_va(struct nvgpu_channel *c)
{
	struct gk20a *g = c->g;

	nvgpu_log_fn(g, " ");

	g->ops.mm.init_inst_block(&c->inst_block, c->vm,
			c->vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG]);
}

int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add)
{
	return c->g->ops.runlist.update_for_channel(c->g, c->runlist_id,
			c, add, true);
}

int nvgpu_channel_enable_tsg(struct gk20a *g, struct nvgpu_channel *ch)
{
	struct nvgpu_tsg *tsg;

	tsg = nvgpu_tsg_from_ch(ch);
	if (tsg != NULL) {
		g->ops.tsg.enable(tsg);
		return 0;
	} else {
		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
		return -EINVAL;
	}
}

int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch)
{
	struct nvgpu_tsg *tsg;

	tsg = nvgpu_tsg_from_ch(ch);
	if (tsg != NULL) {
		g->ops.tsg.disable(tsg);
		return 0;
	} else {
		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
		return -EINVAL;
	}
}

#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch)
{
	/* synchronize with actual job cleanup */
	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);

	/* ensure no fences are pending */
	nvgpu_mutex_acquire(&ch->sync_lock);
	if (ch->sync != NULL) {
		nvgpu_channel_sync_set_min_eq_max(ch->sync);
	}

#ifdef CONFIG_TEGRA_GK20A_NVHOST
	if (ch->user_sync != NULL) {
		nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync);
	}
#endif
	nvgpu_mutex_release(&ch->sync_lock);

	nvgpu_mutex_release(&ch->joblist.cleanup_lock);

	/* The update to flush the job queue is only needed to process
	 * nondeterministic resources and ch wdt timeouts. Any others are
	 * either nonexistent or preallocated from pools that can be killed in
	 * one go on deterministic channels; take a look at what would happen
	 * in nvgpu_channel_clean_up_deterministic_job() and what
	 * nvgpu_submit_deterministic() requires.
	 */
	if (!nvgpu_channel_is_deterministic(ch)) {
		/*
		 * When closing the channel, this scheduled update holds one
		 * channel ref which is waited for before advancing with
		 * freeing.
		 */
		nvgpu_channel_update(ch);
	}
}

static void channel_kernelmode_deinit(struct nvgpu_channel *ch)
{
	struct vm_gk20a *ch_vm = ch->vm;

	nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem);
#ifdef CONFIG_NVGPU_DGPU
	nvgpu_big_free(ch->g, ch->gpfifo.pipe);
#endif
	(void) memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));

	if (ch->priv_cmd_q != NULL) {
		nvgpu_priv_cmdbuf_queue_free(ch->priv_cmd_q);
		ch->priv_cmd_q = NULL;
	}

	nvgpu_channel_joblist_deinit(ch);

	/* sync must be destroyed before releasing channel vm */
	nvgpu_mutex_acquire(&ch->sync_lock);
	if (ch->sync != NULL) {
		nvgpu_channel_sync_destroy(ch->sync);
		ch->sync = NULL;
	}
	nvgpu_mutex_release(&ch->sync_lock);
}

#ifdef CONFIG_TEGRA_GK20A_NVHOST
int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch)
{
	struct gk20a *g = ch->g;
	struct nvgpu_channel_sync_syncpt *sync_syncpt;
	u32 new_syncpt = 0U;
	u32 old_syncpt = g->ops.ramfc.get_syncpt(ch);
	int err = 0;

	if (ch->sync != NULL) {
		sync_syncpt = nvgpu_channel_sync_to_syncpt(ch->sync);
		if (sync_syncpt != NULL) {
			new_syncpt =
			    nvgpu_channel_sync_get_syncpt_id(sync_syncpt);
		} else {
			new_syncpt = NVGPU_INVALID_SYNCPT_ID;
			/* ??? */
			return -EINVAL;
		}
	} else {
		return -EINVAL;
	}

	if ((new_syncpt != 0U) && (new_syncpt != old_syncpt)) {
		/* disable channel */
		err = nvgpu_channel_disable_tsg(g, ch);
		if (err != 0) {
			nvgpu_err(g, "failed to disable channel/TSG");
			return err;
		}

		/* preempt the channel */
		err = nvgpu_preempt_channel(g, ch);
		nvgpu_assert(err == 0);
		if (err != 0 ) {
			goto out;
		}
		/* no error at this point */
		g->ops.ramfc.set_syncpt(ch, new_syncpt);

		err =  nvgpu_channel_enable_tsg(g, ch);
		if (err != 0) {
			nvgpu_err(g, "failed to enable channel/TSG");
		}
	}

	nvgpu_log_fn(g, "done");
	return err;
out:
	if (nvgpu_channel_enable_tsg(g, ch) != 0) {
		nvgpu_err(g, "failed to enable channel/TSG");
	}
	return err;
}
#endif

static int channel_setup_kernelmode(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args)
{
	u32 gpfifo_size, gpfifo_entry_size;
	u64 gpfifo_gpu_va;
	u32 job_count;

	int err = 0;
	struct gk20a *g = c->g;

	gpfifo_size = args->num_gpfifo_entries;
	gpfifo_entry_size = nvgpu_get_gpfifo_entry_size();

	err = nvgpu_dma_alloc_map_sys(c->vm,
			(size_t)gpfifo_size * (size_t)gpfifo_entry_size,
			&c->gpfifo.mem);
	if (err != 0) {
		nvgpu_err(g, "memory allocation failed");
		goto clean_up;
	}

#ifdef CONFIG_NVGPU_DGPU
	if (c->gpfifo.mem.aperture == APERTURE_VIDMEM) {
		c->gpfifo.pipe = nvgpu_big_malloc(g,
					(size_t)gpfifo_size *
					(size_t)gpfifo_entry_size);
		if (c->gpfifo.pipe == NULL) {
			err = -ENOMEM;
			goto clean_up_unmap;
		}
	}
#endif
	gpfifo_gpu_va = c->gpfifo.mem.gpu_va;

	c->gpfifo.entry_num = gpfifo_size;
	c->gpfifo.get = 0;
	c->gpfifo.put = 0;

	nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
		c->chid, gpfifo_gpu_va, c->gpfifo.entry_num);

	g->ops.userd.init_mem(g, c);

	if (g->aggressive_sync_destroy_thresh == 0U) {
		nvgpu_mutex_acquire(&c->sync_lock);
		c->sync = nvgpu_channel_sync_create(c);
		if (c->sync == NULL) {
			err = -ENOMEM;
			nvgpu_mutex_release(&c->sync_lock);
			goto clean_up_unmap;
		}
		nvgpu_mutex_release(&c->sync_lock);

		if (g->ops.channel.set_syncpt != NULL) {
			err = g->ops.channel.set_syncpt(c);
			if (err != 0) {
				goto clean_up_sync;
			}
		}
	}

	err = channel_setup_ramfc(c, args, gpfifo_gpu_va,
		c->gpfifo.entry_num);

	if (err != 0) {
		goto clean_up_sync;
	}

	/*
	 * Allocate priv cmdbuf space for pre and post fences. If the inflight
	 * job count isn't specified, we base it on the gpfifo count. We
	 * multiply by a factor of 1/3 because at most a third of the GPFIFO
	 * entries can be used for user-submitted jobs; another third goes to
	 * wait entries, and the final third to incr entries. There will be one
	 * pair of acq and incr commands for each job.
	 */
	job_count = args->num_inflight_jobs;
	if (job_count == 0U) {
		/*
		 * Round up so the allocation behaves nicely with a very small
		 * gpfifo, and to be able to use all slots when the entry count
		 * would be one too small for both wait and incr commands. An
		 * increment would then still just fit.
		 *
		 * gpfifo_size is required to be at most 2^31 earlier.
		 */
		job_count = nvgpu_safe_add_u32(gpfifo_size, 2U) / 3U;
	}

	err = nvgpu_channel_joblist_init(c, job_count);
	if (err != 0) {
		goto clean_up_sync;
	}

	err = nvgpu_priv_cmdbuf_queue_alloc(c->vm, job_count, &c->priv_cmd_q);
	if (err != 0) {
		goto clean_up_prealloc;
	}

	err = nvgpu_channel_update_runlist(c, true);
	if (err != 0) {
		goto clean_up_priv_cmd;
	}

	return 0;

clean_up_priv_cmd:
	nvgpu_priv_cmdbuf_queue_free(c->priv_cmd_q);
	c->priv_cmd_q = NULL;
clean_up_prealloc:
	nvgpu_channel_joblist_deinit(c);
clean_up_sync:
	if (c->sync != NULL) {
		nvgpu_channel_sync_destroy(c->sync);
		c->sync = NULL;
	}
clean_up_unmap:
#ifdef CONFIG_NVGPU_DGPU
	nvgpu_big_free(g, c->gpfifo.pipe);
#endif
	nvgpu_dma_unmap_free(c->vm, &c->gpfifo.mem);
clean_up:
	(void) memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));

	return err;

}

/* Update with this periodically to determine how the gpfifo is draining. */
static inline u32 channel_update_gpfifo_get(struct gk20a *g,
				struct nvgpu_channel *c)
{
	u32 new_get = g->ops.userd.gp_get(g, c);

	c->gpfifo.get = new_get;
	return new_get;
}

u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch)
{
	return (ch->gpfifo.entry_num - (ch->gpfifo.put - ch->gpfifo.get) - 1U) %
		ch->gpfifo.entry_num;
}

u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch)
{
	(void)channel_update_gpfifo_get(ch->g, ch);
	return nvgpu_channel_get_gpfifo_free_count(ch);
}

static inline struct nvgpu_channel_worker *
nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
{
	return (struct nvgpu_channel_worker *)
	   ((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker));
};

#ifdef CONFIG_NVGPU_CHANNEL_WDT
void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
{
	ch->wdt_debug_dump = dump;
}

static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
		struct nvgpu_channel *ch)
{
	struct gk20a *g = ch->g;
	struct nvgpu_channel_wdt_state state = { 0, 0 };

	/*
	 * Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
	 * the moment because system suspend puts g->regs away but doesn't stop
	 * the worker thread that runs the watchdog. This might need to be
	 * cleared up in the future.
	 */
	if (nvgpu_channel_wdt_running(ch->wdt)) {
		/*
		 * Read the state only if the wdt is on to avoid unnecessary
		 * accesses. The kernel mem for userd may not even exist; this
		 * channel could be in usermode submit mode.
		 */
		state.gp_get = g->ops.userd.gp_get(g, ch);
		state.pb_get = g->ops.userd.pb_get(g, ch);
	}

	return state;
}

static void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
{
	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);

	/*
	 * FIXME: channel recovery can race the submit path and can start even
	 * after this, but this check is the best we can do for now.
	 */
	if (!nvgpu_channel_check_unserviceable(ch)) {
		nvgpu_channel_wdt_start(ch->wdt, &state);
	}
}


void nvgpu_channel_restart_all_wdts(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch != NULL) {
			if ((ch->wdt != NULL) &&
			    !nvgpu_channel_check_unserviceable(ch)) {
				struct nvgpu_channel_wdt_state state =
					nvgpu_channel_collect_wdt_state(ch);

				nvgpu_channel_wdt_rewind(ch->wdt, &state);
			}
			nvgpu_channel_put(ch);
		}
	}
}

static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
{
	struct gk20a *g = ch->g;

	nvgpu_log_fn(g, " ");

	if (nvgpu_channel_check_unserviceable(ch)) {
		/* channel is already recovered */
		nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
		return;
	}

	nvgpu_err(g, "Job on channel %d timed out", ch->chid);

	/* force reset calls gk20a_debug_dump but not this */
	if (ch->wdt_debug_dump) {
		gk20a_gr_debug_dump(g);
	}

#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
	if (g->ops.tsg.force_reset(ch,
	    NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
	    ch->wdt_debug_dump) != 0) {
		nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
	}
#endif
}

/*
 * Test the watchdog progress. If the channel is stuck, reset it.
 *
 * The gpu is implicitly on at this point because the watchdog can only run on
 * channels that have submitted jobs pending for cleanup.
 */
static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
{
	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);

	if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
		nvgpu_channel_recover_from_wdt(ch);
	}
}

static void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
{
	struct nvgpu_channel_worker *ch_worker =
		nvgpu_channel_worker_from_worker(worker);
	int ret;

	ch_worker->watchdog_interval = 100U;

	ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout,
			ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER);
	if (ret != 0) {
		nvgpu_err(worker->g, "timeout_init failed: %d", ret);
	}
}

/**
 * Loop every living channel, check timeouts and handle stuck channels.
 */
static void nvgpu_channel_poll_wdt(struct gk20a *g)
{
	unsigned int chid;

	for (chid = 0; chid < g->fifo.num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch != NULL) {
			if (!nvgpu_channel_check_unserviceable(ch)) {
				nvgpu_channel_check_wdt(ch);
			}
			nvgpu_channel_put(ch);
		}
	}
}

static void nvgpu_channel_worker_poll_wakeup_post_process_item(
		struct nvgpu_worker *worker)
{
	struct gk20a *g = worker->g;

	struct nvgpu_channel_worker *ch_worker =
		nvgpu_channel_worker_from_worker(worker);
	int ret;

	if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
		nvgpu_channel_poll_wdt(g);
		ret = nvgpu_timeout_init(g, &ch_worker->timeout,
				ch_worker->watchdog_interval,
				NVGPU_TIMER_CPU_TIMER);
		if (ret != 0) {
			nvgpu_err(g, "timeout_init failed: %d", ret);
		}
	}
}

static u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
		struct nvgpu_worker *worker)
{
	struct nvgpu_channel_worker *ch_worker =
		nvgpu_channel_worker_from_worker(worker);

	return ch_worker->watchdog_interval;
}
#else
static void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
#endif /* CONFIG_NVGPU_CHANNEL_WDT */

static inline struct nvgpu_channel *
nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
{
	return (struct nvgpu_channel *)
	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
};

static void nvgpu_channel_worker_poll_wakeup_process_item(
		struct nvgpu_list_node *work_item)
{
	struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item);

	nvgpu_assert(ch != NULL);

	nvgpu_log_fn(ch->g, " ");

	nvgpu_channel_clean_up_jobs(ch);

	/* ref taken when enqueued */
	nvgpu_channel_put(ch);
}

static const struct nvgpu_worker_ops channel_worker_ops = {
#ifdef CONFIG_NVGPU_CHANNEL_WDT
	.pre_process = nvgpu_channel_worker_poll_init,
	.wakeup_post_process =
		nvgpu_channel_worker_poll_wakeup_post_process_item,
	.wakeup_timeout =
		nvgpu_channel_worker_poll_wakeup_condition_get_timeout,
#endif
	.wakeup_early_exit = NULL,
	.wakeup_process_item =
		nvgpu_channel_worker_poll_wakeup_process_item,
	.wakeup_condition = NULL,
};

/**
 * Initialize the channel worker's metadata and start the background thread.
 */
int nvgpu_channel_worker_init(struct gk20a *g)
{
	struct nvgpu_worker *worker = &g->channel_worker.worker;

	nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name);

	return nvgpu_worker_init(g, worker, &channel_worker_ops);
}

void nvgpu_channel_worker_deinit(struct gk20a *g)
{
	struct nvgpu_worker *worker = &g->channel_worker.worker;

	nvgpu_worker_deinit(worker);
}

/**
 * Append a channel to the worker's list, if not there already.
 *
 * The worker thread processes work items (channels in its work list) and polls
 * for other things. This adds @ch to the end of the list and wakes the worker
 * up immediately. If the channel already existed in the list, it's not added,
 * because in that case it has been scheduled already but has not yet been
 * processed.
 */
static void channel_worker_enqueue(struct nvgpu_channel *ch)
{
	struct gk20a *g = ch->g;
	int ret;

	nvgpu_log_fn(g, " ");

	/*
	 * Ref released when this item gets processed. The caller should hold
	 * one ref already, so normally shouldn't fail, but the channel could
	 * end up being freed between the time the caller got its reference and
	 * the time we end up here (e.g., if the client got killed); if so, just
	 * return.
	 */
	if (nvgpu_channel_get(ch) == NULL) {
		nvgpu_info(g, "cannot get ch ref for worker!");
		return;
	}

	ret = nvgpu_worker_enqueue(&g->channel_worker.worker,
			&ch->worker_item);
	if (ret != 0) {
		nvgpu_channel_put(ch);
		return;
	}
}

int nvgpu_channel_add_job(struct nvgpu_channel *c,
				 struct nvgpu_channel_job *job,
				 bool skip_buffer_refcounting)
{
	struct vm_gk20a *vm = c->vm;
	struct nvgpu_mapped_buf **mapped_buffers = NULL;
	int err = 0;
	u32 num_mapped_buffers = 0;

	if (!skip_buffer_refcounting) {
		err = nvgpu_vm_get_buffers(vm, &mapped_buffers,
					&num_mapped_buffers);
		if (err != 0) {
			return err;
		}
	}

	if (c != NULL) {
		job->num_mapped_buffers = num_mapped_buffers;
		job->mapped_buffers = mapped_buffers;

		nvgpu_channel_launch_wdt(c);

		nvgpu_channel_joblist_lock(c);
		nvgpu_channel_joblist_add(c, job);
		nvgpu_channel_joblist_unlock(c);
	} else {
		err = -ETIMEDOUT;
		goto err_put_buffers;
	}

	return 0;

err_put_buffers:
	nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);

	return err;
}

/**
 * Clean up job resources for further jobs to use.
 *
 * Loop all jobs from the joblist until a pending job is found. Pending jobs
 * are detected from the job's post fence, so this is only done for jobs that
 * have job tracking resources. Free all per-job memory for completed jobs; in
 * case of preallocated resources, this opens up slots for new jobs to be
 * submitted.
 */
void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c)
{
	struct vm_gk20a *vm;
	struct nvgpu_channel_job *job;
	struct gk20a *g;
	bool job_finished = false;
	bool watchdog_on = false;

	if (nvgpu_is_powered_off(c->g)) { /* shutdown case */
		return;
	}

	vm = c->vm;
	g = c->g;

	nvgpu_assert(!nvgpu_channel_is_deterministic(c));

	watchdog_on = nvgpu_channel_wdt_stop(c->wdt);

	/* Synchronize with abort cleanup that needs the jobs. */
	nvgpu_mutex_acquire(&c->joblist.cleanup_lock);

	while (true) {
		bool completed;

		nvgpu_channel_joblist_lock(c);
		job = nvgpu_channel_joblist_peek(c);
		nvgpu_channel_joblist_unlock(c);

		if (job == NULL) {
			/*
			 * No jobs in flight, timeout will remain stopped until
			 * new jobs are submitted.
			 */
			break;
		}

		completed = nvgpu_fence_is_expired(&job->post_fence);
		if (!completed) {
			/*
			 * The watchdog eventually sees an updated gp_get if
			 * something happened in this loop. A new job can have
			 * been submitted between the above call to stop and
			 * this - in that case, this is a no-op and the new
			 * later timeout is still used.
			 */
			if (watchdog_on) {
				nvgpu_channel_wdt_continue(c->wdt);
			}
			break;
		}

		WARN_ON(c->sync == NULL);

		if (c->sync != NULL) {
			if (c->has_os_fence_framework_support &&
			    g->os_channel.os_fence_framework_inst_exists(c)) {
				g->os_channel.signal_os_fence_framework(c,
						&job->post_fence);
			}

			if (g->aggressive_sync_destroy_thresh != 0U) {
				nvgpu_mutex_acquire(&c->sync_lock);
				if (nvgpu_channel_sync_put_ref_and_check(c->sync)
					&& g->aggressive_sync_destroy) {
					nvgpu_channel_sync_destroy(c->sync);
					c->sync = NULL;
				}
				nvgpu_mutex_release(&c->sync_lock);
			}
		}

		if (job->num_mapped_buffers != 0U) {
			nvgpu_vm_put_buffers(vm, job->mapped_buffers,
				job->num_mapped_buffers);
		}

		nvgpu_fence_put(&job->post_fence);

		/*
		 * Free the private command buffers (in order of allocation)
		 */
		if (job->wait_cmd != NULL) {
			nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd);
		}
		nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd);

		nvgpu_channel_free_job(c, job);

		nvgpu_channel_joblist_lock(c);
		nvgpu_channel_joblist_delete(c, job);
		nvgpu_channel_joblist_unlock(c);

		job_finished = true;

		/* taken in nvgpu_submit_nondeterministic() */
		gk20a_idle(g);
	}

	nvgpu_mutex_release(&c->joblist.cleanup_lock);

	if ((job_finished) &&
			(g->os_channel.work_completion_signal != NULL)) {
		g->os_channel.work_completion_signal(c);
	}
}

/**
 * Clean up one job if any to provide space for a new submit.
 *
 * Deterministic channels do very little in the submit path, so the cleanup
 * code does not do much either. This assumes the preconditions that
 * deterministic channels are missing features such as timeouts and mapped
 * buffers.
 */
void nvgpu_channel_clean_up_deterministic_job(struct nvgpu_channel *c)
{
	struct nvgpu_channel_job *job;

	nvgpu_assert(nvgpu_channel_is_deterministic(c));

	/* Synchronize with abort cleanup that needs the jobs. */
	nvgpu_mutex_acquire(&c->joblist.cleanup_lock);

	nvgpu_channel_joblist_lock(c);
	job = nvgpu_channel_joblist_peek(c);
	nvgpu_channel_joblist_unlock(c);

	if (job == NULL) {
		goto out_unlock;
	}

	nvgpu_assert(job->num_mapped_buffers == 0U);

	if (!nvgpu_fence_is_expired(&job->post_fence)) {
		goto out_unlock;
	}

	/*
	 * This fence is syncpoint-based, so cleanup doesn't do anything. Put
	 * the ref back for consistency though.
	 */
	nvgpu_fence_put(&job->post_fence);

	/*
	 * Free the private command buffers (in order of allocation)
	 */
	if (job->wait_cmd != NULL) {
		nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->wait_cmd);
	}
	nvgpu_priv_cmdbuf_free(c->priv_cmd_q, job->incr_cmd);

	nvgpu_channel_free_job(c, job);

	nvgpu_channel_joblist_lock(c);
	nvgpu_channel_joblist_delete(c, job);
	nvgpu_channel_joblist_unlock(c);

out_unlock:
	nvgpu_mutex_release(&c->joblist.cleanup_lock);
}

/**
 * Schedule a job cleanup work on this channel to free resources and to signal
 * about completion.
 *
 * Call this when there has been an interrupt about finished jobs, or when job
 * cleanup needs to be performed, e.g., when closing a channel. This is always
 * safe to call even if there is nothing to clean up. Any visible actions on
 * jobs just before calling this are guaranteed to be processed.
 */
void nvgpu_channel_update(struct nvgpu_channel *c)
{
	if (nvgpu_is_powered_off(c->g)) { /* shutdown case */
		return;
	}
#ifdef CONFIG_NVGPU_TRACE
	trace_nvgpu_channel_update(c->chid);
#endif
	/* A queued channel is always checked for job cleanup. */
	channel_worker_enqueue(c);
}

bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
		u32 timeout_delta_ms, bool *progress)
{
	u32 gpfifo_get;

	if (ch->usermode_submit_enabled) {
		ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
		*progress = false;
		goto done;
	}

	gpfifo_get = channel_update_gpfifo_get(ch->g, ch);

	if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
		/* didn't advance since previous ctxsw timeout check */
		ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
		*progress = false;
	} else {
		/* first ctxsw timeout isr encountered */
		ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms;
		*progress = true;
	}

	ch->ctxsw_timeout_gpfifo_get = gpfifo_get;

done:
	return nvgpu_is_timeouts_enabled(ch->g) &&
		ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
}

#else

void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch)
{
	/* ensure no fences are pending */
	nvgpu_mutex_acquire(&ch->sync_lock);
	if (ch->user_sync != NULL) {
		nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync);
	}
	nvgpu_mutex_release(&ch->sync_lock);
}

#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */

void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch)
{
	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
	ch->unserviceable = true;
	nvgpu_spinlock_release(&ch->unserviceable_lock);
}

bool  nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch)
{
	bool unserviceable_status;

	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
	unserviceable_status = ch->unserviceable;
	nvgpu_spinlock_release(&ch->unserviceable_lock);

	return unserviceable_status;
}

void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt)
{
	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);

	nvgpu_log_fn(ch->g, " ");

	if (tsg != NULL) {
		return nvgpu_tsg_abort(ch->g, tsg, channel_preempt);
	} else {
		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
	}
}

void nvgpu_channel_wait_until_counter_is_N(
	struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value,
	struct nvgpu_cond *c, const char *caller, const char *counter_name)
{
	while (true) {
		if (NVGPU_COND_WAIT(
			    c,
			    nvgpu_atomic_read(counter) == wait_value,
			    5000U) == 0) {
			break;
		}

		nvgpu_warn(ch->g,
			   "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
			   caller, ch->chid, counter_name,
			   nvgpu_atomic_read(counter), wait_value);

		channel_dump_ref_actions(ch);
	}
}

static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch)
{
	nvgpu_channel_free_usermode_buffers(ch);
#ifdef CONFIG_NVGPU_USERD
	(void) nvgpu_userd_init_channel(ch->g, ch);
#endif
	ch->usermode_submit_enabled = false;
}

static void channel_free_invoke_unbind(struct nvgpu_channel *ch)
{
	int err = 0;
	struct nvgpu_tsg *tsg;
	struct gk20a *g = ch->g;

	if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
		/* abort channel and remove from runlist */
		tsg = nvgpu_tsg_from_ch(ch);
		if (tsg != NULL) {
			/* Between tsg is not null and unbind_channel call,
			 * ioctl cannot be called anymore because user doesn't
			 * have an open channel fd anymore to use for the unbind
			 * ioctl.
			 */
			err = nvgpu_tsg_unbind_channel(tsg, ch);
			if (err != 0) {
				nvgpu_err(g,
					"failed to unbind channel %d from TSG",
					ch->chid);
			}
		} else {
			/*
			 * Channel is already unbound from TSG by User with
			 * explicit call
			 * Nothing to do here in that case
			 */
		}
	}
}

static void channel_free_invoke_deferred_engine_reset(struct nvgpu_channel *ch)
{
#ifdef CONFIG_NVGPU_DEBUGGER
	struct gk20a *g = ch->g;
	struct nvgpu_fifo *f = &g->fifo;
	bool deferred_reset_pending;

	/* if engine reset was deferred, perform it now */
	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
	deferred_reset_pending = g->fifo.deferred_reset_pending;
	nvgpu_mutex_release(&f->deferred_reset_mutex);

	if (deferred_reset_pending) {
		nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
				" deferred, running now");
		nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);

		nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0);

		nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
	}
#endif
}

static void channel_free_invoke_sync_destroy(struct nvgpu_channel *ch)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
	nvgpu_mutex_acquire(&ch->sync_lock);
	if (ch->user_sync != NULL) {
		/*
		 * Set user managed syncpoint to safe state
		 * But it's already done if channel is recovered
		 */
		if (!nvgpu_channel_check_unserviceable(ch)) {
			nvgpu_channel_user_syncpt_set_safe_state(ch->user_sync);
		}
		nvgpu_channel_user_syncpt_destroy(ch->user_sync);
		ch->user_sync = NULL;
	}
	nvgpu_mutex_release(&ch->sync_lock);
#endif
}

static void channel_free_unlink_debug_session(struct nvgpu_channel *ch)
{
#ifdef CONFIG_NVGPU_DEBUGGER
	struct gk20a *g = ch->g;
	struct dbg_session_gk20a *dbg_s;
	struct dbg_session_data *session_data, *tmp_s;
	struct dbg_session_channel_data *ch_data, *tmp;

	/* unlink all debug sessions */
	nvgpu_mutex_acquire(&g->dbg_sessions_lock);

	nvgpu_list_for_each_entry_safe(session_data, tmp_s,
			&ch->dbg_s_list, dbg_session_data, dbg_s_entry) {
		dbg_s = session_data->dbg_s;
		nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
		nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
				dbg_session_channel_data, ch_entry) {
			if (ch_data->chid == ch->chid) {
				if (ch_data->unbind_single_channel(dbg_s,
						ch_data) != 0) {
					nvgpu_err(g,
						"unbind failed for chid: %d",
						ch_data->chid);
				}
			}
		}
		nvgpu_mutex_release(&dbg_s->ch_list_lock);
	}

	nvgpu_mutex_release(&g->dbg_sessions_lock);
#endif
}

static void channel_free_wait_for_refs(struct nvgpu_channel *ch,
		int wait_value, bool force)
{
	/* wait until no more refs to the channel */
	if (!force) {
		nvgpu_channel_wait_until_counter_is_N(
			ch, &ch->ref_count, wait_value, &ch->ref_count_dec_wq,
			__func__, "references");
	}

}

#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
static void channel_free_put_deterministic_ref_from_init(
		struct nvgpu_channel *ch)
{
	struct gk20a *g = ch->g;

	/* put back the channel-wide submit ref from init */
	if (ch->deterministic) {
		nvgpu_rwsem_down_read(&g->deterministic_busy);
		ch->deterministic = false;
		if (!ch->deterministic_railgate_allowed) {
			gk20a_idle(g);
		}
		ch->deterministic_railgate_allowed = false;

		nvgpu_rwsem_up_read(&g->deterministic_busy);
	}
}
#endif

/* call ONLY when no references to the channel exist: after the last put */
static void channel_free(struct nvgpu_channel *ch, bool force)
{
	struct gk20a *g = ch->g;
	struct nvgpu_fifo *f = &g->fifo;
	struct vm_gk20a *ch_vm = ch->vm;
	unsigned long timeout;

	if (g == NULL) {
		nvgpu_do_assert_print(g, "ch already freed");
		return;
	}

	nvgpu_log_fn(g, " ");

	timeout = nvgpu_get_poll_timeout(g);

#ifdef CONFIG_NVGPU_TRACE
	trace_gk20a_free_channel(ch->chid);
#endif

	/*
	 * Disable channel/TSG and unbind here. This should not be executed if
	 * HW access is not available during shutdown/removal path as it will
	 * trigger a timeout
	 */
	channel_free_invoke_unbind(ch);

	/*
	 * OS channel close may require that syncpoint should be set to some
	 * safe value before it is called. nvgpu_tsg_unbind_channel(above) is
	 * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state
	 * deep down in the stack. Otherwise os_channel close may block if the
	 * app is killed abruptly (which was going to do the syncpoint signal).
	 */
	if (g->os_channel.close != NULL) {
		g->os_channel.close(ch, force);
	}

	/* wait until there's only our ref to the channel */
	channel_free_wait_for_refs(ch, 1, force);

	/* wait until all pending interrupts for recently completed
	 * jobs are handled */
	nvgpu_wait_for_deferred_interrupts(g);

	/* prevent new refs */
	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
	if (!ch->referenceable) {
		nvgpu_spinlock_release(&ch->ref_obtain_lock);
		nvgpu_err(ch->g,
			  "Extra %s() called to channel %u",
			  __func__, ch->chid);
		return;
	}
	ch->referenceable = false;
	nvgpu_spinlock_release(&ch->ref_obtain_lock);

	/* matches with the initial reference in nvgpu_channel_open_new() */
	nvgpu_atomic_dec(&ch->ref_count);

	channel_free_wait_for_refs(ch, 0, force);

	channel_free_invoke_deferred_engine_reset(ch);

	if (!nvgpu_channel_as_bound(ch)) {
		goto unbind;
	}

	nvgpu_log_info(g, "freeing bound channel context, timeout=%ld",
			timeout);

#ifdef CONFIG_NVGPU_FECS_TRACE
	if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr)
		g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block);
#endif

	if (g->ops.gr.setup.free_subctx != NULL) {
		g->ops.gr.setup.free_subctx(ch);
		ch->subctx = NULL;
	}

	g->ops.gr.intr.flush_channel_tlb(g);

	if (ch->usermode_submit_enabled) {
		nvgpu_channel_usermode_deinit(ch);
	} else {
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
		channel_kernelmode_deinit(ch);
#endif
	}

	channel_free_invoke_sync_destroy(ch);

	/*
	 * When releasing the channel we unbind the VM - so release the ref.
	 */
	nvgpu_vm_put(ch_vm);

	/* make sure we don't have deferred interrupts pending that
	 * could still touch the channel */
	nvgpu_wait_for_deferred_interrupts(g);

unbind:
	g->ops.channel.unbind(ch);
	g->ops.channel.free_inst(g, ch);

	nvgpu_channel_wdt_destroy(ch->wdt);
	ch->wdt = NULL;

#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
	channel_free_put_deterministic_ref_from_init(ch);
#endif

	ch->vpr = false;
	ch->vm = NULL;

#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
	WARN_ON(ch->sync != NULL);
#endif

	channel_free_unlink_debug_session(ch);

#if GK20A_CHANNEL_REFCOUNT_TRACKING
	(void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
	ch->ref_actions_put = 0;
#endif

	/* make sure we catch accesses of unopened channels in case
	 * there's non-refcounted channel pointers hanging around */
	ch->g = NULL;
	nvgpu_smp_wmb();

	/* ALWAYS last */
	free_channel(f, ch);
}

static void channel_dump_ref_actions(struct nvgpu_channel *ch)
{
#if GK20A_CHANNEL_REFCOUNT_TRACKING
	size_t i, get;
	s64 now = nvgpu_current_time_ms();
	s64 prev = 0;
	struct gk20a *g = ch->g;

	nvgpu_spinlock_acquire(&ch->ref_actions_lock);

	nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:",
			ch->chid, nvgpu_atomic_read(&ch->ref_count));

	/* start at the oldest possible entry. put is next insertion point */
	get = ch->ref_actions_put;

	/*
	 * If the buffer is not full, this will first loop to the oldest entry,
	 * skipping not-yet-initialized entries. There is no ref_actions_get.
	 */
	for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
		struct nvgpu_channel_ref_action *act = &ch->ref_actions[get];

		if (act->trace.nr_entries) {
			nvgpu_info(g,
				"%s ref %zu steps ago (age %lld ms, diff %lld ms)",
				act->type == channel_gk20a_ref_action_get
					? "GET" : "PUT",
				GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
				now - act->timestamp_ms,
				act->timestamp_ms - prev);

			print_stack_trace(&act->trace, 0);
			prev = act->timestamp_ms;
		}

		get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
	}

	nvgpu_spinlock_release(&ch->ref_actions_lock);
#endif
}

#if GK20A_CHANNEL_REFCOUNT_TRACKING
static void channel_save_ref_source(struct nvgpu_channel *ch,
		enum nvgpu_channel_ref_action_type type)
{
	struct nvgpu_channel_ref_action *act;

	nvgpu_spinlock_acquire(&ch->ref_actions_lock);

	act = &ch->ref_actions[ch->ref_actions_put];
	act->type = type;
	act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
	act->trace.nr_entries = 0;
	act->trace.skip = 3; /* onwards from the caller of this */
	act->trace.entries = act->trace_entries;
	save_stack_trace(&act->trace);
	act->timestamp_ms = nvgpu_current_time_ms();
	ch->ref_actions_put = (ch->ref_actions_put + 1) %
		GK20A_CHANNEL_REFCOUNT_TRACKING;

	nvgpu_spinlock_release(&ch->ref_actions_lock);
}
#endif

/* Try to get a reference to the channel. Return nonzero on success. If fails,
 * the channel is dead or being freed elsewhere and you must not touch it.
 *
 * Always when a nvgpu_channel pointer is seen and about to be used, a
 * reference must be held to it - either by you or the caller, which should be
 * documented well or otherwise clearly seen. This usually boils down to the
 * file from ioctls directly, or an explicit get in exception handlers when the
 * channel is found by a chid.
 *
 * Most global functions in this file require a reference to be held by the
 * caller.
 */
struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch,
					 const char *caller)
{
	struct nvgpu_channel *ret;

	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);

	if (likely(ch->referenceable)) {
#if GK20A_CHANNEL_REFCOUNT_TRACKING
		channel_save_ref_source(ch, channel_gk20a_ref_action_get);
#endif
		nvgpu_atomic_inc(&ch->ref_count);
		ret = ch;
	} else {
		ret = NULL;
	}

	nvgpu_spinlock_release(&ch->ref_obtain_lock);

#ifdef CONFIG_NVGPU_TRACE
	if (ret != NULL) {
		trace_nvgpu_channel_get(ch->chid, caller);
	}
#endif

	return ret;
}

void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller)
{
#if GK20A_CHANNEL_REFCOUNT_TRACKING
	channel_save_ref_source(ch, channel_gk20a_ref_action_put);
#endif
#ifdef CONFIG_NVGPU_TRACE
	trace_nvgpu_channel_put(ch->chid, caller);
#endif
	nvgpu_atomic_dec(&ch->ref_count);
	if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) {
		nvgpu_warn(ch->g, "failed to broadcast");
	}

	/* More puts than gets. Channel is probably going to get
	 * stuck. */
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 2, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
	WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);

	/* Also, more puts than gets. ref_count can go to 0 only if
	 * the channel is closing. Channel is probably going to get
	 * stuck. */
	WARN_ON((nvgpu_atomic_read(&ch->ref_count) == 0) && ch->referenceable);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
}

struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g,
				u32 chid, const char *caller)
{
	if (chid == NVGPU_INVALID_CHANNEL_ID) {
		return NULL;
	}

	return nvgpu_channel_get__func(&g->fifo.channel[chid], caller);
}

void nvgpu_channel_close(struct nvgpu_channel *ch)
{
	channel_free(ch, false);
}

/*
 * Be careful with this - it is meant for terminating channels when we know the
 * driver is otherwise dying. Ref counts and the like are ignored by this
 * version of the cleanup.
 */
void nvgpu_channel_kill(struct nvgpu_channel *ch)
{
	channel_free(ch, true);
}

struct nvgpu_channel *nvgpu_channel_open_new(struct gk20a *g,
		u32 runlist_id,
		bool is_privileged_channel,
		pid_t pid, pid_t tid)
{
	struct nvgpu_fifo *f = &g->fifo;
	struct nvgpu_channel *ch;

	/* compatibility with existing code */
	if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
		runlist_id = nvgpu_engine_get_gr_runlist_id(g);
	}

	nvgpu_log_fn(g, " ");

	ch = allocate_channel(f);
	if (ch == NULL) {
		/* TBD: we want to make this virtualizable */
		nvgpu_err(g, "out of hw chids");
		return NULL;
	}

#ifdef CONFIG_NVGPU_TRACE
	trace_nvgpu_channel_open_new(ch->chid);
#endif

NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
	BUG_ON(ch->g != NULL);
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
	ch->g = g;

	/* Runlist for the channel */
	ch->runlist_id = runlist_id;

	/* Channel privilege level */
	ch->is_privileged_channel = is_privileged_channel;

	ch->pid = tid;
	ch->tgid = pid;  /* process granularity for FECS traces */

#ifdef CONFIG_NVGPU_USERD
	if (nvgpu_userd_init_channel(g, ch) != 0) {
		nvgpu_err(g, "userd init failed");
		goto clean_up;
	}
#endif

	if (g->ops.channel.alloc_inst(g, ch) != 0) {
		nvgpu_err(g, "inst allocation failed");
		goto clean_up;
	}

	/* now the channel is in a limbo out of the free list but not marked as
	 * alive and used (i.e. get-able) yet */

	/* By default, channel is regular (non-TSG) channel */
	ch->tsgid = NVGPU_INVALID_TSG_ID;

	/* clear ctxsw timeout counter and update timestamp */
	ch->ctxsw_timeout_accumulated_ms = 0;
	ch->ctxsw_timeout_gpfifo_get = 0;
	/* set gr host default timeout */
	ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g);
	ch->ctxsw_timeout_debug_dump = true;
	/* ch is unserviceable until it is bound to tsg */
	ch->unserviceable = true;

#ifdef CONFIG_NVGPU_CHANNEL_WDT
	ch->wdt = nvgpu_channel_wdt_alloc(g);
	if (ch->wdt == NULL) {
		nvgpu_err(g, "wdt alloc failed");
		goto clean_up;
	}
	ch->wdt_debug_dump = true;
#endif

	ch->obj_class = 0;
	ch->subctx_id = 0;
	ch->runqueue_sel = 0;

	ch->mmu_nack_handled = false;

	/* The channel is *not* runnable at this point. It still needs to have
	 * an address space bound and allocate a gpfifo and grctx. */

	if (nvgpu_cond_init(&ch->notifier_wq) != 0) {
		nvgpu_err(g, "cond init failed");
		goto clean_up;
	}
	if (nvgpu_cond_init(&ch->semaphore_wq) != 0) {
		nvgpu_err(g, "cond init failed");
		goto clean_up;
	}

	/* Mark the channel alive, get-able, with 1 initial use
	 * references. The initial reference will be decreased in
	 * channel_free().
	 *
	 * Use the lock, since an asynchronous thread could
	 * try to access this channel while it's not fully
	 * initialized.
	 */
	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
	ch->referenceable = true;
	nvgpu_atomic_set(&ch->ref_count, 1);
	nvgpu_spinlock_release(&ch->ref_obtain_lock);

	return ch;

clean_up:
	ch->g = NULL;
	free_channel(f, ch);
	return NULL;
}

static int channel_setup_ramfc(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args,
		u64 gpfifo_gpu_va, u32 gpfifo_size)
{
	int err = 0;
	u64 pbdma_acquire_timeout = 0ULL;
	struct gk20a *g = c->g;

	if (nvgpu_channel_wdt_enabled(c->wdt) &&
			nvgpu_is_timeouts_enabled(c->g)) {
		pbdma_acquire_timeout = nvgpu_channel_wdt_limit(c->wdt);
	}

	err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size,
			pbdma_acquire_timeout, args->flags);

	return err;
}

static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args)
{
	u32 gpfifo_size = args->num_gpfifo_entries;
	int err = 0;
	struct gk20a *g = c->g;
	u64 gpfifo_gpu_va;

	if (g->os_channel.alloc_usermode_buffers != NULL) {
		err = g->os_channel.alloc_usermode_buffers(c, args);
		if (err != 0) {
			nvgpu_err(g, "Usermode buffer alloc failed");
			goto clean_up;
		}
		c->userd_mem = &c->usermode_userd;
		c->userd_offset = 0U;
		c->userd_iova = nvgpu_mem_get_addr(g, c->userd_mem);
		c->usermode_submit_enabled = true;
	} else {
		nvgpu_err(g, "Usermode submit not supported");
		err = -EINVAL;
		goto clean_up;
	}
	gpfifo_gpu_va = c->usermode_gpfifo.gpu_va;

	nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
		c->chid, gpfifo_gpu_va, gpfifo_size);

	err = channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size);

	if (err != 0) {
		goto clean_up_unmap;
	}

	err = nvgpu_channel_update_runlist(c, true);
	if (err != 0) {
		goto clean_up_unmap;
	}

	return 0;

clean_up_unmap:
	nvgpu_channel_free_usermode_buffers(c);
#ifdef CONFIG_NVGPU_USERD
	(void) nvgpu_userd_init_channel(g, c);
#endif
	c->usermode_submit_enabled = false;
clean_up:
	return err;
}

static int channel_setup_bind_prechecks(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args)
{
	struct gk20a *g = c->g;
	struct nvgpu_tsg *tsg;
	int err = 0;

	if (args->num_gpfifo_entries > 0x80000000U) {
		nvgpu_err(g,
			"num_gpfifo_entries exceeds max limit of 2^31");
		err = -EINVAL;
		goto fail;
	}

	/*
	 * The gpfifo ring buffer is empty when get == put and it's full when
	 * get == put + 1. Just one entry wouldn't make sense.
	 */
	if (args->num_gpfifo_entries < 2U) {
		nvgpu_err(g, "gpfifo has no space for any jobs");
		err = -EINVAL;
		goto fail;
	}

	/* an address space needs to have been bound at this point. */
	if (!nvgpu_channel_as_bound(c)) {
		nvgpu_err(g,
			"not bound to an address space at time of setup_bind");
		err = -EINVAL;
		goto fail;
	}

	/* The channel needs to be bound to a tsg at this point */
	tsg = nvgpu_tsg_from_ch(c);
	if (tsg == NULL) {
		nvgpu_err(g,
			"not bound to tsg at time of setup_bind");
		err = -EINVAL;
		goto fail;
	}

	if (c->usermode_submit_enabled) {
		nvgpu_err(g, "channel %d : "
			    "usermode buffers allocated", c->chid);
		err = -EEXIST;
		goto fail;
	}

#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
	if (nvgpu_mem_is_valid(&c->gpfifo.mem)) {
		nvgpu_err(g, "channel %d :"
			   "gpfifo already allocated", c->chid);
		err = -EEXIST;
		goto fail;
	}
#endif
	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U
			&& nvgpu_channel_wdt_enabled(c->wdt)) {
		/*
		 * The watchdog would need async job tracking, but that's not
		 * compatible with deterministic mode. We won't disable it
		 * implicitly; the user has to ask.
		 */
		nvgpu_err(g,
			"deterministic is not compatible with watchdog");
		err = -EINVAL;
		goto fail;
	}

	/* FUSA build for now assumes that the deterministic flag is not useful */
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U &&
	    (args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) == 0U) {
		/*
		 * Usermode submit shares various preconditions with
		 * deterministic mode. Require that it's explicitly set to
		 * avoid surprises.
		 */
		nvgpu_err(g, "need deterministic for usermode submit");
		err = -EINVAL;
		goto fail;
	}
#endif

fail:
	return err;
}

int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
		struct nvgpu_setup_bind_args *args)
{
	struct gk20a *g = c->g;
	int err = 0;

	err = channel_setup_bind_prechecks(c, args);
	if (err != 0) {
		goto fail;
	}

#ifdef CONFIG_NVGPU_VPR
	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) {
		if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_VPR)) {
			err = -EINVAL;
			goto fail;
		}

		c->vpr = true;
	}
#else
	c->vpr = false;
#endif

#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) {
		nvgpu_rwsem_down_read(&g->deterministic_busy);
		/*
		 * Railgating isn't deterministic; instead of disallowing
		 * railgating globally, take a power refcount for this
		 * channel's lifetime. The gk20a_idle() pair for this happens
		 * when the channel gets freed.
		 *
		 * Deterministic flag and this busy must be atomic within the
		 * busy lock.
		 */
		err = gk20a_busy(g);
		if (err != 0) {
			nvgpu_rwsem_up_read(&g->deterministic_busy);
			return err;
		}

		c->deterministic = true;
		nvgpu_rwsem_up_read(&g->deterministic_busy);
	}
#endif

	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) {
		err = nvgpu_channel_setup_usermode(c, args);
	} else {
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
		if (g->os_channel.open != NULL) {
			g->os_channel.open(c);
		}
		err = channel_setup_kernelmode(c, args);
#else
		err = -EINVAL;
#endif
	}

	if (err != 0) {
		goto clean_up_idle;
	}

	g->ops.channel.bind(c);

	nvgpu_log_fn(g, "done");
	return 0;

clean_up_idle:
#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
	if (nvgpu_channel_is_deterministic(c)) {
		nvgpu_rwsem_down_read(&g->deterministic_busy);
		gk20a_idle(g);
		c->deterministic = false;
		nvgpu_rwsem_up_read(&g->deterministic_busy);
	}
#endif
fail:
	nvgpu_err(g, "fail");
	return err;
}

void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c)
{
	if (nvgpu_mem_is_valid(&c->usermode_userd)) {
		nvgpu_dma_free(c->g, &c->usermode_userd);
	}
	if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) {
		nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
	}
	if (c->g->os_channel.free_usermode_buffers != NULL) {
		c->g->os_channel.free_usermode_buffers(c);
	}
}

static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(
				struct nvgpu_channel *ch)
{
	bool verbose = false;
	if (nvgpu_is_err_notifier_set(ch,
			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
		verbose = ch->ctxsw_timeout_debug_dump;
	}

	return verbose;
}

void nvgpu_channel_wakeup_wqs(struct gk20a *g,
				struct nvgpu_channel *ch)
{
	/* unblock pending waits */
	if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) {
		nvgpu_warn(g, "failed to broadcast");
	}
	if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) {
		nvgpu_warn(g, "failed to broadcast");
	}
}

bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch)
{
	bool verbose;

	verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(ch);

	/* mark channel as faulted */
	nvgpu_channel_set_unserviceable(ch);

	nvgpu_channel_wakeup_wqs(g, ch);

	return verbose;
}

void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
				u32 error_notifier)
{
	g->ops.channel.set_error_notifier(ch, error_notifier);
}

void nvgpu_channel_sw_quiesce(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	struct nvgpu_channel *ch;
	u32 chid;

	for (chid = 0; chid < f->num_channels; chid++) {
		ch = nvgpu_channel_get(&f->channel[chid]);
		if (ch != NULL) {
			nvgpu_channel_set_error_notifier(g, ch,
				NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
			nvgpu_channel_set_unserviceable(ch);
			nvgpu_channel_wakeup_wqs(g, ch);
			nvgpu_channel_put(ch);
		}
	}
}

#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
/*
 * Stop deterministic channel activity for do_idle() when power needs to go off
 * momentarily but deterministic channels keep power refs for potentially a
 * long time.
 *
 * Takes write access on g->deterministic_busy.
 *
 * Must be paired with nvgpu_channel_deterministic_unidle().
 */
void nvgpu_channel_deterministic_idle(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;

	/* Grab exclusive access to the hw to block new submits */
	nvgpu_rwsem_down_write(&g->deterministic_busy);

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch == NULL) {
			continue;
		}

		if (ch->deterministic && !ch->deterministic_railgate_allowed) {
			/*
			 * Drop the power ref taken when setting deterministic
			 * flag. deterministic_unidle will put this and the
			 * channel ref back. If railgate is allowed separately
			 * for this channel, the power ref has already been put
			 * away.
			 *
			 * Hold the channel ref: it must not get freed in
			 * between. A race could otherwise result in lost
			 * gk20a_busy() via unidle, and in unbalanced
			 * gk20a_idle() via closing the channel.
			 */
			gk20a_idle(g);
		} else {
			/* Not interesting, carry on. */
			nvgpu_channel_put(ch);
		}
	}
}

/*
 * Allow deterministic channel activity again for do_unidle().
 *
 * This releases write access on g->deterministic_busy.
 */
void nvgpu_channel_deterministic_unidle(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;
	int err;

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch == NULL) {
			continue;
		}

		/*
		 * Deterministic state changes inside deterministic_busy lock,
		 * which we took in deterministic_idle.
		 */
		if (ch->deterministic && !ch->deterministic_railgate_allowed) {
			err = gk20a_busy(g);
			if (err != 0) {
				nvgpu_err(g, "cannot busy() again!");
			}
			/* Took this in idle() */
			nvgpu_channel_put(ch);
		}

		nvgpu_channel_put(ch);
	}

	/* Release submits, new deterministic channels and frees */
	nvgpu_rwsem_up_write(&g->deterministic_busy);
}
#endif

static void nvgpu_channel_destroy(struct nvgpu_channel *c)
{
	nvgpu_mutex_destroy(&c->ioctl_lock);
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
#endif
	nvgpu_mutex_destroy(&c->sync_lock);
#if defined(CONFIG_NVGPU_CYCLESTATS)
	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
	nvgpu_mutex_destroy(&c->cs_client_mutex);
#endif
#if defined(CONFIG_NVGPU_DEBUGGER)
	nvgpu_mutex_destroy(&c->dbg_s_lock);
#endif
}

void nvgpu_channel_cleanup_sw(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;

	/*
	 * Make sure all channels are closed before deleting them.
	 */
	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = &f->channel[chid];

		/*
		 * Could race but worst that happens is we get an error message
		 * from channel_free() complaining about multiple closes.
		 */
		if (ch->referenceable) {
			nvgpu_channel_kill(ch);
		}

		nvgpu_channel_destroy(ch);
	}

	nvgpu_vfree(g, f->channel);
	f->channel = NULL;
	nvgpu_mutex_destroy(&f->free_chs_mutex);
}

int nvgpu_channel_init_support(struct gk20a *g, u32 chid)
{
	struct nvgpu_channel *c = &g->fifo.channel[chid];
	int err;

	c->g = NULL;
	c->chid = chid;
	nvgpu_atomic_set(&c->bound, 0);
	nvgpu_spinlock_init(&c->ref_obtain_lock);
	nvgpu_atomic_set(&c->ref_count, 0);
	c->referenceable = false;
	err = nvgpu_cond_init(&c->ref_count_dec_wq);
	if (err != 0) {
		nvgpu_err(g, "cond_init failed");
		return err;
	}

	nvgpu_spinlock_init(&c->unserviceable_lock);

#if GK20A_CHANNEL_REFCOUNT_TRACKING
	nvgpu_spinlock_init(&c->ref_actions_lock);
#endif
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
	nvgpu_init_list_node(&c->worker_item);

	nvgpu_mutex_init(&c->joblist.cleanup_lock);
	nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);

#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
	nvgpu_mutex_init(&c->ioctl_lock);
	nvgpu_mutex_init(&c->sync_lock);
#if defined(CONFIG_NVGPU_CYCLESTATS)
	nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
	nvgpu_mutex_init(&c->cs_client_mutex);
#endif
#if defined(CONFIG_NVGPU_DEBUGGER)
	nvgpu_init_list_node(&c->dbg_s_list);
	nvgpu_mutex_init(&c->dbg_s_lock);
#endif
	nvgpu_init_list_node(&c->ch_entry);
	nvgpu_list_add(&c->free_chs, &g->fifo.free_chs);

	return 0;
}

int nvgpu_channel_setup_sw(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid, i;
	int err;

	f->num_channels = g->ops.channel.count(g);

	nvgpu_mutex_init(&f->free_chs_mutex);

	f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
	if (f->channel == NULL) {
		nvgpu_err(g, "no mem for channels");
		err = -ENOMEM;
		goto clean_up_mutex;
	}

	nvgpu_init_list_node(&f->free_chs);

	for (chid = 0; chid < f->num_channels; chid++) {
		err = nvgpu_channel_init_support(g, chid);
		if (err != 0) {
			nvgpu_err(g, "channel init failed, chid=%u", chid);
			goto clean_up;
		}
	}

	return 0;

clean_up:
	for (i = 0; i < chid; i++) {
		struct nvgpu_channel *ch = &f->channel[i];

		nvgpu_channel_destroy(ch);
	}
	nvgpu_vfree(g, f->channel);
	f->channel = NULL;

clean_up_mutex:
	nvgpu_mutex_destroy(&f->free_chs_mutex);

	return err;
}

int nvgpu_channel_suspend_all_serviceable_ch(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;
	bool channels_in_use = false;
	u32 active_runlist_ids = 0;
	int err;

	nvgpu_log_fn(g, " ");

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch == NULL) {
			continue;
		}
		if (nvgpu_channel_check_unserviceable(ch)) {
			nvgpu_log_info(g, "do not suspend recovered "
						"channel %d", chid);
		} else {
			nvgpu_log_info(g, "suspend channel %d", chid);
			/* disable channel */
			if (nvgpu_channel_disable_tsg(g, ch) != 0) {
				nvgpu_err(g, "failed to disable channel/TSG");
			}
			/* preempt the channel */
			err = nvgpu_preempt_channel(g, ch);
			if (err != 0) {
				nvgpu_err(g, "failed to preempt channel/TSG");
			}
			/* wait for channel update notifiers */
			if (g->os_channel.work_completion_cancel_sync != NULL) {
				g->os_channel.work_completion_cancel_sync(ch);
			}

			g->ops.channel.unbind(ch);

			channels_in_use = true;

			active_runlist_ids |=  BIT32(ch->runlist_id);
		}

		nvgpu_channel_put(ch);
	}

	if (channels_in_use) {
		nvgpu_assert(nvgpu_runlist_reload_ids(g,
				active_runlist_ids, false) == 0);
	}

	nvgpu_log_fn(g, "done");
	return 0;
}

int nvgpu_channel_resume_all_serviceable_ch(struct gk20a *g)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;
	bool channels_in_use = false;
	u32 active_runlist_ids = 0;

	nvgpu_log_fn(g, " ");

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch == NULL) {
			continue;
		}
		if (nvgpu_channel_check_unserviceable(ch)) {
			nvgpu_log_info(g, "do not resume recovered "
						"channel %d", chid);
		} else {
			nvgpu_log_info(g, "resume channel %d", chid);
			g->ops.channel.bind(ch);
			channels_in_use = true;
			active_runlist_ids |= BIT32(ch->runlist_id);
		}
		nvgpu_channel_put(ch);
	}

	if (channels_in_use) {
		nvgpu_assert(nvgpu_runlist_reload_ids(g,
				active_runlist_ids, true) == 0);
	}

	nvgpu_log_fn(g, "done");

	return 0;
}

static void nvgpu_channel_semaphore_signal(struct nvgpu_channel *c,
		bool post_events)
{
	struct gk20a *g = c->g;

	if (nvgpu_cond_broadcast_interruptible( &c->semaphore_wq) != 0) {
		nvgpu_warn(g, "failed to broadcast");
	}

#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
	if (post_events) {
		struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(c);
		if (tsg != NULL) {
			g->ops.tsg.post_event_id(tsg,
			    NVGPU_EVENT_ID_BLOCKING_SYNC);
		}
	}
#endif

#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
	/*
	 * Only non-deterministic channels get the channel_update callback. We
	 * don't allow semaphore-backed syncs for these channels anyways, since
	 * they have a dependency on the sync framework. If deterministic
	 * channels are receiving a semaphore wakeup, it must be for a
	 * user-space managed semaphore.
	 */
	if (!nvgpu_channel_is_deterministic(c)) {
		nvgpu_channel_update(c);
	}
#endif
}

void nvgpu_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;

	nvgpu_log_fn(g, " ");

	/*
	 * Ensure that all pending writes are actually done  before trying to
	 * read semaphore values from DRAM.
	 */
	nvgpu_assert(g->ops.mm.cache.fb_flush(g) == 0);

	for (chid = 0; chid < f->num_channels; chid++) {
		struct nvgpu_channel *c = &g->fifo.channel[chid];
		if (nvgpu_channel_get(c) != NULL) {
			if (nvgpu_atomic_read(&c->bound) != 0) {
				nvgpu_channel_semaphore_signal(c, post_events);
			}
			nvgpu_channel_put(c);
		}
	}
}

/* return with a reference to the channel, caller must put it back */
struct nvgpu_channel *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g,
			u64 inst_ptr)
{
	struct nvgpu_fifo *f = &g->fifo;
	unsigned int ci;

	if (unlikely(f->channel == NULL)) {
		return NULL;
	}
	for (ci = 0; ci < f->num_channels; ci++) {
		struct nvgpu_channel *ch;
		u64 ch_inst_ptr;

		ch = nvgpu_channel_from_id(g, ci);
		/* only alive channels are searched */
		if (ch == NULL) {
			continue;
		}

		ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
		if (inst_ptr == ch_inst_ptr) {
			return ch;
		}

		nvgpu_channel_put(ch);
	}
	return NULL;
}

int nvgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch)
{
	int err;

	nvgpu_log_fn(g, " ");

	err = nvgpu_alloc_inst_block(g, &ch->inst_block);
	if (err != 0) {
		return err;
	}

	nvgpu_log_info(g, "channel %d inst block physical addr: 0x%16llx",
		ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));

	nvgpu_log_fn(g, "done");
	return 0;
}

void nvgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch)
{
	nvgpu_free_inst_block(g, &ch->inst_block);
}

static void nvgpu_channel_sync_debug_dump(struct gk20a *g,
	struct nvgpu_debug_context *o, struct nvgpu_channel_dump_info *info)
{
#ifdef CONFIG_NVGPU_NON_FUSA
	gk20a_debug_output(o,
			"RAMFC: TOP: %012llx PUT: %012llx GET: %012llx "
			"FETCH: %012llx "
			"HEADER: %08x COUNT: %08x "
			"SYNCPOINT: %08x %08x "
			"SEMAPHORE: %08x %08x %08x %08x",
			info->inst.pb_top_level_get,
			info->inst.pb_put,
			info->inst.pb_get,
			info->inst.pb_fetch,
			info->inst.pb_header,
			info->inst.pb_count,
			info->inst.syncpointa,
			info->inst.syncpointb,
			info->inst.semaphorea,
			info->inst.semaphoreb,
			info->inst.semaphorec,
			info->inst.semaphored);

	g->ops.pbdma.syncpt_debug_dump(g, o, info);
#endif
}

static void nvgpu_channel_info_debug_dump(struct gk20a *g,
			     struct nvgpu_debug_context *o,
			     struct nvgpu_channel_dump_info *info)
{
	/**
	 * Use gpu hw version to control the channel instance fields
	 * dump in nvgpu_channel_dump_info struct.
	 * For hw version before gv11b, dump syncpoint a/b, semaphore a/b/c/d.
	 * For hw version after gv11b, dump sem addr/payload/execute.
	 */
	u32 ver = nvgpu_safe_add_u32(g->params.gpu_arch, g->params.gpu_impl);

	gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ",
			info->chid,
			g->name,
			info->tsgid,
			info->pid,
			info->refs,
			info->deterministic ? ", deterministic" : "");
	gk20a_debug_output(o, "channel status: %s in use %s %s",
			info->hw_state.enabled ? "" : "not",
			info->hw_state.status_string,
			info->hw_state.busy ? "busy" : "not busy");

	if (ver < NVGPU_GPUID_GV11B) {
		nvgpu_channel_sync_debug_dump(g, o, info);
	} else {
		gk20a_debug_output(o,
			"RAMFC: TOP: %012llx PUT: %012llx GET: %012llx "
			"FETCH: %012llx "
			"HEADER: %08x COUNT: %08x "
			"SEMAPHORE: addr %012llx "
			"payload %016llx execute %08x",
			info->inst.pb_top_level_get,
			info->inst.pb_put,
			info->inst.pb_get,
			info->inst.pb_fetch,
			info->inst.pb_header,
			info->inst.pb_count,
			info->inst.sem_addr,
			info->inst.sem_payload,
			info->inst.sem_execute);
	}

	if (info->sema.addr != 0ULL) {
		gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
				   "next_val: 0x%08x addr: 0x%010llx",
				  info->sema.value,
				  info->sema.next,
				  info->sema.addr);
	}

	gk20a_debug_output(o, " ");
}

void nvgpu_channel_debug_dump_all(struct gk20a *g,
		 struct nvgpu_debug_context *o)
{
	struct nvgpu_fifo *f = &g->fifo;
	u32 chid;
	struct nvgpu_channel_dump_info **infos;

	infos = nvgpu_kzalloc(g, sizeof(*infos) * f->num_channels);
	if (infos == NULL) {
		gk20a_debug_output(o, "cannot alloc memory for channels");
		return;
	}

	for (chid = 0U; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);

		if (ch != NULL) {
			struct nvgpu_channel_dump_info *info;

			info = nvgpu_kzalloc(g, sizeof(*info));

			/*
			 * ref taken stays to below loop with
			 * successful allocs
			 */
			if (info == NULL) {
				nvgpu_channel_put(ch);
			} else {
				infos[chid] = info;
			}
		}
	}

	for (chid = 0U; chid < f->num_channels; chid++) {
		struct nvgpu_channel *ch = &f->channel[chid];
		struct nvgpu_channel_dump_info *info = infos[chid];
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
		struct nvgpu_channel_sync_semaphore *sync_sema;
		struct nvgpu_hw_semaphore *hw_sema = NULL;

		if (ch->sync != NULL) {
			sync_sema = nvgpu_channel_sync_to_semaphore(ch->sync);
			if (sync_sema != NULL) {
				hw_sema = nvgpu_channel_sync_semaphore_hw_sema(
						sync_sema);
			}
		}
#endif

		/* if this info exists, the above loop took a channel ref */
		if (info == NULL) {
			continue;
		}

		info->chid = ch->chid;
		info->tsgid = ch->tsgid;
		info->pid = ch->pid;
		info->refs = nvgpu_atomic_read(&ch->ref_count);
		info->deterministic = nvgpu_channel_is_deterministic(ch);

#ifdef CONFIG_NVGPU_SW_SEMAPHORE
		if (hw_sema != NULL) {
			info->sema.value = nvgpu_hw_semaphore_read(hw_sema);
			info->sema.next =
				(u32)nvgpu_hw_semaphore_read_next(hw_sema);
			info->sema.addr = nvgpu_hw_semaphore_addr(hw_sema);
		}
#endif

		g->ops.channel.read_state(g, ch, &info->hw_state);
		g->ops.ramfc.capture_ram_dump(g, ch, info);

		nvgpu_channel_put(ch);
	}

	gk20a_debug_output(o, "Channel Status - chip %-5s", g->name);
	gk20a_debug_output(o, "---------------------------");
	for (chid = 0U; chid < f->num_channels; chid++) {
		struct nvgpu_channel_dump_info *info = infos[chid];

		if (info != NULL) {
			nvgpu_channel_info_debug_dump(g, o, info);
			nvgpu_kfree(g, info);
		}
	}

	nvgpu_kfree(g, infos);
}

#ifdef CONFIG_NVGPU_DEBUGGER
int nvgpu_channel_deferred_reset_engines(struct gk20a *g,
		struct nvgpu_channel *ch)
{
	unsigned long engine_id, engines = 0U;
	struct nvgpu_tsg *tsg;
	bool deferred_reset_pending;
	struct nvgpu_fifo *f = &g->fifo;
	int err = 0;

	nvgpu_mutex_acquire(&g->dbg_sessions_lock);

	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
	deferred_reset_pending = g->fifo.deferred_reset_pending;
	nvgpu_mutex_release(&f->deferred_reset_mutex);

	if (!deferred_reset_pending) {
		nvgpu_mutex_release(&g->dbg_sessions_lock);
		return 0;
	}

	err = nvgpu_gr_disable_ctxsw(g);
	if (err != 0) {
		nvgpu_err(g, "failed to disable ctxsw");
		goto fail;
	}

	tsg = nvgpu_tsg_from_ch(ch);
	if (tsg != NULL) {
		engines = nvgpu_engine_get_mask_on_id(g, tsg->tsgid, true);
	} else {
		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
	}

	if (engines == 0U) {
		goto clean_up;
	}

	/*
	 * If deferred reset is set for an engine, and channel is running
	 * on that engine, reset it
	 */

	for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) {
		if ((BIT64(engine_id) & engines) != 0ULL) {
			nvgpu_engine_reset(g, (u32)engine_id);
		}
	}

	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
	g->fifo.deferred_fault_engines = 0;
	g->fifo.deferred_reset_pending = false;
	nvgpu_mutex_release(&f->deferred_reset_mutex);

clean_up:
	err = nvgpu_gr_enable_ctxsw(g);
	if (err != 0) {
		nvgpu_err(g, "failed to enable ctxsw");
	}
fail:
	nvgpu_mutex_release(&g->dbg_sessions_lock);

	return err;
}
#endif