mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
Add NVGPU_IOCTL_CHANNEL_PREEMPT_NEXT ioctl to check host and FECS status and preempt pending load of context not belonging to the calling channel on GR engine during context switch. This should be called after a submit with NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST to decrease worst case submit to start latency for high interleave channel. There is less than 0.002% chance that the ioctl blocks up to couple miliseconds due to race condition of FECS status changing while being read. Also fix bug with host reschedule for multiple runlists which needs to write both runlist registers. Bug 1987640 Bug 1924808 Change-Id: I0b7e2f91bd18b0b20928e5a3311b9426b1bf1848 Signed-off-by: David Li <davli@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1549598 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
4294 lines
110 KiB
C
4294 lines
110 KiB
C
/*
|
|
* GK20A Graphics channel
|
|
*
|
|
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <linux/nvhost.h>
|
|
#include <linux/list.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/highmem.h> /* need for nvmap.h*/
|
|
#include <linux/kthread.h>
|
|
#include <trace/events/gk20a.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/file.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/dma-buf.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/circ_buf.h>
|
|
|
|
#include <nvgpu/semaphore.h>
|
|
#include <nvgpu/timers.h>
|
|
#include <nvgpu/kmem.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "debug_gk20a.h"
|
|
#include "ctxsw_trace_gk20a.h"
|
|
#include "dbg_gpu_gk20a.h"
|
|
#include "fence_gk20a.h"
|
|
|
|
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
|
|
#include <nvgpu/hw/gk20a/hw_ltc_gk20a.h>
|
|
|
|
#define NVMAP_HANDLE_PARAM_SIZE 1
|
|
|
|
/*
|
|
* Although channels do have pointers back to the gk20a struct that they were
|
|
* created under in cases where the driver is killed that pointer can be bad.
|
|
* The channel memory can be freed before the release() function for a given
|
|
* channel is called. This happens when the driver dies and userspace doesn't
|
|
* get a chance to call release() until after the entire gk20a driver data is
|
|
* unloaded and freed.
|
|
*/
|
|
struct channel_priv {
|
|
struct gk20a *g;
|
|
struct channel_gk20a *c;
|
|
};
|
|
|
|
static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f);
|
|
static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c);
|
|
static void gk20a_channel_dump_ref_actions(struct channel_gk20a *c);
|
|
|
|
static void free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e);
|
|
|
|
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c);
|
|
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
|
|
|
|
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
|
|
|
|
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job);
|
|
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job);
|
|
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
|
|
struct channel_gk20a *c);
|
|
|
|
static int channel_gk20a_commit_userd(struct channel_gk20a *c);
|
|
static int channel_gk20a_setup_userd(struct channel_gk20a *c);
|
|
|
|
static void channel_gk20a_bind(struct channel_gk20a *ch_gk20a);
|
|
|
|
static int channel_gk20a_update_runlist(struct channel_gk20a *c,
|
|
bool add);
|
|
static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
|
|
|
|
static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
|
|
|
|
static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|
bool clean_all);
|
|
|
|
/* allocate GPU channel */
|
|
static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
|
|
{
|
|
struct channel_gk20a *ch = NULL;
|
|
struct gk20a_platform *platform;
|
|
|
|
platform = gk20a_get_platform(f->g->dev);
|
|
|
|
nvgpu_mutex_acquire(&f->free_chs_mutex);
|
|
if (!list_empty(&f->free_chs)) {
|
|
ch = list_first_entry(&f->free_chs, struct channel_gk20a,
|
|
free_chs);
|
|
list_del(&ch->free_chs);
|
|
WARN_ON(atomic_read(&ch->ref_count));
|
|
WARN_ON(ch->referenceable);
|
|
f->used_channels++;
|
|
}
|
|
nvgpu_mutex_release(&f->free_chs_mutex);
|
|
|
|
if (platform->aggressive_sync_destroy_thresh &&
|
|
(f->used_channels >
|
|
platform->aggressive_sync_destroy_thresh))
|
|
platform->aggressive_sync_destroy = true;
|
|
|
|
return ch;
|
|
}
|
|
|
|
static void free_channel(struct fifo_gk20a *f,
|
|
struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a_platform *platform;
|
|
struct gk20a *g = f->g;
|
|
|
|
trace_gk20a_release_used_channel(ch->hw_chid);
|
|
/* refcount is zero here and channel is in a freed/dead state */
|
|
nvgpu_mutex_acquire(&f->free_chs_mutex);
|
|
/* add to head to increase visibility of timing-related bugs */
|
|
list_add(&ch->free_chs, &f->free_chs);
|
|
f->used_channels--;
|
|
nvgpu_mutex_release(&f->free_chs_mutex);
|
|
|
|
/*
|
|
* On teardown it is not possible to dereference platform, but ignoring
|
|
* this is fine then because no new channels would be created.
|
|
*/
|
|
if (!g->driver_is_dying) {
|
|
platform = gk20a_get_platform(g->dev);
|
|
|
|
if (platform->aggressive_sync_destroy_thresh &&
|
|
(f->used_channels <
|
|
platform->aggressive_sync_destroy_thresh))
|
|
platform->aggressive_sync_destroy = false;
|
|
}
|
|
}
|
|
|
|
int channel_gk20a_commit_va(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
g->ops.mm.init_inst_block(&c->inst_block, c->vm,
|
|
c->vm->gmmu_page_sizes[gmmu_page_size_big]);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int channel_gk20a_commit_userd(struct channel_gk20a *c)
|
|
{
|
|
u32 addr_lo;
|
|
u32 addr_hi;
|
|
struct gk20a *g = c->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
addr_lo = u64_lo32(c->userd_iova >> ram_userd_base_shift_v());
|
|
addr_hi = u64_hi32(c->userd_iova);
|
|
|
|
gk20a_dbg_info("channel %d : set ramfc userd 0x%16llx",
|
|
c->hw_chid, (u64)c->userd_iova);
|
|
|
|
gk20a_mem_wr32(g, &c->inst_block,
|
|
ram_in_ramfc_w() + ram_fc_userd_w(),
|
|
gk20a_aperture_mask(g, &g->fifo.userd,
|
|
pbdma_userd_target_sys_mem_ncoh_f(),
|
|
pbdma_userd_target_vid_mem_f()) |
|
|
pbdma_userd_addr_f(addr_lo));
|
|
|
|
gk20a_mem_wr32(g, &c->inst_block,
|
|
ram_in_ramfc_w() + ram_fc_userd_hi_w(),
|
|
pbdma_userd_hi_addr_f(addr_hi));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_get_timescale_from_timeslice(struct gk20a *g,
|
|
int timeslice_period,
|
|
int *__timeslice_timeout, int *__timeslice_scale)
|
|
{
|
|
struct gk20a_platform *platform = dev_get_drvdata(g->dev);
|
|
int value = scale_ptimer(timeslice_period,
|
|
ptimer_scalingfactor10x(platform->ptimer_src_freq));
|
|
int shift = 0;
|
|
|
|
/* value field is 8 bits long */
|
|
while (value >= 1 << 8) {
|
|
value >>= 1;
|
|
shift++;
|
|
}
|
|
|
|
/* time slice register is only 18bits long */
|
|
if ((value << shift) >= 1<<19) {
|
|
pr_err("Requested timeslice value is clamped to 18 bits\n");
|
|
value = 255;
|
|
shift = 10;
|
|
}
|
|
|
|
*__timeslice_timeout = value;
|
|
*__timeslice_scale = shift;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int channel_gk20a_set_schedule_params(struct channel_gk20a *c)
|
|
{
|
|
int shift = 0, value = 0;
|
|
|
|
gk20a_channel_get_timescale_from_timeslice(c->g,
|
|
c->timeslice_us, &value, &shift);
|
|
|
|
/* disable channel */
|
|
c->g->ops.fifo.disable_channel(c);
|
|
|
|
/* preempt the channel */
|
|
WARN_ON(c->g->ops.fifo.preempt_channel(c->g, c->hw_chid));
|
|
|
|
/* set new timeslice */
|
|
gk20a_mem_wr32(c->g, &c->inst_block, ram_fc_runlist_timeslice_w(),
|
|
value | (shift << 12) |
|
|
fifo_runlist_timeslice_enable_true_f());
|
|
|
|
/* enable channel */
|
|
gk20a_writel(c->g, ccsr_channel_r(c->hw_chid),
|
|
gk20a_readl(c->g, ccsr_channel_r(c->hw_chid)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
|
|
return 0;
|
|
}
|
|
|
|
u32 channel_gk20a_pbdma_acquire_val(struct channel_gk20a *c)
|
|
{
|
|
u32 val, exp, man;
|
|
u64 timeout;
|
|
unsigned int val_len;
|
|
|
|
val = pbdma_acquire_retry_man_2_f() |
|
|
pbdma_acquire_retry_exp_2_f();
|
|
|
|
if (!c->g->timeouts_enabled || !c->wdt_enabled)
|
|
return val;
|
|
|
|
timeout = gk20a_get_channel_watchdog_timeout(c);
|
|
timeout *= 80UL;
|
|
do_div(timeout, 100); /* set acquire timeout to 80% of channel wdt */
|
|
timeout *= 1000000UL; /* ms -> ns */
|
|
do_div(timeout, 1024); /* in unit of 1024ns */
|
|
val_len = fls(timeout >> 32) + 32;
|
|
if (val_len == 32)
|
|
val_len = fls(timeout);
|
|
if (val_len > 16U + pbdma_acquire_timeout_exp_max_v()) { /* man: 16bits */
|
|
exp = pbdma_acquire_timeout_exp_max_v();
|
|
man = pbdma_acquire_timeout_man_max_v();
|
|
} else if (val_len > 16) {
|
|
exp = val_len - 16;
|
|
man = timeout >> exp;
|
|
} else {
|
|
exp = 0;
|
|
man = timeout;
|
|
}
|
|
|
|
val |= pbdma_acquire_timeout_exp_f(exp) |
|
|
pbdma_acquire_timeout_man_f(man) |
|
|
pbdma_acquire_timeout_en_enable_f();
|
|
|
|
return val;
|
|
}
|
|
|
|
void gk20a_channel_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *mem = &c->inst_block;
|
|
|
|
gk20a_dbg_info("channel %d : set ramfc privileged_channel", c->hw_chid);
|
|
|
|
/* Enable HCE priv mode for phys mode transfer */
|
|
gk20a_mem_wr32(g, mem, ram_fc_hce_ctrl_w(),
|
|
pbdma_hce_ctrl_hce_priv_mode_yes_f());
|
|
}
|
|
|
|
int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
|
|
u64 gpfifo_base, u32 gpfifo_entries, u32 flags)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *mem = &c->inst_block;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_memset(g, mem, 0, 0, ram_fc_size_val_v());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_gp_base_w(),
|
|
pbdma_gp_base_offset_f(
|
|
u64_lo32(gpfifo_base >> pbdma_gp_base_rsvd_s())));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_gp_base_hi_w(),
|
|
pbdma_gp_base_hi_offset_f(u64_hi32(gpfifo_base)) |
|
|
pbdma_gp_base_hi_limit2_f(ilog2(gpfifo_entries)));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_signature_w(),
|
|
c->g->ops.fifo.get_pbdma_signature(c->g));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_formats_w(),
|
|
pbdma_formats_gp_fermi0_f() |
|
|
pbdma_formats_pb_fermi1_f() |
|
|
pbdma_formats_mp_fermi0_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_pb_header_w(),
|
|
pbdma_pb_header_priv_user_f() |
|
|
pbdma_pb_header_method_zero_f() |
|
|
pbdma_pb_header_subchannel_zero_f() |
|
|
pbdma_pb_header_level_main_f() |
|
|
pbdma_pb_header_first_true_f() |
|
|
pbdma_pb_header_type_inc_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_subdevice_w(),
|
|
pbdma_subdevice_id_f(1) |
|
|
pbdma_subdevice_status_active_f() |
|
|
pbdma_subdevice_channel_dma_enable_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_target_w(), pbdma_target_engine_sw_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_acquire_w(),
|
|
channel_gk20a_pbdma_acquire_val(c));
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_runlist_timeslice_w(),
|
|
fifo_runlist_timeslice_timeout_128_f() |
|
|
fifo_runlist_timeslice_timescale_3_f() |
|
|
fifo_runlist_timeslice_enable_true_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_pb_timeslice_w(),
|
|
fifo_pb_timeslice_timeout_16_f() |
|
|
fifo_pb_timeslice_timescale_0_f() |
|
|
fifo_pb_timeslice_enable_true_f());
|
|
|
|
gk20a_mem_wr32(g, mem, ram_fc_chid_w(), ram_fc_chid_id_f(c->hw_chid));
|
|
|
|
if (c->is_privileged_channel)
|
|
gk20a_channel_setup_ramfc_for_privileged_channel(c);
|
|
|
|
return channel_gk20a_commit_userd(c);
|
|
}
|
|
|
|
static int channel_gk20a_setup_userd(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *mem = &g->fifo.userd;
|
|
u32 offset = c->hw_chid * g->fifo.userd_entry_size / sizeof(u32);
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_put_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_ref_threshold_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_top_level_get_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_get_hi_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_get_w(), 0);
|
|
gk20a_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void channel_gk20a_bind(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
|
|
>> ram_in_base_shift_v();
|
|
|
|
gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
|
|
c->hw_chid, inst_ptr);
|
|
|
|
|
|
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
|
|
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
|
|
~ccsr_channel_runlist_f(~0)) |
|
|
ccsr_channel_runlist_f(c->runlist_id));
|
|
|
|
gk20a_writel(g, ccsr_channel_inst_r(c->hw_chid),
|
|
ccsr_channel_inst_ptr_f(inst_ptr) |
|
|
gk20a_aperture_mask(g, &c->inst_block,
|
|
ccsr_channel_inst_target_sys_mem_ncoh_f(),
|
|
ccsr_channel_inst_target_vid_mem_f()) |
|
|
ccsr_channel_inst_bind_true_f());
|
|
|
|
gk20a_writel(g, ccsr_channel_r(c->hw_chid),
|
|
(gk20a_readl(g, ccsr_channel_r(c->hw_chid)) &
|
|
~ccsr_channel_enable_set_f(~0)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
|
|
wmb();
|
|
atomic_set(&c->bound, true);
|
|
|
|
}
|
|
|
|
void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
|
|
{
|
|
struct gk20a *g = ch_gk20a->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
|
|
gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->hw_chid),
|
|
ccsr_channel_inst_ptr_f(0) |
|
|
ccsr_channel_inst_bind_false_f());
|
|
}
|
|
}
|
|
|
|
int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
err = gk20a_alloc_inst_block(g, &ch->inst_block);
|
|
if (err)
|
|
return err;
|
|
|
|
gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
|
|
ch->hw_chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
gk20a_free_inst_block(g, &ch->inst_block);
|
|
}
|
|
|
|
static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add)
|
|
{
|
|
return c->g->ops.fifo.update_runlist(c->g, c->runlist_id, c->hw_chid, add, true);
|
|
}
|
|
|
|
void channel_gk20a_enable(struct channel_gk20a *ch)
|
|
{
|
|
/* enable channel */
|
|
gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
|
|
gk20a_readl(ch->g, ccsr_channel_r(ch->hw_chid)) |
|
|
ccsr_channel_enable_set_true_f());
|
|
}
|
|
|
|
void channel_gk20a_disable(struct channel_gk20a *ch)
|
|
{
|
|
/* disable channel */
|
|
gk20a_writel(ch->g, ccsr_channel_r(ch->hw_chid),
|
|
gk20a_readl(ch->g,
|
|
ccsr_channel_r(ch->hw_chid)) |
|
|
ccsr_channel_enable_clr_true_f());
|
|
}
|
|
|
|
int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
struct tsg_gk20a *tsg;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
tsg = &g->fifo.tsg[ch->tsgid];
|
|
gk20a_enable_tsg(tsg);
|
|
} else {
|
|
g->ops.fifo.enable_channel(ch);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch)
|
|
{
|
|
struct tsg_gk20a *tsg;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
tsg = &g->fifo.tsg[ch->tsgid];
|
|
gk20a_disable_tsg(tsg);
|
|
} else {
|
|
g->ops.fifo.disable_channel(ch);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
|
|
{
|
|
struct channel_gk20a_job *job, *n;
|
|
bool released_job_semaphore = false;
|
|
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);
|
|
|
|
/* synchronize with actual job cleanup */
|
|
nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);
|
|
|
|
/* ensure no fences are pending */
|
|
nvgpu_mutex_acquire(&ch->sync_lock);
|
|
if (ch->sync)
|
|
ch->sync->set_min_eq_max(ch->sync);
|
|
nvgpu_mutex_release(&ch->sync_lock);
|
|
|
|
/* release all job semaphores (applies only to jobs that use
|
|
semaphore synchronization) */
|
|
channel_gk20a_joblist_lock(ch);
|
|
if (pre_alloc_enabled) {
|
|
int tmp_get = ch->joblist.pre_alloc.get;
|
|
int put = ch->joblist.pre_alloc.put;
|
|
|
|
/*
|
|
* ensure put is read before any subsequent reads.
|
|
* see corresponding wmb in gk20a_channel_add_job()
|
|
*/
|
|
rmb();
|
|
|
|
while (tmp_get != put) {
|
|
job = &ch->joblist.pre_alloc.jobs[tmp_get];
|
|
if (job->post_fence->semaphore) {
|
|
__nvgpu_semaphore_release(
|
|
job->post_fence->semaphore, true);
|
|
released_job_semaphore = true;
|
|
}
|
|
tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
|
|
}
|
|
} else {
|
|
list_for_each_entry_safe(job, n,
|
|
&ch->joblist.dynamic.jobs, list) {
|
|
if (job->post_fence->semaphore) {
|
|
__nvgpu_semaphore_release(
|
|
job->post_fence->semaphore, true);
|
|
released_job_semaphore = true;
|
|
}
|
|
}
|
|
}
|
|
channel_gk20a_joblist_unlock(ch);
|
|
|
|
nvgpu_mutex_release(&ch->joblist.cleanup_lock);
|
|
|
|
if (released_job_semaphore)
|
|
wake_up_interruptible_all(&ch->semaphore_wq);
|
|
|
|
/*
|
|
* When closing the channel, this scheduled update holds one ref which
|
|
* is waited for before advancing with freeing.
|
|
*/
|
|
gk20a_channel_update(ch);
|
|
}
|
|
|
|
void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
return gk20a_fifo_abort_tsg(ch->g, ch->tsgid, channel_preempt);
|
|
|
|
/* make sure new kickoffs are prevented */
|
|
ch->has_timedout = true;
|
|
|
|
ch->g->ops.fifo.disable_channel(ch);
|
|
|
|
if (channel_preempt)
|
|
ch->g->ops.fifo.preempt_channel(ch->g, ch->hw_chid);
|
|
|
|
gk20a_channel_abort_clean_up(ch);
|
|
}
|
|
|
|
int gk20a_wait_channel_idle(struct channel_gk20a *ch)
|
|
{
|
|
bool channel_idle = false;
|
|
struct nvgpu_timeout timeout;
|
|
|
|
nvgpu_timeout_init(ch->g, &timeout, gk20a_get_gr_idle_timeout(ch->g),
|
|
NVGPU_TIMER_CPU_TIMER);
|
|
|
|
do {
|
|
channel_gk20a_joblist_lock(ch);
|
|
channel_idle = channel_gk20a_joblist_is_empty(ch);
|
|
channel_gk20a_joblist_unlock(ch);
|
|
if (channel_idle)
|
|
break;
|
|
|
|
usleep_range(1000, 3000);
|
|
} while (!nvgpu_timeout_expired(&timeout));
|
|
|
|
if (!channel_idle) {
|
|
gk20a_err(dev_from_gk20a(ch->g), "jobs not freed for channel %d\n",
|
|
ch->hw_chid);
|
|
return -EBUSY;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_disable_channel(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_channel_abort(ch, true);
|
|
channel_gk20a_update_runlist(ch, false);
|
|
}
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
|
|
static void gk20a_free_cycle_stats_buffer(struct channel_gk20a *ch)
|
|
{
|
|
/* disable existing cyclestats buffer */
|
|
nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
if (ch->cyclestate.cyclestate_buffer_handler) {
|
|
dma_buf_vunmap(ch->cyclestate.cyclestate_buffer_handler,
|
|
ch->cyclestate.cyclestate_buffer);
|
|
dma_buf_put(ch->cyclestate.cyclestate_buffer_handler);
|
|
ch->cyclestate.cyclestate_buffer_handler = NULL;
|
|
ch->cyclestate.cyclestate_buffer = NULL;
|
|
ch->cyclestate.cyclestate_buffer_size = 0;
|
|
}
|
|
nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
|
|
}
|
|
|
|
static int gk20a_channel_cycle_stats(struct channel_gk20a *ch,
|
|
struct nvgpu_cycle_stats_args *args)
|
|
{
|
|
struct dma_buf *dmabuf;
|
|
void *virtual_address;
|
|
|
|
/* is it allowed to handle calls for current GPU? */
|
|
if (0 == (ch->g->gpu_characteristics.flags &
|
|
NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS))
|
|
return -ENOSYS;
|
|
|
|
if (args->dmabuf_fd && !ch->cyclestate.cyclestate_buffer_handler) {
|
|
|
|
/* set up new cyclestats buffer */
|
|
dmabuf = dma_buf_get(args->dmabuf_fd);
|
|
if (IS_ERR(dmabuf))
|
|
return PTR_ERR(dmabuf);
|
|
virtual_address = dma_buf_vmap(dmabuf);
|
|
if (!virtual_address)
|
|
return -ENOMEM;
|
|
|
|
ch->cyclestate.cyclestate_buffer_handler = dmabuf;
|
|
ch->cyclestate.cyclestate_buffer = virtual_address;
|
|
ch->cyclestate.cyclestate_buffer_size = dmabuf->size;
|
|
return 0;
|
|
|
|
} else if (!args->dmabuf_fd &&
|
|
ch->cyclestate.cyclestate_buffer_handler) {
|
|
gk20a_free_cycle_stats_buffer(ch);
|
|
return 0;
|
|
|
|
} else if (!args->dmabuf_fd &&
|
|
!ch->cyclestate.cyclestate_buffer_handler) {
|
|
/* no requst from GL */
|
|
return 0;
|
|
|
|
} else {
|
|
pr_err("channel already has cyclestats buffer\n");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
static int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|
{
|
|
int ret;
|
|
|
|
nvgpu_mutex_acquire(&ch->cs_client_mutex);
|
|
if (ch->cs_client)
|
|
ret = gr_gk20a_css_flush(ch, ch->cs_client);
|
|
else
|
|
ret = -EBADF;
|
|
nvgpu_mutex_release(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|
u32 dmabuf_fd,
|
|
u32 perfmon_id_count,
|
|
u32 *perfmon_id_start)
|
|
{
|
|
int ret;
|
|
|
|
nvgpu_mutex_acquire(&ch->cs_client_mutex);
|
|
if (ch->cs_client) {
|
|
ret = -EEXIST;
|
|
} else {
|
|
ret = gr_gk20a_css_attach(ch,
|
|
dmabuf_fd,
|
|
perfmon_id_count,
|
|
perfmon_id_start,
|
|
&ch->cs_client);
|
|
}
|
|
nvgpu_mutex_release(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_free_cycle_stats_snapshot(struct channel_gk20a *ch)
|
|
{
|
|
int ret;
|
|
|
|
nvgpu_mutex_acquire(&ch->cs_client_mutex);
|
|
if (ch->cs_client) {
|
|
ret = gr_gk20a_css_detach(ch, ch->cs_client);
|
|
ch->cs_client = NULL;
|
|
} else {
|
|
ret = 0;
|
|
}
|
|
nvgpu_mutex_release(&ch->cs_client_mutex);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|
struct nvgpu_cycle_stats_snapshot_args *args)
|
|
{
|
|
int ret;
|
|
|
|
/* is it allowed to handle calls for current GPU? */
|
|
if (0 == (ch->g->gpu_characteristics.flags &
|
|
NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT))
|
|
return -ENOSYS;
|
|
|
|
if (!args->dmabuf_fd)
|
|
return -EINVAL;
|
|
|
|
/* handle the command (most frequent cases first) */
|
|
switch (args->cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
|
|
ret = gk20a_flush_cycle_stats_snapshot(ch);
|
|
args->extra = 0;
|
|
break;
|
|
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_ATTACH:
|
|
ret = gk20a_attach_cycle_stats_snapshot(ch,
|
|
args->dmabuf_fd,
|
|
args->extra,
|
|
&args->extra);
|
|
break;
|
|
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_DETACH:
|
|
ret = gk20a_free_cycle_stats_snapshot(ch);
|
|
args->extra = 0;
|
|
break;
|
|
|
|
default:
|
|
pr_err("cyclestats: unknown command %u\n", args->cmd);
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_channel_set_wdt_status(struct channel_gk20a *ch,
|
|
struct nvgpu_channel_wdt_args *args)
|
|
{
|
|
if (args->wdt_status == NVGPU_IOCTL_CHANNEL_DISABLE_WDT)
|
|
ch->wdt_enabled = false;
|
|
else if (args->wdt_status == NVGPU_IOCTL_CHANNEL_ENABLE_WDT)
|
|
ch->wdt_enabled = true;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_set_runlist_interleave(struct channel_gk20a *ch,
|
|
u32 level)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
int ret;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(g), "invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
switch (level) {
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
|
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_HIGH:
|
|
ret = g->ops.fifo.set_runlist_interleave(g, ch->hw_chid,
|
|
false, 0, level);
|
|
break;
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
gk20a_dbg(gpu_dbg_sched, "chid=%u interleave=%u", ch->hw_chid, level);
|
|
|
|
return ret ? ret : g->ops.fifo.update_runlist(g, ch->runlist_id, ~0, true, true);
|
|
}
|
|
|
|
static int gk20a_init_error_notifier(struct channel_gk20a *ch,
|
|
struct nvgpu_set_error_notifier *args)
|
|
{
|
|
struct device *dev = dev_from_gk20a(ch->g);
|
|
struct dma_buf *dmabuf;
|
|
void *va;
|
|
u64 end = args->offset + sizeof(struct nvgpu_notification);
|
|
|
|
if (!args->mem) {
|
|
pr_err("gk20a_init_error_notifier: invalid memory handle\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
dmabuf = dma_buf_get(args->mem);
|
|
|
|
gk20a_free_error_notifiers(ch);
|
|
|
|
if (IS_ERR(dmabuf)) {
|
|
pr_err("Invalid handle: %d\n", args->mem);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (end > dmabuf->size || end < sizeof(struct nvgpu_notification)) {
|
|
dma_buf_put(dmabuf);
|
|
gk20a_err(dev, "gk20a_init_error_notifier: invalid offset\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* map handle */
|
|
va = dma_buf_vmap(dmabuf);
|
|
if (!va) {
|
|
dma_buf_put(dmabuf);
|
|
pr_err("Cannot map notifier handle\n");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
ch->error_notifier = va + args->offset;
|
|
ch->error_notifier_va = va;
|
|
memset(ch->error_notifier, 0, sizeof(struct nvgpu_notification));
|
|
|
|
/* set channel notifiers pointer */
|
|
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
|
ch->error_notifier_ref = dmabuf;
|
|
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* gk20a_set_error_notifier_locked()
|
|
* Should be called with ch->error_notifier_mutex held
|
|
*/
|
|
void gk20a_set_error_notifier_locked(struct channel_gk20a *ch, __u32 error)
|
|
{
|
|
if (ch->error_notifier_ref) {
|
|
struct timespec time_data;
|
|
u64 nsec;
|
|
getnstimeofday(&time_data);
|
|
nsec = ((u64)time_data.tv_sec) * 1000000000u +
|
|
(u64)time_data.tv_nsec;
|
|
ch->error_notifier->time_stamp.nanoseconds[0] =
|
|
(u32)nsec;
|
|
ch->error_notifier->time_stamp.nanoseconds[1] =
|
|
(u32)(nsec >> 32);
|
|
ch->error_notifier->info32 = error;
|
|
ch->error_notifier->status = 0xffff;
|
|
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"error notifier set to %d for ch %d", error, ch->hw_chid);
|
|
}
|
|
}
|
|
|
|
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
|
|
{
|
|
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
|
gk20a_set_error_notifier_locked(ch, error);
|
|
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
|
}
|
|
|
|
static void gk20a_free_error_notifiers(struct channel_gk20a *ch)
|
|
{
|
|
nvgpu_mutex_acquire(&ch->error_notifier_mutex);
|
|
if (ch->error_notifier_ref) {
|
|
dma_buf_vunmap(ch->error_notifier_ref, ch->error_notifier_va);
|
|
dma_buf_put(ch->error_notifier_ref);
|
|
ch->error_notifier_ref = NULL;
|
|
ch->error_notifier = NULL;
|
|
ch->error_notifier_va = NULL;
|
|
}
|
|
nvgpu_mutex_release(&ch->error_notifier_mutex);
|
|
}
|
|
|
|
static void gk20a_wait_until_counter_is_N(
|
|
struct channel_gk20a *ch, atomic_t *counter, int wait_value,
|
|
wait_queue_head_t *wq, const char *caller, const char *counter_name)
|
|
{
|
|
while (true) {
|
|
if (wait_event_timeout(
|
|
*wq,
|
|
atomic_read(counter) == wait_value,
|
|
msecs_to_jiffies(5000)) > 0)
|
|
break;
|
|
|
|
gk20a_warn(dev_from_gk20a(ch->g),
|
|
"%s: channel %d, still waiting, %s left: %d, waiting for: %d",
|
|
caller, ch->hw_chid, counter_name,
|
|
atomic_read(counter), wait_value);
|
|
|
|
gk20a_channel_dump_ref_actions(ch);
|
|
}
|
|
}
|
|
|
|
/* call ONLY when no references to the channel exist: after the last put */
|
|
static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
struct vm_gk20a *ch_vm = ch->vm;
|
|
unsigned long timeout = gk20a_get_gr_idle_timeout(g);
|
|
struct dbg_session_gk20a *dbg_s;
|
|
struct dbg_session_data *session_data, *tmp_s;
|
|
struct dbg_session_channel_data *ch_data, *tmp;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
WARN_ON(ch->g == NULL);
|
|
|
|
trace_gk20a_free_channel(ch->hw_chid);
|
|
|
|
/* abort channel and remove from runlist */
|
|
gk20a_disable_channel(ch);
|
|
|
|
/* wait until there's only our ref to the channel */
|
|
if (!force)
|
|
gk20a_wait_until_counter_is_N(
|
|
ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
|
|
__func__, "references");
|
|
|
|
/* wait until all pending interrupts for recently completed
|
|
* jobs are handled */
|
|
nvgpu_wait_for_deferred_interrupts(g);
|
|
|
|
/* prevent new refs */
|
|
nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
|
|
if (!ch->referenceable) {
|
|
nvgpu_spinlock_release(&ch->ref_obtain_lock);
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"Extra %s() called to channel %u",
|
|
__func__, ch->hw_chid);
|
|
return;
|
|
}
|
|
ch->referenceable = false;
|
|
nvgpu_spinlock_release(&ch->ref_obtain_lock);
|
|
|
|
/* matches with the initial reference in gk20a_open_new_channel() */
|
|
atomic_dec(&ch->ref_count);
|
|
|
|
/* wait until no more refs to the channel */
|
|
if (!force)
|
|
gk20a_wait_until_counter_is_N(
|
|
ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
|
|
__func__, "references");
|
|
|
|
/* if engine reset was deferred, perform it now */
|
|
nvgpu_mutex_acquire(&f->deferred_reset_mutex);
|
|
if (g->fifo.deferred_reset_pending) {
|
|
gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
|
|
" deferred, running now");
|
|
/* if lock is already taken, a reset is taking place
|
|
so no need to repeat */
|
|
if (nvgpu_mutex_tryacquire(&g->fifo.gr_reset_mutex)) {
|
|
gk20a_fifo_deferred_reset(g, ch);
|
|
nvgpu_mutex_release(&g->fifo.gr_reset_mutex);
|
|
}
|
|
}
|
|
nvgpu_mutex_release(&f->deferred_reset_mutex);
|
|
|
|
if (!gk20a_channel_as_bound(ch))
|
|
goto unbind;
|
|
|
|
gk20a_dbg_info("freeing bound channel context, timeout=%ld",
|
|
timeout);
|
|
|
|
gk20a_free_error_notifiers(ch);
|
|
|
|
if (g->ops.fecs_trace.unbind_channel && !ch->vpr)
|
|
g->ops.fecs_trace.unbind_channel(g, ch);
|
|
|
|
/* release channel ctx */
|
|
g->ops.gr.free_channel_ctx(ch);
|
|
|
|
gk20a_gr_flush_channel_tlb(gr);
|
|
|
|
memset(&ch->ramfc, 0, sizeof(struct mem_desc_sub));
|
|
|
|
gk20a_gmmu_unmap_free(ch_vm, &ch->gpfifo.mem);
|
|
nvgpu_big_free(g, ch->gpfifo.pipe);
|
|
memset(&ch->gpfifo, 0, sizeof(struct gpfifo_desc));
|
|
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
gk20a_free_cycle_stats_buffer(ch);
|
|
gk20a_free_cycle_stats_snapshot(ch);
|
|
#endif
|
|
|
|
channel_gk20a_free_priv_cmdbuf(ch);
|
|
|
|
/* sync must be destroyed before releasing channel vm */
|
|
nvgpu_mutex_acquire(&ch->sync_lock);
|
|
if (ch->sync) {
|
|
gk20a_channel_sync_destroy(ch->sync);
|
|
ch->sync = NULL;
|
|
}
|
|
nvgpu_mutex_release(&ch->sync_lock);
|
|
|
|
/*
|
|
* free the channel used semaphore index.
|
|
* we need to do this before releasing the address space,
|
|
* as the semaphore pool might get freed after that point.
|
|
*/
|
|
if (ch->hw_sema)
|
|
nvgpu_semaphore_free_hw_sema(ch);
|
|
|
|
/*
|
|
* When releasing the channel we unbind the VM - so release the ref.
|
|
*/
|
|
gk20a_vm_put(ch_vm);
|
|
|
|
nvgpu_spinlock_acquire(&ch->update_fn_lock);
|
|
ch->update_fn = NULL;
|
|
ch->update_fn_data = NULL;
|
|
nvgpu_spinlock_release(&ch->update_fn_lock);
|
|
cancel_work_sync(&ch->update_fn_work);
|
|
|
|
/* make sure we don't have deferred interrupts pending that
|
|
* could still touch the channel */
|
|
nvgpu_wait_for_deferred_interrupts(g);
|
|
|
|
unbind:
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
g->ops.fifo.tsg_unbind_channel(ch);
|
|
|
|
g->ops.fifo.unbind_channel(ch);
|
|
g->ops.fifo.free_inst(g, ch);
|
|
|
|
ch->vpr = false;
|
|
ch->deterministic = false;
|
|
ch->vm = NULL;
|
|
|
|
WARN_ON(ch->sync);
|
|
|
|
/* unlink all debug sessions */
|
|
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
|
|
|
list_for_each_entry_safe(session_data, tmp_s,
|
|
&ch->dbg_s_list, dbg_s_entry) {
|
|
dbg_s = session_data->dbg_s;
|
|
nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
|
|
list_for_each_entry_safe(ch_data, tmp,
|
|
&dbg_s->ch_list, ch_entry) {
|
|
if (ch_data->chid == ch->hw_chid)
|
|
dbg_unbind_single_channel_gk20a(dbg_s, ch_data);
|
|
}
|
|
nvgpu_mutex_release(&dbg_s->ch_list_lock);
|
|
}
|
|
|
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
|
|
|
/* free pre-allocated resources, if applicable */
|
|
if (channel_gk20a_is_prealloc_enabled(ch))
|
|
channel_gk20a_free_prealloc_resources(ch);
|
|
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
|
|
ch->ref_actions_put = 0;
|
|
#endif
|
|
|
|
/* make sure we catch accesses of unopened channels in case
|
|
* there's non-refcounted channel pointers hanging around */
|
|
ch->g = NULL;
|
|
wmb();
|
|
|
|
/* ALWAYS last */
|
|
free_channel(f, ch);
|
|
}
|
|
|
|
static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
|
|
{
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
size_t i, get;
|
|
unsigned long now = jiffies;
|
|
unsigned long prev_jiffies = 0;
|
|
struct device *dev = dev_from_gk20a(ch->g);
|
|
|
|
nvgpu_spinlock_acquire(&ch->ref_actions_lock);
|
|
|
|
dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
|
|
ch->hw_chid, atomic_read(&ch->ref_count));
|
|
|
|
/* start at the oldest possible entry. put is next insertion point */
|
|
get = ch->ref_actions_put;
|
|
|
|
/*
|
|
* If the buffer is not full, this will first loop to the oldest entry,
|
|
* skipping not-yet-initialized entries. There is no ref_actions_get.
|
|
*/
|
|
for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
|
|
struct channel_gk20a_ref_action *act = &ch->ref_actions[get];
|
|
|
|
if (act->trace.nr_entries) {
|
|
dev_info(dev, "%s ref %zu steps ago (age %d ms, diff %d ms)\n",
|
|
act->type == channel_gk20a_ref_action_get
|
|
? "GET" : "PUT",
|
|
GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
|
|
jiffies_to_msecs(now - act->jiffies),
|
|
jiffies_to_msecs(act->jiffies - prev_jiffies));
|
|
|
|
print_stack_trace(&act->trace, 0);
|
|
prev_jiffies = act->jiffies;
|
|
}
|
|
|
|
get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
|
|
}
|
|
|
|
nvgpu_spinlock_release(&ch->ref_actions_lock);
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_save_ref_source(struct channel_gk20a *ch,
|
|
enum channel_gk20a_ref_action_type type)
|
|
{
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
struct channel_gk20a_ref_action *act;
|
|
|
|
nvgpu_spinlock_acquire(&ch->ref_actions_lock);
|
|
|
|
act = &ch->ref_actions[ch->ref_actions_put];
|
|
act->type = type;
|
|
act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
|
|
act->trace.nr_entries = 0;
|
|
act->trace.skip = 3; /* onwards from the caller of this */
|
|
act->trace.entries = act->trace_entries;
|
|
save_stack_trace(&act->trace);
|
|
act->jiffies = jiffies;
|
|
ch->ref_actions_put = (ch->ref_actions_put + 1) %
|
|
GK20A_CHANNEL_REFCOUNT_TRACKING;
|
|
|
|
nvgpu_spinlock_release(&ch->ref_actions_lock);
|
|
#endif
|
|
}
|
|
|
|
/* Try to get a reference to the channel. Return nonzero on success. If fails,
|
|
* the channel is dead or being freed elsewhere and you must not touch it.
|
|
*
|
|
* Always when a channel_gk20a pointer is seen and about to be used, a
|
|
* reference must be held to it - either by you or the caller, which should be
|
|
* documented well or otherwise clearly seen. This usually boils down to the
|
|
* file from ioctls directly, or an explicit get in exception handlers when the
|
|
* channel is found by a hw_chid.
|
|
*
|
|
* Most global functions in this file require a reference to be held by the
|
|
* caller.
|
|
*/
|
|
struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
|
|
const char *caller) {
|
|
struct channel_gk20a *ret;
|
|
|
|
nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
|
|
|
|
if (likely(ch->referenceable)) {
|
|
gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
|
|
atomic_inc(&ch->ref_count);
|
|
ret = ch;
|
|
} else
|
|
ret = NULL;
|
|
|
|
nvgpu_spinlock_release(&ch->ref_obtain_lock);
|
|
|
|
if (ret)
|
|
trace_gk20a_channel_get(ch->hw_chid, caller);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
|
|
{
|
|
gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
|
|
trace_gk20a_channel_put(ch->hw_chid, caller);
|
|
atomic_dec(&ch->ref_count);
|
|
wake_up_all(&ch->ref_count_dec_wq);
|
|
|
|
/* More puts than gets. Channel is probably going to get
|
|
* stuck. */
|
|
WARN_ON(atomic_read(&ch->ref_count) < 0);
|
|
|
|
/* Also, more puts than gets. ref_count can go to 0 only if
|
|
* the channel is closing. Channel is probably going to get
|
|
* stuck. */
|
|
WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
|
|
}
|
|
|
|
void gk20a_channel_close(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_free_channel(ch, false);
|
|
}
|
|
|
|
/*
|
|
* Be careful with this - it is meant for terminating channels when we know the
|
|
* driver is otherwise dying. Ref counts and the like are ignored by this
|
|
* version of the cleanup.
|
|
*/
|
|
void __gk20a_channel_kill(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_free_channel(ch, true);
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_get_channel_from_file(int fd)
|
|
{
|
|
struct channel_priv *priv;
|
|
struct file *f = fget(fd);
|
|
|
|
if (!f)
|
|
return NULL;
|
|
|
|
if (f->f_op != &gk20a_channel_ops) {
|
|
fput(f);
|
|
return NULL;
|
|
}
|
|
|
|
priv = (struct channel_priv *)f->private_data;
|
|
fput(f);
|
|
return priv->c;
|
|
}
|
|
|
|
int gk20a_channel_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct channel_priv *priv = filp->private_data;
|
|
struct channel_gk20a *ch = priv->c;
|
|
struct gk20a *g = priv->g;
|
|
|
|
int err;
|
|
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g), "failed to release a channel!");
|
|
goto channel_release;
|
|
}
|
|
|
|
trace_gk20a_channel_release(dev_name(g->dev));
|
|
|
|
gk20a_channel_close(ch);
|
|
gk20a_idle(g);
|
|
|
|
channel_release:
|
|
gk20a_put(g);
|
|
kfree(filp->private_data);
|
|
filp->private_data = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
|
|
{
|
|
struct channel_gk20a *ch =
|
|
container_of(work, struct channel_gk20a, update_fn_work);
|
|
void (*update_fn)(struct channel_gk20a *, void *);
|
|
void *update_fn_data;
|
|
|
|
nvgpu_spinlock_acquire(&ch->update_fn_lock);
|
|
update_fn = ch->update_fn;
|
|
update_fn_data = ch->update_fn_data;
|
|
nvgpu_spinlock_release(&ch->update_fn_lock);
|
|
|
|
if (update_fn)
|
|
update_fn(ch, update_fn_data);
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
|
|
void (*update_fn)(struct channel_gk20a *, void *),
|
|
void *update_fn_data,
|
|
int runlist_id,
|
|
bool is_privileged_channel)
|
|
{
|
|
struct channel_gk20a *ch = gk20a_open_new_channel(g, runlist_id, is_privileged_channel);
|
|
|
|
if (ch) {
|
|
nvgpu_spinlock_acquire(&ch->update_fn_lock);
|
|
ch->update_fn = update_fn;
|
|
ch->update_fn_data = update_fn_data;
|
|
nvgpu_spinlock_release(&ch->update_fn_lock);
|
|
}
|
|
|
|
return ch;
|
|
}
|
|
|
|
struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
|
s32 runlist_id,
|
|
bool is_privileged_channel)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
struct channel_gk20a *ch;
|
|
struct gk20a_event_id_data *event_id_data, *event_id_data_temp;
|
|
|
|
/* compatibility with existing code */
|
|
if (!gk20a_fifo_is_valid_runlist_id(g, runlist_id)) {
|
|
runlist_id = gk20a_fifo_get_gr_runlist_id(g);
|
|
}
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
ch = allocate_channel(f);
|
|
if (ch == NULL) {
|
|
/* TBD: we want to make this virtualizable */
|
|
gk20a_err(dev_from_gk20a(g), "out of hw chids");
|
|
return NULL;
|
|
}
|
|
|
|
trace_gk20a_open_new_channel(ch->hw_chid);
|
|
|
|
BUG_ON(ch->g);
|
|
ch->g = g;
|
|
|
|
/* Runlist for the channel */
|
|
ch->runlist_id = runlist_id;
|
|
|
|
/* Channel privilege level */
|
|
ch->is_privileged_channel = is_privileged_channel;
|
|
|
|
if (g->ops.fifo.alloc_inst(g, ch)) {
|
|
ch->g = NULL;
|
|
free_channel(f, ch);
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to open gk20a channel, out of inst mem");
|
|
return NULL;
|
|
}
|
|
|
|
/* now the channel is in a limbo out of the free list but not marked as
|
|
* alive and used (i.e. get-able) yet */
|
|
|
|
ch->pid = current->pid;
|
|
ch->tgid = current->tgid; /* process granularity for FECS traces */
|
|
|
|
/* unhook all events created on this channel */
|
|
nvgpu_mutex_acquire(&ch->event_id_list_lock);
|
|
list_for_each_entry_safe(event_id_data, event_id_data_temp,
|
|
&ch->event_id_list,
|
|
event_id_node) {
|
|
list_del_init(&event_id_data->event_id_node);
|
|
}
|
|
nvgpu_mutex_release(&ch->event_id_list_lock);
|
|
|
|
/* By default, channel is regular (non-TSG) channel */
|
|
ch->tsgid = NVGPU_INVALID_TSG_ID;
|
|
|
|
/* reset timeout counter and update timestamp */
|
|
ch->timeout_accumulated_ms = 0;
|
|
ch->timeout_gpfifo_get = 0;
|
|
/* set gr host default timeout */
|
|
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
|
|
ch->timeout_debug_dump = true;
|
|
ch->has_timedout = false;
|
|
ch->wdt_enabled = true;
|
|
ch->obj_class = 0;
|
|
ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
|
|
ch->timeslice_us = g->timeslice_low_priority_us;
|
|
|
|
/* The channel is *not* runnable at this point. It still needs to have
|
|
* an address space bound and allocate a gpfifo and grctx. */
|
|
|
|
init_waitqueue_head(&ch->notifier_wq);
|
|
init_waitqueue_head(&ch->semaphore_wq);
|
|
|
|
ch->update_fn = NULL;
|
|
ch->update_fn_data = NULL;
|
|
nvgpu_spinlock_init(&ch->update_fn_lock);
|
|
INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn);
|
|
|
|
/* Mark the channel alive, get-able, with 1 initial use
|
|
* references. The initial reference will be decreased in
|
|
* gk20a_free_channel() */
|
|
ch->referenceable = true;
|
|
atomic_set(&ch->ref_count, 1);
|
|
wmb();
|
|
|
|
return ch;
|
|
}
|
|
|
|
/* note: runlist_id -1 is synonym for the ENGINE_GR_GK20A runlist id */
|
|
static int __gk20a_channel_open(struct gk20a *g, struct file *filp, s32 runlist_id)
|
|
{
|
|
int err;
|
|
struct channel_gk20a *ch;
|
|
struct channel_priv *priv;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
g = gk20a_get(g);
|
|
if (!g)
|
|
return -ENODEV;
|
|
|
|
trace_gk20a_channel_open(dev_name(g->dev));
|
|
|
|
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
|
|
if (!priv) {
|
|
err = -ENOMEM;
|
|
goto free_ref;
|
|
}
|
|
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(g), "failed to power on, %d", err);
|
|
goto fail_busy;
|
|
}
|
|
/* All the user space channel should be non privilege */
|
|
ch = gk20a_open_new_channel(g, runlist_id, false);
|
|
gk20a_idle(g);
|
|
if (!ch) {
|
|
gk20a_err(dev_from_gk20a(g),
|
|
"failed to get f");
|
|
err = -ENOMEM;
|
|
goto fail_busy;
|
|
}
|
|
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_sched_defaults, ch);
|
|
|
|
priv->g = g;
|
|
priv->c = ch;
|
|
|
|
filp->private_data = priv;
|
|
return 0;
|
|
|
|
fail_busy:
|
|
kfree(priv);
|
|
free_ref:
|
|
gk20a_put(g);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_channel_open(struct inode *inode, struct file *filp)
|
|
{
|
|
struct gk20a *g = container_of(inode->i_cdev,
|
|
struct gk20a, channel.cdev);
|
|
int ret;
|
|
|
|
gk20a_dbg_fn("start");
|
|
ret = __gk20a_channel_open(g, filp, -1);
|
|
|
|
gk20a_dbg_fn("end");
|
|
return ret;
|
|
}
|
|
|
|
int gk20a_channel_open_ioctl(struct gk20a *g,
|
|
struct nvgpu_channel_open_args *args)
|
|
{
|
|
int err;
|
|
int fd;
|
|
struct file *file;
|
|
char *name;
|
|
s32 runlist_id = args->in.runlist_id;
|
|
|
|
err = get_unused_fd_flags(O_RDWR);
|
|
if (err < 0)
|
|
return err;
|
|
fd = err;
|
|
|
|
name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d",
|
|
dev_name(g->dev), fd);
|
|
if (!name) {
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
file = anon_inode_getfile(name, g->channel.cdev.ops, NULL, O_RDWR);
|
|
kfree(name);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto clean_up;
|
|
}
|
|
|
|
err = __gk20a_channel_open(g, file, runlist_id);
|
|
if (err)
|
|
goto clean_up_file;
|
|
|
|
fd_install(fd, file);
|
|
args->out.channel_fd = fd;
|
|
return 0;
|
|
|
|
clean_up_file:
|
|
fput(file);
|
|
clean_up:
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
/* allocate private cmd buffer.
|
|
used for inserting commands before/after user submitted buffers. */
|
|
static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c)
|
|
{
|
|
struct device *d = dev_from_gk20a(c->g);
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
u32 size;
|
|
int err = 0;
|
|
|
|
/*
|
|
* Compute the amount of priv_cmdbuf space we need. In general the worst
|
|
* case is the kernel inserts both a semaphore pre-fence and post-fence.
|
|
* Any sync-pt fences will take less memory so we can ignore them for
|
|
* now.
|
|
*
|
|
* A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b,
|
|
* semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
|
|
* dwords: all the same as an ACQ plus a non-stalling intr which is
|
|
* another 2 dwords.
|
|
*
|
|
* Lastly the number of gpfifo entries per channel is fixed so at most
|
|
* we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one
|
|
* userspace entry, and one post-fence entry). Thus the computation is:
|
|
*
|
|
* (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes.
|
|
*/
|
|
size = roundup_pow_of_two(c->gpfifo.entry_num *
|
|
2 * 18 * sizeof(u32) / 3);
|
|
|
|
err = gk20a_gmmu_alloc_map_sys(ch_vm, size, &q->mem);
|
|
if (err) {
|
|
gk20a_err(d, "%s: memory allocation failed\n", __func__);
|
|
goto clean_up;
|
|
}
|
|
|
|
q->size = q->mem.size / sizeof (u32);
|
|
|
|
return 0;
|
|
|
|
clean_up:
|
|
channel_gk20a_free_priv_cmdbuf(c);
|
|
return err;
|
|
}
|
|
|
|
static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
|
|
{
|
|
struct vm_gk20a *ch_vm = c->vm;
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
|
|
if (q->size == 0)
|
|
return;
|
|
|
|
gk20a_gmmu_unmap_free(ch_vm, &q->mem);
|
|
|
|
memset(q, 0, sizeof(struct priv_cmd_queue));
|
|
}
|
|
|
|
/* allocate a cmd buffer with given size. size is number of u32 entries */
|
|
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
|
|
struct priv_cmd_entry *e)
|
|
{
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
u32 free_count;
|
|
u32 size = orig_size;
|
|
|
|
gk20a_dbg_fn("size %d", orig_size);
|
|
|
|
if (!e) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"ch %d: priv cmd entry is null",
|
|
c->hw_chid);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* if free space in the end is less than requested, increase the size
|
|
* to make the real allocated space start from beginning. */
|
|
if (q->put + size > q->size)
|
|
size = orig_size + (q->size - q->put);
|
|
|
|
gk20a_dbg_info("ch %d: priv cmd queue get:put %d:%d",
|
|
c->hw_chid, q->get, q->put);
|
|
|
|
free_count = (q->size - (q->put - q->get) - 1) % q->size;
|
|
|
|
if (size > free_count)
|
|
return -EAGAIN;
|
|
|
|
e->size = orig_size;
|
|
e->mem = &q->mem;
|
|
|
|
/* if we have increased size to skip free space in the end, set put
|
|
to beginning of cmd buffer (0) + size */
|
|
if (size != orig_size) {
|
|
e->off = 0;
|
|
e->gva = q->mem.gpu_va;
|
|
q->put = orig_size;
|
|
} else {
|
|
e->off = q->put;
|
|
e->gva = q->mem.gpu_va + q->put * sizeof(u32);
|
|
q->put = (q->put + orig_size) & (q->size - 1);
|
|
}
|
|
|
|
/* we already handled q->put + size > q->size so BUG_ON this */
|
|
BUG_ON(q->put > q->size);
|
|
|
|
/*
|
|
* commit the previous writes before making the entry valid.
|
|
* see the corresponding rmb() in gk20a_free_priv_cmdbuf().
|
|
*/
|
|
wmb();
|
|
|
|
e->valid = true;
|
|
gk20a_dbg_fn("done");
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Don't call this to free an explict cmd entry.
|
|
* It doesn't update priv_cmd_queue get/put */
|
|
static void free_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *e)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c))
|
|
memset(e, 0, sizeof(struct priv_cmd_entry));
|
|
else
|
|
kfree(e);
|
|
}
|
|
|
|
static int channel_gk20a_alloc_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job **job_out)
|
|
{
|
|
int err = 0;
|
|
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
int put = c->joblist.pre_alloc.put;
|
|
int get = c->joblist.pre_alloc.get;
|
|
|
|
/*
|
|
* ensure all subsequent reads happen after reading get.
|
|
* see corresponding wmb in gk20a_channel_clean_up_jobs()
|
|
*/
|
|
rmb();
|
|
|
|
if (CIRC_SPACE(put, get, c->joblist.pre_alloc.length))
|
|
*job_out = &c->joblist.pre_alloc.jobs[put];
|
|
else {
|
|
gk20a_warn(dev_from_gk20a(c->g),
|
|
"out of job ringbuffer space\n");
|
|
err = -EAGAIN;
|
|
}
|
|
} else {
|
|
*job_out = kzalloc(sizeof(struct channel_gk20a_job),
|
|
GFP_KERNEL);
|
|
if (!*job_out)
|
|
err = -ENOMEM;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
static void channel_gk20a_free_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job)
|
|
{
|
|
/*
|
|
* In case of pre_allocated jobs, we need to clean out
|
|
* the job but maintain the pointers to the priv_cmd_entry,
|
|
* since they're inherently tied to the job node.
|
|
*/
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
struct priv_cmd_entry *wait_cmd = job->wait_cmd;
|
|
struct priv_cmd_entry *incr_cmd = job->incr_cmd;
|
|
memset(job, 0, sizeof(*job));
|
|
job->wait_cmd = wait_cmd;
|
|
job->incr_cmd = incr_cmd;
|
|
} else
|
|
kfree(job);
|
|
}
|
|
|
|
void channel_gk20a_joblist_lock(struct channel_gk20a *c)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c))
|
|
nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
|
|
else
|
|
nvgpu_spinlock_acquire(&c->joblist.dynamic.lock);
|
|
}
|
|
|
|
void channel_gk20a_joblist_unlock(struct channel_gk20a *c)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c))
|
|
nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
|
|
else
|
|
nvgpu_spinlock_release(&c->joblist.dynamic.lock);
|
|
}
|
|
|
|
static struct channel_gk20a_job *channel_gk20a_joblist_peek(
|
|
struct channel_gk20a *c)
|
|
{
|
|
int get;
|
|
struct channel_gk20a_job *job = NULL;
|
|
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
if (!channel_gk20a_joblist_is_empty(c)) {
|
|
get = c->joblist.pre_alloc.get;
|
|
job = &c->joblist.pre_alloc.jobs[get];
|
|
}
|
|
} else {
|
|
if (!list_empty(&c->joblist.dynamic.jobs))
|
|
job = list_first_entry(&c->joblist.dynamic.jobs,
|
|
struct channel_gk20a_job, list);
|
|
}
|
|
|
|
return job;
|
|
}
|
|
|
|
static void channel_gk20a_joblist_add(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1) %
|
|
(c->joblist.pre_alloc.length);
|
|
} else {
|
|
list_add_tail(&job->list, &c->joblist.dynamic.jobs);
|
|
}
|
|
}
|
|
|
|
static void channel_gk20a_joblist_delete(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1) %
|
|
(c->joblist.pre_alloc.length);
|
|
} else {
|
|
list_del_init(&job->list);
|
|
}
|
|
}
|
|
|
|
bool channel_gk20a_joblist_is_empty(struct channel_gk20a *c)
|
|
{
|
|
if (channel_gk20a_is_prealloc_enabled(c)) {
|
|
int get = c->joblist.pre_alloc.get;
|
|
int put = c->joblist.pre_alloc.put;
|
|
return !(CIRC_CNT(put, get, c->joblist.pre_alloc.length));
|
|
}
|
|
|
|
return list_empty(&c->joblist.dynamic.jobs);
|
|
}
|
|
|
|
bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
|
|
{
|
|
bool pre_alloc_enabled = c->joblist.pre_alloc.enabled;
|
|
|
|
rmb();
|
|
return pre_alloc_enabled;
|
|
}
|
|
|
|
static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
|
|
unsigned int num_jobs)
|
|
{
|
|
unsigned int i;
|
|
int err;
|
|
size_t size;
|
|
struct priv_cmd_entry *entries = NULL;
|
|
|
|
if (channel_gk20a_is_prealloc_enabled(c) || !num_jobs)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* pre-allocate the job list.
|
|
* since vmalloc take in an unsigned long, we need
|
|
* to make sure we don't hit an overflow condition
|
|
*/
|
|
size = sizeof(struct channel_gk20a_job);
|
|
if (num_jobs <= ULONG_MAX / size)
|
|
c->joblist.pre_alloc.jobs = vzalloc(num_jobs * size);
|
|
if (!c->joblist.pre_alloc.jobs) {
|
|
err = -ENOMEM;
|
|
goto clean_up;
|
|
}
|
|
|
|
/*
|
|
* pre-allocate 2x priv_cmd_entry for each job up front.
|
|
* since vmalloc take in an unsigned long, we need
|
|
* to make sure we don't hit an overflow condition
|
|
*/
|
|
size = sizeof(struct priv_cmd_entry);
|
|
if (num_jobs <= ULONG_MAX / (size << 1))
|
|
entries = vzalloc((num_jobs << 1) * size);
|
|
if (!entries) {
|
|
err = -ENOMEM;
|
|
goto clean_up_joblist;
|
|
}
|
|
|
|
for (i = 0; i < num_jobs; i++) {
|
|
c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
|
|
c->joblist.pre_alloc.jobs[i].incr_cmd =
|
|
&entries[i + num_jobs];
|
|
}
|
|
|
|
/* pre-allocate a fence pool */
|
|
err = gk20a_alloc_fence_pool(c, num_jobs);
|
|
if (err)
|
|
goto clean_up_priv_cmd;
|
|
|
|
c->joblist.pre_alloc.length = num_jobs;
|
|
|
|
/*
|
|
* commit the previous writes before setting the flag.
|
|
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
|
|
*/
|
|
wmb();
|
|
c->joblist.pre_alloc.enabled = true;
|
|
|
|
return 0;
|
|
|
|
clean_up_priv_cmd:
|
|
vfree(entries);
|
|
clean_up_joblist:
|
|
vfree(c->joblist.pre_alloc.jobs);
|
|
clean_up:
|
|
memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
|
|
return err;
|
|
}
|
|
|
|
static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
|
|
{
|
|
vfree(c->joblist.pre_alloc.jobs[0].wait_cmd);
|
|
vfree(c->joblist.pre_alloc.jobs);
|
|
gk20a_free_fence_pool(c);
|
|
|
|
/*
|
|
* commit the previous writes before disabling the flag.
|
|
* see corresponding rmb in channel_gk20a_is_prealloc_enabled()
|
|
*/
|
|
wmb();
|
|
c->joblist.pre_alloc.enabled = false;
|
|
}
|
|
|
|
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_alloc_gpfifo_ex_args *args)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct gk20a_platform *platform = gk20a_get_platform(d);
|
|
struct vm_gk20a *ch_vm;
|
|
u32 gpfifo_size;
|
|
int err = 0;
|
|
|
|
gpfifo_size = args->num_entries;
|
|
|
|
if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
|
|
c->vpr = true;
|
|
|
|
if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
|
|
c->deterministic = true;
|
|
|
|
/* an address space needs to have been bound at this point. */
|
|
if (!gk20a_channel_as_bound(c)) {
|
|
gk20a_err(d,
|
|
"not bound to an address space at time of gpfifo"
|
|
" allocation.");
|
|
return -EINVAL;
|
|
}
|
|
ch_vm = c->vm;
|
|
|
|
c->ramfc.offset = 0;
|
|
c->ramfc.size = ram_in_ramfc_s() / 8;
|
|
|
|
if (c->gpfifo.mem.size) {
|
|
gk20a_err(d, "channel %d :"
|
|
"gpfifo already allocated", c->hw_chid);
|
|
return -EEXIST;
|
|
}
|
|
|
|
err = gk20a_gmmu_alloc_map_sys(ch_vm,
|
|
gpfifo_size * sizeof(struct nvgpu_gpfifo),
|
|
&c->gpfifo.mem);
|
|
if (err) {
|
|
gk20a_err(d, "%s: memory allocation failed\n", __func__);
|
|
goto clean_up;
|
|
}
|
|
|
|
if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
|
|
c->gpfifo.pipe = nvgpu_big_malloc(g,
|
|
gpfifo_size * sizeof(struct nvgpu_gpfifo));
|
|
if (!c->gpfifo.pipe) {
|
|
err = -ENOMEM;
|
|
goto clean_up_unmap;
|
|
}
|
|
}
|
|
|
|
c->gpfifo.entry_num = gpfifo_size;
|
|
c->gpfifo.get = c->gpfifo.put = 0;
|
|
|
|
gk20a_dbg_info("channel %d : gpfifo_base 0x%016llx, size %d",
|
|
c->hw_chid, c->gpfifo.mem.gpu_va, c->gpfifo.entry_num);
|
|
|
|
channel_gk20a_setup_userd(c);
|
|
|
|
if (!platform->aggressive_sync_destroy_thresh) {
|
|
nvgpu_mutex_acquire(&c->sync_lock);
|
|
c->sync = gk20a_channel_sync_create(c);
|
|
if (!c->sync) {
|
|
err = -ENOMEM;
|
|
nvgpu_mutex_release(&c->sync_lock);
|
|
goto clean_up_unmap;
|
|
}
|
|
nvgpu_mutex_release(&c->sync_lock);
|
|
|
|
if (g->ops.fifo.resetup_ramfc) {
|
|
err = g->ops.fifo.resetup_ramfc(c);
|
|
if (err)
|
|
goto clean_up_sync;
|
|
}
|
|
}
|
|
|
|
err = g->ops.fifo.setup_ramfc(c, c->gpfifo.mem.gpu_va,
|
|
c->gpfifo.entry_num, args->flags);
|
|
if (err)
|
|
goto clean_up_sync;
|
|
|
|
/* TBD: setup engine contexts */
|
|
|
|
if (args->num_inflight_jobs) {
|
|
err = channel_gk20a_prealloc_resources(c,
|
|
args->num_inflight_jobs);
|
|
if (err)
|
|
goto clean_up_sync;
|
|
}
|
|
|
|
err = channel_gk20a_alloc_priv_cmdbuf(c);
|
|
if (err)
|
|
goto clean_up_prealloc;
|
|
|
|
err = channel_gk20a_update_runlist(c, true);
|
|
if (err)
|
|
goto clean_up_priv_cmd;
|
|
|
|
g->ops.fifo.bind_channel(c);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
|
|
clean_up_priv_cmd:
|
|
channel_gk20a_free_priv_cmdbuf(c);
|
|
clean_up_prealloc:
|
|
if (args->num_inflight_jobs)
|
|
channel_gk20a_free_prealloc_resources(c);
|
|
clean_up_sync:
|
|
if (c->sync) {
|
|
gk20a_channel_sync_destroy(c->sync);
|
|
c->sync = NULL;
|
|
}
|
|
clean_up_unmap:
|
|
nvgpu_big_free(g, c->gpfifo.pipe);
|
|
gk20a_gmmu_unmap_free(ch_vm, &c->gpfifo.mem);
|
|
clean_up:
|
|
memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
|
|
gk20a_err(d, "fail");
|
|
return err;
|
|
}
|
|
|
|
u32 gk20a_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
|
|
{
|
|
return gk20a_bar1_readl(g,
|
|
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_get_w());
|
|
}
|
|
|
|
void gk20a_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
|
|
{
|
|
gk20a_bar1_writel(g,
|
|
c->userd_gpu_va + sizeof(u32) * ram_userd_gp_put_w(),
|
|
c->gpfifo.put);
|
|
}
|
|
|
|
/* Update with this periodically to determine how the gpfifo is draining. */
|
|
static inline u32 update_gp_get(struct gk20a *g,
|
|
struct channel_gk20a *c)
|
|
{
|
|
u32 new_get = g->ops.fifo.userd_gp_get(g, c);
|
|
|
|
if (new_get < c->gpfifo.get)
|
|
c->gpfifo.wrap = !c->gpfifo.wrap;
|
|
c->gpfifo.get = new_get;
|
|
return new_get;
|
|
}
|
|
|
|
static inline u32 gp_free_count(struct channel_gk20a *c)
|
|
{
|
|
return (c->gpfifo.entry_num - (c->gpfifo.put - c->gpfifo.get) - 1) %
|
|
c->gpfifo.entry_num;
|
|
}
|
|
|
|
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
|
u32 timeout_delta_ms, bool *progress)
|
|
{
|
|
u32 gpfifo_get = update_gp_get(ch->g, ch);
|
|
|
|
/* Count consequent timeout isr */
|
|
if (gpfifo_get == ch->timeout_gpfifo_get) {
|
|
/* we didn't advance since previous channel timeout check */
|
|
ch->timeout_accumulated_ms += timeout_delta_ms;
|
|
*progress = false;
|
|
} else {
|
|
/* first timeout isr encountered */
|
|
ch->timeout_accumulated_ms = timeout_delta_ms;
|
|
*progress = true;
|
|
}
|
|
|
|
ch->timeout_gpfifo_get = gpfifo_get;
|
|
|
|
return ch->g->timeouts_enabled &&
|
|
ch->timeout_accumulated_ms > ch->timeout_ms_max;
|
|
}
|
|
|
|
static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
|
|
return platform->ch_wdt_timeout_ms;
|
|
}
|
|
|
|
static u32 get_gp_free_count(struct channel_gk20a *c)
|
|
{
|
|
update_gp_get(c->g, c);
|
|
return gp_free_count(c);
|
|
}
|
|
|
|
static void trace_write_pushbuffer(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *g)
|
|
{
|
|
void *mem = NULL;
|
|
unsigned int words;
|
|
u64 offset;
|
|
struct dma_buf *dmabuf = NULL;
|
|
|
|
if (gk20a_debug_trace_cmdbuf) {
|
|
u64 gpu_va = (u64)g->entry0 |
|
|
(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
|
|
int err;
|
|
|
|
words = pbdma_gp_entry1_length_v(g->entry1);
|
|
err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset);
|
|
if (!err)
|
|
mem = dma_buf_vmap(dmabuf);
|
|
}
|
|
|
|
if (mem) {
|
|
u32 i;
|
|
/*
|
|
* Write in batches of 128 as there seems to be a limit
|
|
* of how much you can output to ftrace at once.
|
|
*/
|
|
for (i = 0; i < words; i += 128U) {
|
|
trace_gk20a_push_cmdbuf(
|
|
dev_name(c->g->dev),
|
|
0,
|
|
min(words - i, 128U),
|
|
offset + i * sizeof(u32),
|
|
mem);
|
|
}
|
|
dma_buf_vunmap(dmabuf, mem);
|
|
}
|
|
}
|
|
|
|
static void trace_write_pushbuffer_range(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *g,
|
|
struct nvgpu_gpfifo __user *user_gpfifo,
|
|
int offset,
|
|
int count)
|
|
{
|
|
u32 size;
|
|
int i;
|
|
struct nvgpu_gpfifo *gp;
|
|
bool gpfifo_allocated = false;
|
|
|
|
if (!gk20a_debug_trace_cmdbuf)
|
|
return;
|
|
|
|
if (!g && !user_gpfifo)
|
|
return;
|
|
|
|
if (!g) {
|
|
size = count * sizeof(struct nvgpu_gpfifo);
|
|
if (size) {
|
|
g = nvgpu_big_malloc(c->g, size);
|
|
if (!g)
|
|
return;
|
|
|
|
if (copy_from_user(g, user_gpfifo, size)) {
|
|
nvgpu_big_free(c->g, g);
|
|
return;
|
|
}
|
|
}
|
|
gpfifo_allocated = true;
|
|
}
|
|
|
|
gp = g + offset;
|
|
for (i = 0; i < count; i++, gp++)
|
|
trace_write_pushbuffer(c, gp);
|
|
|
|
if (gpfifo_allocated)
|
|
nvgpu_big_free(c->g, g);
|
|
}
|
|
|
|
static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
|
{
|
|
ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
|
|
ch->timeout.running = true;
|
|
nvgpu_timeout_init(ch->g, &ch->timeout.timer,
|
|
gk20a_get_channel_watchdog_timeout(ch),
|
|
NVGPU_TIMER_CPU_TIMER);
|
|
}
|
|
|
|
/**
|
|
* Start a timeout counter (watchdog) on this channel.
|
|
*
|
|
* Trigger a watchdog to recover the channel after the per-platform timeout
|
|
* duration (but strictly no earlier) if the channel hasn't advanced within
|
|
* that time.
|
|
*
|
|
* If the timeout is already running, do nothing. This should be called when
|
|
* new jobs are submitted. The timeout will stop when the last tracked job
|
|
* finishes, making the channel idle.
|
|
*
|
|
* The channel's gpfifo read pointer will be used to determine if the job has
|
|
* actually stuck at that time. After the timeout duration has expired, a
|
|
* worker thread will consider the channel stuck and recover it if stuck.
|
|
*/
|
|
static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
|
|
|
|
if (!ch->g->timeouts_enabled || !platform->ch_wdt_timeout_ms)
|
|
return;
|
|
|
|
if (!ch->wdt_enabled)
|
|
return;
|
|
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
|
|
if (ch->timeout.running) {
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
return;
|
|
}
|
|
__gk20a_channel_timeout_start(ch);
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
}
|
|
|
|
/**
|
|
* Stop a running timeout counter (watchdog) on this channel.
|
|
*
|
|
* Make the watchdog consider the channel not running, so that it won't get
|
|
* recovered even if no progress is detected. Progress is not tracked if the
|
|
* watchdog is turned off.
|
|
*
|
|
* No guarantees are made about concurrent execution of the timeout handler.
|
|
* (This should be called from an update handler running in the same thread
|
|
* with the watchdog.)
|
|
*/
|
|
static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
|
|
{
|
|
bool was_running;
|
|
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
was_running = ch->timeout.running;
|
|
ch->timeout.running = false;
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
return was_running;
|
|
}
|
|
|
|
/**
|
|
* Continue a previously stopped timeout
|
|
*
|
|
* Enable the timeout again but don't reinitialize its timer.
|
|
*
|
|
* No guarantees are made about concurrent execution of the timeout handler.
|
|
* (This should be called from an update handler running in the same thread
|
|
* with the watchdog.)
|
|
*/
|
|
static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
|
|
{
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
ch->timeout.running = true;
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
}
|
|
|
|
/**
|
|
* Rewind the timeout on each non-dormant channel.
|
|
*
|
|
* Reschedule the timeout of each active channel for which timeouts are running
|
|
* as if something was happened on each channel right now. This should be
|
|
* called when a global hang is detected that could cause a false positive on
|
|
* other innocent channels.
|
|
*/
|
|
void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
|
|
if (!gk20a_channel_get(ch))
|
|
continue;
|
|
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
if (ch->timeout.running)
|
|
__gk20a_channel_timeout_start(ch);
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a timed out channel has hung and recover it if it has.
|
|
*
|
|
* Test if this channel has really got stuck at this point (should be called
|
|
* when the watchdog timer has expired) by checking if its gp_get has advanced
|
|
* or not. If no gp_get action happened since when the watchdog was started,
|
|
* force-reset the channel.
|
|
*
|
|
* The gpu is implicitly on at this point, because the watchdog can only run on
|
|
* channels that have submitted jobs pending for cleanup.
|
|
*/
|
|
static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
u32 gp_get;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/* Get status and clear the timer */
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
gp_get = ch->timeout.gp_get;
|
|
ch->timeout.running = false;
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
|
|
if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
|
|
/* Channel has advanced, reschedule */
|
|
gk20a_channel_timeout_start(ch);
|
|
return;
|
|
}
|
|
|
|
gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
|
|
ch->hw_chid);
|
|
|
|
gk20a_debug_dump(g->dev);
|
|
gk20a_gr_debug_dump(g->dev);
|
|
|
|
g->ops.fifo.force_reset_ch(ch,
|
|
NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
|
|
}
|
|
|
|
/**
|
|
* Test if the per-channel timeout is expired and handle the timeout in that case.
|
|
*
|
|
* Each channel has an expiration time based watchdog. The timer is
|
|
* (re)initialized in two situations: when a new job is submitted on an idle
|
|
* channel and when the timeout is checked but progress is detected.
|
|
*
|
|
* Watchdog timeout does not yet necessarily mean a stuck channel so this may
|
|
* or may not cause recovery.
|
|
*
|
|
* The timeout is stopped (disabled) after the last job in a row finishes
|
|
* making the channel idle.
|
|
*/
|
|
static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
|
|
{
|
|
bool timed_out;
|
|
|
|
nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
|
|
timed_out = ch->timeout.running &&
|
|
nvgpu_timeout_peek_expired(&ch->timeout.timer);
|
|
nvgpu_raw_spinlock_release(&ch->timeout.lock);
|
|
|
|
if (timed_out)
|
|
gk20a_channel_timeout_handler(ch);
|
|
}
|
|
|
|
/**
|
|
* Loop every living channel, check timeouts and handle stuck channels.
|
|
*/
|
|
static void gk20a_channel_poll_timeouts(struct gk20a *g)
|
|
{
|
|
unsigned int chid;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (chid = 0; chid < g->fifo.num_channels; chid++) {
|
|
struct channel_gk20a *ch = &g->fifo.channel[chid];
|
|
|
|
if (gk20a_channel_get(ch)) {
|
|
gk20a_channel_timeout_check(ch);
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Process one scheduled work item for this channel. Currently, the only thing
|
|
* the worker does is job cleanup handling.
|
|
*/
|
|
static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_channel_clean_up_jobs(ch, true);
|
|
|
|
/* ref taken when enqueued */
|
|
gk20a_channel_put(ch);
|
|
}
|
|
|
|
/**
|
|
* Tell the worker that one more work needs to be done.
|
|
*
|
|
* Increase the work counter to synchronize the worker with the new work. Wake
|
|
* up the worker. If the worker was already running, it will handle this work
|
|
* before going to sleep.
|
|
*/
|
|
static int __gk20a_channel_worker_wakeup(struct gk20a *g)
|
|
{
|
|
int put;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/*
|
|
* Currently, the only work type is associated with a lock, which deals
|
|
* with any necessary barriers. If a work type with no locking were
|
|
* added, a a wmb() would be needed here. See ..worker_pending() for a
|
|
* pair.
|
|
*/
|
|
|
|
put = atomic_inc_return(&g->channel_worker.put);
|
|
wake_up(&g->channel_worker.wq);
|
|
|
|
return put;
|
|
}
|
|
|
|
/**
|
|
* Test if there is some work pending.
|
|
*
|
|
* This is a pair for __gk20a_channel_worker_wakeup to be called from the
|
|
* worker. The worker has an internal work counter which is incremented once
|
|
* per finished work item. This is compared with the number of queued jobs,
|
|
* which may be channels on the items list or any other types of work.
|
|
*/
|
|
static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
|
|
{
|
|
bool pending = atomic_read(&g->channel_worker.put) != get;
|
|
|
|
/*
|
|
* This would be the place for a rmb() pairing a wmb() for a wakeup
|
|
* if we had any work with no implicit barriers caused by locking.
|
|
*/
|
|
|
|
return pending;
|
|
}
|
|
|
|
/**
|
|
* Process the queued works for the worker thread serially.
|
|
*
|
|
* Flush all the work items in the queue one by one. This may block timeout
|
|
* handling for a short while, as these are serialized.
|
|
*/
|
|
static void gk20a_channel_worker_process(struct gk20a *g, int *get)
|
|
{
|
|
gk20a_dbg_fn("");
|
|
|
|
while (__gk20a_channel_worker_pending(g, *get)) {
|
|
struct channel_gk20a *ch;
|
|
|
|
/*
|
|
* If a channel is on the list, it's guaranteed to be handled
|
|
* eventually just once. However, the opposite is not true. A
|
|
* channel may be being processed if it's on the list or not.
|
|
*
|
|
* With this, processing channel works should be conservative
|
|
* as follows: it's always safe to look at a channel found in
|
|
* the list, and if someone enqueues the channel, it will be
|
|
* handled eventually, even if it's being handled at the same
|
|
* time. A channel is on the list only once; multiple calls to
|
|
* enqueue are harmless.
|
|
*/
|
|
nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
|
|
ch = list_first_entry_or_null(&g->channel_worker.items,
|
|
struct channel_gk20a,
|
|
worker_item);
|
|
if (ch)
|
|
list_del_init(&ch->worker_item);
|
|
nvgpu_spinlock_release(&g->channel_worker.items_lock);
|
|
|
|
if (!ch) {
|
|
/*
|
|
* Woke up for some other reason, but there are no
|
|
* other reasons than a channel added in the items list
|
|
* currently, so warn and ack the message.
|
|
*/
|
|
gk20a_warn(g->dev, "Spurious worker event!");
|
|
++*get;
|
|
break;
|
|
}
|
|
|
|
gk20a_channel_worker_process_ch(ch);
|
|
++*get;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Look at channel states periodically, until canceled. Abort timed out
|
|
* channels serially. Process all work items found in the queue.
|
|
*/
|
|
static int gk20a_channel_poll_worker(void *arg)
|
|
{
|
|
struct gk20a *g = (struct gk20a *)arg;
|
|
struct gk20a_channel_worker *worker = &g->channel_worker;
|
|
unsigned long start_wait;
|
|
/* event timeout for also polling the watchdog */
|
|
unsigned long timeout = msecs_to_jiffies(100);
|
|
int get = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
start_wait = jiffies;
|
|
while (!kthread_should_stop()) {
|
|
bool got_events;
|
|
|
|
got_events = wait_event_timeout(
|
|
worker->wq,
|
|
__gk20a_channel_worker_pending(g, get),
|
|
timeout) > 0;
|
|
|
|
if (got_events)
|
|
gk20a_channel_worker_process(g, &get);
|
|
|
|
if (jiffies - start_wait >= timeout) {
|
|
gk20a_channel_poll_timeouts(g);
|
|
start_wait = jiffies;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Initialize the channel worker's metadata and start the background thread.
|
|
*/
|
|
int nvgpu_channel_worker_init(struct gk20a *g)
|
|
{
|
|
struct task_struct *task;
|
|
|
|
atomic_set(&g->channel_worker.put, 0);
|
|
init_waitqueue_head(&g->channel_worker.wq);
|
|
INIT_LIST_HEAD(&g->channel_worker.items);
|
|
nvgpu_spinlock_init(&g->channel_worker.items_lock);
|
|
task = kthread_run(gk20a_channel_poll_worker, g,
|
|
"nvgpu_channel_poll_%s", dev_name(g->dev));
|
|
if (IS_ERR(task)) {
|
|
gk20a_err(g->dev, "failed to start channel poller thread");
|
|
return PTR_ERR(task);
|
|
}
|
|
g->channel_worker.poll_task = task;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nvgpu_channel_worker_deinit(struct gk20a *g)
|
|
{
|
|
kthread_stop(g->channel_worker.poll_task);
|
|
}
|
|
|
|
/**
|
|
* Append a channel to the worker's list, if not there already.
|
|
*
|
|
* The worker thread processes work items (channels in its work list) and polls
|
|
* for other things. This adds @ch to the end of the list and wakes the worker
|
|
* up immediately. If the channel already existed in the list, it's not added,
|
|
* because in that case it has been scheduled already but has not yet been
|
|
* processed.
|
|
*/
|
|
void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/*
|
|
* Ref released when this item gets processed. The caller should hold
|
|
* one ref already, so can't fail.
|
|
*/
|
|
if (WARN_ON(!gk20a_channel_get(ch))) {
|
|
gk20a_warn(g->dev, "cannot get ch ref for worker!");
|
|
return;
|
|
}
|
|
|
|
nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
|
|
if (!list_empty(&ch->worker_item)) {
|
|
/*
|
|
* Already queued, so will get processed eventually.
|
|
* The worker is probably awake already.
|
|
*/
|
|
nvgpu_spinlock_release(&g->channel_worker.items_lock);
|
|
gk20a_channel_put(ch);
|
|
return;
|
|
}
|
|
list_add_tail(&ch->worker_item, &g->channel_worker.items);
|
|
nvgpu_spinlock_release(&g->channel_worker.items_lock);
|
|
|
|
__gk20a_channel_worker_wakeup(g);
|
|
}
|
|
|
|
int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
|
|
{
|
|
struct priv_cmd_queue *q = &c->priv_cmd_q;
|
|
struct device *d = dev_from_gk20a(c->g);
|
|
|
|
if (!e)
|
|
return 0;
|
|
|
|
if (e->valid) {
|
|
/* read the entry's valid flag before reading its contents */
|
|
rmb();
|
|
if ((q->get != e->off) && e->off != 0)
|
|
gk20a_err(d, "requests out-of-order, ch=%d\n",
|
|
c->hw_chid);
|
|
q->get = e->off + e->size;
|
|
}
|
|
|
|
free_priv_cmdbuf(c, e);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_add_job(struct channel_gk20a *c,
|
|
struct channel_gk20a_job *job,
|
|
bool skip_buffer_refcounting)
|
|
{
|
|
struct vm_gk20a *vm = c->vm;
|
|
struct mapped_buffer_node **mapped_buffers = NULL;
|
|
int err = 0, num_mapped_buffers = 0;
|
|
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
|
|
|
|
if (!skip_buffer_refcounting) {
|
|
err = gk20a_vm_get_buffers(vm, &mapped_buffers,
|
|
&num_mapped_buffers);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Ref to hold the channel open during the job lifetime. This is
|
|
* released by job cleanup launched via syncpt or sema interrupt.
|
|
*/
|
|
c = gk20a_channel_get(c);
|
|
|
|
if (c) {
|
|
job->num_mapped_buffers = num_mapped_buffers;
|
|
job->mapped_buffers = mapped_buffers;
|
|
|
|
gk20a_channel_timeout_start(c);
|
|
|
|
if (!pre_alloc_enabled)
|
|
channel_gk20a_joblist_lock(c);
|
|
|
|
/*
|
|
* ensure all pending write complete before adding to the list.
|
|
* see corresponding rmb in gk20a_channel_clean_up_jobs() &
|
|
* gk20a_channel_abort_clean_up()
|
|
*/
|
|
wmb();
|
|
channel_gk20a_joblist_add(c, job);
|
|
|
|
if (!pre_alloc_enabled)
|
|
channel_gk20a_joblist_unlock(c);
|
|
} else {
|
|
err = -ETIMEDOUT;
|
|
goto err_put_buffers;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err_put_buffers:
|
|
gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
|
|
|
|
return err;
|
|
}
|
|
|
|
/**
|
|
* Clean up job resources for further jobs to use.
|
|
* @clean_all: If true, process as many jobs as possible, otherwise just one.
|
|
*
|
|
* Loop all jobs from the joblist until a pending job is found, or just one if
|
|
* clean_all is not set. Pending jobs are detected from the job's post fence,
|
|
* so this is only done for jobs that have job tracking resources. Free all
|
|
* per-job memory for completed jobs; in case of preallocated resources, this
|
|
* opens up slots for new jobs to be submitted.
|
|
*/
|
|
static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
|
bool clean_all)
|
|
{
|
|
struct vm_gk20a *vm;
|
|
struct channel_gk20a_job *job;
|
|
struct gk20a_platform *platform;
|
|
struct gk20a *g;
|
|
int job_finished = 0;
|
|
bool watchdog_on = false;
|
|
|
|
c = gk20a_channel_get(c);
|
|
if (!c)
|
|
return;
|
|
|
|
if (!c->g->power_on) { /* shutdown case */
|
|
gk20a_channel_put(c);
|
|
return;
|
|
}
|
|
|
|
vm = c->vm;
|
|
g = c->g;
|
|
platform = gk20a_get_platform(g->dev);
|
|
|
|
/*
|
|
* If !clean_all, we're in a condition where watchdog isn't supported
|
|
* anyway (this would be a no-op).
|
|
*/
|
|
if (clean_all)
|
|
watchdog_on = gk20a_channel_timeout_stop(c);
|
|
|
|
/* Synchronize with abort cleanup that needs the jobs. */
|
|
nvgpu_mutex_acquire(&c->joblist.cleanup_lock);
|
|
|
|
while (1) {
|
|
bool completed;
|
|
|
|
channel_gk20a_joblist_lock(c);
|
|
if (channel_gk20a_joblist_is_empty(c)) {
|
|
/*
|
|
* No jobs in flight, timeout will remain stopped until
|
|
* new jobs are submitted.
|
|
*/
|
|
channel_gk20a_joblist_unlock(c);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* ensure that all subsequent reads occur after checking
|
|
* that we have a valid node. see corresponding wmb in
|
|
* gk20a_channel_add_job().
|
|
*/
|
|
rmb();
|
|
job = channel_gk20a_joblist_peek(c);
|
|
channel_gk20a_joblist_unlock(c);
|
|
|
|
completed = gk20a_fence_is_expired(job->post_fence);
|
|
if (!completed) {
|
|
/*
|
|
* The watchdog eventually sees an updated gp_get if
|
|
* something happened in this loop. A new job can have
|
|
* been submitted between the above call to stop and
|
|
* this - in that case, this is a no-op and the new
|
|
* later timeout is still used.
|
|
*/
|
|
if (clean_all && watchdog_on)
|
|
gk20a_channel_timeout_continue(c);
|
|
break;
|
|
}
|
|
|
|
WARN_ON(!c->sync);
|
|
|
|
if (c->sync) {
|
|
c->sync->signal_timeline(c->sync);
|
|
|
|
if (platform->aggressive_sync_destroy_thresh) {
|
|
nvgpu_mutex_acquire(&c->sync_lock);
|
|
if (atomic_dec_and_test(&c->sync->refcount) &&
|
|
platform->aggressive_sync_destroy) {
|
|
gk20a_channel_sync_destroy(c->sync);
|
|
c->sync = NULL;
|
|
}
|
|
nvgpu_mutex_release(&c->sync_lock);
|
|
}
|
|
}
|
|
|
|
if (job->num_mapped_buffers)
|
|
gk20a_vm_put_buffers(vm, job->mapped_buffers,
|
|
job->num_mapped_buffers);
|
|
|
|
/* Remove job from channel's job list before we close the
|
|
* fences, to prevent other callers (gk20a_channel_abort) from
|
|
* trying to dereference post_fence when it no longer exists.
|
|
*/
|
|
channel_gk20a_joblist_lock(c);
|
|
channel_gk20a_joblist_delete(c, job);
|
|
channel_gk20a_joblist_unlock(c);
|
|
|
|
/* Close the fences (this will unref the semaphores and release
|
|
* them to the pool). */
|
|
gk20a_fence_put(job->pre_fence);
|
|
gk20a_fence_put(job->post_fence);
|
|
|
|
/* Free the private command buffers (wait_cmd first and
|
|
* then incr_cmd i.e. order of allocation) */
|
|
gk20a_free_priv_cmdbuf(c, job->wait_cmd);
|
|
gk20a_free_priv_cmdbuf(c, job->incr_cmd);
|
|
|
|
/* another bookkeeping taken in add_job. caller must hold a ref
|
|
* so this wouldn't get freed here. */
|
|
gk20a_channel_put(c);
|
|
|
|
/*
|
|
* ensure all pending writes complete before freeing up the job.
|
|
* see corresponding rmb in channel_gk20a_alloc_job().
|
|
*/
|
|
wmb();
|
|
|
|
channel_gk20a_free_job(c, job);
|
|
job_finished = 1;
|
|
gk20a_idle(g);
|
|
|
|
if (!clean_all) {
|
|
/* Timeout isn't supported here so don't touch it. */
|
|
break;
|
|
}
|
|
}
|
|
|
|
nvgpu_mutex_release(&c->joblist.cleanup_lock);
|
|
|
|
if (job_finished && c->update_fn)
|
|
schedule_work(&c->update_fn_work);
|
|
|
|
gk20a_channel_put(c);
|
|
}
|
|
|
|
/**
|
|
* Schedule a job cleanup work on this channel to free resources and to signal
|
|
* about completion.
|
|
*
|
|
* Call this when there has been an interrupt about finished jobs, or when job
|
|
* cleanup needs to be performed, e.g., when closing a channel. This is always
|
|
* safe to call even if there is nothing to clean up. Any visible actions on
|
|
* jobs just before calling this are guaranteed to be processed.
|
|
*/
|
|
void gk20a_channel_update(struct channel_gk20a *c)
|
|
{
|
|
if (!c->g->power_on) { /* shutdown case */
|
|
return;
|
|
}
|
|
|
|
trace_gk20a_channel_update(c->hw_chid);
|
|
/* A queued channel is always checked for job cleanup. */
|
|
gk20a_channel_worker_enqueue(c);
|
|
}
|
|
|
|
static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
|
|
struct priv_cmd_entry *cmd)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
|
|
struct nvgpu_gpfifo x = {
|
|
.entry0 = u64_lo32(cmd->gva),
|
|
.entry1 = u64_hi32(cmd->gva) |
|
|
pbdma_gp_entry1_length_f(cmd->size)
|
|
};
|
|
|
|
gk20a_mem_wr_n(g, gpfifo_mem, c->gpfifo.put * sizeof(x),
|
|
&x, sizeof(x));
|
|
|
|
if (cmd->mem->aperture == APERTURE_SYSMEM)
|
|
trace_gk20a_push_cmdbuf(dev_name(g->dev), 0, cmd->size, 0,
|
|
cmd->mem->cpu_va + cmd->off * sizeof(u32));
|
|
|
|
c->gpfifo.put = (c->gpfifo.put + 1) & (c->gpfifo.entry_num - 1);
|
|
}
|
|
|
|
/*
|
|
* Copy source gpfifo entries into the gpfifo ring buffer, potentially
|
|
* splitting into two memcpys to handle wrap-around.
|
|
*/
|
|
static int gk20a_submit_append_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *kern_gpfifo,
|
|
struct nvgpu_gpfifo __user *user_gpfifo,
|
|
u32 num_entries)
|
|
{
|
|
/* byte offsets */
|
|
u32 gpfifo_size = c->gpfifo.entry_num * sizeof(struct nvgpu_gpfifo);
|
|
u32 len = num_entries * sizeof(struct nvgpu_gpfifo);
|
|
u32 start = c->gpfifo.put * sizeof(struct nvgpu_gpfifo);
|
|
u32 end = start + len; /* exclusive */
|
|
struct mem_desc *gpfifo_mem = &c->gpfifo.mem;
|
|
struct nvgpu_gpfifo *cpu_src;
|
|
int err;
|
|
|
|
if (user_gpfifo && !c->gpfifo.pipe) {
|
|
/*
|
|
* This path (from userspace to sysmem) is special in order to
|
|
* avoid two copies unnecessarily (from user to pipe, then from
|
|
* pipe to gpu sysmem buffer).
|
|
*
|
|
* As a special case, the pipe buffer exists if PRAMIN writes
|
|
* are forced, although the buffers may not be in vidmem in
|
|
* that case.
|
|
*/
|
|
if (end > gpfifo_size) {
|
|
/* wrap-around */
|
|
int length0 = gpfifo_size - start;
|
|
int length1 = len - length0;
|
|
void __user *user2 = (u8 __user *)user_gpfifo + length0;
|
|
|
|
err = copy_from_user(gpfifo_mem->cpu_va + start,
|
|
user_gpfifo, length0);
|
|
if (err)
|
|
return err;
|
|
|
|
err = copy_from_user(gpfifo_mem->cpu_va,
|
|
user2, length1);
|
|
if (err)
|
|
return err;
|
|
} else {
|
|
err = copy_from_user(gpfifo_mem->cpu_va + start,
|
|
user_gpfifo, len);
|
|
if (err)
|
|
return err;
|
|
}
|
|
|
|
trace_write_pushbuffer_range(c, NULL, user_gpfifo,
|
|
0, num_entries);
|
|
goto out;
|
|
} else if (user_gpfifo) {
|
|
/* from userspace to vidmem or sysmem when pramin forced, use
|
|
* the common copy path below */
|
|
err = copy_from_user(c->gpfifo.pipe, user_gpfifo, len);
|
|
if (err)
|
|
return err;
|
|
|
|
cpu_src = c->gpfifo.pipe;
|
|
} else {
|
|
/* from kernel to either sysmem or vidmem, don't need
|
|
* copy_from_user so use the common path below */
|
|
cpu_src = kern_gpfifo;
|
|
}
|
|
|
|
if (end > gpfifo_size) {
|
|
/* wrap-around */
|
|
int length0 = gpfifo_size - start;
|
|
int length1 = len - length0;
|
|
void *src2 = (u8 *)cpu_src + length0;
|
|
|
|
gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, length0);
|
|
gk20a_mem_wr_n(c->g, gpfifo_mem, 0, src2, length1);
|
|
} else {
|
|
gk20a_mem_wr_n(c->g, gpfifo_mem, start, cpu_src, len);
|
|
|
|
}
|
|
|
|
trace_write_pushbuffer_range(c, cpu_src, NULL, 0, num_entries);
|
|
|
|
out:
|
|
c->gpfifo.put = (c->gpfifo.put + num_entries) &
|
|
(c->gpfifo.entry_num - 1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handle the submit synchronization - pre-fences and post-fences.
|
|
*/
|
|
static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
|
|
struct nvgpu_fence *fence,
|
|
struct channel_gk20a_job *job,
|
|
struct priv_cmd_entry **wait_cmd,
|
|
struct priv_cmd_entry **incr_cmd,
|
|
struct gk20a_fence **pre_fence,
|
|
struct gk20a_fence **post_fence,
|
|
bool force_need_sync_fence,
|
|
bool register_irq,
|
|
u32 flags)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct gk20a_platform *platform = gk20a_get_platform(g->dev);
|
|
bool need_sync_fence = false;
|
|
bool new_sync_created = false;
|
|
int wait_fence_fd = -1;
|
|
int err = 0;
|
|
bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
|
|
bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
|
|
|
|
/*
|
|
* If user wants to always allocate sync_fence_fds then respect that;
|
|
* otherwise, allocate sync_fence_fd based on user flags.
|
|
*/
|
|
if (force_need_sync_fence)
|
|
need_sync_fence = true;
|
|
|
|
if (platform->aggressive_sync_destroy_thresh) {
|
|
nvgpu_mutex_acquire(&c->sync_lock);
|
|
if (!c->sync) {
|
|
c->sync = gk20a_channel_sync_create(c);
|
|
if (!c->sync) {
|
|
err = -ENOMEM;
|
|
nvgpu_mutex_release(&c->sync_lock);
|
|
goto fail;
|
|
}
|
|
new_sync_created = true;
|
|
}
|
|
atomic_inc(&c->sync->refcount);
|
|
nvgpu_mutex_release(&c->sync_lock);
|
|
}
|
|
|
|
if (g->ops.fifo.resetup_ramfc && new_sync_created) {
|
|
err = g->ops.fifo.resetup_ramfc(c);
|
|
if (err)
|
|
goto fail;
|
|
}
|
|
|
|
/*
|
|
* Optionally insert syncpt wait in the beginning of gpfifo submission
|
|
* when user requested and the wait hasn't expired. Validate that the id
|
|
* makes sense, elide if not. The only reason this isn't being
|
|
* unceremoniously killed is to keep running some tests which trigger
|
|
* this condition.
|
|
*/
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
|
|
job->pre_fence = gk20a_alloc_fence(c);
|
|
if (!job->pre_fence) {
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
if (!pre_alloc_enabled)
|
|
job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
|
|
GFP_KERNEL);
|
|
|
|
if (!job->wait_cmd) {
|
|
err = -ENOMEM;
|
|
goto clean_up_pre_fence;
|
|
}
|
|
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
|
|
wait_fence_fd = fence->id;
|
|
err = c->sync->wait_fd(c->sync, wait_fence_fd,
|
|
job->wait_cmd, job->pre_fence);
|
|
} else {
|
|
err = c->sync->wait_syncpt(c->sync, fence->id,
|
|
fence->value, job->wait_cmd,
|
|
job->pre_fence);
|
|
}
|
|
|
|
if (!err) {
|
|
if (job->wait_cmd->valid)
|
|
*wait_cmd = job->wait_cmd;
|
|
*pre_fence = job->pre_fence;
|
|
} else
|
|
goto clean_up_wait_cmd;
|
|
}
|
|
|
|
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
|
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
|
|
need_sync_fence = true;
|
|
|
|
/*
|
|
* Always generate an increment at the end of a GPFIFO submission. This
|
|
* is used to keep track of method completion for idle railgating. The
|
|
* sync_pt/semaphore PB is added to the GPFIFO later on in submit.
|
|
*/
|
|
job->post_fence = gk20a_alloc_fence(c);
|
|
if (!job->post_fence) {
|
|
err = -ENOMEM;
|
|
goto clean_up_wait_cmd;
|
|
}
|
|
if (!pre_alloc_enabled)
|
|
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry),
|
|
GFP_KERNEL);
|
|
|
|
if (!job->incr_cmd) {
|
|
err = -ENOMEM;
|
|
goto clean_up_post_fence;
|
|
}
|
|
|
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
|
|
err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
|
|
job->post_fence, need_wfi, need_sync_fence,
|
|
register_irq);
|
|
else
|
|
err = c->sync->incr(c->sync, job->incr_cmd,
|
|
job->post_fence, need_sync_fence,
|
|
register_irq);
|
|
if (!err) {
|
|
*incr_cmd = job->incr_cmd;
|
|
*post_fence = job->post_fence;
|
|
} else
|
|
goto clean_up_incr_cmd;
|
|
|
|
return 0;
|
|
|
|
clean_up_incr_cmd:
|
|
free_priv_cmdbuf(c, job->incr_cmd);
|
|
if (!pre_alloc_enabled)
|
|
job->incr_cmd = NULL;
|
|
clean_up_post_fence:
|
|
gk20a_fence_put(job->post_fence);
|
|
job->post_fence = NULL;
|
|
clean_up_wait_cmd:
|
|
free_priv_cmdbuf(c, job->wait_cmd);
|
|
if (!pre_alloc_enabled)
|
|
job->wait_cmd = NULL;
|
|
clean_up_pre_fence:
|
|
gk20a_fence_put(job->pre_fence);
|
|
job->pre_fence = NULL;
|
|
fail:
|
|
*wait_cmd = NULL;
|
|
*pre_fence = NULL;
|
|
return err;
|
|
}
|
|
|
|
int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|
struct nvgpu_gpfifo *gpfifo,
|
|
struct nvgpu_submit_gpfifo_args *args,
|
|
u32 num_entries,
|
|
u32 flags,
|
|
struct nvgpu_fence *fence,
|
|
struct gk20a_fence **fence_out,
|
|
bool force_need_sync_fence,
|
|
struct fifo_profile_gk20a *profile)
|
|
{
|
|
struct gk20a *g = c->g;
|
|
struct device *d = dev_from_gk20a(g);
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_fence *pre_fence = NULL;
|
|
struct gk20a_fence *post_fence = NULL;
|
|
struct channel_gk20a_job *job = NULL;
|
|
/* we might need two extra gpfifo entries - one for pre fence
|
|
* and one for post fence. */
|
|
const int extra_entries = 2;
|
|
bool skip_buffer_refcounting = (flags &
|
|
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
|
|
int err = 0;
|
|
bool need_job_tracking;
|
|
bool need_deferred_cleanup = false;
|
|
struct nvgpu_gpfifo __user *user_gpfifo = args ?
|
|
(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
|
|
struct gk20a_platform *platform;
|
|
|
|
if (g->driver_is_dying)
|
|
return -ENODEV;
|
|
|
|
platform = gk20a_get_platform(d);
|
|
|
|
if (c->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
/* fifo not large enough for request. Return error immediately.
|
|
* Kernel can insert gpfifo entries before and after user gpfifos.
|
|
* So, add extra_entries in user request. Also, HW with fifo size N
|
|
* can accept only N-1 entreis and so the below condition */
|
|
if (c->gpfifo.entry_num - 1 < num_entries + extra_entries) {
|
|
gk20a_err(d, "not enough gpfifo space allocated");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (!gpfifo && !args)
|
|
return -EINVAL;
|
|
|
|
if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
|
|
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
|
|
!fence)
|
|
return -EINVAL;
|
|
|
|
/* an address space needs to have been bound at this point. */
|
|
if (!gk20a_channel_as_bound(c)) {
|
|
gk20a_err(d,
|
|
"not bound to an address space at time of gpfifo"
|
|
" submission.");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (profile)
|
|
profile->timestamp[PROFILE_ENTRY] = sched_clock();
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
/* update debug settings */
|
|
if (g->ops.ltc.sync_debugfs)
|
|
g->ops.ltc.sync_debugfs(g);
|
|
#endif
|
|
|
|
gk20a_dbg_info("channel %d", c->hw_chid);
|
|
|
|
/*
|
|
* Job tracking is necessary for any of the following conditions:
|
|
* - pre- or post-fence functionality
|
|
* - channel wdt
|
|
* - GPU rail-gating
|
|
* - buffer refcounting
|
|
*
|
|
* If none of the conditions are met, then job tracking is not
|
|
* required and a fast submit can be done (ie. only need to write
|
|
* out userspace GPFIFO entries and update GP_PUT).
|
|
*/
|
|
need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
|
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
|
|
c->wdt_enabled ||
|
|
platform->can_railgate ||
|
|
!skip_buffer_refcounting;
|
|
|
|
if (need_job_tracking) {
|
|
bool need_sync_framework = false;
|
|
|
|
/*
|
|
* If the channel is to have deterministic latency and
|
|
* job tracking is required, the channel must have
|
|
* pre-allocated resources. Otherwise, we fail the submit here
|
|
*/
|
|
if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
|
|
return -EINVAL;
|
|
|
|
need_sync_framework = force_need_sync_fence ||
|
|
gk20a_channel_sync_needs_sync_framework(g) ||
|
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
|
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
|
|
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
|
|
|
|
/*
|
|
* Deferred clean-up is necessary for any of the following
|
|
* conditions:
|
|
* - channel's deterministic flag is not set
|
|
* - dependency on sync framework, which could make the
|
|
* behavior of the clean-up operation non-deterministic
|
|
* (should not be performed in the submit path)
|
|
* - channel wdt
|
|
* - GPU rail-gating
|
|
* - buffer refcounting
|
|
*
|
|
* If none of the conditions are met, then deferred clean-up
|
|
* is not required, and we clean-up one job-tracking
|
|
* resource in the submit path.
|
|
*/
|
|
need_deferred_cleanup = !c->deterministic ||
|
|
need_sync_framework ||
|
|
c->wdt_enabled ||
|
|
platform->can_railgate ||
|
|
!skip_buffer_refcounting;
|
|
|
|
/*
|
|
* For deterministic channels, we don't allow deferred clean_up
|
|
* processing to occur. In cases we hit this, we fail the submit
|
|
*/
|
|
if (c->deterministic && need_deferred_cleanup)
|
|
return -EINVAL;
|
|
|
|
/* released by job cleanup via syncpt or sema interrupt */
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s",
|
|
current->comm);
|
|
return err;
|
|
}
|
|
|
|
if (!need_deferred_cleanup) {
|
|
/* clean up a single job */
|
|
gk20a_channel_clean_up_jobs(c, false);
|
|
}
|
|
}
|
|
|
|
trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
|
|
c->hw_chid,
|
|
num_entries,
|
|
flags,
|
|
fence ? fence->id : 0,
|
|
fence ? fence->value : 0);
|
|
|
|
gk20a_dbg_info("pre-submit put %d, get %d, size %d",
|
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
|
|
|
/*
|
|
* Make sure we have enough space for gpfifo entries. Check cached
|
|
* values first and then read from HW. If no space, return EAGAIN
|
|
* and let userpace decide to re-try request or not.
|
|
*/
|
|
if (gp_free_count(c) < num_entries + extra_entries) {
|
|
if (get_gp_free_count(c) < num_entries + extra_entries) {
|
|
err = -EAGAIN;
|
|
goto clean_up;
|
|
}
|
|
}
|
|
|
|
if (c->has_timedout) {
|
|
err = -ETIMEDOUT;
|
|
goto clean_up;
|
|
}
|
|
|
|
if (need_job_tracking) {
|
|
err = channel_gk20a_alloc_job(c, &job);
|
|
if (err)
|
|
goto clean_up;
|
|
|
|
err = gk20a_submit_prepare_syncs(c, fence, job,
|
|
&wait_cmd, &incr_cmd,
|
|
&pre_fence, &post_fence,
|
|
force_need_sync_fence,
|
|
need_deferred_cleanup,
|
|
flags);
|
|
if (err)
|
|
goto clean_up_job;
|
|
}
|
|
|
|
if (profile)
|
|
profile->timestamp[PROFILE_JOB_TRACKING] = sched_clock();
|
|
|
|
if (wait_cmd)
|
|
gk20a_submit_append_priv_cmdbuf(c, wait_cmd);
|
|
|
|
if (gpfifo || user_gpfifo)
|
|
err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
|
|
num_entries);
|
|
if (err)
|
|
goto clean_up_job;
|
|
|
|
/*
|
|
* And here's where we add the incr_cmd we generated earlier. It should
|
|
* always run!
|
|
*/
|
|
if (incr_cmd)
|
|
gk20a_submit_append_priv_cmdbuf(c, incr_cmd);
|
|
|
|
if (fence_out)
|
|
*fence_out = gk20a_fence_get(post_fence);
|
|
|
|
if (need_job_tracking)
|
|
/* TODO! Check for errors... */
|
|
gk20a_channel_add_job(c, job, skip_buffer_refcounting);
|
|
if (profile)
|
|
profile->timestamp[PROFILE_APPEND] = sched_clock();
|
|
|
|
g->ops.fifo.userd_gp_put(g, c);
|
|
|
|
if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & flags) &&
|
|
g->ops.fifo.reschedule_runlist)
|
|
g->ops.fifo.reschedule_runlist(g, c->runlist_id);
|
|
|
|
trace_gk20a_channel_submitted_gpfifo(dev_name(c->g->dev),
|
|
c->hw_chid,
|
|
num_entries,
|
|
flags,
|
|
post_fence ? post_fence->syncpt_id : 0,
|
|
post_fence ? post_fence->syncpt_value : 0);
|
|
|
|
gk20a_dbg_info("post-submit put %d, get %d, size %d",
|
|
c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
|
|
|
|
if (profile)
|
|
profile->timestamp[PROFILE_END] = sched_clock();
|
|
gk20a_dbg_fn("done");
|
|
return err;
|
|
|
|
clean_up_job:
|
|
channel_gk20a_free_job(c, job);
|
|
clean_up:
|
|
gk20a_dbg_fn("fail");
|
|
gk20a_fence_put(pre_fence);
|
|
gk20a_fence_put(post_fence);
|
|
if (need_deferred_cleanup)
|
|
gk20a_idle(g);
|
|
return err;
|
|
}
|
|
|
|
int gk20a_init_channel_support(struct gk20a *g, u32 chid)
|
|
{
|
|
struct channel_gk20a *c = g->fifo.channel+chid;
|
|
c->g = NULL;
|
|
c->hw_chid = chid;
|
|
atomic_set(&c->bound, false);
|
|
nvgpu_spinlock_init(&c->ref_obtain_lock);
|
|
atomic_set(&c->ref_count, 0);
|
|
c->referenceable = false;
|
|
init_waitqueue_head(&c->ref_count_dec_wq);
|
|
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
|
nvgpu_spinlock_init(&c->ref_actions_lock);
|
|
#endif
|
|
nvgpu_mutex_init(&c->ioctl_lock);
|
|
nvgpu_mutex_init(&c->error_notifier_mutex);
|
|
nvgpu_mutex_init(&c->joblist.cleanup_lock);
|
|
nvgpu_spinlock_init(&c->joblist.dynamic.lock);
|
|
nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
|
|
nvgpu_raw_spinlock_init(&c->timeout.lock);
|
|
nvgpu_mutex_init(&c->sync_lock);
|
|
|
|
INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
|
|
nvgpu_mutex_init(&c->cs_client_mutex);
|
|
#endif
|
|
INIT_LIST_HEAD(&c->dbg_s_list);
|
|
INIT_LIST_HEAD(&c->event_id_list);
|
|
nvgpu_mutex_init(&c->event_id_list_lock);
|
|
nvgpu_mutex_init(&c->dbg_s_lock);
|
|
list_add(&c->free_chs, &g->fifo.free_chs);
|
|
|
|
INIT_LIST_HEAD(&c->worker_item);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_wait_semaphore(struct channel_gk20a *ch,
|
|
ulong id, u32 offset,
|
|
u32 payload, long timeout)
|
|
{
|
|
struct device *dev = ch->g->dev;
|
|
struct dma_buf *dmabuf;
|
|
void *data;
|
|
u32 *semaphore;
|
|
int ret = 0;
|
|
long remain;
|
|
|
|
/* do not wait if channel has timed out */
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
dmabuf = dma_buf_get(id);
|
|
if (IS_ERR(dmabuf)) {
|
|
gk20a_err(dev, "invalid notifier nvmap handle 0x%lx", id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
data = dma_buf_kmap(dmabuf, offset >> PAGE_SHIFT);
|
|
if (!data) {
|
|
gk20a_err(dev, "failed to map notifier memory");
|
|
ret = -EINVAL;
|
|
goto cleanup_put;
|
|
}
|
|
|
|
semaphore = data + (offset & ~PAGE_MASK);
|
|
|
|
remain = wait_event_interruptible_timeout(
|
|
ch->semaphore_wq,
|
|
*semaphore == payload || ch->has_timedout,
|
|
timeout);
|
|
|
|
if (remain == 0 && *semaphore != payload)
|
|
ret = -ETIMEDOUT;
|
|
else if (remain < 0)
|
|
ret = remain;
|
|
|
|
dma_buf_kunmap(dmabuf, offset >> PAGE_SHIFT, data);
|
|
cleanup_put:
|
|
dma_buf_put(dmabuf);
|
|
return ret;
|
|
}
|
|
|
|
static int gk20a_channel_wait(struct channel_gk20a *ch,
|
|
struct nvgpu_wait_args *args)
|
|
{
|
|
struct device *d = dev_from_gk20a(ch->g);
|
|
struct dma_buf *dmabuf;
|
|
struct notification *notif;
|
|
struct timespec tv;
|
|
u64 jiffies;
|
|
ulong id;
|
|
u32 offset;
|
|
unsigned long timeout;
|
|
int remain, ret = 0;
|
|
u64 end;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
if (args->timeout == NVGPU_NO_TIMEOUT)
|
|
timeout = MAX_SCHEDULE_TIMEOUT;
|
|
else
|
|
timeout = (u32)msecs_to_jiffies(args->timeout);
|
|
|
|
switch (args->type) {
|
|
case NVGPU_WAIT_TYPE_NOTIFIER:
|
|
id = args->condition.notifier.dmabuf_fd;
|
|
offset = args->condition.notifier.offset;
|
|
end = offset + sizeof(struct notification);
|
|
|
|
dmabuf = dma_buf_get(id);
|
|
if (IS_ERR(dmabuf)) {
|
|
gk20a_err(d, "invalid notifier nvmap handle 0x%lx",
|
|
id);
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (end > dmabuf->size || end < sizeof(struct notification)) {
|
|
dma_buf_put(dmabuf);
|
|
gk20a_err(d, "invalid notifier offset\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
notif = dma_buf_vmap(dmabuf);
|
|
if (!notif) {
|
|
gk20a_err(d, "failed to map notifier memory");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
notif = (struct notification *)((uintptr_t)notif + offset);
|
|
|
|
/* user should set status pending before
|
|
* calling this ioctl */
|
|
remain = wait_event_interruptible_timeout(
|
|
ch->notifier_wq,
|
|
notif->status == 0 || ch->has_timedout,
|
|
timeout);
|
|
|
|
if (remain == 0 && notif->status != 0) {
|
|
ret = -ETIMEDOUT;
|
|
goto notif_clean_up;
|
|
} else if (remain < 0) {
|
|
ret = -EINTR;
|
|
goto notif_clean_up;
|
|
}
|
|
|
|
/* TBD: fill in correct information */
|
|
jiffies = get_jiffies_64();
|
|
jiffies_to_timespec(jiffies, &tv);
|
|
notif->timestamp.nanoseconds[0] = tv.tv_nsec;
|
|
notif->timestamp.nanoseconds[1] = tv.tv_sec;
|
|
notif->info32 = 0xDEADBEEF; /* should be object name */
|
|
notif->info16 = ch->hw_chid; /* should be method offset */
|
|
|
|
notif_clean_up:
|
|
dma_buf_vunmap(dmabuf, notif);
|
|
return ret;
|
|
|
|
case NVGPU_WAIT_TYPE_SEMAPHORE:
|
|
ret = gk20a_channel_wait_semaphore(ch,
|
|
args->condition.semaphore.dmabuf_fd,
|
|
args->condition.semaphore.offset,
|
|
args->condition.semaphore.payload,
|
|
timeout);
|
|
|
|
break;
|
|
|
|
default:
|
|
ret = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
|
|
{
|
|
unsigned int mask = 0;
|
|
struct gk20a_event_id_data *event_id_data = filep->private_data;
|
|
struct gk20a *g = event_id_data->g;
|
|
u32 event_id = event_id_data->event_id;
|
|
|
|
gk20a_dbg(gpu_dbg_fn | gpu_dbg_info, "");
|
|
|
|
poll_wait(filep, &event_id_data->event_id_wq, wait);
|
|
|
|
nvgpu_mutex_acquire(&event_id_data->lock);
|
|
|
|
if (event_id_data->is_tsg) {
|
|
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
|
|
|
if (event_id_data->event_posted) {
|
|
gk20a_dbg_info(
|
|
"found pending event_id=%d on TSG=%d\n",
|
|
event_id, tsg->tsgid);
|
|
mask = (POLLPRI | POLLIN);
|
|
event_id_data->event_posted = false;
|
|
}
|
|
} else {
|
|
struct channel_gk20a *ch = g->fifo.channel
|
|
+ event_id_data->id;
|
|
|
|
if (event_id_data->event_posted) {
|
|
gk20a_dbg_info(
|
|
"found pending event_id=%d on chid=%d\n",
|
|
event_id, ch->hw_chid);
|
|
mask = (POLLPRI | POLLIN);
|
|
event_id_data->event_posted = false;
|
|
}
|
|
}
|
|
|
|
nvgpu_mutex_release(&event_id_data->lock);
|
|
|
|
return mask;
|
|
}
|
|
|
|
static int gk20a_event_id_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct gk20a_event_id_data *event_id_data = filp->private_data;
|
|
struct gk20a *g = event_id_data->g;
|
|
|
|
if (event_id_data->is_tsg) {
|
|
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
|
|
|
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
|
list_del_init(&event_id_data->event_id_node);
|
|
nvgpu_mutex_release(&tsg->event_id_list_lock);
|
|
} else {
|
|
struct channel_gk20a *ch = g->fifo.channel + event_id_data->id;
|
|
|
|
nvgpu_mutex_acquire(&ch->event_id_list_lock);
|
|
list_del_init(&event_id_data->event_id_node);
|
|
nvgpu_mutex_release(&ch->event_id_list_lock);
|
|
}
|
|
|
|
gk20a_put(g);
|
|
kfree(event_id_data);
|
|
filp->private_data = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct file_operations gk20a_event_id_ops = {
|
|
.owner = THIS_MODULE,
|
|
.poll = gk20a_event_id_poll,
|
|
.release = gk20a_event_id_release,
|
|
};
|
|
|
|
static int gk20a_channel_get_event_data_from_id(struct channel_gk20a *ch,
|
|
u32 event_id,
|
|
struct gk20a_event_id_data **event_id_data)
|
|
{
|
|
struct gk20a_event_id_data *local_event_id_data;
|
|
bool event_found = false;
|
|
|
|
nvgpu_mutex_acquire(&ch->event_id_list_lock);
|
|
list_for_each_entry(local_event_id_data, &ch->event_id_list,
|
|
event_id_node) {
|
|
if (local_event_id_data->event_id == event_id) {
|
|
event_found = true;
|
|
break;
|
|
}
|
|
}
|
|
nvgpu_mutex_release(&ch->event_id_list_lock);
|
|
|
|
if (event_found) {
|
|
*event_id_data = local_event_id_data;
|
|
return 0;
|
|
} else {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
void gk20a_channel_event_id_post_event(struct channel_gk20a *ch,
|
|
u32 event_id)
|
|
{
|
|
struct gk20a_event_id_data *event_id_data;
|
|
int err = 0;
|
|
|
|
err = gk20a_channel_get_event_data_from_id(ch, event_id,
|
|
&event_id_data);
|
|
if (err)
|
|
return;
|
|
|
|
nvgpu_mutex_acquire(&event_id_data->lock);
|
|
|
|
gk20a_dbg_info(
|
|
"posting event for event_id=%d on ch=%d\n",
|
|
event_id, ch->hw_chid);
|
|
event_id_data->event_posted = true;
|
|
|
|
wake_up_interruptible_all(&event_id_data->event_id_wq);
|
|
|
|
nvgpu_mutex_release(&event_id_data->lock);
|
|
}
|
|
|
|
static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
|
|
int event_id,
|
|
int *fd)
|
|
{
|
|
struct gk20a *g;
|
|
int err = 0;
|
|
int local_fd;
|
|
struct file *file;
|
|
char *name;
|
|
struct gk20a_event_id_data *event_id_data;
|
|
|
|
g = gk20a_get(ch->g);
|
|
if (!g)
|
|
return -ENODEV;
|
|
|
|
err = gk20a_channel_get_event_data_from_id(ch,
|
|
event_id, &event_id_data);
|
|
if (err == 0) {
|
|
/* We already have event enabled */
|
|
err = -EINVAL;
|
|
goto free_ref;
|
|
}
|
|
|
|
err = get_unused_fd_flags(O_RDWR);
|
|
if (err < 0)
|
|
goto free_ref;
|
|
local_fd = err;
|
|
|
|
name = kasprintf(GFP_KERNEL, "nvgpu-event%d-fd%d",
|
|
event_id, local_fd);
|
|
|
|
file = anon_inode_getfile(name, &gk20a_event_id_ops,
|
|
NULL, O_RDWR);
|
|
kfree(name);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto clean_up;
|
|
}
|
|
|
|
event_id_data = kzalloc(sizeof(*event_id_data), GFP_KERNEL);
|
|
if (!event_id_data) {
|
|
err = -ENOMEM;
|
|
goto clean_up_file;
|
|
}
|
|
event_id_data->g = g;
|
|
event_id_data->id = ch->hw_chid;
|
|
event_id_data->is_tsg = false;
|
|
event_id_data->event_id = event_id;
|
|
|
|
init_waitqueue_head(&event_id_data->event_id_wq);
|
|
nvgpu_mutex_init(&event_id_data->lock);
|
|
INIT_LIST_HEAD(&event_id_data->event_id_node);
|
|
|
|
nvgpu_mutex_acquire(&ch->event_id_list_lock);
|
|
list_add_tail(&event_id_data->event_id_node, &ch->event_id_list);
|
|
nvgpu_mutex_release(&ch->event_id_list_lock);
|
|
|
|
fd_install(local_fd, file);
|
|
file->private_data = event_id_data;
|
|
|
|
*fd = local_fd;
|
|
|
|
return 0;
|
|
|
|
clean_up_file:
|
|
fput(file);
|
|
clean_up:
|
|
put_unused_fd(local_fd);
|
|
free_ref:
|
|
gk20a_put(g);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_channel_event_id_ctrl(struct channel_gk20a *ch,
|
|
struct nvgpu_event_id_ctrl_args *args)
|
|
{
|
|
int err = 0;
|
|
int fd = -1;
|
|
|
|
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
|
return -EINVAL;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch))
|
|
return -EINVAL;
|
|
|
|
switch (args->cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
|
|
err = gk20a_channel_event_id_enable(ch, args->event_id, &fd);
|
|
if (!err)
|
|
args->event_fd = fd;
|
|
break;
|
|
|
|
default:
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"unrecognized channel event id cmd: 0x%x",
|
|
args->cmd);
|
|
err = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
int gk20a_channel_set_priority(struct channel_gk20a *ch, u32 priority)
|
|
{
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* set priority of graphics channel */
|
|
switch (priority) {
|
|
case NVGPU_PRIORITY_LOW:
|
|
ch->timeslice_us = ch->g->timeslice_low_priority_us;
|
|
break;
|
|
case NVGPU_PRIORITY_MEDIUM:
|
|
ch->timeslice_us = ch->g->timeslice_medium_priority_us;
|
|
break;
|
|
case NVGPU_PRIORITY_HIGH:
|
|
ch->timeslice_us = ch->g->timeslice_high_priority_us;
|
|
break;
|
|
default:
|
|
pr_err("Unsupported priority");
|
|
return -EINVAL;
|
|
}
|
|
|
|
return channel_gk20a_set_schedule_params(ch);
|
|
}
|
|
|
|
int gk20a_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
|
|
if (gk20a_is_channel_marked_as_tsg(ch)) {
|
|
gk20a_err(dev_from_gk20a(ch->g),
|
|
"invalid operation for TSG!\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (timeslice < g->min_timeslice_us ||
|
|
timeslice > g->max_timeslice_us)
|
|
return -EINVAL;
|
|
|
|
ch->timeslice_us = timeslice;
|
|
|
|
gk20a_dbg(gpu_dbg_sched, "chid=%u timeslice=%u us",
|
|
ch->hw_chid, timeslice);
|
|
|
|
return channel_gk20a_set_schedule_params(ch);
|
|
}
|
|
|
|
static int gk20a_channel_zcull_bind(struct channel_gk20a *ch,
|
|
struct nvgpu_zcull_bind_args *args)
|
|
{
|
|
struct gk20a *g = ch->g;
|
|
struct gr_gk20a *gr = &g->gr;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
return g->ops.gr.bind_ctxsw_zcull(g, gr, ch,
|
|
args->gpu_va, args->mode);
|
|
}
|
|
|
|
/* in this context the "channel" is the host1x channel which
|
|
* maps to *all* gk20a channels */
|
|
int gk20a_channel_suspend(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
bool channels_in_use = false;
|
|
u32 active_runlist_ids = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *ch = &f->channel[chid];
|
|
if (gk20a_channel_get(ch)) {
|
|
gk20a_dbg_info("suspend channel %d", chid);
|
|
/* disable channel */
|
|
gk20a_disable_channel_tsg(g, ch);
|
|
/* preempt the channel */
|
|
gk20a_fifo_preempt(g, ch);
|
|
/* wait for channel update notifiers */
|
|
if (ch->update_fn)
|
|
cancel_work_sync(&ch->update_fn_work);
|
|
|
|
channels_in_use = true;
|
|
|
|
active_runlist_ids |= BIT(ch->runlist_id);
|
|
|
|
gk20a_channel_put(ch);
|
|
}
|
|
}
|
|
|
|
if (channels_in_use) {
|
|
gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, false, true);
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
if (gk20a_channel_get(&f->channel[chid])) {
|
|
g->ops.fifo.unbind_channel(&f->channel[chid]);
|
|
gk20a_channel_put(&f->channel[chid]);
|
|
}
|
|
}
|
|
}
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
int gk20a_channel_resume(struct gk20a *g)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
bool channels_in_use = false;
|
|
u32 active_runlist_ids = 0;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
if (gk20a_channel_get(&f->channel[chid])) {
|
|
gk20a_dbg_info("resume channel %d", chid);
|
|
g->ops.fifo.bind_channel(&f->channel[chid]);
|
|
channels_in_use = true;
|
|
active_runlist_ids |= BIT(f->channel[chid].runlist_id);
|
|
gk20a_channel_put(&f->channel[chid]);
|
|
}
|
|
}
|
|
|
|
if (channels_in_use)
|
|
gk20a_fifo_update_runlist_ids(g, active_runlist_ids, ~0, true, true);
|
|
|
|
gk20a_dbg_fn("done");
|
|
return 0;
|
|
}
|
|
|
|
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
|
|
{
|
|
struct fifo_gk20a *f = &g->fifo;
|
|
u32 chid;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
/*
|
|
* Ensure that all pending writes are actually done before trying to
|
|
* read semaphore values from DRAM.
|
|
*/
|
|
g->ops.mm.fb_flush(g);
|
|
|
|
for (chid = 0; chid < f->num_channels; chid++) {
|
|
struct channel_gk20a *c = g->fifo.channel+chid;
|
|
if (gk20a_channel_get(c)) {
|
|
if (atomic_read(&c->bound)) {
|
|
wake_up_interruptible_all(&c->semaphore_wq);
|
|
if (post_events) {
|
|
if (gk20a_is_channel_marked_as_tsg(c)) {
|
|
struct tsg_gk20a *tsg =
|
|
&g->fifo.tsg[c->tsgid];
|
|
|
|
gk20a_tsg_event_id_post_event(tsg,
|
|
NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC);
|
|
} else {
|
|
gk20a_channel_event_id_post_event(c,
|
|
NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC);
|
|
}
|
|
}
|
|
/*
|
|
* Only non-deterministic channels get the
|
|
* channel_update callback. We don't allow
|
|
* semaphore-backed syncs for these channels
|
|
* anyways, since they have a dependency on
|
|
* the sync framework.
|
|
* If deterministic channels are receiving a
|
|
* semaphore wakeup, it must be for a
|
|
* user-space managed
|
|
* semaphore.
|
|
*/
|
|
if (!c->deterministic)
|
|
gk20a_channel_update(c);
|
|
}
|
|
gk20a_channel_put(c);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int gk20a_ioctl_channel_submit_gpfifo(
|
|
struct channel_gk20a *ch,
|
|
struct nvgpu_submit_gpfifo_args *args)
|
|
{
|
|
struct gk20a_fence *fence_out;
|
|
struct fifo_profile_gk20a *profile = NULL;
|
|
|
|
int ret = 0;
|
|
gk20a_dbg_fn("");
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
profile = gk20a_fifo_profile_acquire(ch->g);
|
|
|
|
if (profile)
|
|
profile->timestamp[PROFILE_IOCTL_ENTRY] = sched_clock();
|
|
#endif
|
|
if (ch->has_timedout)
|
|
return -ETIMEDOUT;
|
|
|
|
if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & args->flags) &&
|
|
!capable(CAP_SYS_NICE))
|
|
return -EPERM;
|
|
|
|
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
|
args->flags, &args->fence,
|
|
&fence_out, false, profile);
|
|
|
|
if (ret)
|
|
goto clean_up;
|
|
|
|
/* Convert fence_out to something we can pass back to user space. */
|
|
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
|
|
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
|
|
int fd = gk20a_fence_install_fd(fence_out);
|
|
if (fd < 0)
|
|
ret = fd;
|
|
else
|
|
args->fence.id = fd;
|
|
} else {
|
|
args->fence.id = fence_out->syncpt_id;
|
|
args->fence.value = fence_out->syncpt_value;
|
|
}
|
|
}
|
|
gk20a_fence_put(fence_out);
|
|
#ifdef CONFIG_DEBUG_FS
|
|
if (profile) {
|
|
profile->timestamp[PROFILE_IOCTL_EXIT] = sched_clock();
|
|
gk20a_fifo_profile_release(ch->g, profile);
|
|
}
|
|
#endif
|
|
clean_up:
|
|
return ret;
|
|
}
|
|
|
|
void gk20a_init_channel(struct gpu_ops *gops)
|
|
{
|
|
gops->fifo.bind_channel = channel_gk20a_bind;
|
|
gops->fifo.unbind_channel = channel_gk20a_unbind;
|
|
gops->fifo.disable_channel = channel_gk20a_disable;
|
|
gops->fifo.enable_channel = channel_gk20a_enable;
|
|
gops->fifo.alloc_inst = channel_gk20a_alloc_inst;
|
|
gops->fifo.free_inst = channel_gk20a_free_inst;
|
|
gops->fifo.setup_ramfc = channel_gk20a_setup_ramfc;
|
|
gops->fifo.channel_set_priority = gk20a_channel_set_priority;
|
|
gops->fifo.channel_set_timeslice = gk20a_channel_set_timeslice;
|
|
gops->fifo.userd_gp_get = gk20a_userd_gp_get;
|
|
gops->fifo.userd_gp_put = gk20a_userd_gp_put;
|
|
}
|
|
|
|
long gk20a_channel_ioctl(struct file *filp,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct channel_priv *priv = filp->private_data;
|
|
struct channel_gk20a *ch = priv->c;
|
|
struct gk20a *g = ch->g;
|
|
struct device *dev = g->dev;
|
|
u8 buf[NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE] = {0};
|
|
int err = 0;
|
|
|
|
gk20a_dbg_fn("start %d", _IOC_NR(cmd));
|
|
|
|
if ((_IOC_TYPE(cmd) != NVGPU_IOCTL_MAGIC) ||
|
|
(_IOC_NR(cmd) == 0) ||
|
|
(_IOC_NR(cmd) > NVGPU_IOCTL_CHANNEL_LAST) ||
|
|
(_IOC_SIZE(cmd) > NVGPU_IOCTL_CHANNEL_MAX_ARG_SIZE))
|
|
return -EINVAL;
|
|
|
|
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
|
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
|
|
return -EFAULT;
|
|
}
|
|
|
|
/* take a ref or return timeout if channel refs can't be taken */
|
|
ch = gk20a_channel_get(ch);
|
|
if (!ch)
|
|
return -ETIMEDOUT;
|
|
|
|
/* protect our sanity for threaded userspace - most of the channel is
|
|
* not thread safe */
|
|
nvgpu_mutex_acquire(&ch->ioctl_lock);
|
|
|
|
/* this ioctl call keeps a ref to the file which keeps a ref to the
|
|
* channel */
|
|
|
|
switch (cmd) {
|
|
case NVGPU_IOCTL_CHANNEL_OPEN:
|
|
err = gk20a_channel_open_ioctl(ch->g,
|
|
(struct nvgpu_channel_open_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_NVMAP_FD:
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ALLOC_OBJ_CTX:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.alloc_obj_ctx(ch,
|
|
(struct nvgpu_alloc_obj_ctx_args *)buf);
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX:
|
|
{
|
|
struct nvgpu_alloc_gpfifo_ex_args *alloc_gpfifo_ex_args =
|
|
(struct nvgpu_alloc_gpfifo_ex_args *)buf;
|
|
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
|
|
if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
|
|
err = -EINVAL;
|
|
gk20a_idle(g);
|
|
break;
|
|
}
|
|
err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args);
|
|
gk20a_idle(g);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO:
|
|
{
|
|
struct nvgpu_alloc_gpfifo_ex_args alloc_gpfifo_ex_args;
|
|
struct nvgpu_alloc_gpfifo_args *alloc_gpfifo_args =
|
|
(struct nvgpu_alloc_gpfifo_args *)buf;
|
|
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
|
|
/* prepare new args structure */
|
|
memset(&alloc_gpfifo_ex_args, 0,
|
|
sizeof(struct nvgpu_alloc_gpfifo_ex_args));
|
|
/*
|
|
* Kernel can insert one extra gpfifo entry before user
|
|
* submitted gpfifos and another one after, for internal usage.
|
|
* Triple the requested size.
|
|
*/
|
|
alloc_gpfifo_ex_args.num_entries = roundup_pow_of_two(
|
|
alloc_gpfifo_args->num_entries * 3);
|
|
alloc_gpfifo_ex_args.flags = alloc_gpfifo_args->flags;
|
|
|
|
err = gk20a_alloc_channel_gpfifo(ch, &alloc_gpfifo_ex_args);
|
|
gk20a_idle(g);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_SUBMIT_GPFIFO:
|
|
err = gk20a_ioctl_channel_submit_gpfifo(ch,
|
|
(struct nvgpu_submit_gpfifo_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_WAIT:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
|
|
/* waiting is thread-safe, not dropping this mutex could
|
|
* deadlock in certain conditions */
|
|
nvgpu_mutex_release(&ch->ioctl_lock);
|
|
|
|
err = gk20a_channel_wait(ch,
|
|
(struct nvgpu_wait_args *)buf);
|
|
|
|
nvgpu_mutex_acquire(&ch->ioctl_lock);
|
|
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_zcull_bind(ch,
|
|
(struct nvgpu_zcull_bind_args *)buf);
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_init_error_notifier(ch,
|
|
(struct nvgpu_set_error_notifier *)buf);
|
|
gk20a_idle(g);
|
|
break;
|
|
#ifdef CONFIG_GK20A_CYCLE_STATS
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_cycle_stats(ch,
|
|
(struct nvgpu_cycle_stats_args *)buf);
|
|
gk20a_idle(g);
|
|
break;
|
|
#endif
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT:
|
|
{
|
|
u32 timeout =
|
|
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
|
timeout, ch->hw_chid);
|
|
ch->timeout_ms_max = timeout;
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeout, ch);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMEOUT_EX:
|
|
{
|
|
u32 timeout =
|
|
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
|
bool timeout_debug_dump = !((u32)
|
|
((struct nvgpu_set_timeout_ex_args *)buf)->flags &
|
|
(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
|
|
gk20a_dbg(gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
|
timeout, ch->hw_chid);
|
|
ch->timeout_ms_max = timeout;
|
|
ch->timeout_debug_dump = timeout_debug_dump;
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeout, ch);
|
|
break;
|
|
}
|
|
case NVGPU_IOCTL_CHANNEL_GET_TIMEDOUT:
|
|
((struct nvgpu_get_param_args *)buf)->value =
|
|
ch->has_timedout;
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_PRIORITY:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.channel_set_priority(ch,
|
|
((struct nvgpu_set_priority_args *)buf)->priority);
|
|
|
|
gk20a_idle(g);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_priority, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_ENABLE:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
if (ch->g->ops.fifo.enable_channel)
|
|
ch->g->ops.fifo.enable_channel(ch);
|
|
else
|
|
err = -ENOSYS;
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_DISABLE:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
if (ch->g->ops.fifo.disable_channel)
|
|
ch->g->ops.fifo.disable_channel(ch);
|
|
else
|
|
err = -ENOSYS;
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_PREEMPT:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_fifo_preempt(ch->g, ch);
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_PREEMPT_NEXT:
|
|
if (!capable(CAP_SYS_NICE))
|
|
return -EPERM;
|
|
if (!ch->g->ops.fifo.reschedule_preempt_next)
|
|
return -ENOSYS;
|
|
err = gk20a_busy(ch->g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.reschedule_preempt_next(ch);
|
|
gk20a_idle(ch->g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_FORCE_RESET:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.force_reset_ch(ch,
|
|
NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR, true);
|
|
gk20a_idle(g);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CTRL:
|
|
err = gk20a_channel_event_id_ctrl(ch,
|
|
(struct nvgpu_event_id_ctrl_args *)buf);
|
|
break;
|
|
#ifdef CONFIG_GK20A_CYCLE_STATS
|
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_cycle_stats_snapshot(ch,
|
|
(struct nvgpu_cycle_stats_snapshot_args *)buf);
|
|
gk20a_idle(g);
|
|
break;
|
|
#endif
|
|
case NVGPU_IOCTL_CHANNEL_WDT:
|
|
err = gk20a_channel_set_wdt_status(ch,
|
|
(struct nvgpu_channel_wdt_args *)buf);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_RUNLIST_INTERLEAVE:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = gk20a_channel_set_runlist_interleave(ch,
|
|
((struct nvgpu_runlist_interleave_args *)buf)->level);
|
|
|
|
gk20a_idle(g);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_runlist_interleave, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_TIMESLICE:
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.fifo.channel_set_timeslice(ch,
|
|
((struct nvgpu_timeslice_args *)buf)->timeslice_us);
|
|
|
|
gk20a_idle(g);
|
|
gk20a_channel_trace_sched_param(
|
|
trace_gk20a_channel_set_timeslice, ch);
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_PREEMPTION_MODE:
|
|
if (ch->g->ops.gr.set_preemption_mode) {
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.set_preemption_mode(ch,
|
|
((struct nvgpu_preemption_mode_args *)buf)->graphics_preempt_mode,
|
|
((struct nvgpu_preemption_mode_args *)buf)->compute_preempt_mode);
|
|
gk20a_idle(g);
|
|
} else {
|
|
err = -EINVAL;
|
|
}
|
|
break;
|
|
case NVGPU_IOCTL_CHANNEL_SET_BOOSTED_CTX:
|
|
if (ch->g->ops.gr.set_boosted_ctx) {
|
|
bool boost =
|
|
((struct nvgpu_boosted_ctx_args *)buf)->boost;
|
|
|
|
err = gk20a_busy(g);
|
|
if (err) {
|
|
dev_err(dev,
|
|
"%s: failed to host gk20a for ioctl cmd: 0x%x",
|
|
__func__, cmd);
|
|
break;
|
|
}
|
|
err = ch->g->ops.gr.set_boosted_ctx(ch, boost);
|
|
gk20a_idle(g);
|
|
} else {
|
|
err = -EINVAL;
|
|
}
|
|
break;
|
|
default:
|
|
dev_dbg(dev, "unrecognized ioctl cmd: 0x%x", cmd);
|
|
err = -ENOTTY;
|
|
break;
|
|
}
|
|
|
|
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
|
err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd));
|
|
|
|
nvgpu_mutex_release(&ch->ioctl_lock);
|
|
|
|
gk20a_channel_put(ch);
|
|
|
|
gk20a_dbg_fn("end");
|
|
|
|
return err;
|
|
}
|