Files
linux-nvgpu/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
Peter Pipkorn 2b064ce65e gpu: nvgpu: add high priority channel interleave
Interleave all high priority channels between all other channels.
This reduces the latency for high priority work when there
are a lot of lower priority work present, imposing an upper
bound on the latency. Change the default high priority timeslice
from 5.2ms to 3.0 in the process, to prevent long running high priority
apps from hogging the GPU too much.

Introduce a new debugfs node to enable/disable high priority
channel interleaving. It is currently enabled by default.

Adds new runlist length max register, used for allocating
suitable sized runlist.

Limit the number of interleaved channels to 32.

This change reduces the maximum time a lower priority job
is running (one timeslice) before we check that high priority
jobs are running.

Tested with gles2_context_priority (still passes)
Basic sanity testing is done with graphics_submit
(one app is high priority)

Also more functional testing using lots of parallel runs with:
NVRM_GPU_CHANNEL_PRIORITY=3 ./gles2_expensive_draw
 –drawsperframe 20000 –triangles 50 –runtime 30 –finish
plus multiple:
NVRM_GPU_CHANNEL_PRIORITY=2 ./gles2_expensive_draw
–drawsperframe 20000 –triangles 50 –runtime 30 -finish

Previous to this change, the relative performance between
high priority work and normal priority work comes down
to timeslice value. This means that when there are many
low priority channels, the high priority work will still
drop quite a lot. But with this change, the high priority
work will roughly get about half the entire GPU time, meaning
that after the initial lower performance, it is less likely
to get lower in performance due to more apps running on the system.

This change makes a large step towards real priority levels.
It is not perfect and there are no guarantees on anything,
but it is a step forwards without any additional CPU overhead
or other complications. It will also serve as a baseline to
judge other algorithms against.

Support for priorities with TSG is future work.
Support for interleave mid + high priority channels,
instead of just high, is also future work.

Bug 1419900

Change-Id: I0f7d0ce83b6598fe86000577d72e14d312fdad98
Signed-off-by: Peter Pipkorn <ppipkorn@nvidia.com>
Reviewed-on: http://git-master/r/805961
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
2016-01-11 09:04:01 -08:00

198 lines
5.6 KiB
C

/*
* drivers/video/tegra/host/gk20a/fifo_gk20a.h
*
* GK20A graphics fifo (gr host)
*
* Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef __FIFO_GK20A_H__
#define __FIFO_GK20A_H__
#include "channel_gk20a.h"
#include "tsg_gk20a.h"
#define MAX_RUNLIST_BUFFERS 2
/* generally corresponds to the "pbdma" engine */
struct fifo_runlist_info_gk20a {
unsigned long *active_channels;
unsigned long *active_tsgs;
unsigned long *high_prio_channels;
/* Each engine has its own SW and HW runlist buffer.*/
struct mem_desc mem[MAX_RUNLIST_BUFFERS];
u32 cur_buffer;
u32 total_entries;
bool stopped;
bool support_tsg;
struct mutex mutex; /* protect channel preempt and runlist upate */
};
/* so far gk20a has two engines: gr and ce2(gr_copy) */
enum {
ENGINE_GR_GK20A = 0,
ENGINE_CE2_GK20A = 1,
ENGINE_INVAL_GK20A
};
struct fifo_pbdma_exception_info_gk20a {
u32 status_r; /* raw register value from hardware */
u32 id, next_id;
u32 chan_status_v; /* raw value from hardware */
bool id_is_chid, next_id_is_chid;
bool chsw_in_progress;
};
struct fifo_engine_exception_info_gk20a {
u32 status_r; /* raw register value from hardware */
u32 id, next_id;
u32 ctx_status_v; /* raw value from hardware */
bool id_is_chid, next_id_is_chid;
bool faulted, idle, ctxsw_in_progress;
};
struct fifo_mmu_fault_info_gk20a {
u32 fault_info_v;
u32 fault_type_v;
u32 engine_subid_v;
u32 client_v;
u32 fault_hi_v;
u32 fault_lo_v;
u64 inst_ptr;
const char *fault_type_desc;
const char *engine_subid_desc;
const char *client_desc;
};
struct fifo_engine_info_gk20a {
u32 engine_id;
u32 runlist_id;
u32 intr_id;
u32 reset_id;
u32 pbdma_id;
struct fifo_pbdma_exception_info_gk20a pbdma_exception_info;
struct fifo_engine_exception_info_gk20a engine_exception_info;
struct fifo_mmu_fault_info_gk20a mmu_fault_info;
};
struct fifo_gk20a {
struct gk20a *g;
int num_channels;
int num_runlist_entries;
int num_pbdma;
u32 *pbdma_map;
struct fifo_engine_info_gk20a *engine_info;
u32 max_engines;
u32 num_engines;
struct fifo_runlist_info_gk20a *runlist_info;
u32 max_runlists;
struct mem_desc userd;
u32 userd_entry_size;
int used_channels;
struct channel_gk20a *channel;
/* zero-kref'd channels here */
struct list_head free_chs;
struct mutex free_chs_mutex;
struct mutex gr_reset_mutex;
struct tsg_gk20a *tsg;
struct mutex tsg_inuse_mutex;
void (*remove_support)(struct fifo_gk20a *);
bool sw_ready;
struct {
/* share info between isrs and non-isr code */
struct {
struct mutex mutex;
} isr;
struct {
u32 device_fatal_0;
u32 channel_fatal_0;
u32 restartable_0;
} pbdma;
struct {
} engine;
} intr;
u32 deferred_fault_engines;
bool deferred_reset_pending;
struct mutex deferred_reset_mutex;
};
int gk20a_init_fifo_support(struct gk20a *g);
void gk20a_fifo_isr(struct gk20a *g);
void gk20a_fifo_nonstall_isr(struct gk20a *g);
int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid);
int gk20a_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch);
int gk20a_fifo_enable_engine_activity(struct gk20a *g,
struct fifo_engine_info_gk20a *eng_info);
int gk20a_fifo_enable_all_engine_activity(struct gk20a *g);
int gk20a_fifo_disable_engine_activity(struct gk20a *g,
struct fifo_engine_info_gk20a *eng_info,
bool wait_for_idle);
int gk20a_fifo_disable_all_engine_activity(struct gk20a *g,
bool wait_for_idle);
u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 hw_chid);
int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
bool add, bool wait_for_finish);
int gk20a_fifo_suspend(struct gk20a *g);
bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
void gk20a_fifo_recover(struct gk20a *g,
u32 engine_ids, /* if zero, will be queried from HW */
u32 hw_id, /* if ~0, will be queried from HW */
bool hw_id_is_tsg, /* ignored if hw_id == ~0 */
bool id_is_known, bool verbose);
void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose);
void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose);
int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose);
void gk20a_fifo_reset_engine(struct gk20a *g, u32 engine_id);
int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
void gk20a_init_fifo(struct gpu_ops *gops);
void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
unsigned long fault_id);
int gk20a_fifo_wait_engine_idle(struct gk20a *g);
u32 gk20a_fifo_engine_interrupt_mask(struct gk20a *g);
u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g);
int gk20a_fifo_set_channel_priority(struct gk20a *g, u32 runlist_id,
u32 hw_chid, bool interleave);
u32 gk20a_fifo_get_failing_engine_data(struct gk20a *g,
int *__id, bool *__is_tsg);
bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g,
struct tsg_gk20a *tsg);
void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid);
bool gk20a_fifo_set_ctx_mmu_error_ch(struct gk20a *g,
struct channel_gk20a *ch);
#endif /*__GR_GK20A_H__*/