mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add speculative barrier
Data can be speculativerly stored and
code flow can be hijacked.
To mitigate this problem insert a
speculation barrier.
Bug 200447167
Change-Id: Ia865ff2add8b30de49aa970715625b13e8f71c08
Signed-off-by: Ranjanikar Nikhil Prabhakarrao <rprabhakarra@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1972221
(cherry picked from commit f0762ed483)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/1996052
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Deepak Nibade <dnibade@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
bbef4c6927
commit
f56874aec2
@@ -212,6 +212,7 @@ static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c,
|
|||||||
u32 end = start + len; /* exclusive */
|
u32 end = start + len; /* exclusive */
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (end > gpfifo_size) {
|
if (end > gpfifo_size) {
|
||||||
/* wrap-around */
|
/* wrap-around */
|
||||||
int length0 = gpfifo_size - start;
|
int length0 = gpfifo_size - start;
|
||||||
|
|||||||
@@ -219,6 +219,7 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level)
|
|||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level);
|
nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (level) {
|
switch (level) {
|
||||||
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
||||||
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
|
case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
|
||||||
|
|||||||
@@ -3943,6 +3943,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr,
|
|||||||
/* no endian swap ? */
|
/* no endian swap ? */
|
||||||
|
|
||||||
nvgpu_mutex_acquire(&gr->zbc_lock);
|
nvgpu_mutex_acquire(&gr->zbc_lock);
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (zbc_val->type) {
|
switch (zbc_val->type) {
|
||||||
case GK20A_ZBC_TYPE_COLOR:
|
case GK20A_ZBC_TYPE_COLOR:
|
||||||
/* search existing tables */
|
/* search existing tables */
|
||||||
@@ -4047,6 +4048,7 @@ int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr,
|
|||||||
u32 index = query_params->index_size;
|
u32 index = query_params->index_size;
|
||||||
u32 i;
|
u32 i;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (query_params->type) {
|
switch (query_params->type) {
|
||||||
case GK20A_ZBC_TYPE_INVALID:
|
case GK20A_ZBC_TYPE_INVALID:
|
||||||
query_params->index_size = GK20A_ZBC_TABLE_SIZE;
|
query_params->index_size = GK20A_ZBC_TABLE_SIZE;
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num)
|
|||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (class_num) {
|
switch (class_num) {
|
||||||
case PASCAL_COMPUTE_A:
|
case PASCAL_COMPUTE_A:
|
||||||
case PASCAL_A:
|
case PASCAL_A:
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
|
|||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (class_num) {
|
switch (class_num) {
|
||||||
case VOLTA_COMPUTE_A:
|
case VOLTA_COMPUTE_A:
|
||||||
case VOLTA_A:
|
case VOLTA_A:
|
||||||
@@ -106,6 +107,7 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
|
|||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (class_num) {
|
switch (class_num) {
|
||||||
case VOLTA_A:
|
case VOLTA_A:
|
||||||
case PASCAL_A:
|
case PASCAL_A:
|
||||||
@@ -140,6 +142,7 @@ bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
|
|||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (class_num) {
|
switch (class_num) {
|
||||||
case VOLTA_COMPUTE_A:
|
case VOLTA_COMPUTE_A:
|
||||||
case PASCAL_COMPUTE_A:
|
case PASCAL_COMPUTE_A:
|
||||||
|
|||||||
@@ -244,6 +244,7 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
|
|||||||
vidmem_buf = dmabuf->priv;
|
vidmem_buf = dmabuf->priv;
|
||||||
mem = vidmem_buf->mem;
|
mem = vidmem_buf->mem;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
|
case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
|
||||||
nvgpu_mem_rd_n(g, mem, offset, buffer, size);
|
nvgpu_mem_rd_n(g, mem, offset, buffer, size);
|
||||||
|
|||||||
@@ -170,6 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
|
|||||||
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
|
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (err) {
|
if (err) {
|
||||||
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
|
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
|
||||||
|
|
||||||
@@ -355,6 +356,7 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_AS_IOCTL_BIND_CHANNEL:
|
case NVGPU_AS_IOCTL_BIND_CHANNEL:
|
||||||
trace_gk20a_as_ioctl_bind_channel(g->name);
|
trace_gk20a_as_ioctl_bind_channel(g->name);
|
||||||
|
|||||||
@@ -290,6 +290,7 @@ static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch,
|
|||||||
if (!args->dmabuf_fd)
|
if (!args->dmabuf_fd)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
/* handle the command (most frequent cases first) */
|
/* handle the command (most frequent cases first) */
|
||||||
switch (args->cmd) {
|
switch (args->cmd) {
|
||||||
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
|
case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH:
|
||||||
@@ -874,6 +875,7 @@ clean_up:
|
|||||||
*/
|
*/
|
||||||
u32 nvgpu_get_common_runlist_level(u32 level)
|
u32 nvgpu_get_common_runlist_level(u32 level)
|
||||||
{
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (level) {
|
switch (level) {
|
||||||
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW:
|
||||||
return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
|
return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW;
|
||||||
@@ -982,6 +984,7 @@ u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode)
|
|||||||
*/
|
*/
|
||||||
static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
|
static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
|
||||||
{
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (graphics_preempt_mode) {
|
switch (graphics_preempt_mode) {
|
||||||
case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
|
case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI:
|
||||||
return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
|
return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI;
|
||||||
@@ -998,6 +1001,7 @@ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode)
|
|||||||
*/
|
*/
|
||||||
static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
|
static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode)
|
||||||
{
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (compute_preempt_mode) {
|
switch (compute_preempt_mode) {
|
||||||
case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
|
case NVGPU_COMPUTE_PREEMPTION_MODE_WFI:
|
||||||
return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
|
return NVGPU_PREEMPTION_MODE_COMPUTE_WFI;
|
||||||
@@ -1121,6 +1125,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
|||||||
/* this ioctl call keeps a ref to the file which keeps a ref to the
|
/* this ioctl call keeps a ref to the file which keeps a ref to the
|
||||||
* channel */
|
* channel */
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_IOCTL_CHANNEL_OPEN:
|
case NVGPU_IOCTL_CHANNEL_OPEN:
|
||||||
err = gk20a_channel_open_ioctl(ch->g,
|
err = gk20a_channel_open_ioctl(ch->g,
|
||||||
|
|||||||
@@ -366,6 +366,7 @@ gk20a_ctrl_ioctl_gpu_characteristics(
|
|||||||
if (request->gpu_characteristics_buf_size > 0) {
|
if (request->gpu_characteristics_buf_size > 0) {
|
||||||
size_t write_size = sizeof(gpu);
|
size_t write_size = sizeof(gpu);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (write_size > request->gpu_characteristics_buf_size)
|
if (write_size > request->gpu_characteristics_buf_size)
|
||||||
write_size = request->gpu_characteristics_buf_size;
|
write_size = request->gpu_characteristics_buf_size;
|
||||||
|
|
||||||
@@ -556,6 +557,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
|
|||||||
if (args->mask_buf_size > 0) {
|
if (args->mask_buf_size > 0) {
|
||||||
size_t write_size = gpc_tpc_mask_size;
|
size_t write_size = gpc_tpc_mask_size;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (write_size > args->mask_buf_size)
|
if (write_size > args->mask_buf_size)
|
||||||
write_size = args->mask_buf_size;
|
write_size = args->mask_buf_size;
|
||||||
|
|
||||||
@@ -580,6 +582,7 @@ static int gk20a_ctrl_get_fbp_l2_masks(
|
|||||||
if (args->mask_buf_size > 0) {
|
if (args->mask_buf_size > 0) {
|
||||||
size_t write_size = fbp_l2_mask_size;
|
size_t write_size = fbp_l2_mask_size;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (write_size > args->mask_buf_size)
|
if (write_size > args->mask_buf_size)
|
||||||
write_size = args->mask_buf_size;
|
write_size = args->mask_buf_size;
|
||||||
|
|
||||||
@@ -1245,6 +1248,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
|
|||||||
nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
|
nvgpu_gpu_convert_clk_domain(clk_info.clk_domain)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
entry = (struct nvgpu_gpu_clk_info __user *)
|
entry = (struct nvgpu_gpu_clk_info __user *)
|
||||||
(uintptr_t)args->clk_info_entries;
|
(uintptr_t)args->clk_info_entries;
|
||||||
@@ -1264,6 +1268,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
|
|||||||
nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
|
nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
|
ret = nvgpu_clk_arb_commit_request_fd(g, session, fd);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
@@ -1333,6 +1338,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
|
|||||||
clk_info.clk_type = args->clk_type;
|
clk_info.clk_type = args->clk_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (clk_info.clk_type) {
|
switch (clk_info.clk_type) {
|
||||||
case NVGPU_GPU_CLK_TYPE_TARGET:
|
case NVGPU_GPU_CLK_TYPE_TARGET:
|
||||||
err = nvgpu_clk_arb_get_session_target_mhz(session,
|
err = nvgpu_clk_arb_get_session_target_mhz(session,
|
||||||
@@ -1366,6 +1372,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
args->num_entries = num_entries;
|
args->num_entries = num_entries;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@@ -1403,6 +1410,7 @@ static int nvgpu_gpu_get_voltage(struct gk20a *g,
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (args->which) {
|
switch (args->which) {
|
||||||
case NVGPU_GPU_VOLTAGE_CORE:
|
case NVGPU_GPU_VOLTAGE_CORE:
|
||||||
err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
|
err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage);
|
||||||
@@ -1625,6 +1633,7 @@ static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
nvgpu_rwsem_up_read(&g->deterministic_busy);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
@@ -1668,6 +1677,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
|
case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
|
||||||
get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
|
get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
|
||||||
@@ -1713,6 +1723,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
zbc_val->format = set_table_args->format;
|
zbc_val->format = set_table_args->format;
|
||||||
zbc_val->type = set_table_args->type;
|
zbc_val->type = set_table_args->type;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (zbc_val->type) {
|
switch (zbc_val->type) {
|
||||||
case GK20A_ZBC_TYPE_COLOR:
|
case GK20A_ZBC_TYPE_COLOR:
|
||||||
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
for (i = 0; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) {
|
||||||
|
|||||||
@@ -314,6 +314,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
|
|||||||
if (args->sm_error_state_record_size > 0) {
|
if (args->sm_error_state_record_size > 0) {
|
||||||
size_t write_size = sizeof(*sm_error_state);
|
size_t write_size = sizeof(*sm_error_state);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (write_size > args->sm_error_state_record_size)
|
if (write_size > args->sm_error_state_record_size)
|
||||||
write_size = args->sm_error_state_record_size;
|
write_size = args->sm_error_state_record_size;
|
||||||
|
|
||||||
@@ -361,6 +362,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s,
|
|||||||
nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
|
nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d",
|
||||||
timeout_mode);
|
timeout_mode);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (timeout_mode) {
|
switch (timeout_mode) {
|
||||||
case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
|
case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE:
|
||||||
if (dbg_s->is_timeout_disabled == true)
|
if (dbg_s->is_timeout_disabled == true)
|
||||||
@@ -917,6 +919,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
|
|||||||
ops_offset += num_ops;
|
ops_offset += num_ops;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
nvgpu_kfree(g, linux_fragment);
|
nvgpu_kfree(g, linux_fragment);
|
||||||
|
|
||||||
/* enable powergate, if previously disabled */
|
/* enable powergate, if previously disabled */
|
||||||
@@ -1007,6 +1010,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
|
|||||||
|
|
||||||
static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
|
static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode)
|
||||||
{
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (mode){
|
switch (mode){
|
||||||
case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
|
case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW:
|
||||||
return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
|
return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW;
|
||||||
@@ -1153,6 +1157,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
|
|||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (action) {
|
switch (action) {
|
||||||
case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
|
case NVGPU_DBG_GPU_SUSPEND_ALL_SMS:
|
||||||
gr_gk20a_suspend_context(ch);
|
gr_gk20a_suspend_context(ch);
|
||||||
@@ -1366,6 +1371,7 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s,
|
|||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (args->cmd) {
|
switch (args->cmd) {
|
||||||
case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
|
case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE:
|
||||||
gk20a_dbg_gpu_events_enable(dbg_s);
|
gk20a_dbg_gpu_events_enable(dbg_s);
|
||||||
@@ -1536,6 +1542,7 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s,
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (args->action) {
|
switch (args->action) {
|
||||||
case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
|
case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS:
|
||||||
err = g->ops.gr.suspend_contexts(g, dbg_s,
|
err = g->ops.gr.suspend_contexts(g, dbg_s,
|
||||||
@@ -1627,6 +1634,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s,
|
|||||||
size -= access_size;
|
size -= access_size;
|
||||||
offset += access_size;
|
offset += access_size;
|
||||||
}
|
}
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
|
||||||
fail_idle:
|
fail_idle:
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
@@ -1899,6 +1907,7 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask(
|
|||||||
struct gk20a *g = dbg_s->g;
|
struct gk20a *g = dbg_s->g;
|
||||||
u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
|
u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (args->exception_type_mask) {
|
switch (args->exception_type_mask) {
|
||||||
case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
|
case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL:
|
||||||
sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL;
|
sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL;
|
||||||
@@ -1970,6 +1979,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
/* protect from threaded user space calls */
|
/* protect from threaded user space calls */
|
||||||
nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
|
nvgpu_mutex_acquire(&dbg_s->ioctl_lock);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
|
case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL:
|
||||||
err = dbg_bind_channel_gk20a(dbg_s,
|
err = dbg_bind_channel_gk20a(dbg_s,
|
||||||
|
|||||||
@@ -361,6 +361,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
|
|||||||
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (args->cmd) {
|
switch (args->cmd) {
|
||||||
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
|
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
|
||||||
err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
|
err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
|
||||||
@@ -572,6 +573,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
|
|||||||
if (args->record_size > 0) {
|
if (args->record_size > 0) {
|
||||||
size_t write_size = sizeof(*sm_error_state);
|
size_t write_size = sizeof(*sm_error_state);
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
if (write_size > args->record_size)
|
if (write_size > args->record_size)
|
||||||
write_size = args->record_size;
|
write_size = args->record_size;
|
||||||
|
|
||||||
|
|||||||
@@ -447,6 +447,7 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
|
|||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
case NVGPU_SCHED_IOCTL_GET_TSGS:
|
case NVGPU_SCHED_IOCTL_GET_TSGS:
|
||||||
err = gk20a_sched_dev_ioctl_get_tsgs(g,
|
err = gk20a_sched_dev_ioctl_get_tsgs(g,
|
||||||
|
|||||||
549
drivers/gpu/nvgpu/tu104/gr_tu104.c
Normal file
549
drivers/gpu/nvgpu/tu104/gr_tu104.c
Normal file
@@ -0,0 +1,549 @@
|
|||||||
|
<<<<<<< HEAD (bbef4c gpu: nvgpu: initialize masks for the perfmon counters 3)
|
||||||
|
=======
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/types.h>
|
||||||
|
#include <nvgpu/soc.h>
|
||||||
|
#include <nvgpu/io.h>
|
||||||
|
#include <nvgpu/utils.h>
|
||||||
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/channel.h>
|
||||||
|
#include <nvgpu/netlist.h>
|
||||||
|
|
||||||
|
#include "gk20a/gr_gk20a.h"
|
||||||
|
#include "gk20a/gr_pri_gk20a.h"
|
||||||
|
|
||||||
|
#include "gp10b/gr_gp10b.h"
|
||||||
|
|
||||||
|
#include "gv11b/gr_gv11b.h"
|
||||||
|
|
||||||
|
#include "tu104/gr_tu104.h"
|
||||||
|
|
||||||
|
#include <nvgpu/hw/tu104/hw_gr_tu104.h>
|
||||||
|
|
||||||
|
bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num)
|
||||||
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
switch (class_num) {
|
||||||
|
case TURING_CHANNEL_GPFIFO_A:
|
||||||
|
case TURING_A:
|
||||||
|
case TURING_COMPUTE_A:
|
||||||
|
case TURING_DMA_COPY_A:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gr_gv11b_is_valid_class(g, class_num);
|
||||||
|
};
|
||||||
|
|
||||||
|
bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num)
|
||||||
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
switch (class_num) {
|
||||||
|
case TURING_A:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gr_gv11b_is_valid_gfx_class(g, class_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num)
|
||||||
|
{
|
||||||
|
nvgpu_speculation_barrier();
|
||||||
|
switch (class_num) {
|
||||||
|
case TURING_COMPUTE_A:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gr_gv11b_is_valid_compute_class(g, class_num);
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_init_sw_bundle64(struct gk20a *g)
|
||||||
|
{
|
||||||
|
u32 i;
|
||||||
|
u32 last_bundle_data_lo = 0;
|
||||||
|
u32 last_bundle_data_hi = 0;
|
||||||
|
int err = 0;
|
||||||
|
struct netlist_av64_list *sw_bundle64_init =
|
||||||
|
&g->netlist_vars->sw_bundle64_init;
|
||||||
|
|
||||||
|
for (i = 0U; i < sw_bundle64_init->count; i++) {
|
||||||
|
if (i == 0U ||
|
||||||
|
(last_bundle_data_lo != sw_bundle64_init->l[i].value_lo) ||
|
||||||
|
(last_bundle_data_hi != sw_bundle64_init->l[i].value_hi)) {
|
||||||
|
nvgpu_writel(g, gr_pipe_bundle_data_r(),
|
||||||
|
sw_bundle64_init->l[i].value_lo);
|
||||||
|
nvgpu_writel(g, gr_pipe_bundle_data_hi_r(),
|
||||||
|
sw_bundle64_init->l[i].value_hi);
|
||||||
|
|
||||||
|
last_bundle_data_lo = sw_bundle64_init->l[i].value_lo;
|
||||||
|
last_bundle_data_hi = sw_bundle64_init->l[i].value_hi;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_writel(g, gr_pipe_bundle_address_r(),
|
||||||
|
sw_bundle64_init->l[i].addr);
|
||||||
|
|
||||||
|
if (gr_pipe_bundle_address_value_v(sw_bundle64_init->l[i].addr)
|
||||||
|
== GR_GO_IDLE_BUNDLE) {
|
||||||
|
err = gr_gk20a_wait_idle(g,
|
||||||
|
gk20a_get_gr_idle_timeout(g),
|
||||||
|
GR_IDLE_CHECK_DEFAULT);
|
||||||
|
} else if (nvgpu_platform_is_silicon(g)) {
|
||||||
|
err = gr_gk20a_wait_fe_idle(g,
|
||||||
|
gk20a_get_gr_idle_timeout(g),
|
||||||
|
GR_IDLE_CHECK_DEFAULT);
|
||||||
|
}
|
||||||
|
if (err != 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_alloc_global_ctx_buffers(struct gk20a *g)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
u32 rtv_circular_buffer_size;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
rtv_circular_buffer_size =
|
||||||
|
(gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f()) *
|
||||||
|
gr_scc_bundle_cb_size_div_256b_byte_granularity_v();
|
||||||
|
nvgpu_log_info(g, "rtv_circular_buffer_size : %u",
|
||||||
|
rtv_circular_buffer_size);
|
||||||
|
|
||||||
|
err = gk20a_gr_alloc_ctx_buffer(g,
|
||||||
|
&gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER],
|
||||||
|
rtv_circular_buffer_size);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gr_gk20a_alloc_global_ctx_buffers(g);
|
||||||
|
if (err != 0) {
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
clean_up:
|
||||||
|
nvgpu_err(g, "fail");
|
||||||
|
gk20a_gr_destroy_ctx_buffer(g,
|
||||||
|
&gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER]);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_map_global_ctx_buffers(struct gk20a *g, struct vm_gk20a *vm,
|
||||||
|
struct nvgpu_gr_ctx *gr_ctx, bool vpr)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
u64 *g_bfr_va;
|
||||||
|
u64 *g_bfr_size;
|
||||||
|
int *g_bfr_index;
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
struct nvgpu_mem *mem;
|
||||||
|
u64 gpu_va;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
g_bfr_va = gr_ctx->global_ctx_buffer_va;
|
||||||
|
g_bfr_size = gr_ctx->global_ctx_buffer_size;
|
||||||
|
g_bfr_index = gr_ctx->global_ctx_buffer_index;
|
||||||
|
|
||||||
|
/* RTV circular buffer */
|
||||||
|
mem = &gr->global_ctx_buffer[RTV_CIRCULAR_BUFFER].mem;
|
||||||
|
gpu_va = nvgpu_gmmu_map(vm, mem, mem->size, 0,
|
||||||
|
gk20a_mem_flag_none, true, mem->aperture);
|
||||||
|
if (gpu_va == 0ULL) {
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_bfr_va[RTV_CIRCULAR_BUFFER_VA] = gpu_va;
|
||||||
|
g_bfr_size[RTV_CIRCULAR_BUFFER_VA] = mem->size;
|
||||||
|
g_bfr_index[RTV_CIRCULAR_BUFFER_VA] = RTV_CIRCULAR_BUFFER;
|
||||||
|
|
||||||
|
err = gr_gk20a_map_global_ctx_buffers(g, vm, gr_ctx, vpr);
|
||||||
|
if (err != 0) {
|
||||||
|
goto clean_up;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
clean_up:
|
||||||
|
nvgpu_err(g, "fail");
|
||||||
|
nvgpu_gmmu_unmap(vm, mem, gpu_va);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gr_tu104_commit_rtv_circular_buffer(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_ctx *gr_ctx,
|
||||||
|
u64 addr, u32 size, u32 gfxpAddSize, bool patch)
|
||||||
|
{
|
||||||
|
gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_base_r(),
|
||||||
|
gr_scc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
|
||||||
|
gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_rtv_cb_size_r(),
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_f(size), patch);
|
||||||
|
gr_gk20a_ctx_patch_write(g, gr_ctx, gr_gpcs_gcc_rm_rtv_cb_base_r(),
|
||||||
|
gr_gpcs_gcc_rm_rtv_cb_base_addr_39_8_f(addr), patch);
|
||||||
|
gr_gk20a_ctx_patch_write(g, gr_ctx, gr_scc_rm_gfxp_reserve_r(),
|
||||||
|
gr_scc_rm_gfxp_reserve_rtv_cb_size_div_256b_f(gfxpAddSize),
|
||||||
|
patch);
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_commit_global_ctx_buffers(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_ctx *gr_ctx, bool patch)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
u64 addr;
|
||||||
|
u32 size;
|
||||||
|
u32 gfxpaddsize = 0;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
err = gr_gk20a_commit_global_ctx_buffers(g, gr_ctx, patch);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (patch) {
|
||||||
|
int err;
|
||||||
|
err = gr_gk20a_ctx_patch_write_begin(g, gr_ctx, false);
|
||||||
|
if (err != 0) {
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* RTV circular buffer */
|
||||||
|
addr = gr_ctx->global_ctx_buffer_va[RTV_CIRCULAR_BUFFER_VA] >>
|
||||||
|
U64(gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f());
|
||||||
|
|
||||||
|
size = (gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f());
|
||||||
|
|
||||||
|
gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr, size,
|
||||||
|
gfxpaddsize, patch);
|
||||||
|
|
||||||
|
if (patch) {
|
||||||
|
gr_gk20a_ctx_patch_write_end(g, gr_ctx, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_alloc_gfxp_rtv_cb(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
u32 rtv_cb_size;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
rtv_cb_size =
|
||||||
|
(gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f()) *
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_byte_granularity_v();
|
||||||
|
|
||||||
|
err = gr_gp10b_alloc_buffer(vm,
|
||||||
|
rtv_cb_size,
|
||||||
|
&gr_ctx->gfxp_rtvcb_ctxsw_buffer);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_commit_gfxp_rtv_cb(struct gk20a *g,
|
||||||
|
struct nvgpu_gr_ctx *gr_ctx, bool patch)
|
||||||
|
{
|
||||||
|
u64 addr;
|
||||||
|
u32 rtv_cb_size;
|
||||||
|
u32 gfxp_addr_size;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
rtv_cb_size =
|
||||||
|
(gr_scc_rm_rtv_cb_size_div_256b_default_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_db_adder_f() +
|
||||||
|
gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f());
|
||||||
|
gfxp_addr_size = gr_scc_rm_rtv_cb_size_div_256b_gfxp_adder_f();
|
||||||
|
|
||||||
|
/* GFXP RTV circular buffer */
|
||||||
|
addr = (u64)(u64_lo32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) >>
|
||||||
|
gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()) |
|
||||||
|
(u64)(u64_hi32(gr_ctx->gfxp_rtvcb_ctxsw_buffer.gpu_va) <<
|
||||||
|
(32U - gr_scc_rm_rtv_cb_base_addr_39_8_align_bits_f()));
|
||||||
|
|
||||||
|
|
||||||
|
gr_tu104_commit_rtv_circular_buffer(g, gr_ctx, addr,
|
||||||
|
rtv_cb_size,
|
||||||
|
gfxp_addr_size,
|
||||||
|
patch);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_bundle_cb_defaults(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
|
||||||
|
gr->bundle_cb_default_size =
|
||||||
|
gr_scc_bundle_cb_size_div_256b__prod_v();
|
||||||
|
gr->min_gpm_fifo_depth =
|
||||||
|
gr_pd_ab_dist_cfg2_state_limit_min_gpm_fifo_depths_v();
|
||||||
|
gr->bundle_cb_token_limit =
|
||||||
|
gr_pd_ab_dist_cfg2_token_limit_init_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_cb_size_default(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
|
||||||
|
if (gr->attrib_cb_default_size == 0U) {
|
||||||
|
gr->attrib_cb_default_size =
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v();
|
||||||
|
}
|
||||||
|
gr->alpha_cb_default_size =
|
||||||
|
gr_gpc0_ppc0_cbm_alpha_cb_size_v_default_v();
|
||||||
|
gr->attrib_cb_gfxp_default_size =
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
gr->attrib_cb_gfxp_size =
|
||||||
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v();
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_free_gr_ctx(struct gk20a *g,
|
||||||
|
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx)
|
||||||
|
{
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
if (gr_ctx != NULL) {
|
||||||
|
nvgpu_dma_unmap_free(vm, &gr_ctx->gfxp_rtvcb_ctxsw_buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
gr_gk20a_free_gr_ctx(g, vm, gr_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_enable_gpc_exceptions(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
u32 tpc_mask;
|
||||||
|
|
||||||
|
gk20a_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
|
||||||
|
gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f());
|
||||||
|
|
||||||
|
tpc_mask =
|
||||||
|
gr_gpcs_gpccs_gpc_exception_en_tpc_f((1 << gr->max_tpc_per_gpc_count) - 1);
|
||||||
|
|
||||||
|
gk20a_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(),
|
||||||
|
(tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1) |
|
||||||
|
gr_gpcs_gpccs_gpc_exception_en_gpccs_f(1) |
|
||||||
|
gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1)));
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_get_offset_in_gpccs_segment(struct gk20a *g,
|
||||||
|
enum ctxsw_addr_type addr_type,
|
||||||
|
u32 num_tpcs,
|
||||||
|
u32 num_ppcs,
|
||||||
|
u32 reg_list_ppc_count,
|
||||||
|
u32 *__offset_in_segment)
|
||||||
|
{
|
||||||
|
u32 offset_in_segment = 0;
|
||||||
|
u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
|
||||||
|
GPU_LIT_NUM_PES_PER_GPC);
|
||||||
|
|
||||||
|
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
||||||
|
/*
|
||||||
|
* reg = g->netlist_vars->ctxsw_regs.tpc.l;
|
||||||
|
* offset_in_segment = 0;
|
||||||
|
*/
|
||||||
|
} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
||||||
|
/*
|
||||||
|
* The ucode stores TPC data before PPC data.
|
||||||
|
* Advance offset past TPC data to PPC data.
|
||||||
|
*/
|
||||||
|
offset_in_segment =
|
||||||
|
((g->netlist_vars->ctxsw_regs.tpc.count *
|
||||||
|
num_tpcs) << 2);
|
||||||
|
} else if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
||||||
|
/*
|
||||||
|
* The ucode stores TPC/PPC data before GPC data.
|
||||||
|
* Advance offset past TPC/PPC data to GPC data.
|
||||||
|
*
|
||||||
|
* Note 1 PES_PER_GPC case
|
||||||
|
*/
|
||||||
|
if (num_pes_per_gpc > 1U) {
|
||||||
|
offset_in_segment =
|
||||||
|
(((g->netlist_vars->ctxsw_regs.tpc.count *
|
||||||
|
num_tpcs) << 2) +
|
||||||
|
((reg_list_ppc_count * num_ppcs) << 2));
|
||||||
|
} else {
|
||||||
|
offset_in_segment =
|
||||||
|
((g->netlist_vars->ctxsw_regs.tpc.count *
|
||||||
|
num_tpcs) << 2);
|
||||||
|
}
|
||||||
|
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
|
||||||
|
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
|
||||||
|
if (num_pes_per_gpc > 1U) {
|
||||||
|
offset_in_segment =
|
||||||
|
((g->netlist_vars->ctxsw_regs.tpc.count *
|
||||||
|
num_tpcs) << 2) +
|
||||||
|
((reg_list_ppc_count * num_ppcs) << 2) +
|
||||||
|
(g->netlist_vars->ctxsw_regs.gpc.count << 2);
|
||||||
|
} else {
|
||||||
|
offset_in_segment =
|
||||||
|
((g->netlist_vars->ctxsw_regs.tpc.count *
|
||||||
|
num_tpcs) << 2) +
|
||||||
|
(g->netlist_vars->ctxsw_regs.gpc.count << 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* aligned to next 256 byte */
|
||||||
|
offset_in_segment = ALIGN(offset_in_segment, 256);
|
||||||
|
|
||||||
|
nvgpu_log(g, gpu_dbg_info | gpu_dbg_gpu_dbg,
|
||||||
|
"egpc etpc offset_in_segment 0x%#08x",
|
||||||
|
offset_in_segment);
|
||||||
|
} else {
|
||||||
|
nvgpu_log_fn(g, "Unknown address type.");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
*__offset_in_segment = offset_in_segment;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gr_tu104_set_sm_disp_ctrl(struct gk20a *g, u32 data)
|
||||||
|
{
|
||||||
|
u32 reg_val;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
reg_val = nvgpu_readl(g, gr_gpcs_tpcs_sm_disp_ctrl_r());
|
||||||
|
|
||||||
|
if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK)
|
||||||
|
== NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_DISABLE) {
|
||||||
|
reg_val = set_field(reg_val,
|
||||||
|
gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(),
|
||||||
|
gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_disable_f()
|
||||||
|
);
|
||||||
|
} else if ((data & NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_MASK)
|
||||||
|
== NVC5C0_SET_SM_DISP_CTRL_COMPUTE_SHADER_QUAD_ENABLE) {
|
||||||
|
reg_val = set_field(reg_val,
|
||||||
|
gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_m(),
|
||||||
|
gr_gpcs_tpcs_sm_disp_ctrl_compute_shader_quad_enable_f()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_writel(g, gr_gpcs_tpcs_sm_disp_ctrl_r(), reg_val);
|
||||||
|
}
|
||||||
|
|
||||||
|
int gr_tu104_handle_sw_method(struct gk20a *g, u32 addr,
|
||||||
|
u32 class_num, u32 offset, u32 data)
|
||||||
|
{
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
if (class_num == TURING_COMPUTE_A) {
|
||||||
|
switch (offset << 2) {
|
||||||
|
case NVC5C0_SET_SHADER_EXCEPTIONS:
|
||||||
|
gv11b_gr_set_shader_exceptions(g, data);
|
||||||
|
break;
|
||||||
|
case NVC5C0_SET_SKEDCHECK:
|
||||||
|
gr_gv11b_set_skedcheck(g, data);
|
||||||
|
break;
|
||||||
|
case NVC5C0_SET_SM_DISP_CTRL:
|
||||||
|
gr_tu104_set_sm_disp_ctrl(g, data);
|
||||||
|
break;
|
||||||
|
case NVC5C0_SET_SHADER_CUT_COLLECTOR:
|
||||||
|
gr_gv11b_set_shader_cut_collector(g, data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (class_num == TURING_A) {
|
||||||
|
switch (offset << 2) {
|
||||||
|
case NVC597_SET_SHADER_EXCEPTIONS:
|
||||||
|
gv11b_gr_set_shader_exceptions(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_CIRCULAR_BUFFER_SIZE:
|
||||||
|
g->ops.gr.set_circular_buffer_size(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_ALPHA_CIRCULAR_BUFFER_SIZE:
|
||||||
|
g->ops.gr.set_alpha_circular_buffer_size(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_GO_IDLE_TIMEOUT:
|
||||||
|
gr_gv11b_set_go_idle_timeout(g, data);
|
||||||
|
break;
|
||||||
|
case NVC097_SET_COALESCE_BUFFER_SIZE:
|
||||||
|
gr_gv11b_set_coalesce_buffer_size(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_TEX_IN_DBG:
|
||||||
|
gr_gv11b_set_tex_in_dbg(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_SKEDCHECK:
|
||||||
|
gr_gv11b_set_skedcheck(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_BES_CROP_DEBUG3:
|
||||||
|
g->ops.gr.set_bes_crop_debug3(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_BES_CROP_DEBUG4:
|
||||||
|
g->ops.gr.set_bes_crop_debug4(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_SM_DISP_CTRL:
|
||||||
|
gr_tu104_set_sm_disp_ctrl(g, data);
|
||||||
|
break;
|
||||||
|
case NVC597_SET_SHADER_CUT_COLLECTOR:
|
||||||
|
gr_gv11b_set_shader_cut_collector(g, data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_init_sm_dsm_reg_info(void)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||||
|
u32 *num_sm_dsm_perf_ctrl_regs,
|
||||||
|
u32 **sm_dsm_perf_ctrl_regs,
|
||||||
|
u32 *ctrl_register_stride)
|
||||||
|
{
|
||||||
|
*num_sm_dsm_perf_ctrl_regs = 0;
|
||||||
|
*sm_dsm_perf_ctrl_regs = NULL;
|
||||||
|
*ctrl_register_stride = 0;
|
||||||
|
}
|
||||||
|
>>>>>>> CHANGE (f0762e gpu: nvgpu: add speculative barrier)
|
||||||
Reference in New Issue
Block a user