From f0762ed4831b3fe6cc953a4a4ec26c2537dcb69f Mon Sep 17 00:00:00 2001 From: Ranjanikar Nikhil Prabhakarrao Date: Thu, 13 Dec 2018 17:29:20 +0530 Subject: [PATCH] gpu: nvgpu: add speculative barrier Data can be speculativerly stored and code flow can be hijacked. To mitigate this problem insert a speculation barrier. Bug 200447167 Change-Id: Ia865ff2add8b30de49aa970715625b13e8f71c08 Signed-off-by: Ranjanikar Nikhil Prabhakarrao Reviewed-on: https://git-master.nvidia.com/r/1972221 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/submit.c | 1 + drivers/gpu/nvgpu/common/fifo/tsg.c | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 2 ++ drivers/gpu/nvgpu/gp10b/gr_gp10b.c | 1 + drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 3 +++ drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c | 1 + drivers/gpu/nvgpu/os/linux/ioctl_as.c | 2 ++ drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 5 +++++ drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 11 +++++++++++ drivers/gpu/nvgpu/os/linux/ioctl_dbg.c | 10 ++++++++++ drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 2 ++ drivers/gpu/nvgpu/os/linux/sched.c | 1 + drivers/gpu/nvgpu/tu104/gr_tu104.c | 3 +++ 13 files changed, 43 insertions(+) diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index 02322d77b..c5ddeab9a 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -217,6 +217,7 @@ static int nvgpu_submit_append_gpfifo_user_direct(struct channel_gk20a *c, u32 end = start + len; /* exclusive */ int err; + nvgpu_speculation_barrier(); if (end > gpfifo_size) { /* wrap-around */ u32 length0 = gpfifo_size - start; diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 86f090a2c..63e3a838e 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -366,6 +366,7 @@ int gk20a_tsg_set_runlist_interleave(struct tsg_gk20a *tsg, u32 level) nvgpu_log(g, gpu_dbg_sched, "tsgid=%u interleave=%u", tsg->tsgid, level); + nvgpu_speculation_barrier(); switch (level) { case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW: case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM: diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index c880c86cf..9a8978000 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3642,6 +3642,7 @@ int gr_gk20a_add_zbc(struct gk20a *g, struct gr_gk20a *gr, /* no endian swap ? */ nvgpu_mutex_acquire(&gr->zbc_lock); + nvgpu_speculation_barrier(); switch (zbc_val->type) { case GK20A_ZBC_TYPE_COLOR: /* search existing tables */ @@ -3748,6 +3749,7 @@ int gr_gk20a_query_zbc(struct gk20a *g, struct gr_gk20a *gr, u32 index = query_params->index_size; u32 i; + nvgpu_speculation_barrier(); switch (query_params->type) { case GK20A_ZBC_TYPE_INVALID: query_params->index_size = GK20A_ZBC_TABLE_SIZE; diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 2a59f5bf0..37756b076 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -51,6 +51,7 @@ bool gr_gp10b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; + nvgpu_speculation_barrier(); switch (class_num) { case PASCAL_COMPUTE_A: case PASCAL_A: diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 84e3f2d99..8e912d24d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -75,6 +75,7 @@ bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; + nvgpu_speculation_barrier(); switch (class_num) { case VOLTA_COMPUTE_A: case VOLTA_A: @@ -104,6 +105,7 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num) { bool valid = false; + nvgpu_speculation_barrier(); switch (class_num) { case VOLTA_A: case PASCAL_A: @@ -138,6 +140,7 @@ bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) { bool valid = false; + nvgpu_speculation_barrier(); switch (class_num) { case VOLTA_COMPUTE_A: case PASCAL_COMPUTE_A: diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c index 8b38a9e1c..bada5dc79 100644 --- a/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c +++ b/drivers/gpu/nvgpu/os/linux/dmabuf_vidmem.c @@ -244,6 +244,7 @@ int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, vidmem_buf = dmabuf->priv; mem = vidmem_buf->mem; + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ: nvgpu_mem_rd_n(g, mem, offset, buffer, size); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 4da83b867..34f2de6d8 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -170,6 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch( nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch); } + nvgpu_speculation_barrier(); if (err) { nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); @@ -355,6 +356,7 @@ long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (err) return err; + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_AS_IOCTL_BIND_CHANNEL: trace_gk20a_as_ioctl_bind_channel(g->name); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 5133074b7..318aeeef6 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -290,6 +290,7 @@ static int gk20a_channel_cycle_stats_snapshot(struct channel_gk20a *ch, if (!args->dmabuf_fd) return -EINVAL; + nvgpu_speculation_barrier(); /* handle the command (most frequent cases first) */ switch (args->cmd) { case NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT_CMD_FLUSH: @@ -874,6 +875,7 @@ clean_up: */ u32 nvgpu_get_common_runlist_level(u32 level) { + nvgpu_speculation_barrier(); switch (level) { case NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW: return NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW; @@ -982,6 +984,7 @@ u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode) */ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) { + nvgpu_speculation_barrier(); switch (graphics_preempt_mode) { case NVGPU_GRAPHICS_PREEMPTION_MODE_WFI: return NVGPU_PREEMPTION_MODE_GRAPHICS_WFI; @@ -998,6 +1001,7 @@ static u32 nvgpu_get_common_graphics_preempt_mode(u32 graphics_preempt_mode) */ static u32 nvgpu_get_common_compute_preempt_mode(u32 compute_preempt_mode) { + nvgpu_speculation_barrier(); switch (compute_preempt_mode) { case NVGPU_COMPUTE_PREEMPTION_MODE_WFI: return NVGPU_PREEMPTION_MODE_COMPUTE_WFI; @@ -1128,6 +1132,7 @@ long gk20a_channel_ioctl(struct file *filp, /* this ioctl call keeps a ref to the file which keeps a ref to the * channel */ + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_IOCTL_CHANNEL_OPEN: err = gk20a_channel_open_ioctl(ch->g, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 513ec8ca9..a0fa54e53 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -367,6 +367,7 @@ gk20a_ctrl_ioctl_gpu_characteristics( if (request->gpu_characteristics_buf_size > 0) { size_t write_size = sizeof(gpu); + nvgpu_speculation_barrier(); if (write_size > request->gpu_characteristics_buf_size) write_size = request->gpu_characteristics_buf_size; @@ -557,6 +558,7 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, if (args->mask_buf_size > 0) { size_t write_size = gpc_tpc_mask_size; + nvgpu_speculation_barrier(); if (write_size > args->mask_buf_size) write_size = args->mask_buf_size; @@ -581,6 +583,7 @@ static int gk20a_ctrl_get_fbp_l2_masks( if (args->mask_buf_size > 0) { size_t write_size = fbp_l2_mask_size; + nvgpu_speculation_barrier(); if (write_size > args->mask_buf_size) write_size = args->mask_buf_size; @@ -1219,6 +1222,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, nvgpu_gpu_convert_clk_domain(clk_info.clk_domain))) return -EINVAL; } + nvgpu_speculation_barrier(); entry = (struct nvgpu_gpu_clk_info __user *) (uintptr_t)args->clk_info_entries; @@ -1238,6 +1242,7 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g, nvgpu_gpu_convert_clk_domain(clk_info.clk_domain), freq_mhz); } + nvgpu_speculation_barrier(); ret = nvgpu_clk_arb_commit_request_fd(g, session, fd); if (ret < 0) return ret; @@ -1307,6 +1312,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, clk_info.clk_type = args->clk_type; } + nvgpu_speculation_barrier(); switch (clk_info.clk_type) { case NVGPU_GPU_CLK_TYPE_TARGET: err = nvgpu_clk_arb_get_session_target_mhz(session, @@ -1340,6 +1346,7 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g, return -EFAULT; } + nvgpu_speculation_barrier(); args->num_entries = num_entries; return 0; @@ -1380,6 +1387,7 @@ static int nvgpu_gpu_get_voltage(struct gk20a *g, if (err) return err; + nvgpu_speculation_barrier(); switch (args->which) { case NVGPU_GPU_VOLTAGE_CORE: err = volt_get_voltage(g, CTRL_VOLT_DOMAIN_LOGIC, &args->voltage); @@ -1602,6 +1610,7 @@ static int nvgpu_gpu_set_deterministic_opts(struct gk20a *g, break; } + nvgpu_speculation_barrier(); nvgpu_rwsem_up_read(&g->deterministic_busy); out: @@ -1646,6 +1655,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg gk20a_idle(g); } + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE: get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf; @@ -1692,6 +1702,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg zbc_val->format = set_table_args->format; zbc_val->type = set_table_args->type; + nvgpu_speculation_barrier(); switch (zbc_val->type) { case GK20A_ZBC_TYPE_COLOR: for (i = 0U; i < GK20A_ZBC_COLOR_VALUE_SIZE; i++) { diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index dd6dae5cb..245bcb96e 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -306,6 +306,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state( if (args->sm_error_state_record_size > 0) { size_t write_size = sizeof(*sm_error_state); + nvgpu_speculation_barrier(); if (write_size > args->sm_error_state_record_size) write_size = args->sm_error_state_record_size; @@ -353,6 +354,7 @@ static int nvgpu_dbg_timeout_enable(struct dbg_session_gk20a *dbg_s, nvgpu_log(g, gpu_dbg_gpu_dbg, "Timeouts mode requested : %d", timeout_mode); + nvgpu_speculation_barrier(); switch (timeout_mode) { case NVGPU_DBG_GPU_IOCTL_TIMEOUT_ENABLE: if (dbg_s->is_timeout_disabled == true) @@ -909,6 +911,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s, ops_offset += num_ops; } + nvgpu_speculation_barrier(); nvgpu_kfree(g, linux_fragment); /* enable powergate, if previously disabled */ @@ -999,6 +1002,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s, static u32 nvgpu_hwpm_ctxsw_mode_to_common_mode(u32 mode) { + nvgpu_speculation_barrier(); switch (mode){ case NVGPU_DBG_GPU_HWPM_CTXSW_MODE_NO_CTXSW: return NVGPU_DBG_HWPM_CTXSW_MODE_NO_CTXSW; @@ -1098,6 +1102,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( goto clean_up; } + nvgpu_speculation_barrier(); switch (action) { case NVGPU_DBG_GPU_SUSPEND_ALL_SMS: gr_gk20a_suspend_context(ch); @@ -1311,6 +1316,7 @@ static int gk20a_dbg_gpu_events_ctrl(struct dbg_session_gk20a *dbg_s, return -EINVAL; } + nvgpu_speculation_barrier(); switch (args->cmd) { case NVGPU_DBG_GPU_EVENTS_CTRL_CMD_ENABLE: gk20a_dbg_gpu_events_enable(dbg_s); @@ -1480,6 +1486,7 @@ nvgpu_dbg_gpu_ioctl_suspend_resume_contexts(struct dbg_session_gk20a *dbg_s, if (err) return err; + nvgpu_speculation_barrier(); switch (args->action) { case NVGPU_DBG_GPU_SUSPEND_ALL_CONTEXTS: err = g->ops.gr.suspend_contexts(g, dbg_s, @@ -1571,6 +1578,7 @@ static int nvgpu_dbg_gpu_ioctl_access_fb_memory(struct dbg_session_gk20a *dbg_s, size -= access_size; offset += access_size; } + nvgpu_speculation_barrier(); fail_idle: gk20a_idle(g); @@ -1811,6 +1819,7 @@ static int nvgpu_dbg_gpu_set_sm_exception_type_mask(struct dbg_session_gk20a *db u32 sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_NONE; struct channel_gk20a *ch = NULL; + nvgpu_speculation_barrier(); switch (args->exception_type_mask) { case NVGPU_DBG_GPU_IOCTL_SET_SM_EXCEPTION_TYPE_MASK_FATAL: sm_exception_mask_type = NVGPU_SM_EXCEPTION_TYPE_MASK_FATAL; @@ -1889,6 +1898,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, /* protect from threaded user space calls */ nvgpu_mutex_acquire(&dbg_s->ioctl_lock); + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_DBG_GPU_IOCTL_BIND_CHANNEL: err = dbg_bind_channel_gk20a(dbg_s, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index 1d3290cb0..6c726e6a9 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -362,6 +362,7 @@ static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg, if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) return -EINVAL; + nvgpu_speculation_barrier(); switch (args->cmd) { case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE: err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd); @@ -575,6 +576,7 @@ static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g, if (args->record_size > 0) { size_t write_size = sizeof(*sm_error_state); + nvgpu_speculation_barrier(); if (write_size > args->record_size) write_size = args->record_size; diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c index 541744270..efdf8e8ff 100644 --- a/drivers/gpu/nvgpu/os/linux/sched.c +++ b/drivers/gpu/nvgpu/os/linux/sched.c @@ -454,6 +454,7 @@ long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd, return -EFAULT; } + nvgpu_speculation_barrier(); switch (cmd) { case NVGPU_SCHED_IOCTL_GET_TSGS: err = gk20a_sched_dev_ioctl_get_tsgs(sched, diff --git a/drivers/gpu/nvgpu/tu104/gr_tu104.c b/drivers/gpu/nvgpu/tu104/gr_tu104.c index 31866ce69..b487322eb 100644 --- a/drivers/gpu/nvgpu/tu104/gr_tu104.c +++ b/drivers/gpu/nvgpu/tu104/gr_tu104.c @@ -41,6 +41,7 @@ bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num) { + nvgpu_speculation_barrier(); switch (class_num) { case TURING_CHANNEL_GPFIFO_A: case TURING_A: @@ -56,6 +57,7 @@ bool gr_tu104_is_valid_class(struct gk20a *g, u32 class_num) bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num) { + nvgpu_speculation_barrier(); switch (class_num) { case TURING_A: return true; @@ -68,6 +70,7 @@ bool gr_tu104_is_valid_gfx_class(struct gk20a *g, u32 class_num) bool gr_tu104_is_valid_compute_class(struct gk20a *g, u32 class_num) { + nvgpu_speculation_barrier(); switch (class_num) { case TURING_COMPUTE_A: return true;