From b2caba06f0eb9b96e6ee3320db78324209403bcb Mon Sep 17 00:00:00 2001 From: Vinod G Date: Wed, 3 Apr 2019 14:07:05 -0700 Subject: [PATCH] gpu: nvgpu: move handle_notify_pending hal to hal.gr.intr Move handle_notify_pending hal to hal.gr.intr Move gk20a_gr_handle_notify_pending code from gr_gk20a.c to common.gr.intr as nvgpu_gr_intr_handle_notify_pending function. JIRA NVGPU-1891 JIRA NVGPU-3016 Change-Id: Ib3284a83253b03e5708674fce683331ee20b8213 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2089172 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 3 +- drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/common/gr/gr_intr.c | 182 +++++++++++++++++++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 164 +---------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 2 - drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 4 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 4 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 4 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 4 +- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 4 +- drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h | 4 + drivers/gpu/nvgpu/tu104/hal_tu104.c | 4 +- 12 files changed, 207 insertions(+), 173 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/gr/gr_intr.c diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index ed89bd02d..a58a32e29 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -65,8 +65,9 @@ nvgpu-y += \ common/regops/regops_gv11b.o \ common/regops/regops_tu104.o \ common/ltc/ltc.o \ - common/gr/gr.o \ common/cbc/cbc.o \ + common/gr/gr.o \ + common/gr/gr_intr.o \ common/gr/global_ctx.o \ common/gr/ctx.o \ common/gr/gr_falcon.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 0bfd6b68e..567bb71ab 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -107,6 +107,7 @@ srcs += common/sim.c \ common/engine_queues/engine_emem_queue.c \ common/engine_queues/engine_fb_queue.c \ common/gr/gr.c \ + common/gr/gr_intr.c \ common/gr/global_ctx.c \ common/gr/subctx.c \ common/gr/ctx.c \ diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c new file mode 100644 index 000000000..c77627654 --- /dev/null +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include + +#if defined(CONFIG_GK20A_CYCLE_STATS) +static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g, + u32 offset) +{ + /* support only 24-bit 4-byte aligned offsets */ + bool valid = !(offset & 0xFF000003U); + + if (g->allow_all) { + return true; + } + + /* whitelist check */ + valid = valid && + is_bar0_global_offset_whitelisted_gk20a(g, offset); + /* resource size check in case there was a problem + * with allocating the assumed size of bar0 */ + valid = valid && nvgpu_io_valid_reg(g, offset); + return valid; +} +#endif + +int nvgpu_gr_intr_handle_notify_pending(struct gk20a *g, + struct gr_gk20a_isr_data *isr_data) +{ + struct channel_gk20a *ch = isr_data->ch; + +#if defined(CONFIG_GK20A_CYCLE_STATS) + void *virtual_address; + u32 buffer_size; + u32 offset; + bool exit; +#endif + if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) { + return 0; + } + +#if defined(CONFIG_GK20A_CYCLE_STATS) + /* GL will never use payload 0 for cycle state */ + if ((ch->cyclestate.cyclestate_buffer == NULL) || + (isr_data->data_lo == 0)) { + return 0; + } + + nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); + + virtual_address = ch->cyclestate.cyclestate_buffer; + buffer_size = ch->cyclestate.cyclestate_buffer_size; + offset = isr_data->data_lo; + exit = false; + while (!exit) { + struct share_buffer_head *sh_hdr; + u32 min_element_size; + + /* validate offset */ + if (offset + sizeof(struct share_buffer_head) > buffer_size || + offset + sizeof(struct share_buffer_head) < offset) { + nvgpu_err(g, + "cyclestats buffer overrun at offset 0x%x", + offset); + break; + } + + sh_hdr = (struct share_buffer_head *) + ((char *)virtual_address + offset); + + min_element_size = + (sh_hdr->operation == OP_END ? + sizeof(struct share_buffer_head) : + sizeof(struct gk20a_cyclestate_buffer_elem)); + + /* validate sh_hdr->size */ + if (sh_hdr->size < min_element_size || + offset + sh_hdr->size > buffer_size || + offset + sh_hdr->size < offset) { + nvgpu_err(g, + "bad cyclestate buffer header size at offset 0x%x", + offset); + sh_hdr->failed = true; + break; + } + + switch (sh_hdr->operation) { + case OP_END: + exit = true; + break; + + case BAR0_READ32: + case BAR0_WRITE32: + { + struct gk20a_cyclestate_buffer_elem *op_elem = + (struct gk20a_cyclestate_buffer_elem *)sh_hdr; + bool valid = is_valid_cyclestats_bar0_offset_gk20a( + g, op_elem->offset_bar0); + u32 raw_reg; + u64 mask_orig; + u64 v; + + if (!valid) { + nvgpu_err(g, + "invalid cycletstats op offset: 0x%x", + op_elem->offset_bar0); + + sh_hdr->failed = exit = true; + break; + } + + mask_orig = + ((1ULL << (op_elem->last_bit + 1)) - 1) & + ~((1ULL << op_elem->first_bit) - 1); + + raw_reg = nvgpu_readl(g, op_elem->offset_bar0); + + switch (sh_hdr->operation) { + case BAR0_READ32: + op_elem->data = ((raw_reg & mask_orig) + >> op_elem->first_bit); + break; + + case BAR0_WRITE32: + v = 0; + if ((unsigned int)mask_orig != + ~((unsigned int)0)) { + v = (unsigned int) + (raw_reg & ~mask_orig); + } + + v |= ((op_elem->data << op_elem->first_bit) + & mask_orig); + nvgpu_writel(g,op_elem->offset_bar0, + (unsigned int)v); + break; + default: + /* nop ok?*/ + break; + } + } + break; + + default: + /* no operation content case */ + exit = true; + break; + } + sh_hdr->completed = true; + offset += sh_hdr->size; + } + nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); +#endif + nvgpu_log_fn(g, " "); + nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index f76155942..af89c39e4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -1391,168 +1391,6 @@ int gk20a_gr_handle_semaphore_pending(struct gk20a *g, return 0; } -#if defined(CONFIG_GK20A_CYCLE_STATS) -static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g, - u32 offset) -{ - /* support only 24-bit 4-byte aligned offsets */ - bool valid = !(offset & 0xFF000003U); - - if (g->allow_all) { - return true; - } - - /* whitelist check */ - valid = valid && - is_bar0_global_offset_whitelisted_gk20a(g, offset); - /* resource size check in case there was a problem - * with allocating the assumed size of bar0 */ - valid = valid && gk20a_io_valid_reg(g, offset); - return valid; -} -#endif - -int gk20a_gr_handle_notify_pending(struct gk20a *g, - struct gr_gk20a_isr_data *isr_data) -{ - struct channel_gk20a *ch = isr_data->ch; - -#if defined(CONFIG_GK20A_CYCLE_STATS) - void *virtual_address; - u32 buffer_size; - u32 offset; - bool exit; -#endif - if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) { - return 0; - } - -#if defined(CONFIG_GK20A_CYCLE_STATS) - /* GL will never use payload 0 for cycle state */ - if ((ch->cyclestate.cyclestate_buffer == NULL) || - (isr_data->data_lo == 0)) { - return 0; - } - - nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex); - - virtual_address = ch->cyclestate.cyclestate_buffer; - buffer_size = ch->cyclestate.cyclestate_buffer_size; - offset = isr_data->data_lo; - exit = false; - while (!exit) { - struct share_buffer_head *sh_hdr; - u32 min_element_size; - - /* validate offset */ - if (offset + sizeof(struct share_buffer_head) > buffer_size || - offset + sizeof(struct share_buffer_head) < offset) { - nvgpu_err(g, - "cyclestats buffer overrun at offset 0x%x", - offset); - break; - } - - sh_hdr = (struct share_buffer_head *) - ((char *)virtual_address + offset); - - min_element_size = - (sh_hdr->operation == OP_END ? - sizeof(struct share_buffer_head) : - sizeof(struct gk20a_cyclestate_buffer_elem)); - - /* validate sh_hdr->size */ - if (sh_hdr->size < min_element_size || - offset + sh_hdr->size > buffer_size || - offset + sh_hdr->size < offset) { - nvgpu_err(g, - "bad cyclestate buffer header size at offset 0x%x", - offset); - sh_hdr->failed = true; - break; - } - - switch (sh_hdr->operation) { - case OP_END: - exit = true; - break; - - case BAR0_READ32: - case BAR0_WRITE32: - { - struct gk20a_cyclestate_buffer_elem *op_elem = - (struct gk20a_cyclestate_buffer_elem *)sh_hdr; - bool valid = is_valid_cyclestats_bar0_offset_gk20a( - g, op_elem->offset_bar0); - u32 raw_reg; - u64 mask_orig; - u64 v; - - if (!valid) { - nvgpu_err(g, - "invalid cycletstats op offset: 0x%x", - op_elem->offset_bar0); - - sh_hdr->failed = exit = true; - break; - } - - - mask_orig = - ((1ULL << - (op_elem->last_bit + 1)) - -1)&~((1ULL << - op_elem->first_bit)-1); - - raw_reg = - gk20a_readl(g, - op_elem->offset_bar0); - - switch (sh_hdr->operation) { - case BAR0_READ32: - op_elem->data = - (raw_reg & mask_orig) - >> op_elem->first_bit; - break; - - case BAR0_WRITE32: - v = 0; - if ((unsigned int)mask_orig != - ~((unsigned int)0)) { - v = (unsigned int) - (raw_reg & ~mask_orig); - } - - v |= ((op_elem->data - << op_elem->first_bit) - & mask_orig); - - gk20a_writel(g, - op_elem->offset_bar0, - (unsigned int)v); - break; - default: - /* nop ok?*/ - break; - } - } - break; - - default: - /* no operation content case */ - exit = true; - break; - } - sh_hdr->completed = true; - offset += sh_hdr->size; - } - nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex); -#endif - nvgpu_log_fn(g, " "); - nvgpu_cond_broadcast_interruptible(&ch->notifier_wq); - return 0; -} - /* Used by sw interrupt thread to translate current ctx to chid. * Also used by regops to translate current ctx to chid and tsgid. * For performance, we don't want to go through 128 channels every time. @@ -2010,7 +1848,7 @@ int gk20a_gr_isr(struct gk20a *g) isr_data.sub_chan, isr_data.class_num); if ((gr_intr & gr_intr_notify_pending_f()) != 0U) { - g->ops.gr.handle_notify_pending(g, &isr_data); + g->ops.gr.intr.handle_notify_pending(g, &isr_data); gk20a_writel(g, gr_intr_r(), gr_intr_notify_reset_f()); gr_intr &= ~gr_intr_notify_pending_f(); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 188e735f6..23afdabf6 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -418,8 +418,6 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void); void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs, u32 **ovr_perf_regs); u32 gr_gk20a_get_patch_slots(struct gk20a *g); -int gk20a_gr_handle_notify_pending(struct gk20a *g, - struct gr_gk20a_isr_data *isr_data); int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index d7eeab774..87bc92e10 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -308,7 +309,6 @@ static const struct gpu_ops gm20b_ops = { .init_ovr_sm_dsm_perf = gk20a_gr_init_ovr_sm_dsm_perf, .get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, - .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, @@ -476,6 +476,8 @@ static const struct gpu_ops gm20b_ops = { .get_gfxp_rtv_cb_size = NULL, }, .intr = { + .handle_notify_pending = + nvgpu_gr_intr_handle_notify_pending, .get_tpc_exception = gm20b_gr_intr_get_tpc_exception, .handle_tex_exception = gm20b_gr_intr_handle_tex_exception, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index e194a8b03..9018117fb 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" @@ -345,7 +346,6 @@ static const struct gpu_ops gp10b_ops = { .get_max_gfxp_wfi_timeout_count = gr_gp10b_get_max_gfxp_wfi_timeout_count, .fecs_host_int_enable = gr_gk20a_fecs_host_int_enable, - .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, @@ -562,6 +562,8 @@ static const struct gpu_ops gp10b_ops = { gp10b_gr_init_commit_cbes_reserve, }, .intr = { + .handle_notify_pending = + nvgpu_gr_intr_handle_notify_pending, .get_tpc_exception = gm20b_gr_intr_get_tpc_exception, .handle_tex_exception = gp10b_gr_intr_handle_tex_exception, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 9ca79df5b..f05322618 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -176,6 +176,7 @@ #include #include #include +#include #include #include @@ -461,7 +462,6 @@ static const struct gpu_ops gv100_ops = { .decode_egpc_addr = gv11b_gr_decode_egpc_addr, .fecs_host_int_enable = gr_gv11b_fecs_host_int_enable, .handle_ssync_hww = gr_gv11b_handle_ssync_hww, - .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, @@ -699,6 +699,8 @@ static const struct gpu_ops gv100_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_notify_pending = + nvgpu_gr_intr_handle_notify_pending, .handle_gcc_exception = gv11b_gr_intr_handle_gcc_exception, .handle_gpc_gpcmmu_exception = diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 52ce82b22..8a9570456 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -151,6 +151,7 @@ #include #include #include +#include #include #include @@ -420,7 +421,6 @@ static const struct gpu_ops gv11b_ops = { gr_gv11b_get_max_gfxp_wfi_timeout_count, .fecs_host_int_enable = gr_gv11b_fecs_host_int_enable, .handle_ssync_hww = gr_gv11b_handle_ssync_hww, - .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, @@ -658,6 +658,8 @@ static const struct gpu_ops gv11b_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_notify_pending = + nvgpu_gr_intr_handle_notify_pending, .handle_gcc_exception = gv11b_gr_intr_handle_gcc_exception, .handle_gpc_gpcmmu_exception = diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index d7f7c29ab..def0f1f5e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -415,8 +415,6 @@ struct gpu_ops { (struct gk20a *g); void (*fecs_host_int_enable)(struct gk20a *g); int (*handle_ssync_hww)(struct gk20a *g, u32 *ssync_esr); - int (*handle_notify_pending)(struct gk20a *g, - struct gr_gk20a_isr_data *isr_data); int (*handle_semaphore_pending)(struct gk20a *g, struct gr_gk20a_isr_data *isr_data); int (*add_ctxsw_reg_pm_fbpa)(struct gk20a *g, @@ -779,6 +777,8 @@ struct gpu_ops { } init; struct { + int (*handle_notify_pending)(struct gk20a *g, + struct gr_gk20a_isr_data *isr_data); void (*handle_gcc_exception)(struct gk20a *g, u32 gpc, u32 tpc, u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h index 2fe4699a6..f66447c8f 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h @@ -25,10 +25,14 @@ #include +struct gr_gk20a_isr_data; + struct nvgpu_gr_tpc_exception { bool tex_exception; bool sm_exception; bool mpc_exception; }; +int nvgpu_gr_intr_handle_notify_pending(struct gk20a *g, + struct gr_gk20a_isr_data *isr_data); #endif /* NVGPU_GR_INTR_H */ diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index c28ba233d..351d0a659 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -195,6 +195,7 @@ #include #include #include +#include #include #include @@ -489,7 +490,6 @@ static const struct gpu_ops tu104_ops = { gr_gv11b_get_max_gfxp_wfi_timeout_count, .fecs_host_int_enable = gr_gv11b_fecs_host_int_enable, .handle_ssync_hww = gr_gv11b_handle_ssync_hww, - .handle_notify_pending = gk20a_gr_handle_notify_pending, .handle_semaphore_pending = gk20a_gr_handle_semaphore_pending, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, @@ -732,6 +732,8 @@ static const struct gpu_ops tu104_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_notify_pending = + nvgpu_gr_intr_handle_notify_pending, .handle_gcc_exception = gv11b_gr_intr_handle_gcc_exception, .handle_gpc_gpcmmu_exception =