diff --git a/drivers/gpu/nvgpu/common/perf/perf_gm20b.c b/drivers/gpu/nvgpu/common/perf/perf_gm20b.c index f098caf78..7fef0a050 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gm20b.c +++ b/drivers/gpu/nvgpu/common/perf/perf_gm20b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -107,3 +107,8 @@ void gm20b_perf_disable_membuf(struct gk20a *g) perf_pmasys_mem_block_valid_false_f() | perf_pmasys_mem_block_target_f(0)); } + +u32 gm20b_perf_get_pmm_per_chiplet_offset(void) +{ + return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1U); +} diff --git a/drivers/gpu/nvgpu/common/perf/perf_gm20b.h b/drivers/gpu/nvgpu/common/perf/perf_gm20b.h index c89158b3b..507ec2abb 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gm20b.h +++ b/drivers/gpu/nvgpu/common/perf/perf_gm20b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,4 +39,6 @@ void gm20b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr, struct nvgpu_mem *inst_block); void gm20b_perf_disable_membuf(struct gk20a *g); +u32 gm20b_perf_get_pmm_per_chiplet_offset(void); + #endif diff --git a/drivers/gpu/nvgpu/common/perf/perf_gv11b.c b/drivers/gpu/nvgpu/common/perf/perf_gv11b.c index 8ce8c21cd..4bdc0b508 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gv11b.c +++ b/drivers/gpu/nvgpu/common/perf/perf_gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -107,3 +107,8 @@ void gv11b_perf_disable_membuf(struct gk20a *g) perf_pmasys_mem_block_valid_false_f() | perf_pmasys_mem_block_target_f(0)); } + +u32 gv11b_perf_get_pmm_per_chiplet_offset(void) +{ + return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1U); +} diff --git a/drivers/gpu/nvgpu/common/perf/perf_gv11b.h b/drivers/gpu/nvgpu/common/perf/perf_gv11b.h index a970c257e..2752d983e 100644 --- a/drivers/gpu/nvgpu/common/perf/perf_gv11b.h +++ b/drivers/gpu/nvgpu/common/perf/perf_gv11b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -39,4 +39,6 @@ void gv11b_perf_enable_membuf(struct gk20a *g, u32 size, u64 buf_addr, struct nvgpu_mem *inst_block); void gv11b_perf_disable_membuf(struct gk20a *g); +u32 gv11b_perf_get_pmm_per_chiplet_offset(void); + #endif diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 415ac91e7..c765f0c36 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -33,6 +33,7 @@ #include "common/gr/fecs_trace/fecs_trace_gm20b.h" #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" +#include "common/perf/perf_gm20b.h" #include "common/ltc/ltc_gm20b.h" #include "common/ltc/ltc_gp10b.h" #include "common/fuse/fuse_gm20b.h" @@ -219,8 +220,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gm20b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, @@ -319,6 +318,10 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, } }, + .perf = { + .get_pmm_per_chiplet_offset = + gm20b_perf_get_pmm_per_chiplet_offset, + }, .fb = { .init_hw = NULL, .init_fs_state = NULL, @@ -686,6 +689,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; gops->gr.config = vgpu_gp10b_ops.gr.config; gops->fb = vgpu_gp10b_ops.fb; + gops->perf = vgpu_gp10b_ops.perf; gops->clock_gating = vgpu_gp10b_ops.clock_gating; gops->fifo = vgpu_gp10b_ops.fifo; gops->runlist = vgpu_gp10b_ops.runlist; diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 4447c4393..b87661815 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -35,6 +35,7 @@ #include "common/therm/therm_gm20b.h" #include "common/therm/therm_gp10b.h" #include "common/therm/therm_gv11b.h" +#include "common/perf/perf_gv11b.h" #include "common/ltc/ltc_gm20b.h" #include "common/ltc/ltc_gp10b.h" #include "common/ltc/ltc_gv11b.h" @@ -250,8 +251,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gv11b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, .commit_global_ctx_buffers = gr_gk20a_commit_global_ctx_buffers, @@ -361,6 +360,10 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_gpcs_swdx_dss_zbc_z_format_reg = NULL, } }, + .perf = { + .get_pmm_per_chiplet_offset = + gv11b_perf_get_pmm_per_chiplet_offset, + }, .fb = { .init_hw = NULL, .init_fs_state = NULL, @@ -758,6 +761,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; gops->gr.config = vgpu_gv11b_ops.gr.config; gops->fb = vgpu_gv11b_ops.fb; + gops->perf = vgpu_gv11b_ops.perf; gops->clock_gating = vgpu_gv11b_ops.clock_gating; gops->fifo = vgpu_gv11b_ops.fifo; gops->runlist = vgpu_gv11b_ops.runlist; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e5a02bb9b..ed491811d 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -6064,7 +6064,7 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, return -EINVAL; } - base = (g->ops.gr.get_pmm_per_chiplet_offset() * gpc_num); + base = (g->ops.perf.get_pmm_per_chiplet_offset() * gpc_num); if (add_ctxsw_buffer_map_entries(map, &g->netlist_vars->ctxsw_regs.perf_gpc, count, offset, max_cnt, base, ~U32(0U)) != 0) { @@ -6231,7 +6231,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g) &count, &offset, hwpm_ctxsw_reg_count_max, 0, g->gr.num_fbps, - g->ops.gr.get_pmm_per_chiplet_offset(), + g->ops.perf.get_pmm_per_chiplet_offset(), ~U32(0U)) != 0) { goto cleanup; } diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 44dae2ebc..aa9558337 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1423,11 +1423,6 @@ void gm20a_gr_disable_rd_coalesce(struct gk20a *g) gk20a_writel(g, gr_gpcs_tpcs_tex_m_dbg2_r(), dbg2_reg); } -u32 gr_gm20b_get_pmm_per_chiplet_offset(void) -{ - return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1U); -} - void gm20b_gr_set_debug_mode(struct gk20a *g, bool enable) { u32 reg_val, gpc_debug_ctrl; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 3a9e8da34..9fad7a55a 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -325,8 +325,6 @@ static const struct gpu_ops gm20b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gm20b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, .fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, @@ -754,6 +752,8 @@ static const struct gpu_ops gm20b_ops = { .set_membuf_handled_bytes = gm20b_perf_set_membuf_handled_bytes, .get_membuf_overflow_status = gm20b_perf_get_membuf_overflow_status, + .get_pmm_per_chiplet_offset = + gm20b_perf_get_pmm_per_chiplet_offset, }, .perfbuf = { .perfbuf_enable = nvgpu_perfbuf_enable_locked, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 3a5925059..935182e11 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -361,8 +361,6 @@ static const struct gpu_ops gp10b_ops = { .add_ctxsw_reg_perf_pma = gr_gk20a_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gk20a_decode_priv_addr, .create_priv_addr_table = gr_gk20a_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gm20b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, .fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, @@ -832,6 +830,8 @@ static const struct gpu_ops gp10b_ops = { .set_membuf_handled_bytes = gm20b_perf_set_membuf_handled_bytes, .get_membuf_overflow_status = gm20b_perf_get_membuf_overflow_status, + .get_pmm_per_chiplet_offset = + gm20b_perf_get_pmm_per_chiplet_offset, }, .perfbuf = { .perfbuf_enable = nvgpu_perfbuf_enable_locked, diff --git a/drivers/gpu/nvgpu/gv100/gr_gv100.c b/drivers/gpu/nvgpu/gv100/gr_gv100.c index fa66b541b..38397ecd1 100644 --- a/drivers/gpu/nvgpu/gv100/gr_gv100.c +++ b/drivers/gpu/nvgpu/gv100/gr_gv100.c @@ -451,7 +451,7 @@ void gr_gv100_set_pmm_register(struct gk20a *g, u32 offset, u32 val, u32 perfmon_index = 0; u32 chiplet_index = 0; u32 reg_offset = 0; - u32 chiplet_stride = g->ops.gr.get_pmm_per_chiplet_offset(); + u32 chiplet_stride = g->ops.perf.get_pmm_per_chiplet_offset(); for (chiplet_index = 0; chiplet_index < num_chiplets; chiplet_index++) { for (perfmon_index = 0; perfmon_index < num_perfmons; diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index ddd145942..3a1dacd92 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -477,8 +477,6 @@ static const struct gpu_ops gv100_ops = { .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gv11b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr, .fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, @@ -1022,6 +1020,8 @@ static const struct gpu_ops gv100_ops = { .set_membuf_handled_bytes = gv11b_perf_set_membuf_handled_bytes, .get_membuf_overflow_status = gv11b_perf_get_membuf_overflow_status, + .get_pmm_per_chiplet_offset = + gv11b_perf_get_pmm_per_chiplet_offset, }, .perfbuf = { .perfbuf_enable = nvgpu_perfbuf_enable_locked, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 60ca2b360..ea1515e64 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -4681,16 +4681,11 @@ int gr_gv11b_decode_priv_addr(struct gk20a *g, u32 addr, return 0; } -u32 gr_gv11b_get_pmm_per_chiplet_offset(void) -{ - return (perf_pmmsys_extent_v() - perf_pmmsys_base_v() + 1U); -} - static u32 gr_gv11b_pri_pmmgpc_addr(struct gk20a *g, u32 gpc_num, u32 domain_idx, u32 offset) { return perf_pmmgpc_base_v() + - (gpc_num * g->ops.gr.get_pmm_per_chiplet_offset()) + + (gpc_num * g->ops.perf.get_pmm_per_chiplet_offset()) + (domain_idx * perf_pmmgpc_perdomain_offset_v()) + offset; } @@ -4705,7 +4700,7 @@ static void gr_gv11b_split_pmm_fbp_broadcast_address(struct gk20a *g, for (fbp_num = 0; fbp_num < g->gr.num_fbps; fbp_num++) { base = perf_pmmfbp_base_v() + - (fbp_num * g->ops.gr.get_pmm_per_chiplet_offset()); + (fbp_num * g->ops.perf.get_pmm_per_chiplet_offset()); for (domain_idx = domain_start; domain_idx < (domain_start + num_domains); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 74f50635e..13e25f66c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -437,8 +437,6 @@ static const struct gpu_ops gv11b_ops = { .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gv11b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, .fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v, .alloc_global_ctx_buffers = gr_gk20a_alloc_global_ctx_buffers, @@ -963,6 +961,8 @@ static const struct gpu_ops gv11b_ops = { .set_membuf_handled_bytes = gv11b_perf_set_membuf_handled_bytes, .get_membuf_overflow_status = gv11b_perf_get_membuf_overflow_status, + .get_pmm_per_chiplet_offset = + gv11b_perf_get_pmm_per_chiplet_offset, }, .perfbuf = { .perfbuf_enable = nvgpu_perfbuf_enable_locked, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 98a909173..c299947f8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -497,7 +497,6 @@ struct gpu_ops { u32 addr, u32 *priv_addr_table, u32 *num_registers); - u32 (*get_pmm_per_chiplet_offset)(void); void (*split_fbpa_broadcast_addr)(struct gk20a *g, u32 addr, u32 num_fbpas, u32 *priv_addr_table, @@ -1424,6 +1423,7 @@ struct gpu_ops { void (*set_membuf_handled_bytes)(struct gk20a *g, u32 entries, u32 entry_size); bool (*get_membuf_overflow_status)(struct gk20a *g); + u32 (*get_pmm_per_chiplet_offset)(void); } perf; struct { int (*perfbuf_enable)(struct gk20a *g, u64 offset, u32 size); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 7b20e9cfe..846f22dfd 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -501,8 +501,6 @@ static const struct gpu_ops tu104_ops = { .add_ctxsw_reg_perf_pma = gr_gv100_add_ctxsw_reg_perf_pma, .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, - .get_pmm_per_chiplet_offset = - gr_gv11b_get_pmm_per_chiplet_offset, .split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr, .init_sw_bundle64 = gr_tu104_init_sw_bundle64, .fecs_ctxsw_mailbox_size = gr_fecs_ctxsw_mailbox__size_1_v, @@ -1060,6 +1058,8 @@ static const struct gpu_ops tu104_ops = { .set_membuf_handled_bytes = gv11b_perf_set_membuf_handled_bytes, .get_membuf_overflow_status = gv11b_perf_get_membuf_overflow_status, + .get_pmm_per_chiplet_offset = + gv11b_perf_get_pmm_per_chiplet_offset, }, .perfbuf = { .perfbuf_enable = nvgpu_perfbuf_enable_locked,