From 44a87d320eae9e4ef97eb777aa8a6f73768dc7d9 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Mon, 9 Sep 2019 13:13:07 -0400 Subject: [PATCH] gpu: nvgpu: split lists for hwpm control regs FECS ucode introduces separate register lists for control registers, so that they can be restored separately from PM state. Added support for: - LIST_compressed_nv_perf_sys_control_ctx_regs - LIST_compressed_nv_perf_pma_control_ctx_regs - LIST_compressed_nv_perf_fbp_control_ctx_regs - LIST_compressed_nv_perf_gpc_control_ctx_regs Bug 200507276 Change-Id: Ifce398bcb298822f3a46cf372ef9610a46a8df0c Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2193528 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/hwpm_map.c | 112 ++++++++++++------ drivers/gpu/nvgpu/common/netlist/netlist.c | 52 ++++++++ .../gpu/nvgpu/common/netlist/netlist_priv.h | 8 ++ drivers/gpu/nvgpu/include/nvgpu/netlist.h | 8 ++ 4 files changed, 142 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/hwpm_map.c b/drivers/gpu/nvgpu/common/gr/hwpm_map.c index 8c0d8cb11..b29e84c39 100644 --- a/drivers/gpu/nvgpu/common/gr/hwpm_map.c +++ b/drivers/gpu/nvgpu/common/gr/hwpm_map.c @@ -300,40 +300,40 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, } *offset = ALIGN(*offset, 256U); + + base = (g->ops.perf.get_pmm_per_chiplet_offset() * gpc_num); + if (add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g), + count, offset, max_cnt, base, ~U32(0U)) != 0) { + return -EINVAL; + } + + *offset = ALIGN(*offset, 256U); } return 0; } /* - * PM CTXSW BUFFER LAYOUT : - *|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE - *| | + * PM CTXSW BUFFER LAYOUT: + *|=============================================|0x00 <----PM CTXSW BUFFER BASE *| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words - *|---------------------------------------------| - *| | *| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words - *|---------------------------------------------| - *| | - *| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words - *|---------------------------------------------| - *| PADDING for 256 byte alignment on Volta+ | - *|---------------------------------------------|<----256 byte aligned - *| | - *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words - *|---------------------------------------------| + *| LIST_compressed_nv_perf_ctx_reg_sysrouter |Space allocated: numRegs words + *| PADDING for 256 byte alignment on Maxwell+ | + *|=============================================|<----256 byte aligned on Maxwell and later + *| LIST_compressed_nv_perf_sys_control_ctx_regs|Space allocated: numRegs words (+ padding) + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned + *| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words (+ padding) *| PADDING for 256 byte alignment | - *|---------------------------------------------|<----256 byte aligned - *| LIST_compressed_nv_perf_fbp_ctx_regs | - *| |Space allocated: numRegs * n words (for n FB units) - *|---------------------------------------------| - *| LIST_compressed_nv_perf_fbprouter_ctx_regs | - *| |Space allocated: numRegs * n words (for n FB units) - *|---------------------------------------------| - *| LIST_compressed_pm_fbpa_ctx_regs | - *| |Space allocated: numRegs * n words (for n FB units) - *|---------------------------------------------| - *| LIST_compressed_pm_rop_ctx_regs | - *|---------------------------------------------| + *|=============================================|<----256 byte aligned (if prev segment exists) + *| LIST_compressed_nv_perf_pma_control_ctx_regs|Space allocated: numRegs words (+ padding) + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned + *| LIST_compressed_nv_perf_fbp_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_nv_perf_fbprouter_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_pm_fbpa_ctx_regs |Space allocated: numRegs * n words (for n FB units) + *| LIST_compressed_pm_rop_ctx_regs |Space allocated: numRegs * n words (for n FB units) *| LIST_compressed_pm_ltc_ctx_regs | *| LTC0 LTS0 | *| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units) @@ -344,10 +344,15 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, *| LTC0 LTSn | *| LTC1 LTSn | *| LTCn LTSn | - *|---------------------------------------------| *| PADDING for 256 byte alignment | - *|---------------------------------------------|<----256 byte aligned - *| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate + *|=============================================|<----256 byte aligned on Maxwell and later + *| LIST_compressed_nv_perf_fbp_control_ctx_regs|Space allocated: numRegs words + padding + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned on Maxwell and later + * + * Each "GPCn PRI register" segment above has this layout: + *|=============================================|<----256 byte aligned + *| GPC0 REG0 TPC0 |Each GPC has space allocated to accomodate *| REG0 TPC1 | all the GPC/TPC register lists *| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned *| LIST_pm_ctx_reg_TPC REG1 TPC0 | @@ -358,15 +363,11 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g, *| List_pm_ctx_reg_uc_GPC REGn TPCn | *| LIST_nv_perf_ctx_reg_GPC | *| LIST_nv_perf_gpcrouter_ctx_reg | - *| LIST_nv_perf_ctx_reg_CAU | - *| ---- |-- - *| GPC1 . | - *| . |<---- - *|---------------------------------------------| - *= = - *| GPCn | - *= = - *|---------------------------------------------| + *| LIST_nv_perf_ctx_reg_CAU (Tur) | + *|=============================================| + *| LIST_compressed_nv_perf_gpc_control_ctx_regs|Space allocated: numRegs words + padding + *| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.) + *|=============================================|<----256 byte aligned on Maxwell and later */ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, @@ -422,6 +423,19 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, goto cleanup; } + /* Add entries from _LIST_nv_perf_sys_control_ctx_reg*/ + if (nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count > 0U) { + offset = ALIGN(offset, 256U); + + ret = add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g), + &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)); + if (ret != 0) { + goto cleanup; + } + } + if (g->ops.gr.hwpm_map.align_regs_perf_pma) { g->ops.gr.hwpm_map.align_regs_perf_pma(&offset); } @@ -436,6 +450,16 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, offset = ALIGN(offset, 256U); + /* Add entries from _LIST_nv_perf_pma_control_ctx_reg*/ + ret = add_ctxsw_buffer_map_entries(map, + nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g), &count, &offset, + hwpm_ctxsw_reg_count_max, 0, ~U32(0U)); + if (ret != 0) { + goto cleanup; + } + + offset = ALIGN(offset, 256U); + /* Add entries from _LIST_nv_perf_fbp_ctx_regs */ if (add_ctxsw_buffer_map_entries_subunits(map, nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset, @@ -486,6 +510,18 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g, offset = ALIGN(offset, 256U); + /* Add entries from _LIST_nv_perf_fbp_control_ctx_regs */ + if (add_ctxsw_buffer_map_entries_subunits(map, + nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g), + &count, &offset, hwpm_ctxsw_reg_count_max, 0, + num_fbps, ~U32(0U), + g->ops.perf.get_pmm_per_chiplet_offset(), + ~U32(0U)) != 0) { + goto cleanup; + } + + offset = ALIGN(offset, 256U); + /* Add GPC entries */ if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset, hwpm_ctxsw_reg_count_max, config) != 0) { diff --git a/drivers/gpu/nvgpu/common/netlist/netlist.c b/drivers/gpu/nvgpu/common/netlist/netlist.c index 1ba514a23..ccc4fd950 100644 --- a/drivers/gpu/nvgpu/common/netlist/netlist.c +++ b/drivers/gpu/nvgpu/common/netlist/netlist.c @@ -378,6 +378,26 @@ static bool nvgpu_netlist_handle_debugger_region_id(struct gk20a *g, err = nvgpu_netlist_alloc_load_aiv_list(g, src, size, &netlist_vars->ctxsw_regs.pm_cau); break; + case NETLIST_REGIONID_NVPERF_SYS_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_sys_control); + break; + case NETLIST_REGIONID_NVPERF_FBP_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_fbp_control); + break; + case NETLIST_REGIONID_NVPERF_GPC_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_gpc_control); + break; + case NETLIST_REGIONID_NVPERF_PMA_CONTROL: + nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA_CONTROL"); + err = nvgpu_netlist_alloc_load_aiv_list(g, + src, size, &netlist_vars->ctxsw_regs.perf_pma_control); + break; default: handled = false; break; @@ -536,6 +556,10 @@ clean_up: nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ucgpc.l); nvgpu_kfree(g, netlist_vars->ctxsw_regs.etpc.l); nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l); #endif /* CONFIG_NVGPU_DEBUGGER */ nvgpu_release_firmware(g, netlist_fw); err = -ENOENT; @@ -607,6 +631,10 @@ void nvgpu_netlist_deinit_ctx_vars(struct gk20a *g) nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ltc.l); nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_fbpa.l); nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l); + nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l); #endif /* CONFIG_NVGPU_DEBUGGER */ nvgpu_kfree(g, netlist_vars); @@ -804,6 +832,30 @@ struct netlist_aiv_list *nvgpu_netlist_get_pm_cau_ctxsw_regs(struct gk20a *g) { return &g->netlist_vars->ctxsw_regs.pm_cau; } + +struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_sys_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_fbp_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_fbp_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_gpc_control; +} + +struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_control_ctxsw_regs( + struct gk20a *g) +{ + return &g->netlist_vars->ctxsw_regs.perf_pma_control; +} #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_NVGPU_NON_FUSA diff --git a/drivers/gpu/nvgpu/common/netlist/netlist_priv.h b/drivers/gpu/nvgpu/common/netlist/netlist_priv.h index c149b55dc..2ddf53477 100644 --- a/drivers/gpu/nvgpu/common/netlist/netlist_priv.h +++ b/drivers/gpu/nvgpu/common/netlist/netlist_priv.h @@ -74,6 +74,10 @@ struct netlist_aiv_list; #define NETLIST_REGIONID_SW_BUNDLE64_INIT 34 #ifdef CONFIG_NVGPU_DEBUGGER #define NETLIST_REGIONID_NVPERF_PMCAU 35 +#define NETLIST_REGIONID_NVPERF_SYS_CONTROL 52 +#define NETLIST_REGIONID_NVPERF_FBP_CONTROL 53 +#define NETLIST_REGIONID_NVPERF_GPC_CONTROL 54 +#define NETLIST_REGIONID_NVPERF_PMA_CONTROL 55 #endif struct netlist_region { @@ -139,6 +143,10 @@ struct nvgpu_netlist_vars { struct netlist_aiv_list pm_ucgpc; struct netlist_aiv_list etpc; struct netlist_aiv_list pm_cau; + struct netlist_aiv_list perf_sys_control; + struct netlist_aiv_list perf_fbp_control; + struct netlist_aiv_list perf_gpc_control; + struct netlist_aiv_list perf_pma_control; } ctxsw_regs; #endif /* CONFIG_NVGPU_DEBUGGER */ }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/netlist.h b/drivers/gpu/nvgpu/include/nvgpu/netlist.h index de974690e..2954bdbd5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/netlist.h +++ b/drivers/gpu/nvgpu/include/nvgpu/netlist.h @@ -376,6 +376,14 @@ struct netlist_aiv_list *nvgpu_netlist_get_pm_rop_ctxsw_regs(struct gk20a *g); struct netlist_aiv_list *nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(struct gk20a *g); struct netlist_aiv_list *nvgpu_netlist_get_etpc_ctxsw_regs(struct gk20a *g); struct netlist_aiv_list *nvgpu_netlist_get_pm_cau_ctxsw_regs(struct gk20a *g); +struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_control_ctxsw_regs( + struct gk20a *g); +struct netlist_aiv_list *nvgpu_netlist_get_perf_fbp_control_ctxsw_regs( + struct gk20a *g); +struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_control_ctxsw_regs( + struct gk20a *g); +struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_control_ctxsw_regs( + struct gk20a *g); #endif /* CONFIG_NVGPU_DEBUGGER */ #ifdef CONFIG_NVGPU_NON_FUSA