mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-25 11:04:51 +03:00
gpu: nvgpu: split lists for hwpm control regs
FECS ucode introduces separate register lists for control registers, so that they can be restored separately from PM state. Added support for: - LIST_compressed_nv_perf_sys_control_ctx_regs - LIST_compressed_nv_perf_pma_control_ctx_regs - LIST_compressed_nv_perf_fbp_control_ctx_regs - LIST_compressed_nv_perf_gpc_control_ctx_regs Bug 200507276 Change-Id: Ifce398bcb298822f3a46cf372ef9610a46a8df0c Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2193528 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
Alex Waterman
parent
31ac3a1f6e
commit
44a87d320e
@@ -300,40 +300,40 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
}
|
||||
|
||||
*offset = ALIGN(*offset, 256U);
|
||||
|
||||
base = (g->ops.perf.get_pmm_per_chiplet_offset() * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(g),
|
||||
count, offset, max_cnt, base, ~U32(0U)) != 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*offset = ALIGN(*offset, 256U);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* PM CTXSW BUFFER LAYOUT :
|
||||
*|---------------------------------------------|0x00 <----PM CTXSW BUFFER BASE
|
||||
*| |
|
||||
* PM CTXSW BUFFER LAYOUT:
|
||||
*|=============================================|0x00 <----PM CTXSW BUFFER BASE
|
||||
*| LIST_compressed_pm_ctx_reg_SYS |Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| PADDING for 256 byte alignment on Volta+ |
|
||||
*|---------------------------------------------|<----256 byte aligned
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_nv_perf_ctx_reg_sysrouter |Space allocated: numRegs words
|
||||
*| PADDING for 256 byte alignment on Maxwell+ |
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*| LIST_compressed_nv_perf_sys_control_ctx_regs|Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |
|
||||
*|---------------------------------------------|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_fbp_ctx_regs |
|
||||
*| |Space allocated: numRegs * n words (for n FB units)
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |
|
||||
*| |Space allocated: numRegs * n words (for n FB units)
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_pm_fbpa_ctx_regs |
|
||||
*| |Space allocated: numRegs * n words (for n FB units)
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_pm_rop_ctx_regs |
|
||||
*|---------------------------------------------|
|
||||
*|=============================================|<----256 byte aligned (if prev segment exists)
|
||||
*| LIST_compressed_nv_perf_pma_control_ctx_regs|Space allocated: numRegs words (+ padding)
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_fbp_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_nv_perf_fbprouter_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_fbpa_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_rop_ctx_regs |Space allocated: numRegs * n words (for n FB units)
|
||||
*| LIST_compressed_pm_ltc_ctx_regs |
|
||||
*| LTC0 LTS0 |
|
||||
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
|
||||
@@ -344,10 +344,15 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
*| LTC0 LTSn |
|
||||
*| LTC1 LTSn |
|
||||
*| LTCn LTSn |
|
||||
*|---------------------------------------------|
|
||||
*| PADDING for 256 byte alignment |
|
||||
*|---------------------------------------------|<----256 byte aligned
|
||||
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accommodate
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*| LIST_compressed_nv_perf_fbp_control_ctx_regs|Space allocated: numRegs words + padding
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*
|
||||
* Each "GPCn PRI register" segment above has this layout:
|
||||
*|=============================================|<----256 byte aligned
|
||||
*| GPC0 REG0 TPC0 |Each GPC has space allocated to accomodate
|
||||
*| REG0 TPC1 | all the GPC/TPC register lists
|
||||
*| Lists in each GPC region: REG0 TPCn |Per GPC allocated space is always 256 byte aligned
|
||||
*| LIST_pm_ctx_reg_TPC REG1 TPC0 |
|
||||
@@ -358,15 +363,11 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
|
||||
*| LIST_nv_perf_ctx_reg_GPC |
|
||||
*| LIST_nv_perf_gpcrouter_ctx_reg |
|
||||
*| LIST_nv_perf_ctx_reg_CAU |
|
||||
*| ---- |--
|
||||
*| GPC1 . |
|
||||
*| . |<----
|
||||
*|---------------------------------------------|
|
||||
*= =
|
||||
*| GPCn |
|
||||
*= =
|
||||
*|---------------------------------------------|
|
||||
*| LIST_nv_perf_ctx_reg_CAU (Tur) |
|
||||
*|=============================================|
|
||||
*| LIST_compressed_nv_perf_gpc_control_ctx_regs|Space allocated: numRegs words + padding
|
||||
*| PADDING for 256 byte alignment |(If reg list is empty, 0 bytes allocated.)
|
||||
*|=============================================|<----256 byte aligned on Maxwell and later
|
||||
*/
|
||||
|
||||
static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
||||
@@ -422,6 +423,19 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Add entries from _LIST_nv_perf_sys_control_ctx_reg*/
|
||||
if (nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g)->count > 0U) {
|
||||
offset = ALIGN(offset, 256U);
|
||||
|
||||
ret = add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_sys_control_ctxsw_regs(g),
|
||||
&count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
||||
if (ret != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (g->ops.gr.hwpm_map.align_regs_perf_pma) {
|
||||
g->ops.gr.hwpm_map.align_regs_perf_pma(&offset);
|
||||
}
|
||||
@@ -436,6 +450,16 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
||||
|
||||
offset = ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_pma_control_ctx_reg*/
|
||||
ret = add_ctxsw_buffer_map_entries(map,
|
||||
nvgpu_netlist_get_perf_pma_control_ctxsw_regs(g), &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~U32(0U));
|
||||
if (ret != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_fbp_ctxsw_regs(g), &count, &offset,
|
||||
@@ -486,6 +510,18 @@ static int nvgpu_gr_hwpm_map_create(struct gk20a *g,
|
||||
|
||||
offset = ALIGN(offset, 256U);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbp_control_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(g),
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0,
|
||||
num_fbps, ~U32(0U),
|
||||
g->ops.perf.get_pmm_per_chiplet_offset(),
|
||||
~U32(0U)) != 0) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
offset = ALIGN(offset, 256U);
|
||||
|
||||
/* Add GPC entries */
|
||||
if (add_ctxsw_buffer_map_entries_gpcs(g, map, &count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, config) != 0) {
|
||||
|
||||
@@ -378,6 +378,26 @@ static bool nvgpu_netlist_handle_debugger_region_id(struct gk20a *g,
|
||||
err = nvgpu_netlist_alloc_load_aiv_list(g,
|
||||
src, size, &netlist_vars->ctxsw_regs.pm_cau);
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_SYS_CONTROL:
|
||||
nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_SYS_CONTROL");
|
||||
err = nvgpu_netlist_alloc_load_aiv_list(g,
|
||||
src, size, &netlist_vars->ctxsw_regs.perf_sys_control);
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_FBP_CONTROL:
|
||||
nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_FBP_CONTROL");
|
||||
err = nvgpu_netlist_alloc_load_aiv_list(g,
|
||||
src, size, &netlist_vars->ctxsw_regs.perf_fbp_control);
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_GPC_CONTROL:
|
||||
nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_GPC_CONTROL");
|
||||
err = nvgpu_netlist_alloc_load_aiv_list(g,
|
||||
src, size, &netlist_vars->ctxsw_regs.perf_gpc_control);
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_PMA_CONTROL:
|
||||
nvgpu_log_info(g, "NETLIST_REGIONID_NVPERF_PMA_CONTROL");
|
||||
err = nvgpu_netlist_alloc_load_aiv_list(g,
|
||||
src, size, &netlist_vars->ctxsw_regs.perf_pma_control);
|
||||
break;
|
||||
default:
|
||||
handled = false;
|
||||
break;
|
||||
@@ -536,6 +556,10 @@ clean_up:
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ucgpc.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.etpc.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l);
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
nvgpu_release_firmware(g, netlist_fw);
|
||||
err = -ENOENT;
|
||||
@@ -607,6 +631,10 @@ void nvgpu_netlist_deinit_ctx_vars(struct gk20a *g)
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_ltc.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_fbpa.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.pm_cau.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_sys_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_fbp_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_gpc_control.l);
|
||||
nvgpu_kfree(g, netlist_vars->ctxsw_regs.perf_pma_control.l);
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
|
||||
nvgpu_kfree(g, netlist_vars);
|
||||
@@ -804,6 +832,30 @@ struct netlist_aiv_list *nvgpu_netlist_get_pm_cau_ctxsw_regs(struct gk20a *g)
|
||||
{
|
||||
return &g->netlist_vars->ctxsw_regs.pm_cau;
|
||||
}
|
||||
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_control_ctxsw_regs(
|
||||
struct gk20a *g)
|
||||
{
|
||||
return &g->netlist_vars->ctxsw_regs.perf_sys_control;
|
||||
}
|
||||
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(
|
||||
struct gk20a *g)
|
||||
{
|
||||
return &g->netlist_vars->ctxsw_regs.perf_fbp_control;
|
||||
}
|
||||
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(
|
||||
struct gk20a *g)
|
||||
{
|
||||
return &g->netlist_vars->ctxsw_regs.perf_gpc_control;
|
||||
}
|
||||
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_control_ctxsw_regs(
|
||||
struct gk20a *g)
|
||||
{
|
||||
return &g->netlist_vars->ctxsw_regs.perf_pma_control;
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
|
||||
@@ -74,6 +74,10 @@ struct netlist_aiv_list;
|
||||
#define NETLIST_REGIONID_SW_BUNDLE64_INIT 34
|
||||
#ifdef CONFIG_NVGPU_DEBUGGER
|
||||
#define NETLIST_REGIONID_NVPERF_PMCAU 35
|
||||
#define NETLIST_REGIONID_NVPERF_SYS_CONTROL 52
|
||||
#define NETLIST_REGIONID_NVPERF_FBP_CONTROL 53
|
||||
#define NETLIST_REGIONID_NVPERF_GPC_CONTROL 54
|
||||
#define NETLIST_REGIONID_NVPERF_PMA_CONTROL 55
|
||||
#endif
|
||||
|
||||
struct netlist_region {
|
||||
@@ -139,6 +143,10 @@ struct nvgpu_netlist_vars {
|
||||
struct netlist_aiv_list pm_ucgpc;
|
||||
struct netlist_aiv_list etpc;
|
||||
struct netlist_aiv_list pm_cau;
|
||||
struct netlist_aiv_list perf_sys_control;
|
||||
struct netlist_aiv_list perf_fbp_control;
|
||||
struct netlist_aiv_list perf_gpc_control;
|
||||
struct netlist_aiv_list perf_pma_control;
|
||||
} ctxsw_regs;
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
};
|
||||
|
||||
@@ -376,6 +376,14 @@ struct netlist_aiv_list *nvgpu_netlist_get_pm_rop_ctxsw_regs(struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_pm_ucgpc_ctxsw_regs(struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_etpc_ctxsw_regs(struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_pm_cau_ctxsw_regs(struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_sys_control_ctxsw_regs(
|
||||
struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_fbp_control_ctxsw_regs(
|
||||
struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_gpc_control_ctxsw_regs(
|
||||
struct gk20a *g);
|
||||
struct netlist_aiv_list *nvgpu_netlist_get_perf_pma_control_ctxsw_regs(
|
||||
struct gk20a *g);
|
||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
|
||||
Reference in New Issue
Block a user