gpu: nvgpu: tpc powergating through sysfs

- adds static tpc-powergating through sysfs.
- active tpc count will remain till the GPU/systems is not booted again.
- tpc_pg_mask can be written only after GPU probe finishes and
  GPU boot is triggered.

Note:
To be able to use this feature, we need to change boot/init
scripts of the OS(used with nvgpu driver) to write to sysfs nodes before
posting discover image size query to FECS.

Bug 200406784

Change-Id: Id749c7a617422c625f77d0c1a9aada2eb960c4d0
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1742422
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Goyal
2018-07-16 11:10:23 +05:30
committed by mobile promotions
parent 2df33e32e4
commit d3b8415948
10 changed files with 167 additions and 0 deletions

View File

@@ -274,12 +274,23 @@ int gk20a_finalize_poweron(struct gk20a *g)
} }
} }
nvgpu_mutex_acquire(&g->tpc_pg_lock);
if (g->can_tpc_powergate) {
if (g->ops.gr.powergate_tpc != NULL) {
g->ops.gr.powergate_tpc(g);
}
}
err = gk20a_init_gr_support(g); err = gk20a_init_gr_support(g);
if (err) { if (err) {
nvgpu_err(g, "failed to init gk20a gr"); nvgpu_err(g, "failed to init gk20a gr");
nvgpu_mutex_release(&g->tpc_pg_lock);
goto done; goto done;
} }
nvgpu_mutex_release(&g->tpc_pg_lock);
if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) { if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
err = gk20a_init_pstate_pmu_support(g); err = gk20a_init_pstate_pmu_support(g);
if (err) { if (err) {

View File

@@ -307,6 +307,7 @@ struct gpu_ops {
u32 class, u32 padding); u32 class, u32 padding);
void (*free_gr_ctx)(struct gk20a *g, void (*free_gr_ctx)(struct gk20a *g,
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx); struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
void (*powergate_tpc)(struct gk20a *g);
void (*update_ctxsw_preemption_mode)(struct gk20a *g, void (*update_ctxsw_preemption_mode)(struct gk20a *g,
struct channel_gk20a *c, struct channel_gk20a *c,
struct nvgpu_mem *mem); struct nvgpu_mem *mem);
@@ -1361,6 +1362,8 @@ struct gk20a {
u64 log_mask; u64 log_mask;
u32 log_trace; u32 log_trace;
struct nvgpu_mutex tpc_pg_lock;
struct nvgpu_gpu_params params; struct nvgpu_gpu_params params;
/* /*
@@ -1532,6 +1535,11 @@ struct gk20a {
u32 tpc_fs_mask_user; u32 tpc_fs_mask_user;
u32 tpc_pg_mask;
bool can_tpc_powergate;
u32 valid_tpc_mask;
struct nvgpu_bios bios; struct nvgpu_bios bios;
bool bios_is_init; bool bios_is_init;

View File

@@ -58,6 +58,7 @@
#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h> #include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
#include <nvgpu/hw/gv11b/hw_therm_gv11b.h> #include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> #include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100 #define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
@@ -71,6 +72,16 @@
*/ */
#define GR_TPCS_INFO_FOR_MAPREGISTER 6 #define GR_TPCS_INFO_FOR_MAPREGISTER 6
/*
* There are 4 TPCs in GV11b ranging from TPC0 to TPC3
* There are two PES in GV11b each controlling two TPCs
* PES0 is linked to TPC0 & TPC2
* PES1 is linked to TPC1 & TPC3
*/
#define TPC_MASK_FOR_PESID_0 (u32) 0x5
#define TPC_MASK_FOR_PESID_1 (u32) 0xa
bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
{ {
bool valid = false; bool valid = false;
@@ -117,6 +128,35 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
return valid; return valid;
} }
void gr_gv11b_powergate_tpc(struct gk20a *g)
{
u32 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
if (tpc_pg_status == g->tpc_pg_mask) {
nvgpu_info(g, "TPC-PG mask and TPC-PG status is same");
return;
}
gk20a_writel(g, fuse_ctrl_opt_tpc_gpc_r(0), (g->tpc_pg_mask));
do {
tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
} while (tpc_pg_status != g->tpc_pg_mask);
gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
gr_fe_tpc_pesmask_action_write_f() |
gr_fe_tpc_pesmask_pesid_f(0) |
gr_fe_tpc_pesmask_gpcid_f(0) |
((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0));
gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
gr_fe_tpc_pesmask_action_write_f() |
gr_fe_tpc_pesmask_pesid_f(1) |
gr_fe_tpc_pesmask_gpcid_f(0) |
((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1));
return;
}
bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num) bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
{ {
bool valid = false; bool valid = false;

View File

@@ -250,4 +250,5 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
u32 *priv_addr_table, u32 *priv_addr_table,
u32 *num_registers); u32 *num_registers);
u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc); u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc);
void gr_gv11b_powergate_tpc(struct gk20a *g);
#endif #endif

View File

@@ -294,6 +294,7 @@ static const struct gpu_ops gv11b_ops = {
.init_ctx_state = gr_gp10b_init_ctx_state, .init_ctx_state = gr_gp10b_init_ctx_state,
.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx, .alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
.free_gr_ctx = gr_gk20a_free_gr_ctx, .free_gr_ctx = gr_gk20a_free_gr_ctx,
.powergate_tpc = gr_gv11b_powergate_tpc,
.update_ctxsw_preemption_mode = .update_ctxsw_preemption_mode =
gr_gv11b_update_ctxsw_preemption_mode, gr_gv11b_update_ctxsw_preemption_mode,
.dump_gr_regs = gr_gv11b_dump_gr_status_regs, .dump_gr_regs = gr_gv11b_dump_gr_status_regs,

View File

@@ -1572,6 +1572,42 @@ static inline u32 gr_fe_tpc_fs_r(u32 i)
{ {
return 0x0040a200U + i*4U; return 0x0040a200U + i*4U;
} }
static inline u32 gr_fe_tpc_pesmask_r(void)
{
return 0x0040a260U;
}
static inline u32 gr_fe_tpc_pesmask_pesid_f(u32 v)
{
return (v & 0x3fU) << 24U;
}
static inline u32 gr_fe_tpc_pesmask_gpcid_f(u32 v)
{
return (v & 0xffU) << 16U;
}
static inline u32 gr_fe_tpc_pesmask_action_m(void)
{
return 0x1U << 30U;
}
static inline u32 gr_fe_tpc_pesmask_action_write_f(void)
{
return 0x40000000U;
}
static inline u32 gr_fe_tpc_pesmask_action_read_f(void)
{
return 0x0U;
}
static inline u32 gr_fe_tpc_pesmask_req_m(void)
{
return 0x1U << 31U;
}
static inline u32 gr_fe_tpc_pesmask_req_send_f(void)
{
return 0x80000000U;
}
static inline u32 gr_fe_tpc_pesmask_mask_m(void)
{
return 0xffffU << 0U;
}
static inline u32 gr_pri_mme_shadow_raw_index_r(void) static inline u32 gr_pri_mme_shadow_raw_index_r(void)
{ {
return 0x00404488U; return 0x00404488U;

View File

@@ -64,6 +64,7 @@ static void nvgpu_init_vars(struct gk20a *g)
nvgpu_mutex_init(&g->poweron_lock); nvgpu_mutex_init(&g->poweron_lock);
nvgpu_mutex_init(&g->poweroff_lock); nvgpu_mutex_init(&g->poweroff_lock);
nvgpu_mutex_init(&g->ctxsw_disable_lock); nvgpu_mutex_init(&g->ctxsw_disable_lock);
nvgpu_mutex_init(&g->tpc_pg_lock);
l->regs_saved = l->regs; l->regs_saved = l->regs;
l->bar1_saved = l->bar1; l->bar1_saved = l->bar1;
@@ -168,6 +169,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
g->ptimer_src_freq = platform->ptimer_src_freq; g->ptimer_src_freq = platform->ptimer_src_freq;
g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g)); g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
__nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init); __nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
g->can_tpc_powergate = platform->can_tpc_powergate;
g->valid_tpc_mask = platform->valid_tpc_mask;
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init; g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
/* if default delay is not set, set default delay to 500msec */ /* if default delay is not set, set default delay to 500msec */
if (platform->railgate_delay_init) if (platform->railgate_delay_init)

View File

@@ -43,6 +43,9 @@ struct gk20a_platform {
/* Should be populated at probe. */ /* Should be populated at probe. */
bool can_railgate_init; bool can_railgate_init;
/* Should be populated at probe. */
bool can_tpc_powergate;
/* Should be populated at probe. */ /* Should be populated at probe. */
bool can_elpg_init; bool can_elpg_init;
@@ -71,6 +74,8 @@ struct gk20a_platform {
/* Reset control for device */ /* Reset control for device */
struct reset_control *reset_control; struct reset_control *reset_control;
#endif #endif
/* valid TPC-MASK */
u32 valid_tpc_mask;
/* Delay before rail gated */ /* Delay before rail gated */
int railgate_delay_init; int railgate_delay_init;

View File

@@ -209,6 +209,9 @@ struct gk20a_platform gv11b_tegra_platform = {
.railgate_delay_init = 500, .railgate_delay_init = 500,
.can_railgate_init = true, .can_railgate_init = true,
.can_tpc_powergate = true,
.valid_tpc_mask = 0xc,
.can_slcg = true, .can_slcg = true,
.can_blcg = true, .can_blcg = true,
.can_elcg = true, .can_elcg = true,

View File

@@ -31,6 +31,8 @@
#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH) #define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
#define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0
static ssize_t elcg_enable_store(struct device *dev, static ssize_t elcg_enable_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count) struct device_attribute *attr, const char *buf, size_t count)
{ {
@@ -843,6 +845,61 @@ static ssize_t force_idle_read(struct device *dev,
static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store); static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
#endif #endif
static ssize_t tpc_pg_mask_read(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct gk20a *g = get_gk20a(dev);
return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
}
static ssize_t tpc_pg_mask_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct gk20a *g = get_gk20a(dev);
struct gr_gk20a *gr = &g->gr;
unsigned long val = 0;
nvgpu_mutex_acquire(&g->tpc_pg_lock);
if (!g->can_tpc_powergate) {
nvgpu_info(g, "TPC-PG not enabled for the platform");
goto exit;
}
if (kstrtoul(buf, 10, &val) < 0) {
nvgpu_err(g, "invalid value");
nvgpu_mutex_release(&g->tpc_pg_lock);
return -EINVAL;
}
if (val == g->tpc_pg_mask) {
nvgpu_info(g, "no value change, same mask already set");
goto exit;
}
if (gr->ctx_vars.golden_image_size) {
nvgpu_err(g, "golden image size already initialized");
nvgpu_mutex_release(&g->tpc_pg_lock);
return -ENODEV;
}
if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) {
g->tpc_pg_mask = val;
} else {
nvgpu_err(g, "TPC-PG mask is invalid");
nvgpu_mutex_release(&g->tpc_pg_lock);
return -EINVAL;
}
exit:
nvgpu_mutex_release(&g->tpc_pg_lock);
return count;
}
static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store);
static ssize_t tpc_fs_mask_store(struct device *dev, static ssize_t tpc_fs_mask_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count) struct device_attribute *attr, const char *buf, size_t count)
{ {
@@ -1130,6 +1187,7 @@ void nvgpu_remove_sysfs(struct device *dev)
device_remove_file(dev, &dev_attr_aelpg_enable); device_remove_file(dev, &dev_attr_aelpg_enable);
device_remove_file(dev, &dev_attr_allow_all); device_remove_file(dev, &dev_attr_allow_all);
device_remove_file(dev, &dev_attr_tpc_fs_mask); device_remove_file(dev, &dev_attr_tpc_fs_mask);
device_remove_file(dev, &dev_attr_tpc_pg_mask);
device_remove_file(dev, &dev_attr_min_timeslice_us); device_remove_file(dev, &dev_attr_min_timeslice_us);
device_remove_file(dev, &dev_attr_max_timeslice_us); device_remove_file(dev, &dev_attr_max_timeslice_us);
@@ -1181,6 +1239,7 @@ int nvgpu_create_sysfs(struct device *dev)
error |= device_create_file(dev, &dev_attr_aelpg_enable); error |= device_create_file(dev, &dev_attr_aelpg_enable);
error |= device_create_file(dev, &dev_attr_allow_all); error |= device_create_file(dev, &dev_attr_allow_all);
error |= device_create_file(dev, &dev_attr_tpc_fs_mask); error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
error |= device_create_file(dev, &dev_attr_min_timeslice_us); error |= device_create_file(dev, &dev_attr_min_timeslice_us);
error |= device_create_file(dev, &dev_attr_max_timeslice_us); error |= device_create_file(dev, &dev_attr_max_timeslice_us);