mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: tpc powergating through sysfs
- adds static tpc-powergating through sysfs. - active tpc count will remain till the GPU/systems is not booted again. - tpc_pg_mask can be written only after GPU probe finishes and GPU boot is triggered. Note: To be able to use this feature, we need to change boot/init scripts of the OS(used with nvgpu driver) to write to sysfs nodes before posting discover image size query to FECS. Bug 200406784 Change-Id: Id749c7a617422c625f77d0c1a9aada2eb960c4d0 Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1742422 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
2df33e32e4
commit
d3b8415948
@@ -274,12 +274,23 @@ int gk20a_finalize_poweron(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->tpc_pg_lock);
|
||||||
|
|
||||||
|
if (g->can_tpc_powergate) {
|
||||||
|
if (g->ops.gr.powergate_tpc != NULL) {
|
||||||
|
g->ops.gr.powergate_tpc(g);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = gk20a_init_gr_support(g);
|
err = gk20a_init_gr_support(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
nvgpu_err(g, "failed to init gk20a gr");
|
nvgpu_err(g, "failed to init gk20a gr");
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
|
|
||||||
if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
|
if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE)) {
|
||||||
err = gk20a_init_pstate_pmu_support(g);
|
err = gk20a_init_pstate_pmu_support(g);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|||||||
@@ -307,6 +307,7 @@ struct gpu_ops {
|
|||||||
u32 class, u32 padding);
|
u32 class, u32 padding);
|
||||||
void (*free_gr_ctx)(struct gk20a *g,
|
void (*free_gr_ctx)(struct gk20a *g,
|
||||||
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
|
struct vm_gk20a *vm, struct nvgpu_gr_ctx *gr_ctx);
|
||||||
|
void (*powergate_tpc)(struct gk20a *g);
|
||||||
void (*update_ctxsw_preemption_mode)(struct gk20a *g,
|
void (*update_ctxsw_preemption_mode)(struct gk20a *g,
|
||||||
struct channel_gk20a *c,
|
struct channel_gk20a *c,
|
||||||
struct nvgpu_mem *mem);
|
struct nvgpu_mem *mem);
|
||||||
@@ -1361,6 +1362,8 @@ struct gk20a {
|
|||||||
u64 log_mask;
|
u64 log_mask;
|
||||||
u32 log_trace;
|
u32 log_trace;
|
||||||
|
|
||||||
|
struct nvgpu_mutex tpc_pg_lock;
|
||||||
|
|
||||||
struct nvgpu_gpu_params params;
|
struct nvgpu_gpu_params params;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1532,6 +1535,11 @@ struct gk20a {
|
|||||||
|
|
||||||
u32 tpc_fs_mask_user;
|
u32 tpc_fs_mask_user;
|
||||||
|
|
||||||
|
u32 tpc_pg_mask;
|
||||||
|
bool can_tpc_powergate;
|
||||||
|
|
||||||
|
u32 valid_tpc_mask;
|
||||||
|
|
||||||
struct nvgpu_bios bios;
|
struct nvgpu_bios bios;
|
||||||
bool bios_is_init;
|
bool bios_is_init;
|
||||||
|
|
||||||
|
|||||||
@@ -58,6 +58,7 @@
|
|||||||
#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
|
#include <nvgpu/hw/gv11b/hw_pbdma_gv11b.h>
|
||||||
#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
|
#include <nvgpu/hw/gv11b/hw_therm_gv11b.h>
|
||||||
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
|
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
|
||||||
|
#include <nvgpu/hw/gv11b/hw_fuse_gv11b.h>
|
||||||
|
|
||||||
#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
|
#define GFXP_WFI_TIMEOUT_COUNT_IN_USEC_DEFAULT 100
|
||||||
|
|
||||||
@@ -71,6 +72,16 @@
|
|||||||
*/
|
*/
|
||||||
#define GR_TPCS_INFO_FOR_MAPREGISTER 6
|
#define GR_TPCS_INFO_FOR_MAPREGISTER 6
|
||||||
|
|
||||||
|
/*
|
||||||
|
* There are 4 TPCs in GV11b ranging from TPC0 to TPC3
|
||||||
|
* There are two PES in GV11b each controlling two TPCs
|
||||||
|
* PES0 is linked to TPC0 & TPC2
|
||||||
|
* PES1 is linked to TPC1 & TPC3
|
||||||
|
*/
|
||||||
|
#define TPC_MASK_FOR_PESID_0 (u32) 0x5
|
||||||
|
#define TPC_MASK_FOR_PESID_1 (u32) 0xa
|
||||||
|
|
||||||
|
|
||||||
bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
|
bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num)
|
||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
@@ -117,6 +128,35 @@ bool gr_gv11b_is_valid_gfx_class(struct gk20a *g, u32 class_num)
|
|||||||
return valid;
|
return valid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void gr_gv11b_powergate_tpc(struct gk20a *g)
|
||||||
|
{
|
||||||
|
u32 tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
|
||||||
|
|
||||||
|
if (tpc_pg_status == g->tpc_pg_mask) {
|
||||||
|
nvgpu_info(g, "TPC-PG mask and TPC-PG status is same");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gk20a_writel(g, fuse_ctrl_opt_tpc_gpc_r(0), (g->tpc_pg_mask));
|
||||||
|
|
||||||
|
do {
|
||||||
|
tpc_pg_status = gk20a_readl(g, fuse_status_opt_tpc_gpc_r(0));
|
||||||
|
} while (tpc_pg_status != g->tpc_pg_mask);
|
||||||
|
|
||||||
|
gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
|
||||||
|
gr_fe_tpc_pesmask_action_write_f() |
|
||||||
|
gr_fe_tpc_pesmask_pesid_f(0) |
|
||||||
|
gr_fe_tpc_pesmask_gpcid_f(0) |
|
||||||
|
((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_0));
|
||||||
|
gk20a_writel(g, gr_fe_tpc_pesmask_r(), gr_fe_tpc_pesmask_req_send_f() |
|
||||||
|
gr_fe_tpc_pesmask_action_write_f() |
|
||||||
|
gr_fe_tpc_pesmask_pesid_f(1) |
|
||||||
|
gr_fe_tpc_pesmask_gpcid_f(0) |
|
||||||
|
((~g->tpc_pg_mask & (u32) 0xf) & TPC_MASK_FOR_PESID_1));
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
|
bool gr_gv11b_is_valid_compute_class(struct gk20a *g, u32 class_num)
|
||||||
{
|
{
|
||||||
bool valid = false;
|
bool valid = false;
|
||||||
|
|||||||
@@ -250,4 +250,5 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g,
|
|||||||
u32 *priv_addr_table,
|
u32 *priv_addr_table,
|
||||||
u32 *num_registers);
|
u32 *num_registers);
|
||||||
u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc);
|
u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc);
|
||||||
|
void gr_gv11b_powergate_tpc(struct gk20a *g);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -294,6 +294,7 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.init_ctx_state = gr_gp10b_init_ctx_state,
|
.init_ctx_state = gr_gp10b_init_ctx_state,
|
||||||
.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
|
.alloc_gr_ctx = gr_gp10b_alloc_gr_ctx,
|
||||||
.free_gr_ctx = gr_gk20a_free_gr_ctx,
|
.free_gr_ctx = gr_gk20a_free_gr_ctx,
|
||||||
|
.powergate_tpc = gr_gv11b_powergate_tpc,
|
||||||
.update_ctxsw_preemption_mode =
|
.update_ctxsw_preemption_mode =
|
||||||
gr_gv11b_update_ctxsw_preemption_mode,
|
gr_gv11b_update_ctxsw_preemption_mode,
|
||||||
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
|
.dump_gr_regs = gr_gv11b_dump_gr_status_regs,
|
||||||
|
|||||||
@@ -1572,6 +1572,42 @@ static inline u32 gr_fe_tpc_fs_r(u32 i)
|
|||||||
{
|
{
|
||||||
return 0x0040a200U + i*4U;
|
return 0x0040a200U + i*4U;
|
||||||
}
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_r(void)
|
||||||
|
{
|
||||||
|
return 0x0040a260U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_pesid_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0x3fU) << 24U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_gpcid_f(u32 v)
|
||||||
|
{
|
||||||
|
return (v & 0xffU) << 16U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_action_m(void)
|
||||||
|
{
|
||||||
|
return 0x1U << 30U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_action_write_f(void)
|
||||||
|
{
|
||||||
|
return 0x40000000U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_action_read_f(void)
|
||||||
|
{
|
||||||
|
return 0x0U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_req_m(void)
|
||||||
|
{
|
||||||
|
return 0x1U << 31U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_req_send_f(void)
|
||||||
|
{
|
||||||
|
return 0x80000000U;
|
||||||
|
}
|
||||||
|
static inline u32 gr_fe_tpc_pesmask_mask_m(void)
|
||||||
|
{
|
||||||
|
return 0xffffU << 0U;
|
||||||
|
}
|
||||||
static inline u32 gr_pri_mme_shadow_raw_index_r(void)
|
static inline u32 gr_pri_mme_shadow_raw_index_r(void)
|
||||||
{
|
{
|
||||||
return 0x00404488U;
|
return 0x00404488U;
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ static void nvgpu_init_vars(struct gk20a *g)
|
|||||||
nvgpu_mutex_init(&g->poweron_lock);
|
nvgpu_mutex_init(&g->poweron_lock);
|
||||||
nvgpu_mutex_init(&g->poweroff_lock);
|
nvgpu_mutex_init(&g->poweroff_lock);
|
||||||
nvgpu_mutex_init(&g->ctxsw_disable_lock);
|
nvgpu_mutex_init(&g->ctxsw_disable_lock);
|
||||||
|
nvgpu_mutex_init(&g->tpc_pg_lock);
|
||||||
|
|
||||||
l->regs_saved = l->regs;
|
l->regs_saved = l->regs;
|
||||||
l->bar1_saved = l->bar1;
|
l->bar1_saved = l->bar1;
|
||||||
@@ -168,6 +169,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
|
|||||||
g->ptimer_src_freq = platform->ptimer_src_freq;
|
g->ptimer_src_freq = platform->ptimer_src_freq;
|
||||||
g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
|
g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
|
||||||
__nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
|
__nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
|
||||||
|
g->can_tpc_powergate = platform->can_tpc_powergate;
|
||||||
|
g->valid_tpc_mask = platform->valid_tpc_mask;
|
||||||
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
|
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
|
||||||
/* if default delay is not set, set default delay to 500msec */
|
/* if default delay is not set, set default delay to 500msec */
|
||||||
if (platform->railgate_delay_init)
|
if (platform->railgate_delay_init)
|
||||||
|
|||||||
@@ -43,6 +43,9 @@ struct gk20a_platform {
|
|||||||
/* Should be populated at probe. */
|
/* Should be populated at probe. */
|
||||||
bool can_railgate_init;
|
bool can_railgate_init;
|
||||||
|
|
||||||
|
/* Should be populated at probe. */
|
||||||
|
bool can_tpc_powergate;
|
||||||
|
|
||||||
/* Should be populated at probe. */
|
/* Should be populated at probe. */
|
||||||
bool can_elpg_init;
|
bool can_elpg_init;
|
||||||
|
|
||||||
@@ -71,6 +74,8 @@ struct gk20a_platform {
|
|||||||
/* Reset control for device */
|
/* Reset control for device */
|
||||||
struct reset_control *reset_control;
|
struct reset_control *reset_control;
|
||||||
#endif
|
#endif
|
||||||
|
/* valid TPC-MASK */
|
||||||
|
u32 valid_tpc_mask;
|
||||||
|
|
||||||
/* Delay before rail gated */
|
/* Delay before rail gated */
|
||||||
int railgate_delay_init;
|
int railgate_delay_init;
|
||||||
|
|||||||
@@ -209,6 +209,9 @@ struct gk20a_platform gv11b_tegra_platform = {
|
|||||||
.railgate_delay_init = 500,
|
.railgate_delay_init = 500,
|
||||||
.can_railgate_init = true,
|
.can_railgate_init = true,
|
||||||
|
|
||||||
|
.can_tpc_powergate = true,
|
||||||
|
.valid_tpc_mask = 0xc,
|
||||||
|
|
||||||
.can_slcg = true,
|
.can_slcg = true,
|
||||||
.can_blcg = true,
|
.can_blcg = true,
|
||||||
.can_elcg = true,
|
.can_elcg = true,
|
||||||
|
|||||||
@@ -31,6 +31,8 @@
|
|||||||
|
|
||||||
#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
|
#define ROOTRW (S_IRWXU|S_IRGRP|S_IROTH)
|
||||||
|
|
||||||
|
#define TPC_MASK_FOR_ALL_ACTIVE_TPCs (u32) 0x0
|
||||||
|
|
||||||
static ssize_t elcg_enable_store(struct device *dev,
|
static ssize_t elcg_enable_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t count)
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
{
|
{
|
||||||
@@ -843,6 +845,61 @@ static ssize_t force_idle_read(struct device *dev,
|
|||||||
static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
|
static DEVICE_ATTR(force_idle, ROOTRW, force_idle_read, force_idle_store);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static ssize_t tpc_pg_mask_read(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", g->tpc_pg_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t tpc_pg_mask_store(struct device *dev,
|
||||||
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
|
{
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
unsigned long val = 0;
|
||||||
|
|
||||||
|
nvgpu_mutex_acquire(&g->tpc_pg_lock);
|
||||||
|
|
||||||
|
if (!g->can_tpc_powergate) {
|
||||||
|
nvgpu_info(g, "TPC-PG not enabled for the platform");
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kstrtoul(buf, 10, &val) < 0) {
|
||||||
|
nvgpu_err(g, "invalid value");
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val == g->tpc_pg_mask) {
|
||||||
|
nvgpu_info(g, "no value change, same mask already set");
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gr->ctx_vars.golden_image_size) {
|
||||||
|
nvgpu_err(g, "golden image size already initialized");
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
|
return -ENODEV;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (val == TPC_MASK_FOR_ALL_ACTIVE_TPCs || val == g->valid_tpc_mask) {
|
||||||
|
g->tpc_pg_mask = val;
|
||||||
|
} else {
|
||||||
|
|
||||||
|
nvgpu_err(g, "TPC-PG mask is invalid");
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
exit:
|
||||||
|
nvgpu_mutex_release(&g->tpc_pg_lock);
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static DEVICE_ATTR(tpc_pg_mask, ROOTRW, tpc_pg_mask_read, tpc_pg_mask_store);
|
||||||
|
|
||||||
static ssize_t tpc_fs_mask_store(struct device *dev,
|
static ssize_t tpc_fs_mask_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t count)
|
struct device_attribute *attr, const char *buf, size_t count)
|
||||||
{
|
{
|
||||||
@@ -1130,6 +1187,7 @@ void nvgpu_remove_sysfs(struct device *dev)
|
|||||||
device_remove_file(dev, &dev_attr_aelpg_enable);
|
device_remove_file(dev, &dev_attr_aelpg_enable);
|
||||||
device_remove_file(dev, &dev_attr_allow_all);
|
device_remove_file(dev, &dev_attr_allow_all);
|
||||||
device_remove_file(dev, &dev_attr_tpc_fs_mask);
|
device_remove_file(dev, &dev_attr_tpc_fs_mask);
|
||||||
|
device_remove_file(dev, &dev_attr_tpc_pg_mask);
|
||||||
device_remove_file(dev, &dev_attr_min_timeslice_us);
|
device_remove_file(dev, &dev_attr_min_timeslice_us);
|
||||||
device_remove_file(dev, &dev_attr_max_timeslice_us);
|
device_remove_file(dev, &dev_attr_max_timeslice_us);
|
||||||
|
|
||||||
@@ -1181,6 +1239,7 @@ int nvgpu_create_sysfs(struct device *dev)
|
|||||||
error |= device_create_file(dev, &dev_attr_aelpg_enable);
|
error |= device_create_file(dev, &dev_attr_aelpg_enable);
|
||||||
error |= device_create_file(dev, &dev_attr_allow_all);
|
error |= device_create_file(dev, &dev_attr_allow_all);
|
||||||
error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
|
error |= device_create_file(dev, &dev_attr_tpc_fs_mask);
|
||||||
|
error |= device_create_file(dev, &dev_attr_tpc_pg_mask);
|
||||||
error |= device_create_file(dev, &dev_attr_min_timeslice_us);
|
error |= device_create_file(dev, &dev_attr_min_timeslice_us);
|
||||||
error |= device_create_file(dev, &dev_attr_max_timeslice_us);
|
error |= device_create_file(dev, &dev_attr_max_timeslice_us);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user