gpu: nvgpu: do_idle/unidle handling with runtime PM after probe

Extend the runtime suspend/resume based idle/unidle logic in the
probe case to handling done in gk20a_do_idle/unidle for nvgpu
after the probe completion.

If the railgating is disabled, setting autosuspend_delay to 0 will
enable the suspend. If railgating is enabled, autosuspend delay
will be > 0. Setting it to 0 will enable the immediate suspend.

With this approach based on RPM, forced_reset logic is removed.
force_reset_in_do_idle is also removed as railgating is
supported.

Bug 200602747
JIRA NVGPU-5356

Change-Id: Iaf6d5ab651b8200f0547b45d90f812110cf63c0e
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2375941
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sagar Kamble
2020-07-15 13:18:46 +05:30
committed by Alex Waterman
parent 4012a97640
commit bd7bda4f98
6 changed files with 45 additions and 128 deletions

View File

@@ -390,7 +390,6 @@ struct gk20a {
bool can_elpg; bool can_elpg;
bool mscg_enabled; bool mscg_enabled;
bool forced_idle; bool forced_idle;
bool forced_reset;
bool allow_all; bool allow_all;
u32 ptimer_src_freq; u32 ptimer_src_freq;
@@ -681,8 +680,8 @@ int gk20a_do_unidle(void *_g);
#endif #endif
#ifdef CONFIG_PM #ifdef CONFIG_PM
int gk20a_do_idle_impl(struct gk20a *g, bool force_reset); int gk20a_do_idle(void *_g);
int gk20a_do_unidle_impl(struct gk20a *g); int gk20a_do_unidle(void *_g);
#endif #endif
/** /**

View File

@@ -745,24 +745,23 @@ MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match);
#ifdef CONFIG_PM #ifdef CONFIG_PM
/** /**
* gk20a_do_idle_impl() - force the GPU to idle and railgate * gk20a_do_idle() - force the GPU to idle and railgate
* *
* In success, this call MUST be balanced by caller with gk20a_do_unidle_impl() * In success, this call MUST be balanced by caller with gk20a_do_unidle()
* *
* Acquires two locks : &l->busy_lock and &platform->railgate_lock * Acquires two locks : &l->busy_lock and &platform->railgate_lock
* In success, we hold these locks and return * In success, we hold these locks and return
* In failure, we release these locks and return * In failure, we release these locks and return
*/ */
int gk20a_do_idle_impl(struct gk20a *g, bool force_reset) int gk20a_do_idle(void *_g)
{ {
struct gk20a *g = (struct gk20a *)_g;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g); struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev); struct gk20a_platform *platform = dev_get_drvdata(dev);
struct nvgpu_timeout timeout; struct nvgpu_timeout timeout;
int ref_cnt; int ref_cnt;
int target_ref_cnt = 0; int target_ref_cnt = 0;
bool is_railgated;
int err = 0;
if (!g->probe_done) { if (!g->probe_done) {
/* /*
@@ -812,6 +811,7 @@ int gk20a_do_idle_impl(struct gk20a *g, bool force_reset)
target_ref_cnt = 1; target_ref_cnt = 1;
else else
target_ref_cnt = 2; target_ref_cnt = 2;
nvgpu_mutex_acquire(&platform->railgate_lock); nvgpu_mutex_acquire(&platform->railgate_lock);
nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS,
@@ -826,100 +826,47 @@ int gk20a_do_idle_impl(struct gk20a *g, bool force_reset)
if (ref_cnt != target_ref_cnt) { if (ref_cnt != target_ref_cnt) {
nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt", nvgpu_err(g, "failed to idle - refcount %d != target_ref_cnt",
ref_cnt); ref_cnt);
goto fail_drop_usage_count;
pm_runtime_put_noidle(dev);
nvgpu_mutex_release(&platform->railgate_lock);
up_write(&l->busy_lock);
nvgpu_channel_deterministic_unidle(g);
return -EBUSY;
} }
/* check if global force_reset flag is set */ /*
force_reset |= platform->force_reset_in_do_idle; * If railgating is enabled, autosuspend delay will be > 0. Set it to
* 0 to suspend immediately. If railgating is disabled setting it to
* 0 will reduce the usage count. pm_runtime_put_sync_autosuspend
* will then suspend immediately.
*/
pm_runtime_set_autosuspend_delay(dev, 0);
nvgpu_timeout_init(g, &timeout, GK20A_WAIT_FOR_IDLE_MS, pm_runtime_put_sync_autosuspend(dev);
NVGPU_TIMER_CPU_TIMER);
if (nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) && !force_reset) { if (pm_runtime_status_suspended(dev)) {
/*
* Case 1 : GPU railgate is supported
*
* if GPU is now idle, we will have only one ref count,
* drop this ref which will rail gate the GPU
*/
pm_runtime_put_sync(dev);
/* add sufficient delay to allow GPU to rail gate */
nvgpu_msleep(g->railgate_delay);
/* check in loop if GPU is railgated or not */
do {
nvgpu_usleep_range(1000, 1100);
is_railgated = platform->is_railgated(dev);
} while (!is_railgated && !nvgpu_timeout_expired(&timeout));
if (is_railgated) {
return 0;
} else {
nvgpu_err(g, "failed to idle in timeout");
goto fail_timeout;
}
} else {
/*
* Case 2 : GPU railgate is not supported or we explicitly
* do not want to depend on runtime PM
*
* if GPU is now idle, call prepare_poweroff() to save the
* state and then do explicit railgate
*
* gk20a_do_unidle_impl() needs to unrailgate, call
* finalize_poweron(), and then call pm_runtime_put_sync()
* to balance the GPU usage counter
*/
/* Save the GPU state */
err = gk20a_pm_prepare_poweroff(dev);
if (err)
goto fail_drop_usage_count;
/* railgate GPU */
platform->railgate(dev);
nvgpu_udelay(10);
g->forced_reset = true;
return 0; return 0;
} else {
nvgpu_err(g, "failed to idle in timeout");
/*
* gk20a_do_unidle will release the locks and reset the
* autosuspend delay.
*/
(void) gk20a_do_unidle(g);
return -EBUSY;
} }
fail_drop_usage_count:
pm_runtime_put_noidle(dev);
fail_timeout:
nvgpu_mutex_release(&platform->railgate_lock);
up_write(&l->busy_lock);
nvgpu_channel_deterministic_unidle(g);
return -EBUSY;
} }
#ifdef CONFIG_NVGPU_VPR
/** /**
* gk20a_do_idle() - wrap up for gk20a_do_idle_impl() to be called * gk20a_do_unidle() - unblock all the tasks blocked by gk20a_do_idle()
* from outside of GPU driver
*
* In success, this call MUST be balanced by caller with gk20a_do_unidle()
*/ */
int gk20a_do_idle(void *_g) int gk20a_do_unidle(void *_g)
{ {
struct gk20a *g = (struct gk20a *)_g; struct gk20a *g = (struct gk20a *)_g;
return gk20a_do_idle_impl(g, true);
}
#endif
/**
* gk20a_do_unidle_impl() - unblock all the tasks blocked by
* gk20a_do_idle_impl()
*/
int gk20a_do_unidle_impl(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g); struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev); struct gk20a_platform *platform = dev_get_drvdata(dev);
int err;
if (!g->probe_done) { if (!g->probe_done) {
pm_runtime_get_sync(dev); pm_runtime_get_sync(dev);
@@ -931,44 +878,25 @@ int gk20a_do_unidle_impl(struct gk20a *g)
} }
} }
if (g->forced_reset) { /*
/* * Release the railgate_lock here as setting autosuspend_delay to -1
* If we did a forced-reset/railgate * resumes the device that needs this lock.
* then unrailgate the GPU here first */
*/ nvgpu_mutex_release(&platform->railgate_lock);
platform->unrailgate(dev);
/* restore the GPU state */ if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
err = gk20a_pm_finalize_poweron(dev); pm_runtime_set_autosuspend_delay(dev,
if (err) g->railgate_delay);
return err; else
pm_runtime_set_autosuspend_delay(dev, -1);
/* balance GPU usage counter */
pm_runtime_put_sync(dev);
g->forced_reset = false;
}
/* release the lock and open up all other busy() calls */ /* release the lock and open up all other busy() calls */
nvgpu_mutex_release(&platform->railgate_lock);
up_write(&l->busy_lock); up_write(&l->busy_lock);
nvgpu_channel_deterministic_unidle(g); nvgpu_channel_deterministic_unidle(g);
return 0; return 0;
} }
#ifdef CONFIG_NVGPU_VPR
/**
* gk20a_do_unidle() - wrap up for gk20a_do_unidle_impl()
*/
int gk20a_do_unidle(void *_g)
{
struct gk20a *g = (struct gk20a *)_g;
return gk20a_do_unidle_impl(g);
}
#endif
#endif #endif
void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i, void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i,

View File

@@ -154,12 +154,6 @@ struct gk20a_platform {
/* Disable nvlink support */ /* Disable nvlink support */
bool disable_nvlink; bool disable_nvlink;
/*
* gk20a_do_idle() API can take GPU either into rail gate or CAR reset
* This flag can be used to force CAR reset case instead of rail gate
*/
bool force_reset_in_do_idle;
/* guest/vm id, needed for IPA to PA transation */ /* guest/vm id, needed for IPA to PA transation */
int vmid; int vmid;

View File

@@ -981,8 +981,6 @@ struct gk20a_platform gm20b_tegra_platform = {
.enable_perfmon = true, .enable_perfmon = true,
.ptimer_src_freq = 19200000, .ptimer_src_freq = 19200000,
.force_reset_in_do_idle = false,
.ch_wdt_init_limit_ms = 5000, .ch_wdt_init_limit_ms = 5000,
.probe = gk20a_tegra_probe, .probe = gk20a_tegra_probe,

View File

@@ -585,8 +585,6 @@ struct gk20a_platform gp10b_tegra_platform = {
.reset_assert = gp10b_tegra_reset_assert, .reset_assert = gp10b_tegra_reset_assert,
.reset_deassert = gp10b_tegra_reset_deassert, .reset_deassert = gp10b_tegra_reset_deassert,
.force_reset_in_do_idle = false,
.platform_chip_id = TEGRA_186, .platform_chip_id = TEGRA_186,
.soc_name = "tegra18x", .soc_name = "tegra18x",

View File

@@ -788,7 +788,7 @@ static ssize_t force_idle_store(struct device *dev,
if (g->forced_idle) if (g->forced_idle)
return count; /* do nothing */ return count; /* do nothing */
else { else {
err = gk20a_do_idle_impl(g, false); err = gk20a_do_idle(g);
if (!err) { if (!err) {
g->forced_idle = 1; g->forced_idle = 1;
nvgpu_info(g, "gpu is idle : %d", nvgpu_info(g, "gpu is idle : %d",
@@ -799,7 +799,7 @@ static ssize_t force_idle_store(struct device *dev,
if (!g->forced_idle) if (!g->forced_idle)
return count; /* do nothing */ return count; /* do nothing */
else { else {
err = gk20a_do_unidle_impl(g); err = gk20a_do_unidle(g);
if (!err) { if (!err) {
g->forced_idle = 0; g->forced_idle = 0;
nvgpu_info(g, "gpu is idle : %d", nvgpu_info(g, "gpu is idle : %d",