mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: add read_ptimer to gops
Move all places that read ptimer to use the callback. It's for add vgpu implementation of read ptimer. Bug 1395833 Change-Id: Ia339f2f08d75ca4969a443fffc9a61cff1d3d2b7 Signed-off-by: Richard Zhao <rizhao@nvidia.com> Reviewed-on: http://git-master/r/1159587 (cherry picked from commit a01f804684f875c9cffc31eb2c1038f2f29ec66f) Reviewed-on: http://git-master/r/1158449 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Terje Bergstrom
parent
03164b0f4a
commit
86225cb04e
@@ -626,50 +626,17 @@ static int nvgpu_gpu_get_gpu_time(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_gpu_get_gpu_time_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
const unsigned int max_iterations = 3;
|
||||
unsigned int i = 0;
|
||||
u32 gpu_timestamp_hi_prev = 0;
|
||||
u64 time;
|
||||
int err;
|
||||
|
||||
err = gk20a_busy(g->dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Note. The GPU nanosecond timer consists of two 32-bit
|
||||
* registers (high & low). To detect a possible low register
|
||||
* wrap-around between the reads, we need to read the high
|
||||
* register before and after low. The wraparound happens
|
||||
* approximately once per 4 secs. */
|
||||
err = g->ops.read_ptimer(g, &time);
|
||||
if (!err)
|
||||
args->gpu_timestamp = time;
|
||||
|
||||
/* get initial gpu_timestamp_hi value */
|
||||
gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
|
||||
|
||||
for (i = 0; i < max_iterations; ++i) {
|
||||
u32 gpu_timestamp_hi = 0;
|
||||
u32 gpu_timestamp_lo = 0;
|
||||
|
||||
rmb(); /* maintain read order */
|
||||
gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
|
||||
rmb(); /* maintain read order */
|
||||
gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
|
||||
|
||||
if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
|
||||
args->gpu_timestamp =
|
||||
(((u64)gpu_timestamp_hi) << 32) |
|
||||
gpu_timestamp_lo;
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
/* wrap-around detected, retry */
|
||||
gpu_timestamp_hi_prev = gpu_timestamp_hi;
|
||||
}
|
||||
|
||||
/* too many iterations, bail out */
|
||||
gk20a_err(dev_from_gk20a(g),
|
||||
"Failed to read GPU time. Clock or bus unstable?\n");
|
||||
err = -EBUSY;
|
||||
|
||||
clean_up:
|
||||
gk20a_idle(g->dev);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -656,7 +656,6 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
|
||||
struct nvgpu_ctxsw_trace_entry entry = {
|
||||
.vmid = 0,
|
||||
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
|
||||
.timestamp = gk20a_read_ptimer(g),
|
||||
.context_id = 0,
|
||||
.pid = ch->pid,
|
||||
};
|
||||
@@ -664,6 +663,7 @@ void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
g->ops.read_ptimer(g, &entry.timestamp);
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
gk20a_ctxsw_trace_wake_up(g, 0);
|
||||
#endif
|
||||
@@ -676,7 +676,6 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
struct nvgpu_ctxsw_trace_entry entry = {
|
||||
.vmid = 0,
|
||||
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
|
||||
.timestamp = gk20a_read_ptimer(g),
|
||||
.context_id = 0,
|
||||
.pid = 0,
|
||||
};
|
||||
@@ -685,6 +684,7 @@ void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
g->ops.read_ptimer(g, &entry.timestamp);
|
||||
mutex_lock(&tsg->ch_list_lock);
|
||||
if (!list_empty(&tsg->ch_list)) {
|
||||
ch = list_entry(tsg->ch_list.next,
|
||||
|
||||
@@ -2227,19 +2227,46 @@ gk20a_request_firmware(struct gk20a *g, const char *fw_name)
|
||||
return fw;
|
||||
}
|
||||
|
||||
|
||||
u64 gk20a_read_ptimer(struct gk20a *g)
|
||||
int gk20a_read_ptimer(struct gk20a *g, u64 *value)
|
||||
{
|
||||
u32 time_hi0 = gk20a_readl(g, timer_time_1_r());
|
||||
u32 time_lo = gk20a_readl(g, timer_time_0_r());
|
||||
u32 time_hi1 = gk20a_readl(g, timer_time_1_r());
|
||||
u32 time_hi = (time_lo & (1L << 31)) ? time_hi0 : time_hi1;
|
||||
u64 time = ((u64)time_hi << 32) | time_lo;
|
||||
const unsigned int max_iterations = 3;
|
||||
unsigned int i = 0;
|
||||
u32 gpu_timestamp_hi_prev = 0;
|
||||
|
||||
return time;
|
||||
if (!value)
|
||||
return -EINVAL;
|
||||
|
||||
/* Note. The GPU nanosecond timer consists of two 32-bit
|
||||
* registers (high & low). To detect a possible low register
|
||||
* wrap-around between the reads, we need to read the high
|
||||
* register before and after low. The wraparound happens
|
||||
* approximately once per 4 secs. */
|
||||
|
||||
/* get initial gpu_timestamp_hi value */
|
||||
gpu_timestamp_hi_prev = gk20a_readl(g, timer_time_1_r());
|
||||
|
||||
for (i = 0; i < max_iterations; ++i) {
|
||||
u32 gpu_timestamp_hi = 0;
|
||||
u32 gpu_timestamp_lo = 0;
|
||||
|
||||
gpu_timestamp_lo = gk20a_readl(g, timer_time_0_r());
|
||||
gpu_timestamp_hi = gk20a_readl(g, timer_time_1_r());
|
||||
|
||||
if (gpu_timestamp_hi == gpu_timestamp_hi_prev) {
|
||||
*value = (((u64)gpu_timestamp_hi) << 32) |
|
||||
gpu_timestamp_lo;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* wrap-around detected, retry */
|
||||
gpu_timestamp_hi_prev = gpu_timestamp_hi;
|
||||
}
|
||||
|
||||
/* too many iterations, bail out */
|
||||
gk20a_err(dev_from_gk20a(g), "failed to read ptimer");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
module_init(gk20a_init);
|
||||
module_exit(gk20a_exit);
|
||||
|
||||
@@ -620,6 +620,7 @@ struct gpu_ops {
|
||||
|
||||
int (*get_litter_value)(struct gk20a *g, enum nvgpu_litter_value value);
|
||||
int (*chip_init_gpu_characteristics)(struct gk20a *g);
|
||||
int (*read_ptimer)(struct gk20a *g, u64 *value);
|
||||
|
||||
struct {
|
||||
int (*init)(struct gk20a *g);
|
||||
@@ -1111,7 +1112,7 @@ static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
|
||||
return (timeout * 10) / scale10x;
|
||||
}
|
||||
|
||||
u64 gk20a_read_ptimer(struct gk20a *g);
|
||||
int gk20a_read_ptimer(struct gk20a *g, u64 *value);
|
||||
extern struct class nvgpu_class;
|
||||
|
||||
#define INTERFACE_NAME "nvhost%s-gpu"
|
||||
|
||||
@@ -150,6 +150,7 @@ int gk20a_init_hal(struct gk20a *g)
|
||||
gops->name = "gk20a";
|
||||
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
||||
gops->get_litter_value = gk20a_get_litter_value;
|
||||
gops->read_ptimer = gk20a_read_ptimer;
|
||||
|
||||
c->twod_class = FERMI_TWOD_A;
|
||||
c->threed_class = KEPLER_C;
|
||||
|
||||
@@ -218,6 +218,7 @@ int gm20b_init_hal(struct gk20a *g)
|
||||
gops->name = "gm20b";
|
||||
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
||||
gops->get_litter_value = gm20b_get_litter_value;
|
||||
gops->read_ptimer = gk20a_read_ptimer;
|
||||
|
||||
c->twod_class = FERMI_TWOD_A;
|
||||
c->threed_class = MAXWELL_B;
|
||||
|
||||
Reference in New Issue
Block a user