gpu: nvgpu: gv11b: Enable perfmon.

t19x PMU ucode uses RPC mechanism for
PERFMON commands.

- Declared  "pmu_init_perfmon",
  "pmu_perfmon_start_sampling",
  "pmu_perfmon_stop_sampling" and
  "pmu_perfmon_get_samples" in pmu ops
  to differenciate for chips using RPC & legacy
  cmd/msg mechanism.
- Defined and used PERFMON RPC commands for t19x
  	- INIT
	- START
	- STOP
	- QUERY
- Adds RPC handler for PERFMON RPC commands.
- For guerying GPU utilization/load, we need to send PERFMON_QUERY
  RPC command for gv11b.
- Enables perfmon for gv11b.

Bug 2039013

Change-Id: Ic32326f81d48f11bc772afb8fee2dee6e427a699
Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1614114
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Goyal
2018-01-18 11:44:47 +05:30
committed by mobile promotions
parent a57258e9b1
commit e0dbf3a784
14 changed files with 286 additions and 8 deletions

View File

@@ -352,11 +352,11 @@ static ssize_t perfmon_events_enable_write(struct file *file,
if (val && !g->pmu.perfmon_sampling_enabled &&
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
g->pmu.perfmon_sampling_enabled = true;
nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
} else if (!val && g->pmu.perfmon_sampling_enabled &&
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
g->pmu.perfmon_sampling_enabled = false;
nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
}
gk20a_idle(g);
} else {

View File

@@ -208,6 +208,7 @@ struct gk20a_platform t19x_gpu_tegra_platform = {
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.enable_perfmon = true,
/* power management callbacks */
.suspend = gv11b_tegra_suspend,

View File

@@ -417,6 +417,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.pmu_mutex_release = gk20a_pmu_mutex_release,
.write_dmatrfbase = gp10b_write_dmatrfbase,
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
.pmu_pg_init_param = gp10b_pg_gr_init,
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,

View File

@@ -461,6 +461,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.pmu_mutex_release = gk20a_pmu_mutex_release,
.write_dmatrfbase = gp10b_write_dmatrfbase,
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
.pmu_pg_init_param = gv11b_pg_gr_init,
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,

View File

@@ -925,8 +925,9 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
nvgpu_pmu_process_init_msg(pmu, &msg);
if (g->ops.pmu.init_wpr_region != NULL)
g->ops.pmu.init_wpr_region(g);
if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
nvgpu_pmu_init_perfmon(pmu);
g->ops.pmu.pmu_init_perfmon(pmu);
return 0;
}
@@ -978,6 +979,8 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
void *param, u32 handle, u32 status)
{
struct nv_pmu_rpc_header rpc;
struct nvgpu_pmu *pmu = &g->pmu;
struct nv_pmu_rpc_struct_perfmon_query *rpc_param;
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header));
if (param)
@@ -990,6 +993,32 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
}
switch (msg->hdr.unit_id) {
case PMU_UNIT_PERFMON_T18X:
case PMU_UNIT_PERFMON:
switch (rpc.function) {
case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
nvgpu_pmu_dbg(g,
"reply NV_PMU_RPC_ID_PERFMON_INIT");
pmu->perfmon_ready = 1;
break;
case NV_PMU_RPC_ID_PERFMON_T18X_START:
nvgpu_pmu_dbg(g,
"reply NV_PMU_RPC_ID_PERFMON_START");
break;
case NV_PMU_RPC_ID_PERFMON_T18X_STOP:
nvgpu_pmu_dbg(g,
"reply NV_PMU_RPC_ID_PERFMON_STOP");
break;
case NV_PMU_RPC_ID_PERFMON_T18X_QUERY:
nvgpu_pmu_dbg(g,
"reply NV_PMU_RPC_ID_PERFMON_QUERY");
rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)param;
pmu->load = rpc_param->sample_buffer[0];
pmu->perfmon_query = 1;
/* set perfmon_query to 1 after load is copied */
break;
}
break;
/* TBD case will be added */
default:
nvgpu_err(g, " Invalid RPC response, stats 0x%x",

View File

@@ -221,11 +221,18 @@ int nvgpu_pmu_load_update(struct gk20a *g)
if (!pmu->perfmon_ready) {
pmu->load_shadow = 0;
pmu->load = 0;
return 0;
}
if (g->ops.pmu.pmu_perfmon_get_samples_rpc) {
nvgpu_pmu_perfmon_get_samples_rpc(pmu);
load = pmu->load;
} else {
nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
(u8 *)&load, 2, 0);
(u8 *)&load, 2 * 1, 0);
}
pmu->load_shadow = load / 10;
pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
@@ -288,6 +295,129 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
/* restart sampling */
if (pmu->perfmon_sampling_enabled)
return nvgpu_pmu_perfmon_start_sampling(pmu);
return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
return 0;
}
/* Perfmon RPC */
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
{
struct gk20a *g = gk20a_from_pmu(pmu);
struct nv_pmu_rpc_struct_perfmon_init rpc;
int status = 0;
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
return 0;
nvgpu_log_fn(g, " ");
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
pmu->perfmon_ready = 0;
gk20a_pmu_init_perfmon_counter(g);
/* microseconds interval between pmu polls perf counters */
rpc.sample_periodus = 16700;
/* number of sample periods below lower threshold
* before pmu triggers perfmon decrease event
*/
rpc.to_decrease_count = 15;
/* index of base counter, aka. always ticking counter */
rpc.base_counter_id = 6;
/* moving average window for sample periods */
rpc.samples_in_moving_avg = 17;
/* number of perfmon counters
* counter #3 (GR and CE2) for gk20a
*/
rpc.num_counters = 1;
memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) *
NV_PMU_PERFMON_MAX_COUNTERS);
/* Counter used to count GR busy cycles */
rpc.counter[0].index = 3;
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT");
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0);
if (status) {
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
goto exit;
}
exit:
return 0;
}
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
{
struct gk20a *g = gk20a_from_pmu(pmu);
struct nv_pmu_rpc_struct_perfmon_start rpc;
int status = 0;
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
return 0;
nvgpu_log_fn(g, " ");
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
PMU_PERFMON_FLAG_ENABLE_DECREASE |
PMU_PERFMON_FLAG_CLEAR_PREV;
rpc.counter[0].upper_threshold = 3000;
rpc.counter[0].lower_threshold = 1000;
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n");
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0);
if (status)
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
return status;
}
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
{
struct gk20a *g = gk20a_from_pmu(pmu);
struct nv_pmu_rpc_struct_perfmon_stop rpc;
int status = 0;
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
return 0;
nvgpu_log_fn(g, " ");
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop));
/* PERFMON Stop */
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n");
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0);
if (status)
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
return status;
}
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
{
struct gk20a *g = gk20a_from_pmu(pmu);
struct nv_pmu_rpc_struct_perfmon_query rpc;
int status = 0;
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
return 0;
nvgpu_log_fn(g, " ");
pmu->perfmon_query = 0;
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
/* PERFMON QUERY */
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0);
if (status)
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
&pmu->perfmon_query, 1);
return status;
}

View File

@@ -836,6 +836,10 @@ struct gpu_ops {
int (*prepare_ucode)(struct gk20a *g);
int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu);
int (*pmu_init_perfmon)(struct nvgpu_pmu *pmu);
int (*pmu_perfmon_start_sampling)(struct nvgpu_pmu *pmu);
int (*pmu_perfmon_stop_sampling)(struct nvgpu_pmu *pmu);
int (*pmu_perfmon_get_samples_rpc)(struct nvgpu_pmu *pmu);
int (*pmu_setup_elpg)(struct gk20a *g);
u32 (*pmu_get_queue_head)(u32 i);
u32 (*pmu_get_queue_head_size)(void);

View File

@@ -483,6 +483,9 @@ static const struct gpu_ops gm20b_ops = {
.pmu_mutex_release = gk20a_pmu_mutex_release,
.write_dmatrfbase = gm20b_write_dmatrfbase,
.pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
.pmu_pg_init_param = NULL,
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,

View File

@@ -583,6 +583,9 @@ static const struct gpu_ops gp106_ops = {
.is_pmu_supported = gp106_is_pmu_supported,
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
.pmu_is_lpwr_feature_supported =
gp106_pmu_is_lpwr_feature_supported,

View File

@@ -544,6 +544,9 @@ static const struct gpu_ops gp10b_ops = {
.pmu_mutex_release = gk20a_pmu_mutex_release,
.write_dmatrfbase = gp10b_write_dmatrfbase,
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
.pmu_pg_init_param = gp10b_pg_gr_init,
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,

View File

@@ -591,6 +591,9 @@ static const struct gpu_ops gv100_ops = {
.is_pmu_supported = gp106_is_pmu_supported,
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
.pmu_is_lpwr_feature_supported =
gp106_pmu_is_lpwr_feature_supported,

View File

@@ -606,6 +606,10 @@ static const struct gpu_ops gv11b_ops = {
.pmu_mutex_release = gk20a_pmu_mutex_release,
.write_dmatrfbase = gp10b_write_dmatrfbase,
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
.pmu_pg_init_param = gv11b_pg_gr_init,
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,

View File

@@ -329,6 +329,8 @@ struct nvgpu_pmu {
u32 *ucode_image;
bool pmu_ready;
u32 perfmon_query;
u32 zbc_save_done;
u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
@@ -362,6 +364,7 @@ struct nvgpu_pmu {
u32 sample_buffer;
u32 load_shadow;
u32 load_avg;
u32 load;
struct nvgpu_mutex isr_mutex;
bool isr_enabled;
@@ -432,8 +435,12 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
struct pmu_perfmon_msg *msg);
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
int nvgpu_pmu_load_update(struct gk20a *g);
void nvgpu_pmu_reset_load_counters(struct gk20a *g);

View File

@@ -32,6 +32,8 @@
#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
#define NV_PMU_PERFMON_MAX_COUNTERS 10
enum pmu_perfmon_cmd_start_fields {
COUNTER_ALLOC
};
@@ -61,6 +63,15 @@ struct pmu_perfmon_counter_v2 {
u32 scale;
};
struct pmu_perfmon_counter_v3 {
u8 index;
u8 group_id;
u16 flags;
u16 upper_threshold; /* units of 0.01% */
u16 lower_threshold; /* units of 0.01% */
u32 scale;
};
struct pmu_perfmon_cmd_start_v3 {
u8 cmd_type;
u8 group_id;
@@ -184,4 +195,77 @@ struct pmu_perfmon_msg {
};
};
/* PFERMON RPC interface*/
/*
* RPC calls serviced by PERFMON unit.
*/
#define NV_PMU_RPC_ID_PERFMON_T18X_INIT 0x00
#define NV_PMU_RPC_ID_PERFMON_T18X_DEINIT 0x01
#define NV_PMU_RPC_ID_PERFMON_T18X_START 0x02
#define NV_PMU_RPC_ID_PERFMON_T18X_STOP 0x03
#define NV_PMU_RPC_ID_PERFMON_T18X_QUERY 0x04
#define NV_PMU_RPC_ID_PERFMON_T18X__COUNT 0x05
/*
* structure that holds data used to
* execute Perfmon INIT RPC.
* hdr - RPC header
* sample_periodus - Desired period in between samples.
* to_decrease_count - Consecutive samples before decrease event.
* base_counter_id - Index of the base counter.
* samples_in_moving_avg - Number of values in moving average.
* num_counters - Num of counters PMU should use.
* counter - Counters.
*/
struct nv_pmu_rpc_struct_perfmon_init {
struct nv_pmu_rpc_header hdr;
u32 sample_periodus;
u8 to_decrease_count;
u8 base_counter_id;
u8 samples_in_moving_avg;
u8 num_counters;
struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
u32 scratch[1];
};
/*
* structure that holds data used to
* execute Perfmon START RPC.
* hdr - RPC header
* group_id - NV group ID
* state_id - NV state ID
* flags - PMU_PERFON flags
* counters - Counters.
*/
struct nv_pmu_rpc_struct_perfmon_start {
struct nv_pmu_rpc_header hdr;
u8 group_id;
u8 state_id;
u8 flags;
struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
u32 scratch[1];
};
/*
* structure that holds data used to
* execute Perfmon STOP RPC.
* hdr - RPC header
*/
struct nv_pmu_rpc_struct_perfmon_stop {
struct nv_pmu_rpc_header hdr;
u32 scratch[1];
};
/*
* structure that holds data used to
* execute QUERY RPC.
* hdr - RPC header
* sample_buffer - Output buffer from pmu containing utilization samples.
*/
struct nv_pmu_rpc_struct_perfmon_query {
struct nv_pmu_rpc_header hdr;
u16 sample_buffer[NV_PMU_PERFMON_MAX_COUNTERS];
u32 scratch[1];
};
#endif /* _GPMUIFPERFMON_H_ */