mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
gpu: nvgpu: gv11b: Enable perfmon.
t19x PMU ucode uses RPC mechanism for PERFMON commands. - Declared "pmu_init_perfmon", "pmu_perfmon_start_sampling", "pmu_perfmon_stop_sampling" and "pmu_perfmon_get_samples" in pmu ops to differenciate for chips using RPC & legacy cmd/msg mechanism. - Defined and used PERFMON RPC commands for t19x - INIT - START - STOP - QUERY - Adds RPC handler for PERFMON RPC commands. - For guerying GPU utilization/load, we need to send PERFMON_QUERY RPC command for gv11b. - Enables perfmon for gv11b. Bug 2039013 Change-Id: Ic32326f81d48f11bc772afb8fee2dee6e427a699 Signed-off-by: Deepak Goyal <dgoyal@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1614114 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
a57258e9b1
commit
e0dbf3a784
@@ -352,11 +352,11 @@ static ssize_t perfmon_events_enable_write(struct file *file,
|
|||||||
if (val && !g->pmu.perfmon_sampling_enabled &&
|
if (val && !g->pmu.perfmon_sampling_enabled &&
|
||||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||||
g->pmu.perfmon_sampling_enabled = true;
|
g->pmu.perfmon_sampling_enabled = true;
|
||||||
nvgpu_pmu_perfmon_start_sampling(&(g->pmu));
|
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
||||||
} else if (!val && g->pmu.perfmon_sampling_enabled &&
|
} else if (!val && g->pmu.perfmon_sampling_enabled &&
|
||||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||||
g->pmu.perfmon_sampling_enabled = false;
|
g->pmu.perfmon_sampling_enabled = false;
|
||||||
nvgpu_pmu_perfmon_stop_sampling(&(g->pmu));
|
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
|
||||||
}
|
}
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -208,6 +208,7 @@ struct gk20a_platform t19x_gpu_tegra_platform = {
|
|||||||
.can_slcg = false,
|
.can_slcg = false,
|
||||||
.can_blcg = false,
|
.can_blcg = false,
|
||||||
.can_elcg = false,
|
.can_elcg = false,
|
||||||
|
.enable_perfmon = true,
|
||||||
|
|
||||||
/* power management callbacks */
|
/* power management callbacks */
|
||||||
.suspend = gv11b_tegra_suspend,
|
.suspend = gv11b_tegra_suspend,
|
||||||
|
|||||||
@@ -417,6 +417,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
|
|||||||
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
||||||
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
||||||
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
|
||||||
.pmu_pg_init_param = gp10b_pg_gr_init,
|
.pmu_pg_init_param = gp10b_pg_gr_init,
|
||||||
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
||||||
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
||||||
|
|||||||
@@ -461,6 +461,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
|
|||||||
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
||||||
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
||||||
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
|
||||||
|
.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
|
||||||
.pmu_pg_init_param = gv11b_pg_gr_init,
|
.pmu_pg_init_param = gv11b_pg_gr_init,
|
||||||
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
||||||
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
||||||
|
|||||||
@@ -925,8 +925,9 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
|
|||||||
nvgpu_pmu_process_init_msg(pmu, &msg);
|
nvgpu_pmu_process_init_msg(pmu, &msg);
|
||||||
if (g->ops.pmu.init_wpr_region != NULL)
|
if (g->ops.pmu.init_wpr_region != NULL)
|
||||||
g->ops.pmu.init_wpr_region(g);
|
g->ops.pmu.init_wpr_region(g);
|
||||||
|
|
||||||
if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
if (nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
||||||
nvgpu_pmu_init_perfmon(pmu);
|
g->ops.pmu.pmu_init_perfmon(pmu);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -978,6 +979,8 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
|
|||||||
void *param, u32 handle, u32 status)
|
void *param, u32 handle, u32 status)
|
||||||
{
|
{
|
||||||
struct nv_pmu_rpc_header rpc;
|
struct nv_pmu_rpc_header rpc;
|
||||||
|
struct nvgpu_pmu *pmu = &g->pmu;
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_query *rpc_param;
|
||||||
|
|
||||||
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header));
|
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_header));
|
||||||
if (param)
|
if (param)
|
||||||
@@ -990,6 +993,32 @@ static void pmu_rpc_handler(struct gk20a *g, struct pmu_msg *msg,
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (msg->hdr.unit_id) {
|
switch (msg->hdr.unit_id) {
|
||||||
|
case PMU_UNIT_PERFMON_T18X:
|
||||||
|
case PMU_UNIT_PERFMON:
|
||||||
|
switch (rpc.function) {
|
||||||
|
case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
|
||||||
|
nvgpu_pmu_dbg(g,
|
||||||
|
"reply NV_PMU_RPC_ID_PERFMON_INIT");
|
||||||
|
pmu->perfmon_ready = 1;
|
||||||
|
break;
|
||||||
|
case NV_PMU_RPC_ID_PERFMON_T18X_START:
|
||||||
|
nvgpu_pmu_dbg(g,
|
||||||
|
"reply NV_PMU_RPC_ID_PERFMON_START");
|
||||||
|
break;
|
||||||
|
case NV_PMU_RPC_ID_PERFMON_T18X_STOP:
|
||||||
|
nvgpu_pmu_dbg(g,
|
||||||
|
"reply NV_PMU_RPC_ID_PERFMON_STOP");
|
||||||
|
break;
|
||||||
|
case NV_PMU_RPC_ID_PERFMON_T18X_QUERY:
|
||||||
|
nvgpu_pmu_dbg(g,
|
||||||
|
"reply NV_PMU_RPC_ID_PERFMON_QUERY");
|
||||||
|
rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)param;
|
||||||
|
pmu->load = rpc_param->sample_buffer[0];
|
||||||
|
pmu->perfmon_query = 1;
|
||||||
|
/* set perfmon_query to 1 after load is copied */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
/* TBD case will be added */
|
/* TBD case will be added */
|
||||||
default:
|
default:
|
||||||
nvgpu_err(g, " Invalid RPC response, stats 0x%x",
|
nvgpu_err(g, " Invalid RPC response, stats 0x%x",
|
||||||
|
|||||||
@@ -221,11 +221,18 @@ int nvgpu_pmu_load_update(struct gk20a *g)
|
|||||||
|
|
||||||
if (!pmu->perfmon_ready) {
|
if (!pmu->perfmon_ready) {
|
||||||
pmu->load_shadow = 0;
|
pmu->load_shadow = 0;
|
||||||
|
pmu->load = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g->ops.pmu.pmu_perfmon_get_samples_rpc) {
|
||||||
|
nvgpu_pmu_perfmon_get_samples_rpc(pmu);
|
||||||
|
load = pmu->load;
|
||||||
|
} else {
|
||||||
nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
|
nvgpu_flcn_copy_from_dmem(pmu->flcn, pmu->sample_buffer,
|
||||||
(u8 *)&load, 2, 0);
|
(u8 *)&load, 2 * 1, 0);
|
||||||
|
}
|
||||||
|
|
||||||
pmu->load_shadow = load / 10;
|
pmu->load_shadow = load / 10;
|
||||||
pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
|
pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
|
||||||
|
|
||||||
@@ -288,6 +295,129 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|||||||
|
|
||||||
/* restart sampling */
|
/* restart sampling */
|
||||||
if (pmu->perfmon_sampling_enabled)
|
if (pmu->perfmon_sampling_enabled)
|
||||||
return nvgpu_pmu_perfmon_start_sampling(pmu);
|
return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Perfmon RPC */
|
||||||
|
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gk20a_from_pmu(pmu);
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_init rpc;
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
|
||||||
|
pmu->perfmon_ready = 0;
|
||||||
|
|
||||||
|
gk20a_pmu_init_perfmon_counter(g);
|
||||||
|
|
||||||
|
/* microseconds interval between pmu polls perf counters */
|
||||||
|
rpc.sample_periodus = 16700;
|
||||||
|
/* number of sample periods below lower threshold
|
||||||
|
* before pmu triggers perfmon decrease event
|
||||||
|
*/
|
||||||
|
rpc.to_decrease_count = 15;
|
||||||
|
/* index of base counter, aka. always ticking counter */
|
||||||
|
rpc.base_counter_id = 6;
|
||||||
|
/* moving average window for sample periods */
|
||||||
|
rpc.samples_in_moving_avg = 17;
|
||||||
|
/* number of perfmon counters
|
||||||
|
* counter #3 (GR and CE2) for gk20a
|
||||||
|
*/
|
||||||
|
rpc.num_counters = 1;
|
||||||
|
|
||||||
|
memset(rpc.counter, 0, sizeof(struct pmu_perfmon_counter_v3) *
|
||||||
|
NV_PMU_PERFMON_MAX_COUNTERS);
|
||||||
|
/* Counter used to count GR busy cycles */
|
||||||
|
rpc.counter[0].index = 3;
|
||||||
|
|
||||||
|
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_INIT");
|
||||||
|
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, INIT, &rpc, 0);
|
||||||
|
if (status) {
|
||||||
|
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gk20a_from_pmu(pmu);
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_start rpc;
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
|
||||||
|
rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
|
||||||
|
rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
|
||||||
|
rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
|
||||||
|
PMU_PERFMON_FLAG_ENABLE_DECREASE |
|
||||||
|
PMU_PERFMON_FLAG_CLEAR_PREV;
|
||||||
|
|
||||||
|
rpc.counter[0].upper_threshold = 3000;
|
||||||
|
rpc.counter[0].lower_threshold = 1000;
|
||||||
|
|
||||||
|
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_START\n");
|
||||||
|
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, START, &rpc, 0);
|
||||||
|
if (status)
|
||||||
|
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gk20a_from_pmu(pmu);
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_stop rpc;
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
|
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_stop));
|
||||||
|
/* PERFMON Stop */
|
||||||
|
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_STOP\n");
|
||||||
|
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, STOP, &rpc, 0);
|
||||||
|
if (status)
|
||||||
|
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gk20a_from_pmu(pmu);
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_query rpc;
|
||||||
|
int status = 0;
|
||||||
|
|
||||||
|
if (!nvgpu_is_enabled(g, NVGPU_PMU_PERFMON))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
nvgpu_log_fn(g, " ");
|
||||||
|
pmu->perfmon_query = 0;
|
||||||
|
memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
|
||||||
|
/* PERFMON QUERY */
|
||||||
|
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
|
||||||
|
PMU_RPC_EXECUTE(status, pmu, PERFMON_T18X, QUERY, &rpc, 0);
|
||||||
|
if (status)
|
||||||
|
nvgpu_err(g, "Failed to execute RPC, status=0x%x", status);
|
||||||
|
|
||||||
|
pmu_wait_message_cond(pmu, gk20a_get_gr_idle_timeout(g),
|
||||||
|
&pmu->perfmon_query, 1);
|
||||||
|
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|||||||
@@ -836,6 +836,10 @@ struct gpu_ops {
|
|||||||
int (*prepare_ucode)(struct gk20a *g);
|
int (*prepare_ucode)(struct gk20a *g);
|
||||||
int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
|
int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
|
||||||
int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu);
|
int (*pmu_nsbootstrap)(struct nvgpu_pmu *pmu);
|
||||||
|
int (*pmu_init_perfmon)(struct nvgpu_pmu *pmu);
|
||||||
|
int (*pmu_perfmon_start_sampling)(struct nvgpu_pmu *pmu);
|
||||||
|
int (*pmu_perfmon_stop_sampling)(struct nvgpu_pmu *pmu);
|
||||||
|
int (*pmu_perfmon_get_samples_rpc)(struct nvgpu_pmu *pmu);
|
||||||
int (*pmu_setup_elpg)(struct gk20a *g);
|
int (*pmu_setup_elpg)(struct gk20a *g);
|
||||||
u32 (*pmu_get_queue_head)(u32 i);
|
u32 (*pmu_get_queue_head)(u32 i);
|
||||||
u32 (*pmu_get_queue_head_size)(void);
|
u32 (*pmu_get_queue_head_size)(void);
|
||||||
|
|||||||
@@ -483,6 +483,9 @@ static const struct gpu_ops gm20b_ops = {
|
|||||||
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
||||||
.write_dmatrfbase = gm20b_write_dmatrfbase,
|
.write_dmatrfbase = gm20b_write_dmatrfbase,
|
||||||
.pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gk20a_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
|
||||||
.pmu_pg_init_param = NULL,
|
.pmu_pg_init_param = NULL,
|
||||||
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
||||||
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
||||||
|
|||||||
@@ -583,6 +583,9 @@ static const struct gpu_ops gp106_ops = {
|
|||||||
.is_pmu_supported = gp106_is_pmu_supported,
|
.is_pmu_supported = gp106_is_pmu_supported,
|
||||||
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
|
||||||
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
|
||||||
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
|
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
|
||||||
.pmu_is_lpwr_feature_supported =
|
.pmu_is_lpwr_feature_supported =
|
||||||
gp106_pmu_is_lpwr_feature_supported,
|
gp106_pmu_is_lpwr_feature_supported,
|
||||||
|
|||||||
@@ -544,6 +544,9 @@ static const struct gpu_ops gp10b_ops = {
|
|||||||
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
||||||
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
||||||
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp10b_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
|
||||||
.pmu_pg_init_param = gp10b_pg_gr_init,
|
.pmu_pg_init_param = gp10b_pg_gr_init,
|
||||||
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
||||||
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
||||||
|
|||||||
@@ -591,6 +591,9 @@ static const struct gpu_ops gv100_ops = {
|
|||||||
.is_pmu_supported = gp106_is_pmu_supported,
|
.is_pmu_supported = gp106_is_pmu_supported,
|
||||||
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gp106_pmu_pg_engines_list,
|
||||||
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling,
|
||||||
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
|
.pmu_mutex_acquire = gk20a_pmu_mutex_acquire,
|
||||||
.pmu_is_lpwr_feature_supported =
|
.pmu_is_lpwr_feature_supported =
|
||||||
gp106_pmu_is_lpwr_feature_supported,
|
gp106_pmu_is_lpwr_feature_supported,
|
||||||
|
|||||||
@@ -606,6 +606,10 @@ static const struct gpu_ops gv11b_ops = {
|
|||||||
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
.pmu_mutex_release = gk20a_pmu_mutex_release,
|
||||||
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
.write_dmatrfbase = gp10b_write_dmatrfbase,
|
||||||
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
.pmu_elpg_statistics = gp106_pmu_elpg_statistics,
|
||||||
|
.pmu_init_perfmon = nvgpu_pmu_init_perfmon_rpc,
|
||||||
|
.pmu_perfmon_start_sampling = nvgpu_pmu_perfmon_start_sampling_rpc,
|
||||||
|
.pmu_perfmon_stop_sampling = nvgpu_pmu_perfmon_stop_sampling_rpc,
|
||||||
|
.pmu_perfmon_get_samples_rpc = nvgpu_pmu_perfmon_get_samples_rpc,
|
||||||
.pmu_pg_init_param = gv11b_pg_gr_init,
|
.pmu_pg_init_param = gv11b_pg_gr_init,
|
||||||
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
.pmu_pg_supported_engines_list = gk20a_pmu_pg_engines_list,
|
||||||
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
.pmu_pg_engines_feature_list = gk20a_pmu_pg_feature_list,
|
||||||
|
|||||||
@@ -329,6 +329,8 @@ struct nvgpu_pmu {
|
|||||||
u32 *ucode_image;
|
u32 *ucode_image;
|
||||||
bool pmu_ready;
|
bool pmu_ready;
|
||||||
|
|
||||||
|
u32 perfmon_query;
|
||||||
|
|
||||||
u32 zbc_save_done;
|
u32 zbc_save_done;
|
||||||
|
|
||||||
u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
|
u32 stat_dmem_offset[PMU_PG_ELPG_ENGINE_ID_INVALID_ENGINE];
|
||||||
@@ -362,6 +364,7 @@ struct nvgpu_pmu {
|
|||||||
u32 sample_buffer;
|
u32 sample_buffer;
|
||||||
u32 load_shadow;
|
u32 load_shadow;
|
||||||
u32 load_avg;
|
u32 load_avg;
|
||||||
|
u32 load;
|
||||||
|
|
||||||
struct nvgpu_mutex isr_mutex;
|
struct nvgpu_mutex isr_mutex;
|
||||||
bool isr_enabled;
|
bool isr_enabled;
|
||||||
@@ -432,8 +435,12 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);
|
|||||||
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
|
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
|
||||||
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
|
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
|
||||||
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
|
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
|
||||||
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
||||||
struct pmu_perfmon_msg *msg);
|
struct pmu_perfmon_msg *msg);
|
||||||
|
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
|
||||||
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
||||||
int nvgpu_pmu_load_update(struct gk20a *g);
|
int nvgpu_pmu_load_update(struct gk20a *g);
|
||||||
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
||||||
|
|||||||
@@ -32,6 +32,8 @@
|
|||||||
#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
|
#define PMU_PERFMON_FLAG_ENABLE_DECREASE (0x00000002)
|
||||||
#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
|
#define PMU_PERFMON_FLAG_CLEAR_PREV (0x00000004)
|
||||||
|
|
||||||
|
#define NV_PMU_PERFMON_MAX_COUNTERS 10
|
||||||
|
|
||||||
enum pmu_perfmon_cmd_start_fields {
|
enum pmu_perfmon_cmd_start_fields {
|
||||||
COUNTER_ALLOC
|
COUNTER_ALLOC
|
||||||
};
|
};
|
||||||
@@ -61,6 +63,15 @@ struct pmu_perfmon_counter_v2 {
|
|||||||
u32 scale;
|
u32 scale;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct pmu_perfmon_counter_v3 {
|
||||||
|
u8 index;
|
||||||
|
u8 group_id;
|
||||||
|
u16 flags;
|
||||||
|
u16 upper_threshold; /* units of 0.01% */
|
||||||
|
u16 lower_threshold; /* units of 0.01% */
|
||||||
|
u32 scale;
|
||||||
|
};
|
||||||
|
|
||||||
struct pmu_perfmon_cmd_start_v3 {
|
struct pmu_perfmon_cmd_start_v3 {
|
||||||
u8 cmd_type;
|
u8 cmd_type;
|
||||||
u8 group_id;
|
u8 group_id;
|
||||||
@@ -184,4 +195,77 @@ struct pmu_perfmon_msg {
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* PFERMON RPC interface*/
|
||||||
|
/*
|
||||||
|
* RPC calls serviced by PERFMON unit.
|
||||||
|
*/
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X_INIT 0x00
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X_DEINIT 0x01
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X_START 0x02
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X_STOP 0x03
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X_QUERY 0x04
|
||||||
|
#define NV_PMU_RPC_ID_PERFMON_T18X__COUNT 0x05
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure that holds data used to
|
||||||
|
* execute Perfmon INIT RPC.
|
||||||
|
* hdr - RPC header
|
||||||
|
* sample_periodus - Desired period in between samples.
|
||||||
|
* to_decrease_count - Consecutive samples before decrease event.
|
||||||
|
* base_counter_id - Index of the base counter.
|
||||||
|
* samples_in_moving_avg - Number of values in moving average.
|
||||||
|
* num_counters - Num of counters PMU should use.
|
||||||
|
* counter - Counters.
|
||||||
|
*/
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_init {
|
||||||
|
struct nv_pmu_rpc_header hdr;
|
||||||
|
u32 sample_periodus;
|
||||||
|
u8 to_decrease_count;
|
||||||
|
u8 base_counter_id;
|
||||||
|
u8 samples_in_moving_avg;
|
||||||
|
u8 num_counters;
|
||||||
|
struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
|
||||||
|
u32 scratch[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure that holds data used to
|
||||||
|
* execute Perfmon START RPC.
|
||||||
|
* hdr - RPC header
|
||||||
|
* group_id - NV group ID
|
||||||
|
* state_id - NV state ID
|
||||||
|
* flags - PMU_PERFON flags
|
||||||
|
* counters - Counters.
|
||||||
|
*/
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_start {
|
||||||
|
struct nv_pmu_rpc_header hdr;
|
||||||
|
u8 group_id;
|
||||||
|
u8 state_id;
|
||||||
|
u8 flags;
|
||||||
|
struct pmu_perfmon_counter_v3 counter[NV_PMU_PERFMON_MAX_COUNTERS];
|
||||||
|
u32 scratch[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure that holds data used to
|
||||||
|
* execute Perfmon STOP RPC.
|
||||||
|
* hdr - RPC header
|
||||||
|
*/
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_stop {
|
||||||
|
struct nv_pmu_rpc_header hdr;
|
||||||
|
u32 scratch[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* structure that holds data used to
|
||||||
|
* execute QUERY RPC.
|
||||||
|
* hdr - RPC header
|
||||||
|
* sample_buffer - Output buffer from pmu containing utilization samples.
|
||||||
|
*/
|
||||||
|
struct nv_pmu_rpc_struct_perfmon_query {
|
||||||
|
struct nv_pmu_rpc_header hdr;
|
||||||
|
u16 sample_buffer[NV_PMU_PERFMON_MAX_COUNTERS];
|
||||||
|
u32 scratch[1];
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _GPMUIFPERFMON_H_ */
|
#endif /* _GPMUIFPERFMON_H_ */
|
||||||
|
|||||||
Reference in New Issue
Block a user