diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 6b249a676..93e30e3dc 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -94,7 +94,7 @@ nvgpu-y += \ common/pmu/pmu_fw.o \ common/pmu/pg/pmu_pg.o \ common/pmu/pg/pmu_aelpg.o \ - common/pmu/pmu_perfmon.o \ + common/pmu/perfmon/pmu_perfmon.o \ common/pmu/pmu_debug.o \ common/pmu/pmu_gk20a.o \ common/pmu/pmu_gm20b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index bbe94dd1f..f8ded795c 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -133,7 +133,7 @@ srcs += common/sim.c \ common/pmu/pmu_fw.c \ common/pmu/pg/pmu_pg.c \ common/pmu/pg/pmu_aelpg.c \ - common/pmu/pmu_perfmon.c \ + common/pmu/perfmon/pmu_perfmon.c \ common/pmu/pmu_debug.c \ common/pmu/pmu_gk20a.c \ common/pmu/pmu_gm20b.c \ diff --git a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c index a2b5a7f62..b0278256c 100644 --- a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c +++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c @@ -30,6 +30,7 @@ #include #include #include +#include static int pmu_payload_extract(struct nvgpu_pmu *pmu, struct pmu_sequence *seq) { diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c similarity index 82% rename from drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c rename to drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c index a9627c793..3ca7e56b5 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_perfmon.c +++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c @@ -20,17 +20,21 @@ * DEALINGS IN THE SOFTWARE. */ +#include +#include +#include +#include #include #include #include #include #include #include -#include +#include static u8 get_perfmon_id(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; u32 ver = g->params.gpu_arch + g->params.gpu_impl; u8 unit_id; @@ -64,7 +68,7 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, case NV_PMU_RPC_ID_PERFMON_T18X_INIT: nvgpu_pmu_dbg(g, "reply NV_PMU_RPC_ID_PERFMON_INIT"); - pmu->perfmon_ready = true; + pmu->pmu_perfmon->perfmon_ready = true; break; case NV_PMU_RPC_ID_PERFMON_T18X_START: nvgpu_pmu_dbg(g, @@ -79,8 +83,8 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, "reply NV_PMU_RPC_ID_PERFMON_QUERY"); rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *) rpc_payload->rpc_buff; - pmu->load = rpc_param->sample_buffer[0]; - pmu->perfmon_query = 1; + pmu->pmu_perfmon->load = rpc_param->sample_buffer[0]; + pmu->pmu_perfmon->perfmon_query = 1; /* set perfmon_query to 1 after load is copied */ break; default: @@ -89,9 +93,40 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, } } +int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (pmu->pmu_perfmon != NULL) { + /* Not to allocate a new buffer after railgating + is done. Use the same memory for pmu_perfmon + after railgating. + */ + return 0; + + } else { + /* One-time memory allocation for pmu_perfmon */ + pmu->pmu_perfmon = (struct nvgpu_pmu_perfmon *)(nvgpu_kzalloc(g, + sizeof(struct nvgpu_pmu_perfmon))); + if (pmu->pmu_perfmon == NULL) { + nvgpu_err(g, "failed to initialize perfmon"); + return -ENOMEM; + } + } + return 0; + +} + +void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu) +{ + if (pmu->pmu_perfmon == NULL) { + return; + } else { + nvgpu_kfree(g, pmu->pmu_perfmon); + } +} + int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; @@ -104,16 +139,16 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) nvgpu_log_fn(g, " "); - pmu->perfmon_ready = false; + pmu->pmu_perfmon->perfmon_ready = false; g->ops.pmu.pmu_init_perfmon_counter(g); - if (pmu->sample_buffer == 0U) { + if (pmu->pmu_perfmon->sample_buffer == 0U) { tmp_addr = nvgpu_alloc(&pmu->dmem, 2U * sizeof(u16)); nvgpu_assert(tmp_addr <= U32_MAX); - pmu->sample_buffer = (u32)tmp_addr; + pmu->pmu_perfmon->sample_buffer = (u32)tmp_addr; } - if (pmu->sample_buffer == 0U) { + if (pmu->pmu_perfmon->sample_buffer == 0U) { nvgpu_err(g, "failed to allocate perfmon sample buffer"); return -ENOMEM; } @@ -133,10 +168,9 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT; /* buffer to save counter values for pmu perfmon */ pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon, - (u16)pmu->sample_buffer); + (u16)pmu->pmu_perfmon->sample_buffer); /* number of sample periods below lower threshold * before pmu triggers perfmon decrease event - * TBD: = 15 */ pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15); /* index of base counter, aka. always ticking counter */ @@ -171,7 +205,7 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu) int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct pmu_v *pv = &g->ops.pmu_ver; struct pmu_cmd cmd; struct pmu_payload payload; @@ -197,7 +231,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) pv->perfmon_start_set_group_id(&cmd.cmd.perfmon, PMU_DOMAIN_GROUP_PSTATE); pv->perfmon_start_set_state_id(&cmd.cmd.perfmon, - pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); + pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]); pv->perfmon_start_set_flags(&cmd.cmd.perfmon, PMU_PERFMON_FLAG_ENABLE_INCREASE | @@ -230,7 +264,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu) int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct pmu_cmd cmd; u64 tmp_size; @@ -258,7 +292,7 @@ int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu) int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load) { - *load = g->pmu.load_shadow; + *load = g->pmu.pmu_perfmon->load_shadow; return 0; } @@ -267,28 +301,27 @@ int nvgpu_pmu_load_update(struct gk20a *g) struct nvgpu_pmu *pmu = &g->pmu; u32 load = 0; int err = 0; - - if (!pmu->perfmon_ready) { - pmu->load_shadow = 0; - pmu->load = 0; + if (!pmu->pmu_perfmon->perfmon_ready) { + pmu->pmu_perfmon->load_shadow = 0; + pmu->pmu_perfmon->load = 0; return 0; } if (g->ops.pmu.pmu_perfmon_get_samples_rpc != NULL) { nvgpu_pmu_perfmon_get_samples_rpc(pmu); - load = pmu->load; + load = pmu->pmu_perfmon->load; } else { err = nvgpu_falcon_copy_from_dmem(&pmu->flcn, - pmu->sample_buffer, - (u8 *)&load, 2 * 1, 0); + pmu->pmu_perfmon->sample_buffer, (u8 *)&load, 2 * 1, 0); if (err != 0) { nvgpu_err(g, "PMU falcon DMEM copy failed"); return err; } } - pmu->load_shadow = load / 10U; - pmu->load_avg = (((9U*pmu->load_avg) + pmu->load_shadow) / 10U); + pmu->pmu_perfmon->load_shadow = load / 10U; + pmu->pmu_perfmon->load_avg = (((9U*pmu->pmu_perfmon->load_avg) + + pmu->pmu_perfmon->load_shadow) / 10U); return err; } @@ -365,7 +398,7 @@ void nvgpu_pmu_reset_load_counters(struct gk20a *g) int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, struct pmu_perfmon_msg *msg) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; nvgpu_log_fn(g, " "); @@ -374,16 +407,16 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, nvgpu_pmu_dbg(g, "perfmon increase event: "); nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); - (pmu->perfmon_events_cnt)++; + (pmu->pmu_perfmon->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_DECREASE_EVENT: nvgpu_pmu_dbg(g, "perfmon decrease event: "); nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d", msg->gen.state_id, msg->gen.group_id, msg->gen.data); - (pmu->perfmon_events_cnt)++; + (pmu->pmu_perfmon->perfmon_events_cnt)++; break; case PMU_PERFMON_MSG_ID_INIT_EVENT: - pmu->perfmon_ready = true; + pmu->pmu_perfmon->perfmon_ready = true; nvgpu_pmu_dbg(g, "perfmon init event"); break; default: @@ -393,7 +426,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, } /* restart sampling */ - if (pmu->perfmon_sampling_enabled) { + if (pmu->pmu_perfmon->perfmon_sampling_enabled) { return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); } @@ -403,7 +436,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, /* Perfmon RPC */ int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct nv_pmu_rpc_struct_perfmon_init rpc; int status = 0; @@ -414,7 +447,7 @@ int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu) nvgpu_log_fn(g, " "); (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init)); - pmu->perfmon_ready = false; + pmu->pmu_perfmon->perfmon_ready = false; g->ops.pmu.pmu_init_perfmon_counter(g); @@ -451,7 +484,7 @@ exit: int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct nv_pmu_rpc_struct_perfmon_start rpc; int status = 0; @@ -463,7 +496,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu) (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start)); rpc.group_id = PMU_DOMAIN_GROUP_PSTATE; - rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]; + rpc.state_id = pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]; rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE | PMU_PERFMON_FLAG_ENABLE_DECREASE | PMU_PERFMON_FLAG_CLEAR_PREV; @@ -482,7 +515,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu) int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct nv_pmu_rpc_struct_perfmon_stop rpc; int status = 0; @@ -505,7 +538,7 @@ int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu) int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu) { - struct gk20a *g = gk20a_from_pmu(pmu); + struct gk20a *g = pmu->g; struct nv_pmu_rpc_struct_perfmon_query rpc; int status = 0; @@ -514,7 +547,7 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu) } nvgpu_log_fn(g, " "); - pmu->perfmon_query = 0; + pmu->pmu_perfmon->perfmon_query = 0; (void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query)); /* PERFMON QUERY */ nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n"); @@ -524,7 +557,28 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu) } pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g), - &pmu->perfmon_query, 1); + &pmu->pmu_perfmon->perfmon_query, 1); return status; } + +int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->perfmon_sampling_enabled; +} + +void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu, + bool status) +{ + pmu->pmu_perfmon->perfmon_sampling_enabled = status; +} + +u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->perfmon_events_cnt; +} + +u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu) +{ + return pmu->pmu_perfmon->load_avg; +} diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c index 9b2e521e6..ee4e47b9b 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c @@ -37,6 +37,7 @@ #include #include #include +#include static void pmu_report_error(struct gk20a *g, u32 err_type, u32 status, u32 pmu_err_type) @@ -161,10 +162,6 @@ static int nvgpu_init_pmu_setup_sw(struct gk20a *g) goto skip_init; } - /* no infoRom script from vbios? */ - - /* TBD: sysmon subtask */ - err = nvgpu_pmu_mutexes_alloc(g, &pmu->mutexes); if (err != 0) { goto err; @@ -367,7 +364,7 @@ int nvgpu_pmu_destroy(struct gk20a *g) nvgpu_pmu_state_change(g, PMU_STATE_OFF, false); pmu->pmu_ready = false; - pmu->perfmon_ready = false; + pmu->pmu_perfmon->perfmon_ready = false; pmu->pmu_pg.zbc_ready = false; nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); @@ -461,7 +458,7 @@ void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status, } int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu, - u32 id, u32 *token) + u32 id, u32 *token) { if (!g->support_ls_pmu) { return 0; @@ -475,7 +472,7 @@ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu, } int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu, - u32 id, u32 *token) + u32 id, u32 *token) { if (!g->support_ls_pmu) { return 0; diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c index 5fcc53743..008eb5076 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -61,32 +62,32 @@ static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu) static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu) { - return (void *)(&pmu->perfmon_counter_v2); + return (void *)(&pmu->pmu_perfmon->perfmon_counter_v2); } static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut) { - pmu->perfmon_counter_v2.upper_threshold = ut; + pmu->pmu_perfmon->perfmon_counter_v2.upper_threshold = ut; } static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt) { - pmu->perfmon_counter_v2.lower_threshold = lt; + pmu->pmu_perfmon->perfmon_counter_v2.lower_threshold = lt; } static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid) { - pmu->perfmon_counter_v2.valid = valid; + pmu->pmu_perfmon->perfmon_counter_v2.valid = valid; } static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index) { - pmu->perfmon_counter_v2.index = index; + pmu->pmu_perfmon->perfmon_counter_v2.index = index; } static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid) { - pmu->perfmon_counter_v2.group_id = gid; + pmu->pmu_perfmon->perfmon_counter_v2.group_id = gid; } static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu) @@ -1659,6 +1660,8 @@ static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu) nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm); + /* de-allocate memory space of pmu_perfmon */ + nvgpu_pmu_deinitialize_perfmon(g, pmu); nvgpu_mutex_destroy(&pmu->pmu_pg.elpg_mutex); nvgpu_mutex_destroy(&pmu->pmu_pg.pg_mutex); nvgpu_mutex_destroy(&pmu->isr_mutex); @@ -1692,7 +1695,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu) } else { /* secure boot ucodes's */ nvgpu_pmu_dbg(g, "requesting PMU ucode image"); - pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE, 0); + pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE, + 0); if (pmu->fw_image == NULL) { nvgpu_err(g, "failed to load pmu ucode!!"); err = -ENOENT; @@ -1700,7 +1704,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu) } nvgpu_pmu_dbg(g, "requesting PMU ucode desc"); - pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC, 0); + pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC, + 0); if (pmu->fw_desc == NULL) { nvgpu_err(g, "failed to load pmu ucode desc!!"); err = -ENOENT; @@ -1779,6 +1784,12 @@ int nvgpu_early_init_pmu_sw(struct gk20a *g, struct nvgpu_pmu *pmu) goto init_failed; } + /* Allocate memory for pmu_perfmon */ + err = nvgpu_pmu_initialize_perfmon(g, pmu); + if (err != 0) { + goto exit; + } + err = init_pmu_ucode(pmu); if (err != 0) { goto init_failed; diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index f36f0e7ed..71adfb14d 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include "hal/mc/mc_gm20b.h" #include "hal/bus/bus_gm20b.h" diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 94d428627..658b412c0 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index aabdda9a5..62828bcce 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -180,6 +180,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 649491901..15dd03d99 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h index 41cb790a6..a2fdb0b92 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h @@ -214,7 +214,6 @@ struct nvgpu_pmu { struct nvgpu_pmu_lsfm *lsfm; - /* TBD: remove this if ZBC seq is fixed */ struct nvgpu_mem seq_buf; struct nvgpu_mem trace_buf; @@ -233,27 +232,16 @@ struct nvgpu_pmu { bool pmu_ready; - u32 perfmon_query; - u32 mscg_stat; u32 mscg_transition_state; u32 pmu_state; struct nvgpu_pmu_pg pmu_pg; - union { - struct pmu_perfmon_counter_v2 perfmon_counter_v2; - }; - u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; + struct nvgpu_pmu_perfmon *pmu_perfmon; void (*remove_support)(struct nvgpu_pmu *pmu); bool sw_ready; - bool perfmon_ready; - - u32 sample_buffer; - u32 load_shadow; - u32 load_avg; - u32 load; struct nvgpu_mutex isr_mutex; bool isr_enabled; @@ -264,8 +252,7 @@ struct nvgpu_pmu { struct pmu_cmdline_args_v5 args_v5; struct pmu_cmdline_args_v6 args_v6; }; - unsigned long perfmon_events_cnt; - bool perfmon_sampling_enabled; + u32 override_done; }; @@ -289,26 +276,6 @@ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu, int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu, u32 id, u32 *token); -/* perfmon */ -void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, - struct nv_pmu_rpc_header *rpc, - struct rpc_handler_payload *rpc_payload); -int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu); -int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu); -int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, - struct pmu_perfmon_msg *msg); -int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); -int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); -int nvgpu_pmu_load_update(struct gk20a *g); -int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm); -void nvgpu_pmu_reset_load_counters(struct gk20a *g); -void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, - u32 *total_cycles); - int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu, struct nv_pmu_therm_msg *msg); diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h new file mode 100644 index 000000000..e8c187e7d --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h @@ -0,0 +1,77 @@ +/* | + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef NVGPU_PMU_PERFMON_H +#define NVGPU_PMU_PERFMON_H + +//#include +#include +#include + +struct gk20a; +struct nvgpu_pmu; +struct pmu_perfmon_msg; +struct rpc_handler_payload; +struct nv_pmu_rpc_header; + +struct nvgpu_pmu_perfmon { + struct pmu_perfmon_counter_v2 perfmon_counter_v2; + u64 perfmon_events_cnt; + u32 perfmon_query; + u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM]; + u32 sample_buffer; + u32 load_shadow; + u32 load_avg; + u32 load; + bool perfmon_ready; + bool perfmon_sampling_enabled; +}; + +/* perfmon */ +void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, + struct nv_pmu_rpc_header *rpc, + struct rpc_handler_payload *rpc_payload); +int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu); +void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu); +int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu); +int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu); +int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu, + struct pmu_perfmon_msg *msg); +int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu); +int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load); +int nvgpu_pmu_load_update(struct gk20a *g); +int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm); +void nvgpu_pmu_reset_load_counters(struct gk20a *g); +void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles, + u32 *total_cycles); +int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu); +void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu, + bool status); +u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu); +u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu); + +#endif /* NVGPU_PMU_PERFMON_H */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_pmu.c b/drivers/gpu/nvgpu/os/linux/debug_pmu.c index 6028e533d..d81241144 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_pmu.c +++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.c @@ -13,6 +13,8 @@ */ #include +#include + #include "debug_pmu.h" #include "os_linux.h" @@ -314,7 +316,8 @@ static int perfmon_events_enable_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; - seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0); + seq_printf(s, "%u\n", + nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu)) ? 1 : 0); return 0; } @@ -333,6 +336,7 @@ static ssize_t perfmon_events_enable_write(struct file *file, char buf[40]; int buf_size; int err; + bool status; (void) memset(buf, 0, sizeof(buf)); buf_size = min(count, (sizeof(buf)-1)); @@ -349,18 +353,22 @@ static ssize_t perfmon_events_enable_write(struct file *file, if (err) return err; - if (val && !g->pmu.perfmon_sampling_enabled && - nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { - g->pmu.perfmon_sampling_enabled = true; + if (val && !nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu)) + && nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu), + true); g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu)); - } else if (!val && g->pmu.perfmon_sampling_enabled && - nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { - g->pmu.perfmon_sampling_enabled = false; + } else if (!val + && nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu)) + && nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) { + nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu), + false); g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu)); } gk20a_idle(g); } else { - g->pmu.perfmon_sampling_enabled = val ? true : false; + status = val ? true : false; + nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu), status); } return count; @@ -378,7 +386,7 @@ static int perfmon_events_count_show(struct seq_file *s, void *data) { struct gk20a *g = s->private; - seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt); + seq_printf(s, "%llu\n", nvgpu_pmu_perfmon_get_events_count(&(g->pmu))); return 0; } diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c index bcc13c463..390a1a71e 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c @@ -44,7 +44,7 @@ #include #include #include - +#include #include #include "gm20b/clk_gm20b.h" @@ -172,7 +172,8 @@ static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g, /* When scaling emc, account for the gpu load when the * gpu frequency is less than or equal to fmax@vmin. */ if (gpu_freq <= gpu_fmax_at_vmin) - emc_scale = min(g->pmu.load_avg, g->emc3d_ratio); + emc_scale = min(nvgpu_pmu_perfmon_get_load_avg(&(g->pmu)), + g->emc3d_ratio); else emc_scale = g->emc3d_ratio; diff --git a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c index 126ab8a16..6992fa5ff 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c +++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "os_linux.h" diff --git a/drivers/gpu/nvgpu/os/linux/scale.c b/drivers/gpu/nvgpu/os/linux/scale.c index e6bb5c505..c6a784a26 100644 --- a/drivers/gpu/nvgpu/os/linux/scale.c +++ b/drivers/gpu/nvgpu/os/linux/scale.c @@ -1,7 +1,7 @@ /* * gk20a clock scaling profile * - * Copyright (c) 2013-2018, NVIDIA Corporation. All rights reserved. + * Copyright (c) 2013-2019, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -27,6 +27,7 @@ #include #include #include +#include #include "platform_gk20a.h" #include "scale.h" diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 8d7ad31ab..f3cb7878e 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "os_linux.h" #include "sysfs.h" diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index f13dfcd59..9ddf4b1cf 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -199,6 +199,7 @@ #include #include #include +#include #include #include