gpu: nvgpu: Re-factor perfmon unit

- Move the perfmon unit source code to common/pmu/perfmon/ folder - Separate perfmon unit headers under include/nvgpu/pmu/pmu_perfmon.h - Make a new structure: nvgpu_pmu_perfmon for perfmon unit - This new struct combines all perfmon unit variables like perfmon_query, perfmon_ready etc. into one structure as a part of perfmon unit refactoring. - Use pmu_perfmon struct to access all perfmon variables. - Eg: pmu->pmu_perfmon->perfmon_query, pmu->pmu_perfmon->perfmon_ready and so on. JIRA NVGPU-1961 Change-Id: I57516c646bfb256004dd7b719e40fafd3c2a09b2 Signed-off-by: Divya Singhatwaria <dsinghatwari@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2080555 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2019-03-22 10:44:37 +05:30
parent 3c08a91de8
commit 700c16599e
18 changed files with 226 additions and 103 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -94,7 +94,7 @@ nvgpu-y += \
 	common/pmu/pmu_fw.o \
 	common/pmu/pg/pmu_pg.o \
 	common/pmu/pg/pmu_aelpg.o \
-	common/pmu/pmu_perfmon.o \
+	common/pmu/perfmon/pmu_perfmon.o \
 	common/pmu/pmu_debug.o \
 	common/pmu/pmu_gk20a.o \
 	common/pmu/pmu_gm20b.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -133,7 +133,7 @@ srcs += common/sim.c \
 	common/pmu/pmu_fw.c \
 	common/pmu/pg/pmu_pg.c \
 	common/pmu/pg/pmu_aelpg.c \
-	common/pmu/pmu_perfmon.c \
+	common/pmu/perfmon/pmu_perfmon.c \
 	common/pmu/pmu_debug.c \
 	common/pmu/pmu_gk20a.c \
 	common/pmu/pmu_gm20b.c \
--- a/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c
+++ b/drivers/gpu/nvgpu/common/pmu/ipc/pmu_msg.c
@@ -30,6 +30,7 @@
 #include <nvgpu/pmu/therm.h>
 #include <nvgpu/pmu/lsfm.h>
 #include <nvgpu/pmu/super_surface.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 static int pmu_payload_extract(struct nvgpu_pmu *pmu, struct pmu_sequence *seq)
 {
--- a/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c
+++ b/drivers/gpu/nvgpu/common/pmu/perfmon/pmu_perfmon.c
@@ -20,17 +20,21 @@
 * DEALINGS IN THE SOFTWARE.
 */
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/pmu.h>
 #include <nvgpu/pmu/cmd.h>
 #include <nvgpu/log.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
-#include <nvgpu/gk20a.h>
+#include <nvgpu/kmem.h>
 static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	u32 ver = g->params.gpu_arch + g->params.gpu_impl;
 	u8 unit_id;
@@ -64,7 +68,7 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
 	case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
 		nvgpu_pmu_dbg(g,
 			"reply NV_PMU_RPC_ID_PERFMON_INIT");
-		pmu->perfmon_ready = true;
+		pmu->pmu_perfmon->perfmon_ready = true;
 		break;
 	case NV_PMU_RPC_ID_PERFMON_T18X_START:
 		nvgpu_pmu_dbg(g,
@@ -79,8 +83,8 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
 			"reply NV_PMU_RPC_ID_PERFMON_QUERY");
 		rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)
 			rpc_payload->rpc_buff;
-		pmu->load = rpc_param->sample_buffer[0];
+		pmu->pmu_perfmon->load = rpc_param->sample_buffer[0];
-		pmu->perfmon_query = 1;
+		pmu->pmu_perfmon->perfmon_query = 1;
 		/* set perfmon_query to 1 after load is copied */
 		break;
 	default:
@@ -89,9 +93,40 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
 	}
 }
 int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu)
 {
 	if (pmu->pmu_perfmon != NULL) {
 		/* Not to allocate a new buffer after railgating
 		   is done. Use the same memory for pmu_perfmon
 		   after railgating.
 		*/
 		return 0;
 	} else {
 		/* One-time memory allocation for pmu_perfmon */
 		pmu->pmu_perfmon = (struct nvgpu_pmu_perfmon *)(nvgpu_kzalloc(g,
 					sizeof(struct nvgpu_pmu_perfmon)));
 		if (pmu->pmu_perfmon == NULL) {
 			nvgpu_err(g, "failed to initialize  perfmon");
 			return -ENOMEM;
 		}
 	}
 	return 0;
 }
 void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu)
 {
 	if (pmu->pmu_perfmon == NULL) {
 		return;
 	} else {
 		nvgpu_kfree(g, pmu->pmu_perfmon);
 	}
 }
 int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct pmu_v *pv = &g->ops.pmu_ver;
 	struct pmu_cmd cmd;
 	struct pmu_payload payload;
@@ -104,16 +139,16 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
 	nvgpu_log_fn(g, " ");
-	pmu->perfmon_ready = false;
+	pmu->pmu_perfmon->perfmon_ready = false;
 	g->ops.pmu.pmu_init_perfmon_counter(g);
-	if (pmu->sample_buffer == 0U) {
+	if (pmu->pmu_perfmon->sample_buffer == 0U) {
 		tmp_addr = nvgpu_alloc(&pmu->dmem, 2U * sizeof(u16));
 		nvgpu_assert(tmp_addr <= U32_MAX);
-		pmu->sample_buffer = (u32)tmp_addr;
+		pmu->pmu_perfmon->sample_buffer = (u32)tmp_addr;
 	}
-	if (pmu->sample_buffer == 0U) {
+	if (pmu->pmu_perfmon->sample_buffer == 0U) {
 		nvgpu_err(g, "failed to allocate perfmon sample buffer");
 		return -ENOMEM;
 	}
@@ -133,10 +168,9 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
 	cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
 	/* buffer to save counter values for pmu perfmon */
 	pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
-	(u16)pmu->sample_buffer);
+	(u16)pmu->pmu_perfmon->sample_buffer);
 	/* number of sample periods below lower threshold
 	 * before pmu triggers perfmon decrease event
 	 * TBD: = 15
 	 */
 	pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
 	/* index of base counter, aka. always ticking counter */
@@ -171,7 +205,7 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
 int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct pmu_v *pv = &g->ops.pmu_ver;
 	struct pmu_cmd cmd;
 	struct pmu_payload payload;
@@ -197,7 +231,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
 	pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
 		PMU_DOMAIN_GROUP_PSTATE);
 	pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
-		pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
+		pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
 	pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
 		PMU_PERFMON_FLAG_ENABLE_INCREASE |
@@ -230,7 +264,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
 int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct pmu_cmd cmd;
 	u64 tmp_size;
@@ -258,7 +292,7 @@ int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
 int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load)
 {
-	*load = g->pmu.load_shadow;
+	*load = g->pmu.pmu_perfmon->load_shadow;
 	return 0;
 }
@@ -267,28 +301,27 @@ int nvgpu_pmu_load_update(struct gk20a *g)
 	struct nvgpu_pmu *pmu = &g->pmu;
 	u32 load = 0;
 	int err = 0;
-
+	if (!pmu->pmu_perfmon->perfmon_ready) {
-	if (!pmu->perfmon_ready) {
+		pmu->pmu_perfmon->load_shadow = 0;
-		pmu->load_shadow = 0;
+		pmu->pmu_perfmon->load = 0;
 		pmu->load = 0;
 		return 0;
 	}
 	if (g->ops.pmu.pmu_perfmon_get_samples_rpc != NULL) {
 		nvgpu_pmu_perfmon_get_samples_rpc(pmu);
-		load = pmu->load;
+		load = pmu->pmu_perfmon->load;
 	} else {
 		err = nvgpu_falcon_copy_from_dmem(&pmu->flcn,
-						  pmu->sample_buffer,
+			pmu->pmu_perfmon->sample_buffer, (u8 *)&load, 2 * 1, 0);
 						  (u8 *)&load, 2 * 1, 0);
 		if (err != 0) {
 			nvgpu_err(g, "PMU falcon DMEM copy failed");
 			return err;
 		}
 	}
-	pmu->load_shadow = load / 10U;
+	pmu->pmu_perfmon->load_shadow = load / 10U;
-	pmu->load_avg = (((9U*pmu->load_avg) + pmu->load_shadow) / 10U);
+	pmu->pmu_perfmon->load_avg = (((9U*pmu->pmu_perfmon->load_avg) +
 		pmu->pmu_perfmon->load_shadow) / 10U);
 	return err;
 }
@@ -365,7 +398,7 @@ void nvgpu_pmu_reset_load_counters(struct gk20a *g)
 int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 			struct pmu_perfmon_msg *msg)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	nvgpu_log_fn(g, " ");
@@ -374,16 +407,16 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 		nvgpu_pmu_dbg(g, "perfmon increase event: ");
 		nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
 			msg->gen.state_id, msg->gen.group_id, msg->gen.data);
-		(pmu->perfmon_events_cnt)++;
+		(pmu->pmu_perfmon->perfmon_events_cnt)++;
 		break;
 	case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
 		nvgpu_pmu_dbg(g, "perfmon decrease event: ");
 		nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
 			msg->gen.state_id, msg->gen.group_id, msg->gen.data);
-		(pmu->perfmon_events_cnt)++;
+		(pmu->pmu_perfmon->perfmon_events_cnt)++;
 		break;
 	case PMU_PERFMON_MSG_ID_INIT_EVENT:
-		pmu->perfmon_ready = true;
+		pmu->pmu_perfmon->perfmon_ready = true;
 		nvgpu_pmu_dbg(g, "perfmon init event");
 		break;
 	default:
@@ -393,7 +426,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 	}
 	/* restart sampling */
-	if (pmu->perfmon_sampling_enabled) {
+	if (pmu->pmu_perfmon->perfmon_sampling_enabled) {
 		return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
 	}
@@ -403,7 +436,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 /* Perfmon RPC */
 int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct nv_pmu_rpc_struct_perfmon_init rpc;
 	int status = 0;
@@ -414,7 +447,7 @@ int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
 	nvgpu_log_fn(g, " ");
 	(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
-	pmu->perfmon_ready = false;
+	pmu->pmu_perfmon->perfmon_ready = false;
 	g->ops.pmu.pmu_init_perfmon_counter(g);
@@ -451,7 +484,7 @@ exit:
 int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct nv_pmu_rpc_struct_perfmon_start rpc;
 	int status = 0;
@@ -463,7 +496,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
 	(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
 	rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
-	rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
+	rpc.state_id = pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
 	rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
 				PMU_PERFMON_FLAG_ENABLE_DECREASE |
 				PMU_PERFMON_FLAG_CLEAR_PREV;
@@ -482,7 +515,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
 int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct nv_pmu_rpc_struct_perfmon_stop rpc;
 	int status = 0;
@@ -505,7 +538,7 @@ int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
 int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
 {
-	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct gk20a *g = pmu->g;
 	struct nv_pmu_rpc_struct_perfmon_query rpc;
 	int status = 0;
@@ -514,7 +547,7 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
 	}
 	nvgpu_log_fn(g, " ");
-	pmu->perfmon_query = 0;
+	pmu->pmu_perfmon->perfmon_query = 0;
 	(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
 	/* PERFMON QUERY */
 	nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
@@ -524,7 +557,28 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
 	}
 	pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g),
-				      &pmu->perfmon_query, 1);
+				      &pmu->pmu_perfmon->perfmon_query, 1);
 	return status;
 }
 int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu)
 {
 	return pmu->pmu_perfmon->perfmon_sampling_enabled;
 }
 void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu,
 							bool status)
 {
 	pmu->pmu_perfmon->perfmon_sampling_enabled = status;
 }
 u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu)
 {
 	return pmu->pmu_perfmon->perfmon_events_cnt;
 }
 u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu)
 {
 	return pmu->pmu_perfmon->load_avg;
 }
--- a/drivers/gpu/nvgpu/common/pmu/pmu.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -37,6 +37,7 @@
 #include <nvgpu/nvgpu_err.h>
 #include <nvgpu/pmu/lsfm.h>
 #include <nvgpu/pmu/super_surface.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 static void pmu_report_error(struct gk20a *g, u32 err_type,
 		u32 status, u32 pmu_err_type)
@@ -161,10 +162,6 @@ static int nvgpu_init_pmu_setup_sw(struct gk20a *g)
 		goto skip_init;
 	}
 	/* no infoRom script from vbios? */
 	/* TBD: sysmon subtask */
 	err = nvgpu_pmu_mutexes_alloc(g, &pmu->mutexes);
 	if (err != 0) {
 		goto err;
@@ -367,7 +364,7 @@ int nvgpu_pmu_destroy(struct gk20a *g)
 	nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
 	pmu->pmu_ready = false;
-	pmu->perfmon_ready = false;
+	pmu->pmu_perfmon->perfmon_ready = false;
 	pmu->pmu_pg.zbc_ready = false;
 	nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
@@ -461,7 +458,7 @@ void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status,
 }
 int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu,
-			   u32 id, u32 *token)
+			u32 id, u32 *token)
 {
 	if (!g->support_ls_pmu) {
 		return 0;
@@ -475,7 +472,7 @@ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu,
 }
 int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu,
-			   u32 id, u32 *token)
+			u32 id, u32 *token)
 {
 	if (!g->support_ls_pmu) {
 		return 0;
--- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
@@ -34,6 +34,7 @@
 #include <nvgpu/pmu/pstate.h>
 #include <nvgpu/pmu/volt.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include <nvgpu/pmu/allocator.h>
 #include <nvgpu/pmu/lsfm.h>
 #include <nvgpu/pmu/super_surface.h>
@@ -61,32 +62,32 @@ static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu)
 static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu)
 {
-	return (void *)(&pmu->perfmon_counter_v2);
+	return (void *)(&pmu->pmu_perfmon->perfmon_counter_v2);
 }
 static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut)
 {
-	pmu->perfmon_counter_v2.upper_threshold = ut;
+	pmu->pmu_perfmon->perfmon_counter_v2.upper_threshold = ut;
 }
 static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt)
 {
-	pmu->perfmon_counter_v2.lower_threshold = lt;
+	pmu->pmu_perfmon->perfmon_counter_v2.lower_threshold = lt;
 }
 static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid)
 {
-	pmu->perfmon_counter_v2.valid = valid;
+	pmu->pmu_perfmon->perfmon_counter_v2.valid = valid;
 }
 static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index)
 {
-	pmu->perfmon_counter_v2.index = index;
+	pmu->pmu_perfmon->perfmon_counter_v2.index = index;
 }
 static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid)
 {
-	pmu->perfmon_counter_v2.group_id = gid;
+	pmu->pmu_perfmon->perfmon_counter_v2.group_id = gid;
 }
 static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu)
@@ -1659,6 +1660,8 @@ static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu)
 	nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
 	/* de-allocate memory space of pmu_perfmon */
 	nvgpu_pmu_deinitialize_perfmon(g, pmu);
 	nvgpu_mutex_destroy(&pmu->pmu_pg.elpg_mutex);
 	nvgpu_mutex_destroy(&pmu->pmu_pg.pg_mutex);
 	nvgpu_mutex_destroy(&pmu->isr_mutex);
@@ -1692,7 +1695,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu)
 	} else {
 		/* secure boot ucodes's */
 		nvgpu_pmu_dbg(g, "requesting PMU ucode image");
-		pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE, 0);
+		pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE,
 							0);
 		if (pmu->fw_image == NULL) {
 			nvgpu_err(g, "failed to load pmu ucode!!");
 			err = -ENOENT;
@@ -1700,7 +1704,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu)
 		}
 		nvgpu_pmu_dbg(g, "requesting PMU ucode desc");
-		pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC, 0);
+		pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC,
 							0);
 		if (pmu->fw_desc == NULL) {
 			nvgpu_err(g, "failed to load pmu ucode desc!!");
 			err = -ENOENT;
@@ -1779,6 +1784,12 @@ int nvgpu_early_init_pmu_sw(struct gk20a *g, struct nvgpu_pmu *pmu)
 		goto init_failed;
 	}
 	/* Allocate memory for pmu_perfmon */
 	err = nvgpu_pmu_initialize_perfmon(g, pmu);
 	if (err != 0) {
 		goto exit;
 	}
 	err = init_pmu_ucode(pmu);
 	if (err != 0) {
 		goto init_failed;
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -42,7 +42,7 @@
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/setup.h>
-#include <nvgpu/gr/gr.h>
+#include <nvgpu/pmu/pmu_perfmon.h>
 #include "hal/mc/mc_gm20b.h"
 #include "hal/bus/bus_gm20b.h"
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -43,6 +43,7 @@
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -180,6 +180,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/hw/gv100/hw_proj_gv100.h>
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -26,6 +26,7 @@
 #include <nvgpu/regops.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -214,7 +214,6 @@ struct nvgpu_pmu {
 	struct nvgpu_pmu_lsfm *lsfm;
 	/* TBD: remove this if ZBC seq is fixed */
 	struct nvgpu_mem seq_buf;
 	struct nvgpu_mem trace_buf;
@@ -233,27 +232,16 @@ struct nvgpu_pmu {
 	bool pmu_ready;
 	u32 perfmon_query;
 	u32 mscg_stat;
 	u32 mscg_transition_state;
 	u32 pmu_state;
 	struct nvgpu_pmu_pg pmu_pg;
-	union {
+	struct nvgpu_pmu_perfmon *pmu_perfmon;
 		struct pmu_perfmon_counter_v2 perfmon_counter_v2;
 	};
 	u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
 	void (*remove_support)(struct nvgpu_pmu *pmu);
 	bool sw_ready;
 	bool perfmon_ready;
 	u32 sample_buffer;
 	u32 load_shadow;
 	u32 load_avg;
 	u32 load;
 	struct nvgpu_mutex isr_mutex;
 	bool isr_enabled;
@@ -264,8 +252,7 @@ struct nvgpu_pmu {
 		struct pmu_cmdline_args_v5 args_v5;
 		struct pmu_cmdline_args_v6 args_v6;
 	};
-	unsigned long perfmon_events_cnt;
+
 	bool perfmon_sampling_enabled;
 	u32 override_done;
 };
@@ -289,26 +276,6 @@ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu,
 int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu,
 			   u32 id, u32 *token);
 /* perfmon */
 void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
 				   struct nv_pmu_rpc_header *rpc,
 				   struct rpc_handler_payload *rpc_payload);
 int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 	struct pmu_perfmon_msg *msg);
 int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
 int nvgpu_pmu_load_update(struct gk20a *g);
 int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
 void nvgpu_pmu_reset_load_counters(struct gk20a *g);
 void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
 		u32 *total_cycles);
 int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
 			struct nv_pmu_therm_msg *msg);
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h
@@ -0,0 +1,77 @@
 /*                                                      |
 * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 */
 #ifndef NVGPU_PMU_PERFMON_H
 #define NVGPU_PMU_PERFMON_H
 //#include <nvgpu/enabled.h>
 #include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
 #include <nvgpu/pmuif/gpmuif_perfmon.h>
 struct gk20a;
 struct nvgpu_pmu;
 struct pmu_perfmon_msg;
 struct rpc_handler_payload;
 struct nv_pmu_rpc_header;
 struct nvgpu_pmu_perfmon {
 	struct pmu_perfmon_counter_v2 perfmon_counter_v2;
 	u64 perfmon_events_cnt;
 	u32 perfmon_query;
 	u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
 	u32 sample_buffer;
 	u32 load_shadow;
 	u32 load_avg;
 	u32 load;
 	bool perfmon_ready;
 	bool perfmon_sampling_enabled;
 };
 /* perfmon */
 void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
 		struct nv_pmu_rpc_header *rpc,
 		struct rpc_handler_payload *rpc_payload);
 int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu);
 void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu);
 int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
 		struct pmu_perfmon_msg *msg);
 int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
 int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
 int nvgpu_pmu_load_update(struct gk20a *g);
 int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
 void nvgpu_pmu_reset_load_counters(struct gk20a *g);
 void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
 		u32 *total_cycles);
 int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu);
 void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu,
 		bool status);
 u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu);
 u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu);
 #endif /* NVGPU_PMU_PERFMON_H */
--- a/drivers/gpu/nvgpu/os/linux/debug_pmu.c
+++ b/drivers/gpu/nvgpu/os/linux/debug_pmu.c
@@ -13,6 +13,8 @@
 */
 #include <nvgpu/enabled.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "debug_pmu.h"
 #include "os_linux.h"
@@ -314,7 +316,8 @@ static int perfmon_events_enable_show(struct seq_file *s, void *data)
 {
 	struct gk20a *g = s->private;
-	seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
+	seq_printf(s, "%u\n",
 		nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu)) ? 1 : 0);
 	return 0;
 }
@@ -333,6 +336,7 @@ static ssize_t perfmon_events_enable_write(struct file *file,
 	char buf[40];
 	int buf_size;
 	int err;
 	bool status;
 	(void) memset(buf, 0, sizeof(buf));
 	buf_size = min(count, (sizeof(buf)-1));
@@ -349,18 +353,22 @@ static ssize_t perfmon_events_enable_write(struct file *file,
 		if (err)
 			return err;
-		if (val && !g->pmu.perfmon_sampling_enabled &&
+		if (val && !nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu))
-				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
+			&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
-			g->pmu.perfmon_sampling_enabled = true;
+			nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu),
 									true);
 			g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
-		} else if (!val && g->pmu.perfmon_sampling_enabled &&
+		} else if (!val
-				nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
+			&& nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu))
-			g->pmu.perfmon_sampling_enabled = false;
+			&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
 			nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu),
 									false);
 			g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
 		}
 		gk20a_idle(g);
 	} else {
-		g->pmu.perfmon_sampling_enabled = val ? true : false;
+		status = val ? true : false;
 		nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu), status);
 	}
 	return count;
@@ -378,7 +386,7 @@ static int perfmon_events_count_show(struct seq_file *s, void *data)
 {
 	struct gk20a *g = s->private;
-	seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
+	seq_printf(s, "%llu\n", nvgpu_pmu_perfmon_get_events_count(&(g->pmu)));
 	return 0;
 }
--- a/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
@@ -44,7 +44,7 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gr/global_ctx.h>
 #include <nvgpu/nvhost.h>
-
+#include <nvgpu/pmu/pmu_perfmon.h>
 #include <nvgpu/linux/dma.h>
 #include "gm20b/clk_gm20b.h"
@@ -172,7 +172,8 @@ static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
 	/* When scaling emc, account for the gpu load when the
 	 * gpu frequency is less than or equal to fmax@vmin. */
 	if (gpu_freq <= gpu_fmax_at_vmin)
-		emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
+		emc_scale = min(nvgpu_pmu_perfmon_get_load_avg(&(g->pmu)),
 					g->emc3d_ratio);
 	else
 		emc_scale = g->emc3d_ratio;
--- a/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
@@ -34,6 +34,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "os_linux.h"
--- a/drivers/gpu/nvgpu/os/linux/scale.c
+++ b/drivers/gpu/nvgpu/os/linux/scale.c
@@ -1,7 +1,7 @@
 /*
 * gk20a clock scaling profile
 *
- * Copyright (c) 2013-2018, NVIDIA Corporation. All rights reserved.
+ * Copyright (c) 2013-2019, NVIDIA Corporation. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -27,6 +27,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/clk_arb.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "platform_gk20a.h"
 #include "scale.h"
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -27,6 +27,7 @@
 #include <nvgpu/gr/obj_ctx.h>
 #include <nvgpu/power_features/cg.h>
 #include <nvgpu/power_features/pg.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include "os_linux.h"
 #include "sysfs.h"
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -199,6 +199,7 @@
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 #include <nvgpu/hw/tu104/hw_proj_tu104.h>
 #include <nvgpu/hw/tu104/hw_top_tu104.h>