gpu: nvgpu: move cyclestats_snapshot data to struct gk20a

cyclestats_snapshot data and lock is right now stored in struct nvgpu_gr Use case itself is not specific to GR engine but in general it applies to other units outside of GR too. Hence it makes sense to move both data and lock to struct gk20a instead of keeping them in struct nvgpu_gr Update all cyclestats_snapshot code to refer data/lock from struct gk20a Remove gr_priv.h header include from cyclestats_snapshot.c Some of the functions were mistakenly declared in gr_gk20a.h. Move them to cyclestats_snapshot.h and rename them to form nvgpu_css_*() Jira NVGPU-1103 Change-Id: I3fb32fe96f0ca6613f4640c8bd227b9e0e02dca3 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2104848 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2019-04-24 17:49:12 +05:30
parent 11110465df
commit 1533951567
14 changed files with 86 additions and 120 deletions
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -257,8 +257,6 @@ static void gr_remove_support(struct gk20a *g)
 	nvgpu_log_fn(g, " ");
 	gr_gk20a_free_cyclestats_snapshot_data(g);
 	nvgpu_gr_global_ctx_buffer_free(g, gr->global_ctx_buffer);
 	nvgpu_gr_global_ctx_desc_free(g, gr->global_ctx_buffer);
@@ -398,14 +396,6 @@ static int gr_init_setup_sw(struct gk20a *g)
 	}
 	gr->ctxsw_disable_count = 0;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	err = nvgpu_mutex_init(&g->gr->cs_lock);
 	if (err != 0) {
 		nvgpu_err(g, "Error in gr.cs_lock mutex initialization");
 		return err;
 	}
 #endif
 	err = nvgpu_gr_obj_ctx_init(g, &gr->golden_image,
 			nvgpu_gr_falcon_get_golden_image_size(g->gr->falcon));
 	if (err != 0) {
--- a/drivers/gpu/nvgpu/common/gr/gr_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_priv.h
@@ -81,10 +81,6 @@ struct nvgpu_gr {
 	u32 fbp_en_mask;
 	u32 *fbp_rop_l2_en_mask;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	struct nvgpu_mutex		cs_lock;
 	struct gk20a_cs_snapshot	*cs_data;
 #endif
 	u32 max_css_buffer_size;
 	u32 max_ctxsw_ring_buffer_size;
--- a/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c
+++ b/drivers/gpu/nvgpu/common/perf/cyclestats_snapshot.c
@@ -36,11 +36,8 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/unit.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/cyclestats_snapshot.h>
 #include "common/gr/gr_priv.h"
 /* check client for pointed perfmon ownership */
 #define CONTAINS_PERFMON(cl, pm)				\
 		((cl)->perfmon_start <= (pm) &&			\
@@ -87,21 +84,21 @@ void nvgpu_css_set_handled_snapshots(struct gk20a *g, u32 done)
 * from locked context (protected by cs_lock)
 */
-static int css_gr_create_shared_data(struct nvgpu_gr *gr)
+static int css_gr_create_shared_data(struct gk20a *g)
 {
 	struct gk20a_cs_snapshot *data;
-	if (gr->cs_data) {
+	if (g->cs_data) {
 		return 0;
 	}
-	data = nvgpu_kzalloc(gr->g, sizeof(*data));
+	data = nvgpu_kzalloc(g, sizeof(*data));
 	if (!data) {
 		return -ENOMEM;
 	}
 	nvgpu_init_list_node(&data->clients);
-	gr->cs_data = data;
+	g->cs_data = data;
 	return 0;
 }
@@ -110,8 +107,7 @@ int nvgpu_css_enable_snapshot(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *cs_client)
 {
 	struct gk20a *g = ch->g;
-	struct nvgpu_gr *gr = g->gr;
+	struct gk20a_cs_snapshot *data = g->cs_data;
 	struct gk20a_cs_snapshot *data = gr->cs_data;
 	u32 snapshot_size = cs_client->snapshot_size;
 	int ret;
@@ -163,10 +159,9 @@ failed_allocation:
 	return ret;
 }
-void nvgpu_css_disable_snapshot(struct nvgpu_gr *gr)
+void nvgpu_css_disable_snapshot(struct gk20a *g)
 {
-	struct gk20a *g = gr->g;
+	struct gk20a_cs_snapshot *data = g->cs_data;
 	struct gk20a_cs_snapshot *data = gr->cs_data;
 	if (!data->hw_snapshot) {
 		return;
@@ -182,17 +177,15 @@ void nvgpu_css_disable_snapshot(struct nvgpu_gr *gr)
 	nvgpu_log_info(g, "cyclestats: buffer for hardware snapshots disabled\n");
 }
-static void css_gr_free_shared_data(struct nvgpu_gr *gr)
+static void css_gr_free_shared_data(struct gk20a *g)
 {
-	struct gk20a *g = gr->g;
+	if (g->cs_data) {
 	if (gr->cs_data) {
 		/* the clients list is expected to be empty */
-		g->ops.css.disable_snapshot(gr);
+		g->ops.css.disable_snapshot(g);
 		/* release the objects */
-		nvgpu_kfree(gr->g, gr->cs_data);
+		nvgpu_kfree(g, g->cs_data);
-		gr->cs_data = NULL;
+		g->cs_data = NULL;
 	}
 }
@@ -215,8 +208,7 @@ nvgpu_css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon)
 static int css_gr_flush_snapshots(struct channel_gk20a *ch)
 {
 	struct gk20a *g = ch->g;
-	struct nvgpu_gr *gr = g->gr;
+	struct gk20a_cs_snapshot *css = g->cs_data;
 	struct gk20a_cs_snapshot *css = gr->cs_data;
 	struct gk20a_cs_snapshot_client *cur;
 	u32 pending, completed;
 	bool hw_overflow;
@@ -351,7 +343,7 @@ next_hw_fifo_entry:
 		(void) memset(css->hw_get, 0xff,
 				(css->hw_end - css->hw_get) * sizeof(*src));
 	}
-	gr->cs_data->hw_get = src;
+	g->cs_data->hw_get = src;
 	if (g->ops.css.set_handled_snapshots) {
 		g->ops.css.set_handled_snapshots(g, sid);
@@ -466,14 +458,13 @@ static int css_gr_create_client_data(struct gk20a *g,
 }
-int gr_gk20a_css_attach(struct channel_gk20a *ch,
+int nvgpu_css_attach(struct channel_gk20a *ch,
 			u32 perfmon_count,
 			u32 *perfmon_start,
 			struct gk20a_cs_snapshot_client *cs_client)
 {
 	int ret = 0;
 	struct gk20a *g = ch->g;
 	struct nvgpu_gr *gr;
 	/* we must have a placeholder to store pointer to client structure */
 	if (!cs_client) {
@@ -487,16 +478,14 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 	nvgpu_speculation_barrier();
-	gr = g->gr;
+	nvgpu_mutex_acquire(&g->cs_lock);
-	nvgpu_mutex_acquire(&gr->cs_lock);
+	ret = css_gr_create_shared_data(g);
 	ret = css_gr_create_shared_data(gr);
 	if (ret != 0) {
 		goto failed;
 	}
-	ret = css_gr_create_client_data(g, gr->cs_data,
+	ret = css_gr_create_client_data(g, g->cs_data,
 				     perfmon_count,
 				     cs_client);
 	if (ret != 0) {
@@ -512,22 +501,22 @@ int gr_gk20a_css_attach(struct channel_gk20a *ch,
 		*perfmon_start = cs_client->perfmon_start;
 	}
-	nvgpu_mutex_release(&gr->cs_lock);
+	nvgpu_mutex_release(&g->cs_lock);
 	return 0;
 failed:
-	if (gr->cs_data) {
+	if (g->cs_data) {
 		if (cs_client) {
-			css_gr_free_client_data(g, gr->cs_data, cs_client);
+			css_gr_free_client_data(g, g->cs_data, cs_client);
 			cs_client = NULL;
 		}
-		if (nvgpu_list_empty(&gr->cs_data->clients)) {
+		if (nvgpu_list_empty(&g->cs_data->clients)) {
-			css_gr_free_shared_data(gr);
+			css_gr_free_shared_data(g);
 		}
 	}
-	nvgpu_mutex_release(&gr->cs_lock);
+	nvgpu_mutex_release(&g->cs_lock);
 	if (perfmon_start) {
 		*perfmon_start = 0;
@@ -536,21 +525,19 @@ failed:
 	return ret;
 }
-int gr_gk20a_css_detach(struct channel_gk20a *ch,
+int nvgpu_css_detach(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *cs_client)
 {
 	int ret = 0;
 	struct gk20a *g = ch->g;
 	struct nvgpu_gr *gr;
 	if (!cs_client) {
 		return -EINVAL;
 	}
-	gr = g->gr;
+	nvgpu_mutex_acquire(&g->cs_lock);
-	nvgpu_mutex_acquire(&gr->cs_lock);
+	if (g->cs_data) {
-	if (gr->cs_data) {
+		struct gk20a_cs_snapshot *data = g->cs_data;
 		struct gk20a_cs_snapshot *data = gr->cs_data;
 		if (g->ops.css.detach_snapshot) {
 			g->ops.css.detach_snapshot(ch, cs_client);
@@ -558,52 +545,47 @@ int gr_gk20a_css_detach(struct channel_gk20a *ch,
 		ret = css_gr_free_client_data(g, data, cs_client);
 		if (nvgpu_list_empty(&data->clients)) {
-			css_gr_free_shared_data(gr);
+			css_gr_free_shared_data(g);
 		}
 	} else {
 		ret = -EBADF;
 	}
-	nvgpu_mutex_release(&gr->cs_lock);
+	nvgpu_mutex_release(&g->cs_lock);
 	return ret;
 }
-int gr_gk20a_css_flush(struct channel_gk20a *ch,
+int nvgpu_css_flush(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *cs_client)
 {
 	int ret = 0;
 	struct gk20a *g = ch->g;
 	struct nvgpu_gr *gr;
 	if (!cs_client) {
 		return -EINVAL;
 	}
-	gr = g->gr;
+	nvgpu_mutex_acquire(&g->cs_lock);
 	nvgpu_mutex_acquire(&gr->cs_lock);
 	ret = css_gr_flush_snapshots(ch);
-	nvgpu_mutex_release(&gr->cs_lock);
+	nvgpu_mutex_release(&g->cs_lock);
 	return ret;
 }
 /* helper function with locking to cleanup snapshot code code in gr_gk20a.c */
-void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
+void nvgpu_free_cyclestats_snapshot_data(struct gk20a *g)
 {
-	struct nvgpu_gr *gr = g->gr;
+	nvgpu_mutex_acquire(&g->cs_lock);
-
+	css_gr_free_shared_data(g);
-	nvgpu_mutex_acquire(&gr->cs_lock);
+	nvgpu_mutex_release(&g->cs_lock);
-	css_gr_free_shared_data(gr);
+	nvgpu_mutex_destroy(&g->cs_lock);
 	nvgpu_mutex_release(&gr->cs_lock);
 	nvgpu_mutex_destroy(&gr->cs_lock);
 }
 int nvgpu_css_check_data_available(struct channel_gk20a *ch, u32 *pending,
 					bool *hw_overflow)
 {
 	struct gk20a *g = ch->g;
-	struct nvgpu_gr *gr = g->gr;
+	struct gk20a_cs_snapshot *css = g->cs_data;
 	struct gk20a_cs_snapshot *css = gr->cs_data;
 	if (!css->hw_snapshot) {
 		return -EINVAL;
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
@@ -711,10 +711,6 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 	gr->g = g;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_init(&g->gr->cs_lock);
 #endif
 	if (gr->falcon == NULL) {
 		gr->falcon = nvgpu_gr_falcon_init_support(g);
 		if (gr->falcon == NULL) {
--- a/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/init/init_vgpu.c
@@ -32,6 +32,7 @@
 #include <nvgpu/string.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/cbc.h>
 #include <nvgpu/cyclestats_snapshot.h>
 #include "init_vgpu.h"
 #include "init_hal_vgpu.h"
@@ -81,6 +82,10 @@ void vgpu_remove_support_common(struct gk20a *g)
 		g->mm.remove_support(&g->mm);
 	}
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_free_cyclestats_snapshot_data(g);
 #endif
 	msg.event = TEGRA_VGPU_EVENT_ABORT;
 	err = vgpu_ivc_send(vgpu_ivc_get_peer_self(), TEGRA_VGPU_QUEUE_INTR,
 				&msg, sizeof(msg));
--- a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.c
@@ -32,7 +32,6 @@
 #include "cyclestats_snapshot_vgpu.h"
 #include "common/vgpu/ivc/comm_vgpu.h"
 #include "common/gr/gr_priv.h"
 static struct tegra_hv_ivm_cookie *css_cookie;
@@ -81,10 +80,9 @@ u32 vgpu_css_get_buffer_size(struct gk20a *g)
 	return size;
 }
-static int vgpu_css_init_snapshot_buffer(struct nvgpu_gr *gr)
+static int vgpu_css_init_snapshot_buffer(struct gk20a *g)
 {
-	struct gk20a *g = gr->g;
+	struct gk20a_cs_snapshot *data = g->cs_data;
 	struct gk20a_cs_snapshot *data = gr->cs_data;
 	void *buf = NULL;
 	int err;
 	u64 size;
@@ -127,10 +125,9 @@ fail:
 	return err;
 }
-void vgpu_css_release_snapshot_buffer(struct nvgpu_gr *gr)
+void vgpu_css_release_snapshot_buffer(struct gk20a *g)
 {
-	struct gk20a_cs_snapshot *data = gr->cs_data;
+	struct gk20a_cs_snapshot *data = g->cs_data;
 	struct gk20a *g = gr->g;
 	if (!data->hw_snapshot) {
 		return;
@@ -151,8 +148,7 @@ int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
 	struct gk20a *g = ch->g;
 	struct tegra_vgpu_cmd_msg msg = {};
 	struct tegra_vgpu_channel_cyclestats_snapshot_params *p;
-	struct nvgpu_gr *gr = g->gr;
+	struct gk20a_cs_snapshot *data = g->cs_data;
 	struct gk20a_cs_snapshot *data = gr->cs_data;
 	int err;
 	nvgpu_log_fn(g, " ");
@@ -239,7 +235,7 @@ int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
 		return ret;
 	}
-	ret = vgpu_css_init_snapshot_buffer(ch->g->gr);
+	ret = vgpu_css_init_snapshot_buffer(ch->g);
 	return ret;
 }
--- a/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/perf/cyclestats_snapshot_vgpu.h
@@ -25,11 +25,11 @@
 #include <nvgpu/types.h>
-struct nvgpu_gr;
+struct gk20a;
 struct channel_gk20a;
 struct gk20a_cs_snapshot_client;
-void vgpu_css_release_snapshot_buffer(struct nvgpu_gr *gr);
+void vgpu_css_release_snapshot_buffer(struct gk20a *g);
 int vgpu_css_flush_snapshots(struct channel_gk20a *ch,
 			u32 *pending, bool *hw_overflow);
 int vgpu_css_detach(struct channel_gk20a *ch,
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -43,11 +43,6 @@ struct nvgpu_dbg_reg_op;
 enum ctxsw_addr_type;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 struct gk20a_cs_snapshot_client;
 struct gk20a_cs_snapshot;
 #endif
 struct nvgpu_warpstate {
 	u64 valid_warps[2];
 	u64 trapped_warps[2];
@@ -97,28 +92,6 @@ int gr_gk20a_set_sm_debug_mode(struct gk20a *g,
 	struct channel_gk20a *ch, u64 sms, bool enable);
 bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 int gr_gk20a_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
 			u32 perfmon_id_count,	    /* in - number of perfmons*/
 			u32 *perfmon_id_start,	    /* out- index of first pm */
 			/* in/out - pointer to client data used in later     */
 			struct gk20a_cs_snapshot_client *css_client);
 int gr_gk20a_css_detach(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *css_client);
 int gr_gk20a_css_flush(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *css_client);
 void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g);
 #else
 /* fake empty cleanup function if no cyclestats snapshots enabled */
 static inline void gr_gk20a_free_cyclestats_snapshot_data(struct gk20a *g)
 {
 	(void)g;
 }
 #endif
 int gk20a_gr_lock_down_sm(struct gk20a *g,
 			 u32 gpc, u32 tpc, u32 sm, u32 global_esr_mask,
 			 bool check_errors);
--- a/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/cyclestats_snapshot.h
@@ -34,7 +34,6 @@
 #define CSS_MIN_HW_SNAPSHOT_SIZE	(8 * 1024 * 1024)
 struct gk20a;
 struct nvgpu_gr;
 struct channel_gk20a;
 /* cycle stats fifo header (must match NvSnapshotBufferFifo) */
@@ -139,7 +138,7 @@ u32 nvgpu_css_get_pending_snapshots(struct gk20a *g);
 void nvgpu_css_set_handled_snapshots(struct gk20a *g, u32 done);
 int nvgpu_css_enable_snapshot(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *cs_client);
-void nvgpu_css_disable_snapshot(struct nvgpu_gr *gr);
+void nvgpu_css_disable_snapshot(struct gk20a *g);
 u32 nvgpu_css_allocate_perfmon_ids(struct gk20a_cs_snapshot *data,
 				       u32 count);
 u32 nvgpu_css_release_perfmon_ids(struct gk20a_cs_snapshot *data,
@@ -150,4 +149,17 @@ int nvgpu_css_check_data_available(struct channel_gk20a *ch, u32 *pending,
 struct gk20a_cs_snapshot_client *
 nvgpu_css_gr_search_client(struct nvgpu_list_node *clients, u32 perfmon);
 int nvgpu_css_attach(struct channel_gk20a *ch,   /* in - main hw structure */
 			u32 perfmon_id_count,	    /* in - number of perfmons*/
 			u32 *perfmon_id_start,	    /* out- index of first pm */
 			/* in/out - pointer to client data used in later     */
 			struct gk20a_cs_snapshot_client *css_client);
 int nvgpu_css_detach(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *css_client);
 int nvgpu_css_flush(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *css_client);
 void nvgpu_free_cyclestats_snapshot_data(struct gk20a *g);
 #endif /* CYCLESTATS_SNAPSHOT_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1630,7 +1630,7 @@ struct gpu_ops {
 	struct {
 		int (*enable_snapshot)(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *client);
-		void (*disable_snapshot)(struct nvgpu_gr *gr);
+		void (*disable_snapshot)(struct gk20a *g);
 		int (*check_data_available)(struct channel_gk20a *ch,
 						u32 *pending,
 						bool *hw_overflow);
@@ -2042,6 +2042,11 @@ struct gk20a {
 	struct nvgpu_dbg_reg_op *dbg_regops_tmp_buf;
 	u32 dbg_regops_tmp_buf_ops;
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	struct nvgpu_mutex		cs_lock;
 	struct gk20a_cs_snapshot	*cs_data;
 #endif
 	/* For perfbuf mapping */
 	struct {
 		struct dbg_session_gk20a *owner;
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -66,6 +66,9 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_mutex_init(&g->tpc_pg_lock);
 	nvgpu_mutex_init(&g->clk_arb_enable_lock);
 	nvgpu_mutex_init(&g->cg_pg_lock);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_init(&g->cs_lock);
 #endif
 	/* Init the clock req count to 0 */
 	nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -175,7 +175,7 @@ int gk20a_flush_cycle_stats_snapshot(struct channel_gk20a *ch)
 	nvgpu_mutex_acquire(&ch->cs_client_mutex);
 	if (ch->cs_client)
-		ret = gr_gk20a_css_flush(ch, ch->cs_client);
+		ret = nvgpu_css_flush(ch, ch->cs_client);
 	else
 		ret = -EBADF;
 	nvgpu_mutex_release(&ch->cs_client_mutex);
@@ -229,7 +229,7 @@ int gk20a_attach_cycle_stats_snapshot(struct channel_gk20a *ch,
 	ch->cs_client = client;
-	ret = gr_gk20a_css_attach(ch,
+	ret = nvgpu_css_attach(ch,
 				perfmon_id_count,
 				perfmon_id_start,
 				ch->cs_client);
@@ -262,7 +262,7 @@ int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch)
 				struct gk20a_cs_snapshot_client_linux,
 				cs_client);
-	ret = gr_gk20a_css_detach(ch, ch->cs_client);
+	ret = nvgpu_css_detach(ch, ch->cs_client);
 	if (client_linux->dma_handler) {
 		if (ch->cs_client->snapshot)
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -50,6 +50,7 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/pmu/pmu_pstate.h>
 #include <nvgpu/cyclestats_snapshot.h>
 #include "common/gr/gr_priv.h"
 #include "platform_gk20a.h"
@@ -779,6 +780,10 @@ void gk20a_remove_support(struct gk20a *g)
 			sim_linux->remove_support_linux(g);
 	}
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_free_cyclestats_snapshot_data(g);
 #endif
 	nvgpu_remove_usermode_support(g);
 	nvgpu_free_enabled_flags(g);
--- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
+++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c
@@ -134,6 +134,9 @@ static int vgpu_init_support(struct platform_device *pdev)
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_init(&g->cs_lock);
 #endif
 	nvgpu_init_list_node(&g->profiler_objects);