gpu: nvgpu: Add flag checking for ZCULL code

Add NVGPU_GRAPHICS flag checking for ZCULL specific codes. Define NVGPU_GRAPHICS flag for ZCULL support. This flag is disabled for safety build now. Jira NVGPU-3550 Change-Id: Ifd571a5e64e8fb2dfe02a87458a2986681900a6b Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2127515 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2019-05-29 16:14:30 -07:00
parent 05ed37ae3a
commit 61fb688f1a
31 changed files with 165 additions and 62 deletions
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -109,7 +109,6 @@ srcs += common/sim/sim.c \
 	common/gr/subctx.c \
 	common/gr/ctx.c \
 	common/gr/gr_falcon.c \
 	common/gr/zcull.c \
 	common/gr/gr_config.c \
 	common/gr/gr_setup.c \
 	common/gr/hwpm_map.c \
@@ -179,8 +178,6 @@ srcs += common/sim/sim.c \
 	hal/clk/clk_gm20b.c \
 	hal/gr/ecc/ecc_gp10b.c \
 	hal/gr/ecc/ecc_gv11b.c \
 	hal/gr/zcull/zcull_gm20b.c \
 	hal/gr/zcull/zcull_gv11b.c \
 	hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c \
 	hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c \
 	hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c \
@@ -304,9 +301,12 @@ srcs += common/sim/sim.c \
 ifeq ($(NVGPU_GRAPHICS),1)
 srcs += common/gr/zbc.c \
 	common/gr/zcull.c \
 	hal/gr/zbc/zbc_gm20b.c \
 	hal/gr/zbc/zbc_gp10b.c \
-	hal/gr/zbc/zbc_gv11b.c
+	hal/gr/zbc/zbc_gv11b.c \
 	hal/gr/zcull/zcull_gm20b.c \
 	hal/gr/zcull/zcull_gv11b.c
 endif
 ifeq ($(NVGPU_DEBUGGER),1)
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -216,15 +216,6 @@ void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	patch_ctx->data_count = 0;
 }
 void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	u32 mode, u64 gpu_va)
 {
 	struct zcull_ctx_desc *zcull_ctx = &gr_ctx->zcull_ctx;
 	zcull_ctx->ctx_sw_mode = mode;
 	zcull_ctx->gpu_va = gpu_va;
 }
 static int nvgpu_gr_ctx_alloc_ctxsw_buffer(struct vm_gk20a *vm, size_t size,
 	struct nvgpu_mem *mem)
 {
@@ -514,11 +505,6 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
 	return gr_ctx->pm_ctx.pm_mode;
 }
 u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
 {
 	return gr_ctx->zcull_ctx.gpu_va;
 }
 struct nvgpu_mem *nvgpu_gr_ctx_get_preempt_ctxsw_buffer(
 	struct nvgpu_gr_ctx *gr_ctx)
 {
@@ -718,6 +704,21 @@ u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 	return gr_ctx->ctx_id;
 }
 #ifdef NVGPU_GRAPHICS
 void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	u32 mode, u64 gpu_va)
 {
 	struct zcull_ctx_desc *zcull_ctx = &gr_ctx->zcull_ctx;
 	zcull_ctx->ctx_sw_mode = mode;
 	zcull_ctx->gpu_va = gpu_va;
 }
 u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
 {
 	return gr_ctx->zcull_ctx.gpu_va;
 }
 int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
 	int err;
@@ -755,6 +756,7 @@ int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	return 0;
 }
 #endif
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable)
--- a/drivers/gpu/nvgpu/common/gr/gr.c
+++ b/drivers/gpu/nvgpu/common/gr/gr.c
@@ -30,8 +30,8 @@
 #include <nvgpu/gr/gr_intr.h>
 #ifdef NVGPU_GRAPHICS
 #include <nvgpu/gr/zbc.h>
 #endif
 #include <nvgpu/gr/zcull.h>
 #endif
 #include <nvgpu/netlist.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/ctx.h>
@@ -183,10 +183,12 @@ static int gr_init_setup_hw(struct gk20a *g)
 	/* load gr floorsweeping registers */
 	g->ops.gr.init.pes_vsc_stream(g);
 #ifdef NVGPU_GRAPHICS
 	err = nvgpu_gr_zcull_init_hw(g, gr->zcull, gr->config);
 	if (err != 0) {
 		goto out;
 	}
 #endif /* NVGPU_GRAPHICS */
 	if (g->ops.priv_ring.set_ppriv_timeout_settings != NULL) {
 		g->ops.priv_ring.set_ppriv_timeout_settings(g);
@@ -283,9 +285,9 @@ static void gr_remove_support(struct gk20a *g)
 #ifdef NVGPU_GRAPHICS
 	nvgpu_gr_zbc_deinit(g, gr->zbc);
 	nvgpu_gr_zcull_deinit(g, gr->zcull);
 #endif /* NVGPU_GRAPHICS */
 	nvgpu_gr_zcull_deinit(g, gr->zcull);
 	nvgpu_gr_obj_ctx_deinit(g, gr->golden_image);
 }
@@ -406,12 +408,14 @@ static int gr_init_setup_sw(struct gk20a *g)
 		goto clean_up;
 	}
 #ifdef NVGPU_GRAPHICS
 	err = nvgpu_gr_zcull_init(g, &gr->zcull,
 			nvgpu_gr_falcon_get_zcull_image_size(g->gr->falcon),
 			g->gr->config);
 	if (err != 0) {
 		goto clean_up;
 	}
 #endif /* NVGPU_GRAPHICS */
 	gr->gr_ctx_desc = nvgpu_gr_ctx_desc_alloc(g);
 	if (gr->gr_ctx_desc == NULL) {
--- a/drivers/gpu/nvgpu/common/gr/gr_config.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_config.c
@@ -145,10 +145,12 @@ struct nvgpu_gr_config *nvgpu_gr_config_init(struct gk20a *g)
 				gpc_index);
 		config->tpc_count += config->gpc_tpc_count[gpc_index];
 #ifdef NVGPU_GRAPHICS
 		config->gpc_zcb_count[gpc_index] =
 			g->ops.gr.config.get_zcull_count_in_gpc(g, config,
 				gpc_index);
 		config->zcb_count += config->gpc_zcb_count[gpc_index];
 #endif
 		for (pes_index = 0; pes_index < config->pe_count_per_gpc;
 				    pes_index++) {
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
@@ -196,10 +196,12 @@ u32 nvgpu_gr_falcon_get_preempt_image_size(struct nvgpu_gr_falcon *falcon)
 	return falcon->sizes.preempt_image_size;
 }
 #ifdef NVGPU_GRAPHICS
 u32 nvgpu_gr_falcon_get_zcull_image_size(struct nvgpu_gr_falcon *falcon)
 {
 	return falcon->sizes.zcull_image_size;
 }
 #endif
 static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g,
 					struct nvgpu_gr_falcon *falcon)
--- a/drivers/gpu/nvgpu/common/gr/gr_priv.h
+++ b/drivers/gpu/nvgpu/common/gr/gr_priv.h
@@ -32,9 +32,9 @@ struct nvgpu_gr_obj_ctx_golden_image;
 struct nvgpu_gr_config;
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zbc;
 struct nvgpu_gr_zcull;
 #endif
 struct nvgpu_gr_hwpm_map;
 struct nvgpu_gr_zcull;
 struct gk20a_cs_snapshot;
 struct nvgpu_gr {
@@ -53,9 +53,9 @@ struct nvgpu_gr {
 	struct nvgpu_gr_hwpm_map *hwpm_map;
 #ifdef NVGPU_GRAPHICS
 	struct nvgpu_gr_zcull *zcull;
 #ifdef NVGPU_GRAPHICS
 	struct nvgpu_gr_zbc *zbc;
 #endif
--- a/drivers/gpu/nvgpu/common/gr/gr_setup.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_setup.c
@@ -25,13 +25,16 @@
 #include <nvgpu/gr/ctx.h>
 #include <nvgpu/gr/subctx.h>
 #include <nvgpu/gr/obj_ctx.h>
 #ifdef NVGPU_GRAPHICS
 #include <nvgpu/gr/zcull.h>
 #endif
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/preempt.h>
 #include "gr_priv.h"
 #ifdef NVGPU_GRAPHICS
 static int nvgpu_gr_setup_zcull(struct gk20a *g, struct nvgpu_channel *c,
 				struct nvgpu_gr_ctx *gr_ctx)
 {
@@ -94,6 +97,7 @@ int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
 	return nvgpu_gr_setup_zcull(g, c, gr_ctx);
 }
 #endif
 int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 		u32 flags)
--- a/drivers/gpu/nvgpu/common/gr/gr_utils.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_utils.c
@@ -49,12 +49,12 @@ void nvgpu_gr_reset_golden_image_ptr(struct gk20a *g)
 	g->gr->golden_image = NULL;
 }
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zcull *nvgpu_gr_get_zcull_ptr(struct gk20a *g)
 {
 	return g->gr->zcull;
 }
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g)
 {
 	return g->gr->zbc;
--- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c
@@ -505,10 +505,12 @@ restore_fe_go_idle:
 		goto clean_up;
 	}
 #ifdef NVGPU_GRAPHICS
 	err = nvgpu_gr_ctx_init_zcull(g, gr_ctx);
 	if (err != 0) {
 		goto clean_up;
 	}
 #endif
 	data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block);
 	err = g->ops.gr.falcon.ctrl_ctxsw(g,
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -104,14 +104,17 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	g->ops.gr.ctxsw_prog.set_pm_ptr(g, ctxheader,
 		nvgpu_gr_ctx_get_pm_ctx_mem(gr_ctx)->gpu_va);
 #ifdef NVGPU_GRAPHICS
 	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, ctxheader,
 		nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
 #endif
 	g->ops.gr.ctxsw_prog.set_context_buffer_ptr(g, ctxheader, gpu_va);
 	g->ops.gr.ctxsw_prog.set_type_per_veid_header(g, ctxheader);
 }
 #ifdef NVGPU_GRAPHICS
 void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
 		struct nvgpu_gr_ctx *gr_ctx)
 {
@@ -121,6 +124,7 @@ void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx
 	g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &subctx->ctx_header,
 		nvgpu_gr_ctx_get_zcull_ctx_va(gr_ctx));
 }
 #endif
 void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx)
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -205,12 +205,14 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif
 			.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -267,7 +269,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.init_sm_id_table = vgpu_gr_init_sm_id_table,
 		},
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
 #endif
 			.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
 			.free_gr_ctx = vgpu_gr_free_gr_ctx,
 			.set_preemption_mode = vgpu_gr_set_preemption_mode,
@@ -282,11 +286,11 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
 			.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.get_zcull_info = vgpu_gr_get_zcull_info,
 			.program_zcull_mapping = NULL,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.falcon = {
 			.init_ctx_state = vgpu_gr_init_ctx_state,
 			.load_ctxsw_ucode = NULL,
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c
@@ -41,8 +41,8 @@
 #include <nvgpu/gr/gr_falcon.h>
 #ifdef NVGPU_GRAPHICS
 #include <nvgpu/gr/zbc.h>
 #endif
 #include <nvgpu/gr/zcull.h>
 #endif
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/hwpm_map.h>
 #include <nvgpu/gr/obj_ctx.h>
@@ -60,8 +60,8 @@
 #include "common/gr/gr_falcon_priv.h"
 #include "common/gr/gr_intr_priv.h"
 #include "common/gr/ctx_priv.h"
 #include "common/gr/zcull_priv.h"
 #ifdef NVGPU_GRAPHICS
 #include "common/gr/zcull_priv.h"
 #include "common/gr/zbc_priv.h"
 #endif
 #include "common/gr/gr_priv.h"
@@ -155,10 +155,12 @@ int vgpu_gr_init_ctx_state(struct gk20a *g,
 		return -ENXIO;
 	}
 #ifdef NVGPU_GRAPHICS
 	sizes->zcull_image_size = priv->constants.zcull_ctx_size;
 	if (sizes->zcull_image_size == 0U) {
 		return -ENXIO;
 	}
 #endif
 	sizes->preempt_image_size =
 			priv->constants.preempt_ctx_size;
@@ -476,6 +478,7 @@ cleanup:
 	return err;
 }
 #ifdef NVGPU_GRAPHICS
 static int vgpu_gr_init_gr_zcull(struct gk20a *g, struct nvgpu_gr *gr,
 		u32 size)
 {
@@ -542,6 +545,7 @@ int vgpu_gr_get_zcull_info(struct gk20a *g,
 	return 0;
 }
 #endif
 u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
 	u32 gpc_index)
@@ -667,7 +671,9 @@ static void vgpu_remove_gr_support(struct gk20a *g)
 	nvgpu_gr_config_deinit(gr->g, gr->config);
 #ifdef NVGPU_GRAPHICS
 	nvgpu_gr_zcull_deinit(gr->g, gr->zcull);
 #endif
 }
 static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
@@ -723,11 +729,13 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g)
 		goto clean_up;
 	}
 #ifdef NVGPU_GRAPHICS
 	err = vgpu_gr_init_gr_zcull(g, gr,
 			nvgpu_gr_falcon_get_zcull_image_size(g->gr->falcon));
 	if (err) {
 		goto clean_up;
 	}
 #endif
 	err = vgpu_gr_alloc_global_ctx_buffers(g);
 	if (err) {
--- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.h
@@ -28,8 +28,9 @@
 struct gk20a;
 struct nvgpu_channel;
 struct gr_gk20a;
 struct nvgpu_gr_zcull_info;
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zcull_info;
 struct nvgpu_gr_zcull;
 struct nvgpu_gr_zbc;
 struct nvgpu_gr_zbc_entry;
 struct nvgpu_gr_zbc_query_params;
@@ -38,7 +39,6 @@ struct dbg_session_gk20a;
 struct nvgpu_tsg;
 struct vm_gk20a;
 struct nvgpu_gr_ctx;
 struct nvgpu_gr_zcull;
 struct tegra_vgpu_gr_intr_info;
 struct tegra_vgpu_sm_esr_info;
 struct nvgpu_gr_falcon_query_sizes;
@@ -50,18 +50,18 @@ int vgpu_gr_alloc_global_ctx_buffers(struct gk20a *g);
 void vgpu_gr_free_channel_ctx(struct nvgpu_channel *c, bool is_tsg);
 void vgpu_gr_free_tsg_ctx(struct nvgpu_tsg *tsg);
 int vgpu_gr_alloc_obj_ctx(struct nvgpu_channel  *c, u32 class_num, u32 flags);
 int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
 			u64 zcull_va, u32 mode);
 int vgpu_gr_get_zcull_info(struct gk20a *g,
 			struct nvgpu_gr_config *gr_config,
 			struct nvgpu_gr_zcull *zcull,
 			struct nvgpu_gr_zcull_info *zcull_params);
 u32 vgpu_gr_get_gpc_tpc_mask(struct gk20a *g, struct nvgpu_gr_config *config,
 	u32 gpc_index);
 u32 vgpu_gr_get_max_fbps_count(struct gk20a *g);
 u32 vgpu_gr_get_max_ltc_per_fbp(struct gk20a *g);
 u32 vgpu_gr_get_max_lts_per_ltc(struct gk20a *g);
 #ifdef NVGPU_GRAPHICS
 int vgpu_gr_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
 			u64 zcull_va, u32 mode);
 int vgpu_gr_get_zcull_info(struct gk20a *g,
 			struct nvgpu_gr_config *gr_config,
 			struct nvgpu_gr_zcull *zcull,
 			struct nvgpu_gr_zcull_info *zcull_params);
 int vgpu_gr_add_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
 			   struct nvgpu_gr_zbc_entry *zbc_val);
 int vgpu_gr_query_zbc(struct gk20a *g, struct nvgpu_gr_zbc *zbc,
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -117,7 +117,9 @@
 #include <nvgpu/vgpu/ce_vgpu.h>
 #include <nvgpu/vgpu/vm_vgpu.h>
 #ifdef NVGPU_GRAPHICS
 #include <nvgpu/gr/zbc.h>
 #endif
 #include "vgpu_gv11b.h"
@@ -240,12 +242,14 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif
 			.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -312,7 +316,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.init_sm_id_table = vgpu_gr_init_sm_id_table,
 		},
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = vgpu_gr_bind_ctxsw_zcull,
 #endif
 			.alloc_obj_ctx = vgpu_gr_alloc_obj_ctx,
 			.free_gr_ctx = vgpu_gr_free_gr_ctx,
 			.free_subctx = vgpu_channel_free_ctx_header,
@@ -328,11 +334,11 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
 			.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.get_zcull_info = vgpu_gr_get_zcull_info,
 			.program_zcull_mapping = NULL,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.hwpm_map = {
 			.align_regs_perf_pma =
 				gv100_gr_hwpm_map_align_regs_perf_pma,
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.c
@@ -80,6 +80,7 @@ u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
 	return gr_gpc0_fs_gpc_num_available_tpcs_v(tmp);
 }
 #ifdef NVGPU_GRAPHICS
 u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index)
 {
@@ -92,6 +93,7 @@ u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
 	return gr_gpc0_fs_gpc_num_available_zculls_v(tmp);
 }
 #endif
 u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index)
--- a/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/config/gr_config_gm20b.h
@@ -34,8 +34,10 @@ u32 gm20b_gr_config_get_gpc_tpc_mask(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index);
 u32 gm20b_gr_config_get_tpc_count_in_gpc(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index);
 #ifdef NVGPU_GRAPHICS
 u32 gm20b_gr_config_get_zcull_count_in_gpc(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index);
 #endif
 u32 gm20b_gr_config_get_pes_tpc_mask(struct gk20a *g,
 	struct nvgpu_gr_config *config, u32 gpc_index, u32 pes_index);
 u32 gm20b_gr_config_get_pd_dist_skip_table_size(void);
--- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c
@@ -80,6 +80,7 @@ void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
 		ctxsw_prog_main_image_patch_adr_hi_o(), u64_hi32(addr));
 }
 #ifdef NVGPU_GRAPHICS
 void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 	u64 addr)
 {
@@ -105,6 +106,7 @@ bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode)
 {
 	return mode == ctxsw_prog_main_image_zcull_mode_separate_buffer_v();
 }
 #endif
 void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 	u64 addr)
--- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h
@@ -39,6 +39,7 @@ void gm20b_ctxsw_prog_set_patch_count(struct gk20a *g,
 	struct nvgpu_mem *ctx_mem, u32 count);
 void gm20b_ctxsw_prog_set_patch_addr(struct gk20a *g,
 	struct nvgpu_mem *ctx_mem, u64 addr);
 #ifdef NVGPU_GRAPHICS
 void gm20b_ctxsw_prog_set_zcull_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 	u64 addr);
 void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
@@ -46,6 +47,7 @@ void gm20b_ctxsw_prog_set_zcull(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 void gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw(struct gk20a *g,
 	struct nvgpu_mem *ctx_mem);
 bool gm20b_ctxsw_prog_is_zcull_mode_separate_buffer(u32 mode);
 #endif
 void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
 	u64 addr);
 void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -83,8 +83,8 @@
 #include "hal/rc/rc_gk20a.h"
 #ifdef NVGPU_GRAPHICS
 #include "hal/gr/zbc/zbc_gm20b.h"
 #endif
 #include "hal/gr/zcull/zcull_gm20b.h"
 #endif
 #include "hal/gr/falcon/gr_falcon_gm20b.h"
 #include "hal/gr/init/gr_init_gm20b.h"
 #include "hal/gr/intr/gr_intr_gm20b.h"
@@ -219,12 +219,14 @@ static const struct gpu_ops gm20b_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif /* NVGPU_GRAPHICS */
 			.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -272,8 +274,10 @@ static const struct gpu_ops gm20b_ops = {
 			.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
 			.get_tpc_count_in_gpc =
 				gm20b_gr_config_get_tpc_count_in_gpc,
 #ifdef NVGPU_GRAPHICS
 			.get_zcull_count_in_gpc =
 				gm20b_gr_config_get_zcull_count_in_gpc,
 #endif /* NVGPU_GRAPHICS */
 			.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
 			.get_pd_dist_skip_table_size =
 				gm20b_gr_config_get_pd_dist_skip_table_size,
@@ -304,7 +308,9 @@ static const struct gpu_ops gm20b_ops = {
 		},
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
 #endif /* NVGPU_GRAPHICS */
 			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
 			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 		},
@@ -318,12 +324,12 @@ static const struct gpu_ops gm20b_ops = {
 			.get_gpcs_swdx_dss_zbc_c_format_reg = NULL,
 			.get_gpcs_swdx_dss_zbc_z_format_reg = NULL,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.init_zcull_hw = gm20b_gr_init_zcull_hw,
 			.get_zcull_info = gm20b_gr_get_zcull_info,
 			.program_zcull_mapping = gm20b_gr_program_zcull_mapping,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.init = {
 			.get_no_of_sm = nvgpu_gr_get_no_of_sm,
 			.wait_initialized = nvgpu_gr_wait_initialized,
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -256,12 +256,14 @@ static const struct gpu_ops gp10b_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gm20b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif /* NVGPU_GRAPHICS */
 			.set_pm_ptr = gm20b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -318,8 +320,10 @@ static const struct gpu_ops gp10b_ops = {
 			.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
 			.get_tpc_count_in_gpc =
 				gm20b_gr_config_get_tpc_count_in_gpc,
 #ifdef NVGPU_GRAPHICS
 			.get_zcull_count_in_gpc =
 				gm20b_gr_config_get_zcull_count_in_gpc,
 #endif /* NVGPU_GRAPHICS */
 			.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
 			.get_pd_dist_skip_table_size =
 				gm20b_gr_config_get_pd_dist_skip_table_size,
@@ -350,7 +354,9 @@ static const struct gpu_ops gp10b_ops = {
 		},
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
 #endif /* NVGPU_GRAPHICS */
 			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
 			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 			.set_preemption_mode = nvgpu_gr_setup_set_preemption_mode,
@@ -367,12 +373,12 @@ static const struct gpu_ops gp10b_ops = {
 			.get_gpcs_swdx_dss_zbc_z_format_reg =
 				gp10b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.init_zcull_hw = gm20b_gr_init_zcull_hw,
 			.get_zcull_info = gm20b_gr_get_zcull_info,
 			.program_zcull_mapping = gm20b_gr_program_zcull_mapping,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.init = {
 			.get_no_of_sm = nvgpu_gr_get_no_of_sm,
 			.wait_initialized = nvgpu_gr_wait_initialized,
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -107,9 +107,9 @@
 #ifdef NVGPU_GRAPHICS
 #include "hal/gr/zbc/zbc_gp10b.h"
 #include "hal/gr/zbc/zbc_gv11b.h"
 #endif
 #include "hal/gr/zcull/zcull_gm20b.h"
 #include "hal/gr/zcull/zcull_gv11b.h"
 #endif
 #include "hal/gr/init/gr_init_gm20b.h"
 #include "hal/gr/init/gr_init_gp10b.h"
 #include "hal/gr/init/gr_init_gv11b.h"
@@ -325,12 +325,14 @@ static const struct gpu_ops gv11b_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif
 			.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -397,8 +399,10 @@ static const struct gpu_ops gv11b_ops = {
 			.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
 			.get_tpc_count_in_gpc =
 				gm20b_gr_config_get_tpc_count_in_gpc,
 #ifdef NVGPU_GRAPHICS
 			.get_zcull_count_in_gpc =
 				gm20b_gr_config_get_zcull_count_in_gpc,
 #endif
 			.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
 			.get_pd_dist_skip_table_size =
 				gm20b_gr_config_get_pd_dist_skip_table_size,
@@ -429,7 +433,9 @@ static const struct gpu_ops gv11b_ops = {
 		},
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
 #endif
 			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
 			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 			.free_subctx = nvgpu_gr_setup_free_subctx,
@@ -447,12 +453,12 @@ static const struct gpu_ops gv11b_ops = {
 			.get_gpcs_swdx_dss_zbc_z_format_reg =
 				gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.init_zcull_hw = gm20b_gr_init_zcull_hw,
 			.get_zcull_info = gm20b_gr_get_zcull_info,
 			.program_zcull_mapping = gv11b_gr_program_zcull_mapping,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.hwpm_map = {
 			.align_regs_perf_pma =
 				gv100_gr_hwpm_map_align_regs_perf_pma,
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -117,9 +117,9 @@
 #ifdef NVGPU_GRAPHICS
 #include "hal/gr/zbc/zbc_gp10b.h"
 #include "hal/gr/zbc/zbc_gv11b.h"
 #endif
 #include "hal/gr/zcull/zcull_gm20b.h"
 #include "hal/gr/zcull/zcull_gv11b.h"
 #endif
 #include "hal/gr/init/gr_init_gm20b.h"
 #include "hal/gr/init/gr_init_gp10b.h"
 #include "hal/gr/init/gr_init_gv11b.h"
@@ -360,12 +360,14 @@ static const struct gpu_ops tu104_ops = {
 			.get_patch_count = gm20b_ctxsw_prog_get_patch_count,
 			.set_patch_count = gm20b_ctxsw_prog_set_patch_count,
 			.set_patch_addr = gm20b_ctxsw_prog_set_patch_addr,
 #ifdef NVGPU_GRAPHICS
 			.set_zcull_ptr = gv11b_ctxsw_prog_set_zcull_ptr,
 			.set_zcull = gm20b_ctxsw_prog_set_zcull,
 			.set_zcull_mode_no_ctxsw =
 				gm20b_ctxsw_prog_set_zcull_mode_no_ctxsw,
 			.is_zcull_mode_separate_buffer =
 				gm20b_ctxsw_prog_is_zcull_mode_separate_buffer,
 #endif
 			.set_pm_ptr = gv11b_ctxsw_prog_set_pm_ptr,
 			.set_pm_mode = gm20b_ctxsw_prog_set_pm_mode,
 			.set_pm_smpc_mode = gm20b_ctxsw_prog_set_pm_smpc_mode,
@@ -433,8 +435,10 @@ static const struct gpu_ops tu104_ops = {
 			.get_gpc_tpc_mask = gm20b_gr_config_get_gpc_tpc_mask,
 			.get_tpc_count_in_gpc =
 				gm20b_gr_config_get_tpc_count_in_gpc,
 #ifdef NVGPU_GRAPHICS
 			.get_zcull_count_in_gpc =
 				gm20b_gr_config_get_zcull_count_in_gpc,
 #endif
 			.get_pes_tpc_mask = gm20b_gr_config_get_pes_tpc_mask,
 			.get_pd_dist_skip_table_size =
 				gm20b_gr_config_get_pd_dist_skip_table_size,
@@ -465,7 +469,9 @@ static const struct gpu_ops tu104_ops = {
 		},
 #endif /* CONFIG_GK20A_CTXSW_TRACE */
 		.setup = {
 #ifdef NVGPU_GRAPHICS
 			.bind_ctxsw_zcull = nvgpu_gr_setup_bind_ctxsw_zcull,
 #endif
 			.alloc_obj_ctx = nvgpu_gr_setup_alloc_obj_ctx,
 			.free_gr_ctx = nvgpu_gr_setup_free_gr_ctx,
 			.free_subctx = nvgpu_gr_setup_free_subctx,
@@ -483,12 +489,12 @@ static const struct gpu_ops tu104_ops = {
 			.get_gpcs_swdx_dss_zbc_z_format_reg =
 				gv11b_gr_zbc_get_gpcs_swdx_dss_zbc_z_format_reg,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.zcull = {
 			.init_zcull_hw = gm20b_gr_init_zcull_hw,
 			.get_zcull_info = gm20b_gr_get_zcull_info,
 			.program_zcull_mapping = gv11b_gr_program_zcull_mapping,
 		},
 #endif /* NVGPU_GRAPHICS */
 		.hwpm_map = {
 			.align_regs_perf_pma =
 				gv100_gr_hwpm_map_align_regs_perf_pma,
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -66,9 +66,9 @@ struct nvgpu_gr_subctx;
 struct nvgpu_gr_zbc;
 struct nvgpu_gr_zbc_entry;
 struct nvgpu_gr_zbc_query_params;
 #endif
 struct nvgpu_gr_zcull;
 struct nvgpu_gr_zcull_info;
 #endif
 struct nvgpu_gr_tpc_exception;
 struct nvgpu_gr_intr_info;
 struct nvgpu_channel_hw_state;
@@ -439,6 +439,7 @@ struct gpu_ops {
 				struct nvgpu_mem *ctx_mem, u32 count);
 			void (*set_patch_addr)(struct gk20a *g,
 				struct nvgpu_mem *ctx_mem, u64 addr);
 #ifdef NVGPU_GRAPHICS
 			void (*set_zcull_ptr)(struct gk20a *g,
 				struct nvgpu_mem *ctx_mem, u64 addr);
 			void (*set_zcull)(struct gk20a *g,
@@ -446,6 +447,7 @@ struct gpu_ops {
 			void (*set_zcull_mode_no_ctxsw)(struct gk20a *g,
 				struct nvgpu_mem *ctx_mem);
 			bool (*is_zcull_mode_separate_buffer)(u32 mode);
 #endif
 			void (*set_pm_ptr)(struct gk20a *g,
 				struct nvgpu_mem *ctx_mem, u64 addr);
 			void (*set_pm_mode)(struct gk20a *g,
@@ -518,8 +520,10 @@ struct gpu_ops {
 				struct nvgpu_gr_config *config, u32 gpc_index);
 			u32 (*get_tpc_count_in_gpc)(struct gk20a *g,
 				struct nvgpu_gr_config *config, u32 gpc_index);
 #ifdef NVGPU_GRAPHICS
 			u32 (*get_zcull_count_in_gpc)(struct gk20a *g,
 				struct nvgpu_gr_config *config, u32 gpc_index);
 #endif
 			u32 (*get_pes_tpc_mask)(struct gk20a *g,
 				struct nvgpu_gr_config *config, u32 gpc_index,
 				u32 pes_index);
@@ -625,10 +629,12 @@ struct gpu_ops {
 #endif
 		struct {
 #ifdef NVGPU_GRAPHICS
 			int (*bind_ctxsw_zcull)(struct gk20a *g,
 						struct nvgpu_channel *c,
 						u64 zcull_va,
 						u32 mode);
 #endif
 			int (*alloc_obj_ctx)(struct nvgpu_channel  *c,
 				     u32 class_num, u32 flags);
 			void (*free_gr_ctx)(struct gk20a *g,
@@ -660,7 +666,7 @@ struct gpu_ops {
 			u32 (*get_gpcs_swdx_dss_zbc_z_format_reg)(
 				struct gk20a *g);
 		} zbc;
-#endif
+
 		struct {
 			int (*init_zcull_hw)(struct gk20a *g,
 					struct nvgpu_gr_zcull *gr_zcull,
@@ -673,6 +679,7 @@ struct gpu_ops {
 						u32 zcull_alloc_num,
 						u32 *zcull_map_tiles);
 		} zcull;
 #endif /* NVGPU_GRAPHICS */
 		struct {
 			void (*align_regs_perf_pma)(u32 *offset);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/ctx.h
@@ -49,10 +49,13 @@ struct nvgpu_gr_ctx;
 struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_global_ctx_local_golden_image;
 struct patch_desc;
 struct zcull_ctx_desc;
 struct pm_ctx_desc;
 struct nvgpu_gr_ctx_desc;
 #ifdef NVGPU_GRAPHICS
 struct zcull_ctx_desc;
 #endif
 #define NVGPU_GR_CTX_CTX		0U
 #define NVGPU_GR_CTX_PM_CTX		1U
 #define NVGPU_GR_CTX_PATCH_CTX		2U
@@ -113,9 +116,6 @@ int nvgpu_gr_ctx_alloc_patch_ctx(struct gk20a *g,
 void nvgpu_gr_ctx_free_patch_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	struct nvgpu_gr_ctx *gr_ctx);
 void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	u32 mode, u64 gpu_va);
 int nvgpu_gr_ctx_alloc_ctxsw_buffers(struct gk20a *g,
 	struct nvgpu_gr_ctx *gr_ctx,
 	struct nvgpu_gr_ctx_desc *gr_ctx_desc,
@@ -151,8 +151,6 @@ void nvgpu_gr_ctx_set_patch_ctx_data_count(struct nvgpu_gr_ctx *gr_ctx,
 struct nvgpu_mem *nvgpu_gr_ctx_get_pm_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
 u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx);
 struct nvgpu_mem *nvgpu_gr_ctx_get_ctx_mem(struct nvgpu_gr_ctx *gr_ctx);
 int nvgpu_gr_ctx_load_golden_ctx_image(struct gk20a *g,
@@ -177,9 +175,17 @@ void nvgpu_gr_ctx_set_patch_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 #ifdef NVGPU_GRAPHICS
 void nvgpu_gr_ctx_set_zcull_ctx(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	u32 mode, u64 gpu_va);
 u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx);
 int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx);
 int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool set_zcull_ptr);
 #endif
 int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 	bool enable);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
@@ -108,6 +108,9 @@ void *nvgpu_gr_falcon_get_surface_desc_cpu_va(
 u32 nvgpu_gr_falcon_get_golden_image_size(struct nvgpu_gr_falcon *falcon);
 u32 nvgpu_gr_falcon_get_pm_ctxsw_image_size(struct nvgpu_gr_falcon *falcon);
 u32 nvgpu_gr_falcon_get_preempt_image_size(struct nvgpu_gr_falcon *falcon);
 #ifdef NVGPU_GRAPHICS
 u32 nvgpu_gr_falcon_get_zcull_image_size(struct nvgpu_gr_falcon *falcon);
 #endif
 #endif /* NVGPU_GR_FALCON_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_utils.h
@@ -27,8 +27,11 @@ struct gk20a;
 struct nvgpu_gr_falcon;
 struct nvgpu_gr_obj_ctx_golden_image;
 struct nvgpu_gr_config;
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zbc;
 struct nvgpu_gr_zcull;
 #endif
 struct nvgpu_gr_hwpm_map;
 struct nvgpu_gr_intr;
 struct nvgpu_gr_global_ctx_buffer_desc;
@@ -37,8 +40,10 @@ struct nvgpu_gr_global_ctx_buffer_desc;
 struct nvgpu_gr_falcon *nvgpu_gr_get_falcon_ptr(struct gk20a *g);
 struct nvgpu_gr_obj_ctx_golden_image *nvgpu_gr_get_golden_image_ptr(
 							struct gk20a *g);
 #ifdef NVGPU_GRAPHICS
 struct nvgpu_gr_zcull *nvgpu_gr_get_zcull_ptr(struct gk20a *g);
 struct nvgpu_gr_zbc *nvgpu_gr_get_zbc_ptr(struct gk20a *g);
 #endif
 struct nvgpu_gr_config *nvgpu_gr_get_config_ptr(struct gk20a *g);
 struct nvgpu_gr_hwpm_map *nvgpu_gr_get_hwpm_map_ptr(struct gk20a *g);
 struct nvgpu_gr_intr *nvgpu_gr_get_intr_ptr(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/setup.h
@@ -29,8 +29,10 @@ struct nvgpu_channel;
 struct vm_gk20a;
 struct nvgpu_gr_ctx;
 #ifdef NVGPU_GRAPHICS
 int nvgpu_gr_setup_bind_ctxsw_zcull(struct gk20a *g, struct nvgpu_channel *c,
 			u64 zcull_va, u32 mode);
 #endif
 int nvgpu_gr_setup_alloc_obj_ctx(struct nvgpu_channel *c, u32 class_num,
 		u32 flags);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/subctx.h
@@ -40,8 +40,10 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx,
 	struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va);
 #ifdef NVGPU_GRAPHICS
 void nvgpu_gr_subctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_subctx *subctx,
 		struct nvgpu_gr_ctx *gr_ctx);
 #endif
 void nvgpu_gr_subctx_set_hwpm_mode(struct gk20a *g,
 	struct nvgpu_gr_subctx *subctx, struct nvgpu_gr_ctx *gr_ctx);
--- a/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vgpu/tegra_vgpu.h
@@ -220,12 +220,6 @@ struct tegra_vgpu_ch_ctx_params {
 	u32 class_num;
 };
 struct tegra_vgpu_zcull_bind_params {
 	u64 handle;
 	u64 zcull_va;
 	u32 mode;
 };
 enum {
 	TEGRA_VGPU_L2_MAINT_FLUSH = 0,
 	TEGRA_VGPU_L2_MAINT_INV,
@@ -246,6 +240,13 @@ struct tegra_vgpu_golden_ctx_params {
 	u32 size;
 };
 #ifdef NVGPU_GRAPHICS
 struct tegra_vgpu_zcull_bind_params {
 	u64 handle;
 	u64 zcull_va;
 	u32 mode;
 };
 struct tegra_vgpu_zcull_info_params {
 	u32 width_align_pixels;
 	u32 height_align_pixels;
@@ -281,6 +282,7 @@ struct tegra_vgpu_zbc_query_table_params {
 	u32 type;             /* color or depth */
 	u32 index_size;       /* [out] size, [in] index */
 };
 #endif
 enum {
 	TEGRA_VGPU_GR_BIND_CTXSW_BUFFER_MAIN,
@@ -638,13 +640,15 @@ struct tegra_vgpu_cmd_msg {
 		struct tegra_vgpu_channel_config_params channel_config;
 		struct tegra_vgpu_ramfc_params ramfc;
 		struct tegra_vgpu_ch_ctx_params ch_ctx;
 		struct tegra_vgpu_zcull_bind_params zcull_bind;
 		struct tegra_vgpu_cache_maint_params cache_maint;
 		struct tegra_vgpu_runlist_params runlist;
 		struct tegra_vgpu_golden_ctx_params golden_ctx;
 #ifdef NVGPU_GRAPHICS
 		struct tegra_vgpu_zcull_bind_params zcull_bind;
 		struct tegra_vgpu_zcull_info_params zcull_info;
 		struct tegra_vgpu_zbc_set_table_params zbc_set_table;
 		struct tegra_vgpu_zbc_query_table_params zbc_query_table;
 #endif
 		struct tegra_vgpu_gr_bind_ctxsw_buffers_params gr_bind_ctxsw_buffers;
 		struct tegra_vgpu_mmu_debug_mode mmu_debug_mode;
 		struct tegra_vgpu_sm_debug_mode sm_debug_mode;
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -760,6 +760,7 @@ notif_clean_up:
 	return ret;
 }
 #ifdef NVGPU_GRAPHICS
 static int gk20a_channel_zcull_bind(struct nvgpu_channel *ch,
 			    struct nvgpu_zcull_bind_args *args)
 {
@@ -770,6 +771,7 @@ static int gk20a_channel_zcull_bind(struct nvgpu_channel *ch,
 	return g->ops.gr.setup.bind_ctxsw_zcull(g, ch,
 				args->gpu_va, args->mode);
 }
 #endif
 static int gk20a_ioctl_channel_submit_gpfifo(
 	struct nvgpu_channel *ch,
@@ -1226,6 +1228,7 @@ long gk20a_channel_ioctl(struct file *filp,
 		gk20a_idle(ch->g);
 		break;
 #ifdef NVGPU_GRAPHICS
 	case NVGPU_IOCTL_CHANNEL_ZCULL_BIND:
 		err = gk20a_busy(ch->g);
 		if (err) {
@@ -1238,6 +1241,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				(struct nvgpu_zcull_bind_args *)buf);
 		gk20a_idle(ch->g);
 		break;
 #endif
 	case NVGPU_IOCTL_CHANNEL_SET_ERROR_NOTIFIER:
 		err = gk20a_busy(ch->g);
 		if (err) {
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -37,8 +37,8 @@
 #include <nvgpu/gr/config.h>
 #ifdef NVGPU_GRAPHICS
 #include <nvgpu/gr/zbc.h>
 #endif
 #include <nvgpu/gr/zcull.h>
 #endif
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/gr/gr_utils.h>
 #include <nvgpu/gr/warpstate.h>
@@ -1661,13 +1661,13 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
 	struct gk20a *g = priv->g;
 	u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
 	struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
 #ifdef NVGPU_GRAPHICS
 	struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
 	struct nvgpu_gpu_zcull_get_info_args *get_info_args;
 	u8 buf[NVGPU_GPU_IOCTL_MAX_ARG_SIZE];
 	struct nvgpu_gr_zcull_info *zcull_info;
 	struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
 	struct nvgpu_gr_zcull *gr_zcull = nvgpu_gr_get_zcull_ptr(g);
 #ifdef NVGPU_GRAPHICS
 	struct nvgpu_gr_zbc *gr_zbc = nvgpu_gr_get_zbc_ptr(g);
 	struct nvgpu_gr_zbc_entry *zbc_val;
 	struct nvgpu_gr_zbc_query_params *zbc_tbl;
@@ -1701,6 +1701,7 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 	nvgpu_speculation_barrier();
 	switch (cmd) {
 #ifdef NVGPU_GRAPHICS
 	case NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE:
 		get_ctx_size_args = (struct nvgpu_gpu_zcull_get_ctx_size_args *)buf;
@@ -1737,7 +1738,6 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		nvgpu_kfree(g, zcull_info);
 		break;
 #ifdef NVGPU_GRAPHICS
 	case NVGPU_GPU_IOCTL_ZBC_SET_TABLE:
 		set_table_args = (struct nvgpu_gpu_zbc_set_table_args *)buf;