From b2caba06f0eb9b96e6ee3320db78324209403bcb Mon Sep 17 00:00:00 2001
From: Vinod G <vinodg@nvidia.com>
Date: Wed, 3 Apr 2019 14:07:05 -0700
Subject: [PATCH] gpu: nvgpu: move handle_notify_pending hal to hal.gr.intr

Move handle_notify_pending hal to hal.gr.intr

Move gk20a_gr_handle_notify_pending code from gr_gk20a.c to
common.gr.intr as nvgpu_gr_intr_handle_notify_pending function.

JIRA NVGPU-1891
JIRA NVGPU-3016

Change-Id: Ib3284a83253b03e5708674fce683331ee20b8213
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2089172
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                   |   3 +-
 drivers/gpu/nvgpu/Makefile.sources           |   1 +
 drivers/gpu/nvgpu/common/gr/gr_intr.c        | 182 +++++++++++++++++++
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c           | 164 +----------------
 drivers/gpu/nvgpu/gk20a/gr_gk20a.h           |   2 -
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c          |   4 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c          |   4 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c          |   4 +-
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c          |   4 +-
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h      |   4 +-
 drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h |   4 +
 drivers/gpu/nvgpu/tu104/hal_tu104.c          |   4 +-
 12 files changed, 207 insertions(+), 173 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/gr/gr_intr.c

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index ed89bd02d..a58a32e29 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -65,8 +65,9 @@ nvgpu-y += \
 	common/regops/regops_gv11b.o \
 	common/regops/regops_tu104.o \
 	common/ltc/ltc.o \
-	common/gr/gr.o \
 	common/cbc/cbc.o \
+	common/gr/gr.o \
+	common/gr/gr_intr.o \
 	common/gr/global_ctx.o \
 	common/gr/ctx.o \
 	common/gr/gr_falcon.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 0bfd6b68e..567bb71ab 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -107,6 +107,7 @@ srcs += common/sim.c \
 	common/engine_queues/engine_emem_queue.c \
 	common/engine_queues/engine_fb_queue.c \
 	common/gr/gr.c \
+	common/gr/gr_intr.c \
 	common/gr/global_ctx.c \
 	common/gr/subctx.c \
 	common/gr/ctx.c \
diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c
new file mode 100644
index 000000000..c77627654
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/io.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/regops.h>
+
+#include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
+							 u32 offset)
+{
+	/* support only 24-bit 4-byte aligned offsets */
+	bool valid = !(offset & 0xFF000003U);
+
+	if (g->allow_all) {
+		return true;
+	}
+
+	/* whitelist check */
+	valid = valid &&
+		is_bar0_global_offset_whitelisted_gk20a(g, offset);
+	/* resource size check in case there was a problem
+	 * with allocating the assumed size of bar0 */
+	valid = valid && nvgpu_io_valid_reg(g, offset);
+	return valid;
+}
+#endif
+
+int nvgpu_gr_intr_handle_notify_pending(struct gk20a *g,
+					struct gr_gk20a_isr_data *isr_data)
+{
+	struct channel_gk20a *ch = isr_data->ch;
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	void *virtual_address;
+	u32 buffer_size;
+	u32 offset;
+	bool exit;
+#endif
+	if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) {
+		return 0;
+	}
+
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	/* GL will never use payload 0 for cycle state */
+	if ((ch->cyclestate.cyclestate_buffer == NULL) ||
+	    (isr_data->data_lo == 0)) {
+		return 0;
+	}
+
+	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
+
+	virtual_address = ch->cyclestate.cyclestate_buffer;
+	buffer_size = ch->cyclestate.cyclestate_buffer_size;
+	offset = isr_data->data_lo;
+	exit = false;
+	while (!exit) {
+		struct share_buffer_head *sh_hdr;
+		u32 min_element_size;
+
+		/* validate offset */
+		if (offset + sizeof(struct share_buffer_head) > buffer_size ||
+		    offset + sizeof(struct share_buffer_head) < offset) {
+			nvgpu_err(g,
+				  "cyclestats buffer overrun at offset 0x%x",
+				  offset);
+			break;
+		}
+
+		sh_hdr = (struct share_buffer_head *)
+			 ((char *)virtual_address + offset);
+
+		min_element_size =
+			(sh_hdr->operation == OP_END ?
+			 sizeof(struct share_buffer_head) :
+			 sizeof(struct gk20a_cyclestate_buffer_elem));
+
+		/* validate sh_hdr->size */
+		if (sh_hdr->size < min_element_size ||
+		    offset + sh_hdr->size > buffer_size ||
+		    offset + sh_hdr->size < offset) {
+			nvgpu_err(g,
+				  "bad cyclestate buffer header size at offset 0x%x",
+				  offset);
+			sh_hdr->failed = true;
+			break;
+		}
+
+		switch (sh_hdr->operation) {
+		case OP_END:
+			exit = true;
+			break;
+
+		case BAR0_READ32:
+		case BAR0_WRITE32:
+		{
+			struct gk20a_cyclestate_buffer_elem *op_elem =
+				(struct gk20a_cyclestate_buffer_elem *)sh_hdr;
+			bool valid = is_valid_cyclestats_bar0_offset_gk20a(
+						g, op_elem->offset_bar0);
+			u32 raw_reg;
+			u64 mask_orig;
+			u64 v;
+
+			if (!valid) {
+				nvgpu_err(g,
+					   "invalid cycletstats op offset: 0x%x",
+					   op_elem->offset_bar0);
+
+				sh_hdr->failed = exit = true;
+				break;
+			}
+
+			mask_orig =
+				((1ULL << (op_elem->last_bit + 1)) - 1) &
+				~((1ULL << op_elem->first_bit) - 1);
+
+			raw_reg = nvgpu_readl(g, op_elem->offset_bar0);
+
+			switch (sh_hdr->operation) {
+			case BAR0_READ32:
+				op_elem->data =	((raw_reg & mask_orig)
+							>> op_elem->first_bit);
+				break;
+
+			case BAR0_WRITE32:
+				v = 0;
+				if ((unsigned int)mask_orig !=
+							~((unsigned int)0)) {
+					v = (unsigned int)
+						(raw_reg & ~mask_orig);
+				}
+
+				v |= ((op_elem->data << op_elem->first_bit)
+							& mask_orig);
+				nvgpu_writel(g,op_elem->offset_bar0,
+					     (unsigned int)v);
+				break;
+			default:
+				/* nop ok?*/
+				break;
+			}
+		}
+		break;
+
+		default:
+			/* no operation content case */
+			exit = true;
+			break;
+		}
+		sh_hdr->completed = true;
+		offset += sh_hdr->size;
+	}
+	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
+#endif
+	nvgpu_log_fn(g, " ");
+	nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index f76155942..af89c39e4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1391,168 +1391,6 @@ int gk20a_gr_handle_semaphore_pending(struct gk20a *g,
 	return 0;
 }
 
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-static inline bool is_valid_cyclestats_bar0_offset_gk20a(struct gk20a *g,
-							 u32 offset)
-{
-	/* support only 24-bit 4-byte aligned offsets */
-	bool valid = !(offset & 0xFF000003U);
-
-	if (g->allow_all) {
-		return true;
-	}
-
-	/* whitelist check */
-	valid = valid &&
-		is_bar0_global_offset_whitelisted_gk20a(g, offset);
-	/* resource size check in case there was a problem
-	 * with allocating the assumed size of bar0 */
-	valid = valid && gk20a_io_valid_reg(g, offset);
-	return valid;
-}
-#endif
-
-int gk20a_gr_handle_notify_pending(struct gk20a *g,
-					  struct gr_gk20a_isr_data *isr_data)
-{
-	struct channel_gk20a *ch = isr_data->ch;
-
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-	void *virtual_address;
-	u32 buffer_size;
-	u32 offset;
-	bool exit;
-#endif
-	if (ch == NULL || tsg_gk20a_from_ch(ch) == NULL) {
-		return 0;
-	}
-
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-	/* GL will never use payload 0 for cycle state */
-	if ((ch->cyclestate.cyclestate_buffer == NULL) ||
-	    (isr_data->data_lo == 0)) {
-		return 0;
-	}
-
-	nvgpu_mutex_acquire(&ch->cyclestate.cyclestate_buffer_mutex);
-
-	virtual_address = ch->cyclestate.cyclestate_buffer;
-	buffer_size = ch->cyclestate.cyclestate_buffer_size;
-	offset = isr_data->data_lo;
-	exit = false;
-	while (!exit) {
-		struct share_buffer_head *sh_hdr;
-		u32 min_element_size;
-
-		/* validate offset */
-		if (offset + sizeof(struct share_buffer_head) > buffer_size ||
-		    offset + sizeof(struct share_buffer_head) < offset) {
-			nvgpu_err(g,
-				  "cyclestats buffer overrun at offset 0x%x",
-				  offset);
-			break;
-		}
-
-		sh_hdr = (struct share_buffer_head *)
-			((char *)virtual_address + offset);
-
-		min_element_size =
-			(sh_hdr->operation == OP_END ?
-			 sizeof(struct share_buffer_head) :
-			 sizeof(struct gk20a_cyclestate_buffer_elem));
-
-		/* validate sh_hdr->size */
-		if (sh_hdr->size < min_element_size ||
-		    offset + sh_hdr->size > buffer_size ||
-		    offset + sh_hdr->size < offset) {
-			nvgpu_err(g,
-				  "bad cyclestate buffer header size at offset 0x%x",
-				  offset);
-			sh_hdr->failed = true;
-			break;
-		}
-
-		switch (sh_hdr->operation) {
-		case OP_END:
-			exit = true;
-			break;
-
-		case BAR0_READ32:
-		case BAR0_WRITE32:
-		{
-			struct gk20a_cyclestate_buffer_elem *op_elem =
-				(struct gk20a_cyclestate_buffer_elem *)sh_hdr;
-			bool valid = is_valid_cyclestats_bar0_offset_gk20a(
-				g, op_elem->offset_bar0);
-			u32 raw_reg;
-			u64 mask_orig;
-			u64 v;
-
-			if (!valid) {
-				nvgpu_err(g,
-					   "invalid cycletstats op offset: 0x%x",
-					   op_elem->offset_bar0);
-
-				sh_hdr->failed = exit = true;
-				break;
-			}
-
-
-			mask_orig =
-				((1ULL <<
-				  (op_elem->last_bit + 1))
-				 -1)&~((1ULL <<
-					op_elem->first_bit)-1);
-
-			raw_reg =
-				gk20a_readl(g,
-					    op_elem->offset_bar0);
-
-			switch (sh_hdr->operation) {
-			case BAR0_READ32:
-				op_elem->data =
-					(raw_reg & mask_orig)
-					>> op_elem->first_bit;
-				break;
-
-			case BAR0_WRITE32:
-				v = 0;
-				if ((unsigned int)mask_orig !=
-							~((unsigned int)0)) {
-					v = (unsigned int)
-						(raw_reg & ~mask_orig);
-				}
-
-				v |= ((op_elem->data
-				       << op_elem->first_bit)
-				      & mask_orig);
-
-				gk20a_writel(g,
-					     op_elem->offset_bar0,
-					     (unsigned int)v);
-				break;
-			default:
-				/* nop ok?*/
-				break;
-			}
-		}
-		break;
-
-		default:
-			/* no operation content case */
-			exit = true;
-			break;
-		}
-		sh_hdr->completed = true;
-		offset += sh_hdr->size;
-	}
-	nvgpu_mutex_release(&ch->cyclestate.cyclestate_buffer_mutex);
-#endif
-	nvgpu_log_fn(g, " ");
-	nvgpu_cond_broadcast_interruptible(&ch->notifier_wq);
-	return 0;
-}
-
 /* Used by sw interrupt thread to translate current ctx to chid.
  * Also used by regops to translate current ctx to chid and tsgid.
  * For performance, we don't want to go through 128 channels every time.
@@ -2010,7 +1848,7 @@ int gk20a_gr_isr(struct gk20a *g)
 		isr_data.sub_chan, isr_data.class_num);
 
 	if ((gr_intr & gr_intr_notify_pending_f()) != 0U) {
-		g->ops.gr.handle_notify_pending(g, &isr_data);
+		g->ops.gr.intr.handle_notify_pending(g, &isr_data);
 		gk20a_writel(g, gr_intr_r(),
 			gr_intr_notify_reset_f());
 		gr_intr &= ~gr_intr_notify_pending_f();
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
index 188e735f6..23afdabf6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -418,8 +418,6 @@ void gk20a_gr_init_ovr_sm_dsm_perf(void);
 void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
 					       u32 **ovr_perf_regs);
 u32 gr_gk20a_get_patch_slots(struct gk20a *g);
-int gk20a_gr_handle_notify_pending(struct gk20a *g,
-				struct gr_gk20a_isr_data *isr_data);
 
 int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g);
 
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index d7eeab774..87bc92e10 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -39,6 +39,7 @@
 #include <nvgpu/gr/zbc.h>
 #include <nvgpu/gr/zcull.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/setup.h>
 
@@ -308,7 +309,6 @@ static const struct gpu_ops gm20b_ops = {
 		.init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
 		.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
 		.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
-		.handle_notify_pending = gk20a_gr_handle_notify_pending,
 		.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
 		.decode_priv_addr = gr_gk20a_decode_priv_addr,
 		.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
@@ -476,6 +476,8 @@ static const struct gpu_ops gm20b_ops = {
 			.get_gfxp_rtv_cb_size = NULL,
 		},
 		.intr = {
+			.handle_notify_pending =
+					nvgpu_gr_intr_handle_notify_pending,
 			.get_tpc_exception = gm20b_gr_intr_get_tpc_exception,
 			.handle_tex_exception =
 					gm20b_gr_intr_handle_tex_exception,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index e194a8b03..9018117fb 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -42,6 +42,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
@@ -345,7 +346,6 @@ static const struct gpu_ops gp10b_ops = {
 		.get_max_gfxp_wfi_timeout_count =
 				gr_gp10b_get_max_gfxp_wfi_timeout_count,
 		.fecs_host_int_enable = gr_gk20a_fecs_host_int_enable,
-		.handle_notify_pending = gk20a_gr_handle_notify_pending,
 		.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
 		.decode_priv_addr = gr_gk20a_decode_priv_addr,
 		.create_priv_addr_table = gr_gk20a_create_priv_addr_table,
@@ -562,6 +562,8 @@ static const struct gpu_ops gp10b_ops = {
 				gp10b_gr_init_commit_cbes_reserve,
 		},
 		.intr = {
+			.handle_notify_pending =
+					nvgpu_gr_intr_handle_notify_pending,
 			.get_tpc_exception = gm20b_gr_intr_get_tpc_exception,
 			.handle_tex_exception =
 					gp10b_gr_intr_handle_tex_exception,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 9ca79df5b..f05322618 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -176,6 +176,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 
 #include <nvgpu/hw/gv100/hw_proj_gv100.h>
 #include <nvgpu/hw/gv100/hw_top_gv100.h>
@@ -461,7 +462,6 @@ static const struct gpu_ops gv100_ops = {
 		.decode_egpc_addr = gv11b_gr_decode_egpc_addr,
 		.fecs_host_int_enable = gr_gv11b_fecs_host_int_enable,
 		.handle_ssync_hww = gr_gv11b_handle_ssync_hww,
-		.handle_notify_pending = gk20a_gr_handle_notify_pending,
 		.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
 		.decode_priv_addr = gr_gv11b_decode_priv_addr,
 		.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
@@ -699,6 +699,8 @@ static const struct gpu_ops gv100_ops = {
 				gv11b_gr_init_commit_gfxp_wfi_timeout,
 		},
 		.intr = {
+			.handle_notify_pending =
+					nvgpu_gr_intr_handle_notify_pending,
 			.handle_gcc_exception =
 				gv11b_gr_intr_handle_gcc_exception,
 			.handle_gpc_gpcmmu_exception =
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 52ce82b22..8a9570456 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -151,6 +151,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/gr/fecs_trace.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 
 #include <nvgpu/hw/gv11b/hw_proj_gv11b.h>
 #include <nvgpu/hw/gv11b/hw_top_gv11b.h>
@@ -420,7 +421,6 @@ static const struct gpu_ops gv11b_ops = {
 			gr_gv11b_get_max_gfxp_wfi_timeout_count,
 		.fecs_host_int_enable = gr_gv11b_fecs_host_int_enable,
 		.handle_ssync_hww = gr_gv11b_handle_ssync_hww,
-		.handle_notify_pending = gk20a_gr_handle_notify_pending,
 		.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
 		.decode_priv_addr = gr_gv11b_decode_priv_addr,
 		.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
@@ -658,6 +658,8 @@ static const struct gpu_ops gv11b_ops = {
 				gv11b_gr_init_commit_gfxp_wfi_timeout,
 		},
 		.intr = {
+			.handle_notify_pending =
+					nvgpu_gr_intr_handle_notify_pending,
 			.handle_gcc_exception =
 				gv11b_gr_intr_handle_gcc_exception,
 			.handle_gpc_gpcmmu_exception =
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index d7f7c29ab..def0f1f5e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -415,8 +415,6 @@ struct gpu_ops {
 					(struct gk20a *g);
 		void (*fecs_host_int_enable)(struct gk20a *g);
 		int (*handle_ssync_hww)(struct gk20a *g, u32 *ssync_esr);
-		int (*handle_notify_pending)(struct gk20a *g,
-					struct gr_gk20a_isr_data *isr_data);
 		int (*handle_semaphore_pending)(struct gk20a *g,
 					struct gr_gk20a_isr_data *isr_data);
 		int (*add_ctxsw_reg_pm_fbpa)(struct gk20a *g,
@@ -779,6 +777,8 @@ struct gpu_ops {
 		} init;
 
 		struct {
+			int (*handle_notify_pending)(struct gk20a *g,
+				struct gr_gk20a_isr_data *isr_data);
 			void (*handle_gcc_exception)(struct gk20a *g, u32 gpc,
 				u32 tpc, u32 gpc_exception,
 				u32 *corrected_err, u32 *uncorrected_err);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h
index 2fe4699a6..f66447c8f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h
@@ -25,10 +25,14 @@
 
 #include <nvgpu/types.h>
 
+struct gr_gk20a_isr_data;
+
 struct nvgpu_gr_tpc_exception {
 	bool tex_exception;
 	bool sm_exception;
 	bool mpc_exception;
 };
 
+int nvgpu_gr_intr_handle_notify_pending(struct gk20a *g,
+					struct gr_gk20a_isr_data *isr_data);
 #endif /* NVGPU_GR_INTR_H */
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index c28ba233d..351d0a659 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -195,6 +195,7 @@
 #include <nvgpu/pmu/perf.h>
 #include <nvgpu/gr/gr_falcon.h>
 #include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_intr.h>
 
 #include <nvgpu/hw/tu104/hw_proj_tu104.h>
 #include <nvgpu/hw/tu104/hw_top_tu104.h>
@@ -489,7 +490,6 @@ static const struct gpu_ops tu104_ops = {
 			gr_gv11b_get_max_gfxp_wfi_timeout_count,
 		.fecs_host_int_enable = gr_gv11b_fecs_host_int_enable,
 		.handle_ssync_hww = gr_gv11b_handle_ssync_hww,
-		.handle_notify_pending = gk20a_gr_handle_notify_pending,
 		.handle_semaphore_pending = gk20a_gr_handle_semaphore_pending,
 		.decode_priv_addr = gr_gv11b_decode_priv_addr,
 		.create_priv_addr_table = gr_gv11b_create_priv_addr_table,
@@ -732,6 +732,8 @@ static const struct gpu_ops tu104_ops = {
 				gv11b_gr_init_commit_gfxp_wfi_timeout,
 		},
 		.intr = {
+			.handle_notify_pending =
+					nvgpu_gr_intr_handle_notify_pending,
 			.handle_gcc_exception =
 				gv11b_gr_intr_handle_gcc_exception,
 			.handle_gpc_gpcmmu_exception =