gpu: nvgpu: add golden image check for tpc_pg_mask

- Setting different tpc_pg_mask value leads to GPU crash. - It is observed that with GPU railgating disabled, if tpc_pg_mask is set, "the gpu is powered on" error is reported and it won't allow to set the tpc_pg_mask, which is expected. - With GPU railgating enabled, the different tpc_pg_mask value is set and the GPU is crashed. - So, add check for golden image initialized before setting the TPC, GPC and FBP PG mask. - This check won't allow to update TPC, GPC and FBP mask after golden image initialization and thus no GPU crash happens. Bug 3544499 Change-Id: Ia003beaaec9dead22da74ea5862a81986780966b Signed-off-by: Divya <dsinghatwari@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2672202 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Ninad Malwade <nmalwade@nvidia.com> Reviewed-by: Seema Khowala <seemaj@nvidia.com> Tested-by: Ninad Malwade <nmalwade@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 17:36:20 +03:00 · 2022-02-18 13:17:48 +00:00
parent 930c218810
commit 05a1f927f8
1 changed files with 28 additions and 7 deletions
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2022, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -850,6 +850,7 @@ static ssize_t gpc_pg_mask_store(struct device *dev,
 {
 	struct gk20a *g = get_gk20a(dev);
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL;
 	unsigned long val = 0;
 	int err = 0;
@@ -861,8 +862,14 @@ static ssize_t gpc_pg_mask_store(struct device *dev,
 		return -EINVAL;
 	}
-	if (nvgpu_is_powered_on(g)) {
+	if (g->gr != NULL) {
-		nvgpu_err(g, "gpu already powered on");
+		gr_golden_image = nvgpu_gr_get_golden_image_ptr(g);
 	}
 	if (gr_golden_image &&
 			nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image)
 			!= 0) {
 		nvgpu_err(g, "golden image size already initialized");
 		nvgpu_mutex_release(&g->static_pg_lock);
 		return -ENODEV;
 	}
@@ -923,6 +930,7 @@ static ssize_t fbp_pg_mask_store(struct device *dev,
 {
 	struct gk20a *g = get_gk20a(dev);
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL;
 	unsigned long val = 0;
 	int err = 0;
@@ -934,8 +942,14 @@ static ssize_t fbp_pg_mask_store(struct device *dev,
 		return -EINVAL;
 	}
-	if (nvgpu_is_powered_on(g)) {
+	if (g->gr != NULL) {
-		nvgpu_err(g, "gpu is already powered on");
+		gr_golden_image = nvgpu_gr_get_golden_image_ptr(g);
 	}
 	if (gr_golden_image &&
 			nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image)
 			!= 0) {
 		nvgpu_err(g, "golden image size already initialized");
 		nvgpu_mutex_release(&g->static_pg_lock);
 		return -ENODEV;
 	}
@@ -1004,6 +1018,7 @@ static ssize_t tpc_pg_mask_store(struct device *dev,
 {
 	struct gk20a *g = get_gk20a(dev);
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL;
 	unsigned long val = 0;
 	int err = 0;
 	u32 i;
@@ -1028,8 +1043,14 @@ static ssize_t tpc_pg_mask_store(struct device *dev,
 		goto exit;
 	}
-	if (nvgpu_is_powered_on(g)) {
+	if (g->gr != NULL) {
-		nvgpu_err(g, "gpu is already powered on");
+		gr_golden_image = nvgpu_gr_get_golden_image_ptr(g);
 	}
 	if (gr_golden_image &&
 			nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image)
 			!= 0) {
 		nvgpu_err(g, "golden image size already initialized");
 		nvgpu_mutex_release(&g->static_pg_lock);
 		return -ENODEV;
 	}