From 856c1c82de27897c7f257ce625b7d2d7e31afeb0 Mon Sep 17 00:00:00 2001 From: Divya Date: Fri, 18 Feb 2022 13:17:48 +0000 Subject: [PATCH] gpu: nvgpu: add golden image check for tpc_pg_mask - Setting different tpc_pg_mask value leads to GPU crash. - It is observed that with GPU railgating disabled, if tpc_pg_mask is set, "the gpu is powered on" error is reported and it won't allow to set the tpc_pg_mask, which is expected. - With GPU railgating enabled, the different tpc_pg_mask value is set and the GPU is crashed. - So, add check for golden image initialized before setting the TPC, GPC and FBP PG mask. - This check won't allow to update TPC, GPC and FBP mask after golden image initialization and thus no GPU crash happens. Bug 3544499 Change-Id: Ia003beaaec9dead22da74ea5862a81986780966b Signed-off-by: Divya Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2672202 (cherry picked from commit 05a1f927f8678b1eb7ed8a07bc2690e416c7866e) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2673513 Reviewed-by: Sagar Kamble Reviewed-by: Vijayakumar Subbu GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/os/linux/sysfs.c | 35 ++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index 18e7e02a9..0f330432b 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -850,6 +850,7 @@ static ssize_t gpc_pg_mask_store(struct device *dev, { struct gk20a *g = get_gk20a(dev); struct gk20a_platform *platform = dev_get_drvdata(dev); + struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL; unsigned long val = 0; int err = 0; @@ -861,8 +862,14 @@ static ssize_t gpc_pg_mask_store(struct device *dev, return -EINVAL; } - if (nvgpu_is_powered_on(g)) { - nvgpu_err(g, "gpu already powered on"); + if (g->gr != NULL) { + gr_golden_image = nvgpu_gr_get_golden_image_ptr(g); + } + + if (gr_golden_image && + nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image) + != 0) { + nvgpu_err(g, "golden image size already initialized"); nvgpu_mutex_release(&g->static_pg_lock); return -ENODEV; } @@ -923,6 +930,7 @@ static ssize_t fbp_pg_mask_store(struct device *dev, { struct gk20a *g = get_gk20a(dev); struct gk20a_platform *platform = dev_get_drvdata(dev); + struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL; unsigned long val = 0; int err = 0; @@ -934,8 +942,14 @@ static ssize_t fbp_pg_mask_store(struct device *dev, return -EINVAL; } - if (nvgpu_is_powered_on(g)) { - nvgpu_err(g, "gpu is already powered on"); + if (g->gr != NULL) { + gr_golden_image = nvgpu_gr_get_golden_image_ptr(g); + } + + if (gr_golden_image && + nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image) + != 0) { + nvgpu_err(g, "golden image size already initialized"); nvgpu_mutex_release(&g->static_pg_lock); return -ENODEV; } @@ -1004,6 +1018,7 @@ static ssize_t tpc_pg_mask_store(struct device *dev, { struct gk20a *g = get_gk20a(dev); struct gk20a_platform *platform = dev_get_drvdata(dev); + struct nvgpu_gr_obj_ctx_golden_image *gr_golden_image = NULL; unsigned long val = 0; int err = 0; u32 i; @@ -1028,8 +1043,14 @@ static ssize_t tpc_pg_mask_store(struct device *dev, goto exit; } - if (nvgpu_is_powered_on(g)) { - nvgpu_err(g, "gpu is already powered on"); + if (g->gr != NULL) { + gr_golden_image = nvgpu_gr_get_golden_image_ptr(g); + } + + if (gr_golden_image && + nvgpu_gr_obj_ctx_get_golden_image_size(gr_golden_image) + != 0) { + nvgpu_err(g, "golden image size already initialized"); nvgpu_mutex_release(&g->static_pg_lock); return -ENODEV; }