From 3cf1f96b1a640d87ef29c78eadb0a85f0d268ea4 Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Tue, 6 Aug 2019 16:43:58 -0700 Subject: [PATCH] gpu: nvgpu: safety check for golden context image As a part of safety check, golden context is created twice (back to back) and two golden context images are compared. These two images should be identical for safety check. Currently there is one hw deviation happening because of stats counter idle clock increments. To avoid this, stats counter clocks enable is disabled. To have a valid golden context( avoid mismatch between fe and mpc), original stats counter bundle is programmed through mme shadow register. Before first golden context save, golden image is saved in a local copy. Same copy is restored back after first golden save. Second golden context save is done with this local copy and two golden contexts are compared. JIRA NVGPU-3558 Change-Id: I5b5a1c5ff177e866a91cfc23618b118c157c8e95 Signed-off-by: Seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/2167213 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra GVS: Gerrit_Virtual_Submit Reviewed-by: Raghuram Kothakota Reviewed-by: Vinod Gopalakrishnakurup Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/obj_ctx.c | 73 +++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/drivers/gpu/nvgpu/common/gr/obj_ctx.c b/drivers/gpu/nvgpu/common/gr/obj_ctx.c index 151f76f95..a5ceabaf1 100644 --- a/drivers/gpu/nvgpu/common/gr/obj_ctx.c +++ b/drivers/gpu/nvgpu/common/gr/obj_ctx.c @@ -423,6 +423,12 @@ int nvgpu_gr_obj_ctx_alloc_golden_ctx_image(struct gk20a *g, struct netlist_av_list *sw_method_init = nvgpu_netlist_get_sw_method_init_av_list(g); u32 data; +#ifdef NV_BUILD_CONFIGURATION_IS_SAFETY + struct netlist_av_list *sw_bundle_init = + nvgpu_netlist_get_sw_bundle_init_av_list(g); + struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image2 = + NULL; +#endif nvgpu_log_fn(g, " "); @@ -519,6 +525,14 @@ restore_fe_go_idle: /* load method init */ g->ops.gr.init.load_method_init(g, sw_method_init); +#ifdef NV_BUILD_CONFIGURATION_IS_SAFETY + /* restore stats bundle data through mme shadow methods */ + if (g->ops.gr.init.restore_stats_counter_bundle_data != 0) { + g->ops.gr.init.restore_stats_counter_bundle_data(g, + sw_bundle_init); + } +#endif + err = g->ops.gr.init.wait_idle(g); if (err != 0) { goto clean_up; @@ -531,6 +545,23 @@ restore_fe_go_idle: } #endif +#ifdef NV_BUILD_CONFIGURATION_IS_SAFETY + /* + * Save ctx data before first golden context save. Restore same data + * before second golden context save. This temporary copy is + * saved in local_golden_image2. + */ + + size = nvgpu_gr_obj_ctx_get_golden_image_size(golden_image); + + local_golden_image2 = + nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); + if (local_golden_image2 == NULL) { + err = -ENOMEM; + goto clean_up; + } +#endif + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); @@ -547,6 +578,42 @@ restore_fe_go_idle: goto clean_up; } +#ifdef NV_BUILD_CONFIGURATION_IS_SAFETY + /* Before second golden context save restore to before known state */ + nvgpu_gr_global_ctx_load_local_golden_image(g, + local_golden_image2, gr_mem); + /* free local copy now */ + nvgpu_gr_global_ctx_deinit_local_golden_image(g, local_golden_image2); + local_golden_image2 = NULL; + + /* Initiate second golden context save */ + data = g->ops.gr.falcon.get_fecs_current_ctx_data(g, inst_block); + err = g->ops.gr.falcon.ctrl_ctxsw(g, + NVGPU_GR_FALCON_METHOD_GOLDEN_IMAGE_SAVE, data, NULL); + if (err != 0) { + goto clean_up; + } + + /* Copy the data to local buffer */ + local_golden_image2 = + nvgpu_gr_global_ctx_init_local_golden_image(g, gr_mem, size); + if (local_golden_image2 == NULL) { + err = -ENOMEM; + goto clean_up; + } + + /* Compare two golden context images */ + if (!nvgpu_gr_global_ctx_compare_golden_images(g, + nvgpu_mem_is_sysmem(gr_mem), + golden_image->local_golden_image, + local_golden_image2, + size)) { + nvgpu_err(g, "golden context mismatch"); + err = -ENOMEM; + goto clean_up; + } +#endif + golden_image->ready = true; #ifdef CONFIG_NVGPU_LS_PMU nvgpu_pmu_set_golden_image_initialized(g, true); @@ -554,6 +621,12 @@ restore_fe_go_idle: g->ops.gr.falcon.set_current_ctx_invalid(g); clean_up: +#ifdef NV_BUILD_CONFIGURATION_IS_SAFETY + if (local_golden_image2 != NULL) { + nvgpu_gr_global_ctx_deinit_local_golden_image(g, + local_golden_image2); + } +#endif if (err != 0) { nvgpu_err(g, "fail"); } else {