mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: add/remove l2 cache flush when updating the ctx buffers
gr ctx buffer in non-cacheable hence there is no need to do L2 cache flush when updating the buffer. Remove the flushes. pm ctx buffer is cacheable hence add l2 flush in the function nvgpu_profiler_quiesce_hwpm_streamout_non_resident since it updates the buffer. Bug 3677982 Change-Id: I0c15ec7a7f8fa250af1d25891122acc24443a872 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2713916 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
65e7baf856
commit
bfa20f62c6
@@ -749,20 +749,12 @@ u64 nvgpu_gr_ctx_get_zcull_ctx_va(struct nvgpu_gr_ctx *gr_ctx)
|
|||||||
|
|
||||||
int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
||||||
{
|
{
|
||||||
int err;
|
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_gr, " ");
|
nvgpu_log(g, gpu_dbg_gr, " ");
|
||||||
|
|
||||||
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem);
|
g->ops.gr.ctxsw_prog.set_zcull_mode_no_ctxsw(g, &gr_ctx->mem);
|
||||||
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0);
|
g->ops.gr.ctxsw_prog.set_zcull_ptr(g, &gr_ctx->mem, 0);
|
||||||
|
|
||||||
return err;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
int nvgpu_gr_ctx_zcull_setup(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
||||||
@@ -1061,13 +1053,6 @@ u32 nvgpu_gr_ctx_get_pm_ctx_pm_mode(struct nvgpu_gr_ctx *gr_ctx)
|
|||||||
u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
|
||||||
{
|
{
|
||||||
if (!gr_ctx->ctx_id_valid) {
|
if (!gr_ctx->ctx_id_valid) {
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
if (nvgpu_pg_elpg_ms_protected_call(g,
|
|
||||||
g->ops.mm.cache.l2_flush(g, true)) != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
|
gr_ctx->ctx_id = g->ops.gr.ctxsw_prog.get_main_image_ctx_id(g,
|
||||||
&gr_ctx->mem);
|
&gr_ctx->mem);
|
||||||
gr_ctx->ctx_id_valid = true;
|
gr_ctx->ctx_id_valid = true;
|
||||||
@@ -1106,24 +1091,14 @@ bool nvgpu_gr_ctx_desc_dump_ctxsw_stats_on_channel_close(
|
|||||||
int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
||||||
bool enable)
|
bool enable)
|
||||||
{
|
{
|
||||||
int err;
|
|
||||||
|
|
||||||
if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
|
if (!nvgpu_mem_is_valid(&gr_ctx->mem)) {
|
||||||
nvgpu_err(g, "no graphics context allocated");
|
nvgpu_err(g, "no graphics context allocated");
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable);
|
g->ops.gr.ctxsw_prog.set_pm_smpc_mode(g, &gr_ctx->mem, enable);
|
||||||
|
|
||||||
return err;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
||||||
@@ -1188,16 +1163,6 @@ int nvgpu_gr_ctx_prepare_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|||||||
int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
||||||
bool set_pm_ptr)
|
bool set_pm_ptr)
|
||||||
{
|
{
|
||||||
int err;
|
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem,
|
g->ops.gr.ctxsw_prog.set_pm_mode(g, &gr_ctx->mem,
|
||||||
gr_ctx->pm_ctx.pm_mode);
|
gr_ctx->pm_ctx.pm_mode);
|
||||||
if (set_pm_ptr) {
|
if (set_pm_ptr) {
|
||||||
@@ -1205,6 +1170,6 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
|
|||||||
gr_ctx->pm_ctx.gpu_va);
|
gr_ctx->pm_ctx.gpu_va);
|
||||||
}
|
}
|
||||||
|
|
||||||
return err;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_NVGPU_DEBUGGER */
|
#endif /* CONFIG_NVGPU_DEBUGGER */
|
||||||
|
|||||||
@@ -450,13 +450,6 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
|
|||||||
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
|
struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image,
|
||||||
struct nvgpu_mem *target_mem)
|
struct nvgpu_mem *target_mem)
|
||||||
{
|
{
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
if (nvgpu_pg_elpg_ms_protected_call(g,
|
|
||||||
g->ops.mm.cache.l2_flush(g, true)) != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
|
nvgpu_mem_wr_n(g, target_mem, 0, local_golden_image->context,
|
||||||
nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
|
nvgpu_safe_cast_u64_to_u32(local_golden_image->size));
|
||||||
|
|
||||||
|
|||||||
@@ -84,12 +84,6 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
|
|||||||
struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
|
struct nvgpu_gr_ctx *gr_ctx, u64 gpu_va)
|
||||||
{
|
{
|
||||||
struct nvgpu_mem *ctxheader = &subctx->ctx_header;
|
struct nvgpu_mem *ctxheader = &subctx->ctx_header;
|
||||||
int err = 0;
|
|
||||||
|
|
||||||
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
|
#ifdef CONFIG_NVGPU_SET_FALCON_ACCESS_MAP
|
||||||
/* set priv access map */
|
/* set priv access map */
|
||||||
|
|||||||
@@ -38,6 +38,7 @@
|
|||||||
#include <nvgpu/sort.h>
|
#include <nvgpu/sort.h>
|
||||||
#include <nvgpu/gr/gr_instances.h>
|
#include <nvgpu/gr/gr_instances.h>
|
||||||
#include <nvgpu/grmgr.h>
|
#include <nvgpu/grmgr.h>
|
||||||
|
#include <nvgpu/power_features/pg.h>
|
||||||
|
|
||||||
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
|
static int nvgpu_profiler_build_regops_allowlist(struct nvgpu_profiler_object *prof);
|
||||||
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
|
static void nvgpu_profiler_destroy_regops_allowlist(struct nvgpu_profiler_object *prof);
|
||||||
@@ -496,6 +497,7 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
|
|||||||
struct nvgpu_tsg *tsg)
|
struct nvgpu_tsg *tsg)
|
||||||
{
|
{
|
||||||
struct nvgpu_mem *pm_ctx_mem;
|
struct nvgpu_mem *pm_ctx_mem;
|
||||||
|
int err;
|
||||||
|
|
||||||
nvgpu_log(g, gpu_dbg_prof,
|
nvgpu_log(g, gpu_dbg_prof,
|
||||||
"HWPM streamout quiesce in non-resident state started");
|
"HWPM streamout quiesce in non-resident state started");
|
||||||
@@ -505,11 +507,18 @@ static int nvgpu_profiler_quiesce_hwpm_streamout_non_resident(struct gk20a *g,
|
|||||||
}
|
}
|
||||||
|
|
||||||
pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
|
pm_ctx_mem = nvgpu_gr_ctx_get_pm_ctx_mem(tsg->gr_ctx);
|
||||||
|
|
||||||
if (pm_ctx_mem == NULL) {
|
if (pm_ctx_mem == NULL) {
|
||||||
nvgpu_err(g, "No PM context");
|
nvgpu_err(g, "No PM context");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g, "l2_flush failed");
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
|
nvgpu_memset(g, pm_ctx_mem, 0U, 0U, pm_ctx_mem->size);
|
||||||
nvgpu_log(g, gpu_dbg_prof,
|
nvgpu_log(g, gpu_dbg_prof,
|
||||||
"HWPM streamout quiesce in non-resident state successfull");
|
"HWPM streamout quiesce in non-resident state successfull");
|
||||||
|
|||||||
@@ -1947,14 +1947,6 @@ static int nvgpu_dbg_gpu_ioctl_get_gr_context(struct dbg_session_gk20a *dbg_s,
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Channel gr_ctx buffer is gpu cacheable.
|
|
||||||
Flush and invalidate before cpu update. */
|
|
||||||
err = nvgpu_pg_elpg_ms_protected_call(g, g->ops.mm.cache.l2_flush(g, true));
|
|
||||||
if (err != 0) {
|
|
||||||
nvgpu_err(g, "l2_flush failed");
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size);
|
err = nvgpu_dbg_get_context_buffer(g, ctx_mem, user_buffer, size);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
|
|||||||
Reference in New Issue
Block a user