diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 1dbbd1a06..2bf266024 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -60,6 +60,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) { void *cpu_va; + if (WARN_ON(mem->cpu_accessible)) { + nvgpu_warn(g, "nested"); + return -EBUSY; + } + + /* flag that the intent is to allow CPU access to the memory. */ + mem->cpu_accessible = true; + if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return 0; @@ -71,17 +79,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) return 0; - if (WARN_ON(mem->cpu_va)) { - nvgpu_warn(g, "nested"); - return -EBUSY; - } - cpu_va = vmap(mem->priv.pages, PAGE_ALIGN(mem->size) >> PAGE_SHIFT, 0, pgprot_writecombine(PAGE_KERNEL)); - if (WARN_ON(!cpu_va)) + if (WARN_ON(!cpu_va)) { + mem->cpu_accessible = false; return -ENOMEM; + } mem->cpu_va = cpu_va; return 0; @@ -89,6 +94,8 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) { + mem->cpu_accessible = false; + if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2a20c2d9f..06fb5497c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -682,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g, if (err) return err; - if (ch_ctx->gr_ctx->mem.cpu_va) { + if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) { /* reset patch count if ucode has already processed it */ ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g, &ch_ctx->gr_ctx->mem, @@ -699,7 +699,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g, nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem); /* Write context count to context image if it is mapped */ - if (ch_ctx->gr_ctx->mem.cpu_va) { + if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) { nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem, ctxsw_prog_main_image_patch_count_o(), ch_ctx->patch_ctx.data_count); diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 6feacff72..bae503470 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -122,6 +122,7 @@ struct nvgpu_mem { size_t aligned_size; u64 gpu_va; bool skip_wmb; + bool cpu_accessible; /* * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead @@ -210,6 +211,15 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem) } +/* + * Returns true if the passed nvgpu_mem can be accessed by the CPU by virtue + * of having called nvgpu_mem_begin successfully. + */ +static inline bool nvgpu_mem_cpu_accessible(struct nvgpu_mem *mem) +{ + return mem->cpu_accessible; +} + /* * Create a nvgpu_sgt of the default implementation */