diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 669a73419..499e88fd9 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -537,8 +537,20 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, * we really are mapping physical pages directly. */ nvgpu_sgt_for_each_sgl(sgl, sgt) { + /* + * ipa_addr == phys_addr for non virtualized OSes. + */ u64 phys_addr; - u64 chunk_length; + u64 ipa_addr; + /* + * For non virtualized OSes SGL entries are contiguous in + * physical memory (sgl_length == phys_length). For virtualized + * OSes SGL entries are mapped to intermediate physical memory + * which may subsequently point to discontiguous physical + * memory. Therefore phys_length may not be equal to sgl_length. + */ + u64 phys_length; + u64 sgl_length; /* * Cut out sgl ents for space_to_skip. @@ -549,31 +561,83 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, continue; } - phys_addr = g->ops.mm.gpu_phys_addr(g, attrs, - nvgpu_sgt_get_phys(g, sgt, sgl)) + space_to_skip; - chunk_length = min(length, - nvgpu_sgt_get_length(sgt, sgl) - space_to_skip); - - err = __set_pd_level(vm, &vm->pdb, - 0, - phys_addr, - virt_addr, - chunk_length, - attrs); - if (err != 0) { - break; - } - - /* Space has been skipped so zero this for future chunks. */ - space_to_skip = 0; + /* + * IPA and PA have 1:1 mapping for non virtualized OSes. + */ + ipa_addr = nvgpu_sgt_get_ipa(g, sgt, sgl); /* - * Update the map pointer and the remaining length. + * For non-virtualized OSes SGL entries are contiguous and hence + * sgl_length == phys_length. For virtualized OSes the + * phys_length will be updated by nvgpu_sgt_ipa_to_pa. */ - virt_addr += chunk_length; - length -= chunk_length; + sgl_length = nvgpu_sgt_get_length(sgt, sgl); + phys_length = sgl_length; - if (length == 0U) { + while (sgl_length > 0ULL && length > 0ULL) { + /* + * Holds the size of the portion of SGL that is backed + * with physically contiguous memory. + */ + u64 sgl_contiguous_length; + /* + * Number of bytes of the SGL entry that is actually + * mapped after accounting for space_to_skip. + */ + u64 mapped_sgl_length; + + /* + * For virtualized OSes translate IPA to PA. Retrieve + * the size of the underlying physical memory chunk to + * which SGL has been mapped. + */ + phys_addr = nvgpu_sgt_ipa_to_pa(g, sgt, sgl, ipa_addr, + &phys_length); + phys_addr = g->ops.mm.gpu_phys_addr(g, attrs, phys_addr) + + space_to_skip; + + /* + * For virtualized OSes when phys_length is less than + * sgl_length check if space_to_skip exceeds phys_length + * if so skip this memory chunk + */ + if (space_to_skip >= phys_length) { + space_to_skip -= phys_length; + ipa_addr += phys_length; + sgl_length -= phys_length; + continue; + } + + sgl_contiguous_length = min(phys_length, sgl_length); + mapped_sgl_length = min(length, sgl_contiguous_length - + space_to_skip); + + err = __set_pd_level(vm, &vm->pdb, + 0, + phys_addr, + virt_addr, + mapped_sgl_length, + attrs); + if (err != 0) { + return err; + } + + /* + * Update the map pointer and the remaining length. + */ + virt_addr += mapped_sgl_length; + length -= mapped_sgl_length; + sgl_length -= mapped_sgl_length + space_to_skip; + ipa_addr += mapped_sgl_length + space_to_skip; + + /* + * Space has been skipped so zero this for future + * chunks. + */ + space_to_skip = 0; + } + + if (length == 0ULL) { break; } } diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 234027f30..2b441c0f0 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -99,6 +99,18 @@ u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, return sgt->ops->sgl_phys(g, sgl); } +u64 nvgpu_sgt_get_ipa(struct gk20a *g, struct nvgpu_sgt *sgt, + struct nvgpu_sgl *sgl) +{ + return sgt->ops->sgl_ipa(g, sgl); +} + +u64 nvgpu_sgt_ipa_to_pa(struct gk20a *g, struct nvgpu_sgt *sgt, + struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len) +{ + return sgt->ops->sgl_ipa_to_pa(g, sgl, ipa, pa_len); +} + u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl) { return sgt->ops->sgl_dma(sgl); diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 21feadec1..45151dd8a 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c @@ -167,6 +167,12 @@ static u64 nvgpu_page_alloc_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) return sgl_impl->phys; } +static u64 nvgpu_page_alloc_sgl_ipa_to_pa(struct gk20a *g, + struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len) +{ + return ipa; +} + static u64 nvgpu_page_alloc_sgl_dma(struct nvgpu_sgl *sgl) { struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; @@ -205,6 +211,8 @@ static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) static const struct nvgpu_sgt_ops page_alloc_sgl_ops = { .sgl_next = nvgpu_page_alloc_sgl_next, .sgl_phys = nvgpu_page_alloc_sgl_phys, + .sgl_ipa = nvgpu_page_alloc_sgl_phys, + .sgl_ipa_to_pa = nvgpu_page_alloc_sgl_ipa_to_pa, .sgl_dma = nvgpu_page_alloc_sgl_dma, .sgl_length = nvgpu_page_alloc_sgl_length, .sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr, diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index e69274b51..76c225eb4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -71,6 +71,9 @@ struct nvgpu_sgl; struct nvgpu_sgt_ops { struct nvgpu_sgl *(*sgl_next)(struct nvgpu_sgl *sgl); u64 (*sgl_phys)(struct gk20a *g, struct nvgpu_sgl *sgl); + u64 (*sgl_ipa)(struct gk20a *g, struct nvgpu_sgl *sgl); + u64 (*sgl_ipa_to_pa)(struct gk20a *g, struct nvgpu_sgl *sgl, + u64 ipa, u64 *pa_len); u64 (*sgl_dma)(struct nvgpu_sgl *sgl); u64 (*sgl_length)(struct nvgpu_sgl *sgl); u64 (*sgl_gpu_addr)(struct gk20a *g, struct nvgpu_sgl *sgl, @@ -255,6 +258,10 @@ struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); +u64 nvgpu_sgt_get_ipa(struct gk20a *g, struct nvgpu_sgt *sgt, + struct nvgpu_sgl *sgl); +u64 nvgpu_sgt_ipa_to_pa(struct gk20a *g, struct nvgpu_sgt *sgt, + struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len); u64 nvgpu_sgt_get_phys(struct gk20a *g, struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, struct nvgpu_sgl *sgl); diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c index d6a3189e6..1a6e64250 100644 --- a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c @@ -36,6 +36,11 @@ #include "gk20a/mm_gk20a.h" #include "platform_gk20a.h" +static u64 __nvgpu_sgl_ipa(struct gk20a *g, struct nvgpu_sgl *sgl) +{ + return sg_phys((struct scatterlist *)sgl); +} + static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) { struct device *dev = dev_from_gk20a(g); @@ -43,7 +48,7 @@ static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) u64 ipa = sg_phys((struct scatterlist *)sgl); if (platform->phys_addr) - return platform->phys_addr(g, ipa); + return platform->phys_addr(g, ipa, NULL); return ipa; } @@ -251,6 +256,23 @@ static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl); } +static u64 nvgpu_mem_linux_sgl_ipa(struct gk20a *g, struct nvgpu_sgl *sgl) +{ + return __nvgpu_sgl_ipa(g, sgl); +} + +static u64 nvgpu_mem_linux_sgl_ipa_to_pa(struct gk20a *g, + struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len) +{ + struct device *dev = dev_from_gk20a(g); + struct gk20a_platform *platform = gk20a_get_platform(dev); + + if (platform->phys_addr) + return platform->phys_addr(g, ipa, pa_len); + + return ipa; +} + static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) { return (u64)__nvgpu_sgl_phys(g, sgl); @@ -301,6 +323,8 @@ static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { .sgl_next = nvgpu_mem_linux_sgl_next, .sgl_phys = nvgpu_mem_linux_sgl_phys, + .sgl_ipa = nvgpu_mem_linux_sgl_ipa, + .sgl_ipa_to_pa = nvgpu_mem_linux_sgl_ipa_to_pa, .sgl_dma = nvgpu_mem_linux_sgl_dma, .sgl_length = nvgpu_mem_linux_sgl_length, .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, diff --git a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h index 10b988958..16aea33fc 100644 --- a/drivers/gpu/nvgpu/os/linux/platform_gk20a.h +++ b/drivers/gpu/nvgpu/os/linux/platform_gk20a.h @@ -234,7 +234,7 @@ struct gk20a_platform { * addresses (not IPA). This is the case for GV100 nvlink in HV+L * configuration, when dGPU is in pass-through mode. */ - u64 (*phys_addr)(struct gk20a *g, u64 ipa); + u64 (*phys_addr)(struct gk20a *g, u64 ipa, u64 *pa_len); /* Callbacks to assert/deassert GPU reset */ int (*reset_assert)(struct device *dev); diff --git a/drivers/gpu/nvgpu/os/linux/soc.c b/drivers/gpu/nvgpu/os/linux/soc.c index 1b27d6f19..6c5228a46 100644 --- a/drivers/gpu/nvgpu/os/linux/soc.c +++ b/drivers/gpu/nvgpu/os/linux/soc.c @@ -65,7 +65,7 @@ bool nvgpu_is_soc_t194_a01(struct gk20a *g) * is enabled), the addresses we get from dma_alloc are IPAs. We need to * convert them to PA. */ -static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) +static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa, u64 *pa_len) { struct device *dev = dev_from_gk20a(g); struct gk20a_platform *platform = gk20a_get_platform(dev); @@ -92,6 +92,13 @@ static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa) } } else { pa = info.base + info.offset; + if (pa_len != NULL) { + /* + * Update the size of physical memory chunk after the + * specified offset. + */ + *pa_len = info.size - info.offset; + } nvgpu_log(g, gpu_dbg_map_v, "ipa=%llx vmid=%d -> pa=%llx " "base=%llx offset=%llx size=%llx\n", diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c index 502cd1c38..d9a48822d 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c +++ b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c @@ -56,6 +56,14 @@ static u64 nvgpu_mem_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) return (u64)(uintptr_t)mem->phys; } +static u64 nvgpu_mem_sgl_ipa_to_pa(struct gk20a *g, struct nvgpu_sgl *sgl, + u64 ipa, u64 *pa_len) +{ + struct nvgpu_mem *mem = (struct nvgpu_mem *)sgl; + + return (u64)(uintptr_t)mem->cpu_va; +} + static u64 nvgpu_mem_sgl_dma(struct nvgpu_sgl *sgl) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; @@ -99,6 +107,8 @@ static void nvgpu_mem_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) static struct nvgpu_sgt_ops nvgpu_sgt_posix_ops = { .sgl_next = nvgpu_mem_sgl_next, .sgl_phys = nvgpu_mem_sgl_phys, + .sgl_ipa = nvgpu_mem_sgl_phys, + .sgl_ipa_to_pa = nvgpu_mem_sgl_ipa_to_pa, .sgl_dma = nvgpu_mem_sgl_dma, .sgl_length = nvgpu_mem_sgl_length, .sgl_gpu_addr = nvgpu_mem_sgl_gpu_addr,