From 321145b37ecfcb8d81271d529ca92a696161704b Mon Sep 17 00:00:00 2001 From: Prathap Kumar Valsan Date: Mon, 22 May 2023 08:49:19 +0000 Subject: [PATCH] gpu: nvgpu: Enable raw mode for compression In raw addressing mode of CBC backing storage, comptaglines are not required to be allocated or need to programmed in the ptes. Introduce a flag to detect if the hardware supports raw mode and use that to skip all the comptagline allocations and respective page table programming. JIRA NVGPU-9717 Change-Id: I0a16881fc3e897c3c408b30d1835f30564649dad Signed-off-by: Prathap Kumar Valsan Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2908278 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/mm/gmmu/page_table.c | 30 +++++++------ drivers/gpu/nvgpu/common/mm/gmmu/pte.c | 34 ++++++++------- drivers/gpu/nvgpu/common/mm/vm.c | 12 ++++++ drivers/gpu/nvgpu/common/mm/vm_remap.c | 11 +++-- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 7 +++ drivers/gpu/nvgpu/os/linux/comptags.c | 12 +++++- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 43 +++++++++++++------ drivers/gpu/nvgpu/os/linux/vm.c | 15 ++++--- drivers/gpu/nvgpu/os/linux/vm_remap.c | 28 ++++++------ 9 files changed, 125 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 7a3b03ef0..f748d630a 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -1186,23 +1186,25 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm, .aperture = aperture, }; #ifdef CONFIG_NVGPU_COMPRESSION - u64 ctag_granularity = g->ops.fb.compression_page_size(g); + if (!g->cbc_use_raw_mode) { + u64 ctag_granularity = g->ops.fb.compression_page_size(g); - attrs.ctag = (u64)ctag_offset * ctag_granularity; - /* - * We need to add the buffer_offset within compression_page_size so that - * the programmed ctagline gets increased at compression_page_size - * boundaries. - */ - if (attrs.ctag != 0ULL) { - nvgpu_assert(ctag_granularity >= 1ULL); - attrs.ctag = nvgpu_safe_add_u64(attrs.ctag, - buffer_offset & (ctag_granularity - U64(1))); - } + attrs.ctag = (u64)ctag_offset * ctag_granularity; + /* + * We need to add the buffer_offset within compression_page_size so that + * the programmed ctagline gets increased at compression_page_size + * boundaries. + */ + if (attrs.ctag != 0ULL) { + nvgpu_assert(ctag_granularity >= 1ULL); + attrs.ctag = nvgpu_safe_add_u64(attrs.ctag, + buffer_offset & (ctag_granularity - U64(1))); + } - attrs.cbc_comptagline_mode = - g->ops.fb.is_comptagline_mode_enabled != NULL ? + attrs.cbc_comptagline_mode = + g->ops.fb.is_comptagline_mode_enabled != NULL ? g->ops.fb.is_comptagline_mode_enabled(g) : true; + } #endif attrs.l3_alloc = ((flags & NVGPU_VM_MAP_L3_ALLOC) != 0U); diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pte.c b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c index 19510525b..73edb00f8 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/pte.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pte.c @@ -72,24 +72,26 @@ void nvgpu_pte_dbg_print(struct gk20a *g, const char *aperture_str = nvgpu_aperture_str(attrs->aperture); const char *perm_str = nvgpu_gmmu_perm_str(attrs->rw_flag); #ifdef CONFIG_NVGPU_COMPRESSION - u64 ctag_tmp = attrs->ctag; - u32 str_len = 0U; - u32 ctag_num = 0U; + if (!g->cbc_use_raw_mode) { + u64 ctag_tmp = attrs->ctag; + u32 str_len = 0U; + u32 ctag_num = 0U; - /* - * attrs->ctag is incremented to count current page size as well. - * Subtract to get this page's ctag line number. - */ - if (ctag_tmp != 0ULL) { - ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size); + /* + * attrs->ctag is incremented to count current page size as well. + * Subtract to get this page's ctag line number. + */ + if (ctag_tmp != 0ULL) { + ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size); + } + + ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp / + g->ops.fb.compression_page_size(g)); + (void)strcpy(ctag_str, "ctag=0x\0"); + str_len = (u32)strlen(ctag_str); + (void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num, + nvgpu_safe_sub_u32(31U, str_len), 16U); } - - ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp / - g->ops.fb.compression_page_size(g)); - (void)strcpy(ctag_str, "ctag=0x\0"); - str_len = (u32)strlen(ctag_str); - (void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num, - nvgpu_safe_sub_u32(31U, str_len), 16U); #endif (void)map_attrs_to_str(attrs_str, attrs); pte_dbg(g, attrs, diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3fee0a961..594fd2ecc 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -1187,6 +1187,17 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm, (void)os_buf; (void)flags; #ifdef CONFIG_NVGPU_COMPRESSION + if (g->cbc_use_raw_mode) { + if (binfo_ptr->compr_kind != NVGPU_KIND_INVALID) { + pte_kind = (u8)binfo_ptr->compr_kind; + } else if ((binfo_ptr->incompr_kind >= 0)) { + pte_kind = (u8)binfo_ptr->incompr_kind; + } else { + err = -EINVAL; + goto ret_err; + } + goto kind_done; + } err = nvgpu_vm_compute_compression(vm, binfo_ptr); if (err != 0) { nvgpu_err(g, "failure setting up compression"); @@ -1305,6 +1316,7 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm, goto ret_err; } +kind_done: #ifdef CONFIG_NVGPU_COMPRESSION if (clear_ctags) { clear_ctags = gk20a_comptags_start_clear(os_buf); diff --git a/drivers/gpu/nvgpu/common/mm/vm_remap.c b/drivers/gpu/nvgpu/common/mm/vm_remap.c index e08cfbf4c..93f30b8fd 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_remap.c +++ b/drivers/gpu/nvgpu/common/mm/vm_remap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -202,6 +202,7 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm, struct nvgpu_vm_remap_os_buffer *remap_os_buf) { u64 page_size = nvgpu_vm_remap_page_size(op); + struct gk20a *g = gk20a_from_vm(vm); u64 map_offset; u64 map_size; u64 os_buf_size; @@ -216,9 +217,9 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm, } #ifdef CONFIG_NVGPU_COMPRESSION - if (op->compr_kind != NVGPU_KIND_INVALID) { + if (op->compr_kind != NVGPU_KIND_INVALID && + !g->cbc_use_raw_mode) { - struct gk20a *g = gk20a_from_vm(vm); struct gk20a_comptags comptags = { 0 }; /* @@ -360,6 +361,10 @@ static u64 nvgpu_vm_remap_get_ctag_offset(struct vm_gk20a *vm, page_size); u64 compression_page_size; + if (g->cbc_use_raw_mode) { + return 0; + } + gk20a_get_comptags(os_buf, &comptags); if (comptags.lines != 0) { diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 51621e0d3..2fef09af6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -752,6 +752,13 @@ struct gk20a { u32 max_comptag_mem; /* max memory size (MB) for comptag */ struct nvgpu_cbc *cbc; + /* + * In raw mode, L2 calculates the CBC backing storage address from + * physical address of the compressible surface. The comptag lines are + * not used in the calculation, so nvgpu doesn't need to allocate comptag + * lines in the pagetable. + */ + bool cbc_use_raw_mode; #endif #ifdef CONFIG_NVGPU_NON_FUSA diff --git a/drivers/gpu/nvgpu/os/linux/comptags.c b/drivers/gpu/nvgpu/os/linux/comptags.c index 6d248296e..6feb83920 100644 --- a/drivers/gpu/nvgpu/os/linux/comptags.c +++ b/drivers/gpu/nvgpu/os/linux/comptags.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -51,6 +51,16 @@ int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf, u32 lines = 0; int err; + /* + * In raw mode, raw address is used by the hardware to map the + * compressible memory address to CBC address, comptaglines are never + * used. + */ + if (g->cbc_use_raw_mode) { + nvgpu_err(g, "comptags should not be allocated in raw mode\n"); + return -EINVAL; + } + ctag_granularity = g->ops.fb.compression_page_size(g); lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index bee0eb4c5..91ded2fdb 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -428,10 +428,13 @@ static long gk20a_ctrl_ioctl_gpu_characteristics( #ifdef CONFIG_NVGPU_COMPRESSION if (nvgpu_is_enabled(g, NVGPU_SUPPORT_COMPRESSION)) { gpu.compression_page_size = g->ops.fb.compression_page_size(g); - gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw; - gpu.gr_gobs_per_comptagline_per_slice = - g->cbc->gobs_per_comptagline_per_slice; - gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline; + + if (!g->cbc_use_raw_mode) { + gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw; + gpu.gr_gobs_per_comptagline_per_slice = + g->cbc->gobs_per_comptagline_per_slice; + gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline; + } } #endif @@ -2256,16 +2259,28 @@ static int nvgpu_handle_comptags_control(struct gk20a *g, os_buf.dmabuf = dmabuf; os_buf.dev = dev_from_gk20a(g); - err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags); - if (err != 0) { - if (comptags_alloc_control == - NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) { - nvgpu_err(g, "Comptags allocation (required) failed (%d)", - err); - } else { - nvgpu_err(g, "Comptags allocation (requested) failed (%d)", - err); - err = 0; + /* + * In raw mode, comptaglines are not used to map the compressible memory + * address to the CBC address, instead raw address is used by the + * hardrdware to offset to the CBC address. Mark comptags as enabled and + * return. In the comptagline mode, comptags will be marked as enabled + * once the comptags are successfuly allocated in the + * gk20a_alloc_comptags(). + */ + if (g->cbc_use_raw_mode) { + priv->comptags.enabled = true; + } else { + err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags); + if (err != 0) { + if (comptags_alloc_control == + NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) { + nvgpu_err(g, "Comptags allocation (required) failed (%d)", + err); + } else { + nvgpu_err(g, "Comptags allocation (requested) failed (%d)", + err); + err = 0; + } } } diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c index 1b5773daa..ba91ebe38 100644 --- a/drivers/gpu/nvgpu/os/linux/vm.c +++ b/drivers/gpu/nvgpu/os/linux/vm.c @@ -524,7 +524,8 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm, * If we support compression and there's a compressible kind, use it. */ #ifdef CONFIG_NVGPU_COMPRESSION - if (mapped_buffer->ctag_offset != 0) { + if (mapped_buffer->ctag_offset != 0 || + g->cbc_use_raw_mode) { if (compr_kind == NV_KIND_INVALID) { kind = incompr_kind; } else { @@ -549,13 +550,15 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm, } #ifdef CONFIG_NVGPU_COMPRESSION - ctag_offset = mapped_buffer->ctag_offset; + if (!g->cbc_use_raw_mode) { + ctag_offset = mapped_buffer->ctag_offset; - compression_page_size = g->ops.fb.compression_page_size(g); - nvgpu_assert(compression_page_size > 0ULL); + compression_page_size = g->ops.fb.compression_page_size(g); + nvgpu_assert(compression_page_size > 0ULL); - ctag_offset += (u32)(buffer_offset >> - nvgpu_ilog2(compression_page_size)); + ctag_offset += (u32)(buffer_offset >> + nvgpu_ilog2(compression_page_size)); + } #endif if (g->ops.mm.gmmu.map(vm, diff --git a/drivers/gpu/nvgpu/os/linux/vm_remap.c b/drivers/gpu/nvgpu/os/linux/vm_remap.c index 17b607295..2f5975d84 100644 --- a/drivers/gpu/nvgpu/os/linux/vm_remap.c +++ b/drivers/gpu/nvgpu/os/linux/vm_remap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -127,19 +127,21 @@ void nvgpu_vm_remap_os_buf_put(struct vm_gk20a *vm, remap_os_buf->os_priv.attachment, remap_os_buf->os_priv.sgt); #ifdef CONFIG_NVGPU_COMPRESSION - gk20a_get_comptags(&remap_os_buf->os_buf, &comptags); + if (!g->cbc_use_raw_mode) { + gk20a_get_comptags(&remap_os_buf->os_buf, &comptags); - /* - * Flush compression bit cache before releasing the physical - * memory buffer reference. - */ - if (comptags.offset != 0) { - g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0); - err = nvgpu_pg_elpg_ms_protected_call(g, - g->ops.mm.cache.l2_flush(g, true)); - if (err != 0) { - nvgpu_err(g, "l2 flush failed"); - return; + /* + * Flush compression bit cache before releasing the physical + * memory buffer reference. + */ + if (comptags.offset != 0) { + g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0); + err = nvgpu_pg_elpg_ms_protected_call(g, + g->ops.mm.cache.l2_flush(g, true)); + if (err != 0) { + nvgpu_err(g, "l2 flush failed"); + return; + } } } #endif