mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: Enable raw mode for compression
In raw addressing mode of CBC backing storage, comptaglines are not required to be allocated or need to programmed in the ptes. Introduce a flag to detect if the hardware supports raw mode and use that to skip all the comptagline allocations and respective page table programming. JIRA NVGPU-9717 Change-Id: I0a16881fc3e897c3c408b30d1835f30564649dad Signed-off-by: Prathap Kumar Valsan <prathapk@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2908278 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
c8ceef2d08
commit
321145b37e
@@ -1186,23 +1186,25 @@ u64 nvgpu_gmmu_map_locked(struct vm_gk20a *vm,
|
||||
.aperture = aperture,
|
||||
};
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
u64 ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
if (!g->cbc_use_raw_mode) {
|
||||
u64 ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
|
||||
attrs.ctag = (u64)ctag_offset * ctag_granularity;
|
||||
/*
|
||||
* We need to add the buffer_offset within compression_page_size so that
|
||||
* the programmed ctagline gets increased at compression_page_size
|
||||
* boundaries.
|
||||
*/
|
||||
if (attrs.ctag != 0ULL) {
|
||||
nvgpu_assert(ctag_granularity >= 1ULL);
|
||||
attrs.ctag = nvgpu_safe_add_u64(attrs.ctag,
|
||||
buffer_offset & (ctag_granularity - U64(1)));
|
||||
}
|
||||
attrs.ctag = (u64)ctag_offset * ctag_granularity;
|
||||
/*
|
||||
* We need to add the buffer_offset within compression_page_size so that
|
||||
* the programmed ctagline gets increased at compression_page_size
|
||||
* boundaries.
|
||||
*/
|
||||
if (attrs.ctag != 0ULL) {
|
||||
nvgpu_assert(ctag_granularity >= 1ULL);
|
||||
attrs.ctag = nvgpu_safe_add_u64(attrs.ctag,
|
||||
buffer_offset & (ctag_granularity - U64(1)));
|
||||
}
|
||||
|
||||
attrs.cbc_comptagline_mode =
|
||||
g->ops.fb.is_comptagline_mode_enabled != NULL ?
|
||||
attrs.cbc_comptagline_mode =
|
||||
g->ops.fb.is_comptagline_mode_enabled != NULL ?
|
||||
g->ops.fb.is_comptagline_mode_enabled(g) : true;
|
||||
}
|
||||
#endif
|
||||
|
||||
attrs.l3_alloc = ((flags & NVGPU_VM_MAP_L3_ALLOC) != 0U);
|
||||
|
||||
@@ -72,24 +72,26 @@ void nvgpu_pte_dbg_print(struct gk20a *g,
|
||||
const char *aperture_str = nvgpu_aperture_str(attrs->aperture);
|
||||
const char *perm_str = nvgpu_gmmu_perm_str(attrs->rw_flag);
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
u64 ctag_tmp = attrs->ctag;
|
||||
u32 str_len = 0U;
|
||||
u32 ctag_num = 0U;
|
||||
if (!g->cbc_use_raw_mode) {
|
||||
u64 ctag_tmp = attrs->ctag;
|
||||
u32 str_len = 0U;
|
||||
u32 ctag_num = 0U;
|
||||
|
||||
/*
|
||||
* attrs->ctag is incremented to count current page size as well.
|
||||
* Subtract to get this page's ctag line number.
|
||||
*/
|
||||
if (ctag_tmp != 0ULL) {
|
||||
ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size);
|
||||
/*
|
||||
* attrs->ctag is incremented to count current page size as well.
|
||||
* Subtract to get this page's ctag line number.
|
||||
*/
|
||||
if (ctag_tmp != 0ULL) {
|
||||
ctag_tmp = nvgpu_safe_sub_u64(ctag_tmp, page_size);
|
||||
}
|
||||
|
||||
ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp /
|
||||
g->ops.fb.compression_page_size(g));
|
||||
(void)strcpy(ctag_str, "ctag=0x\0");
|
||||
str_len = (u32)strlen(ctag_str);
|
||||
(void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num,
|
||||
nvgpu_safe_sub_u32(31U, str_len), 16U);
|
||||
}
|
||||
|
||||
ctag_num = nvgpu_safe_cast_u64_to_u32(ctag_tmp /
|
||||
g->ops.fb.compression_page_size(g));
|
||||
(void)strcpy(ctag_str, "ctag=0x\0");
|
||||
str_len = (u32)strlen(ctag_str);
|
||||
(void)nvgpu_strnadd_u32(ctag_str + str_len, ctag_num,
|
||||
nvgpu_safe_sub_u32(31U, str_len), 16U);
|
||||
#endif
|
||||
(void)map_attrs_to_str(attrs_str, attrs);
|
||||
pte_dbg(g, attrs,
|
||||
|
||||
@@ -1187,6 +1187,17 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
|
||||
(void)os_buf;
|
||||
(void)flags;
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
if (g->cbc_use_raw_mode) {
|
||||
if (binfo_ptr->compr_kind != NVGPU_KIND_INVALID) {
|
||||
pte_kind = (u8)binfo_ptr->compr_kind;
|
||||
} else if ((binfo_ptr->incompr_kind >= 0)) {
|
||||
pte_kind = (u8)binfo_ptr->incompr_kind;
|
||||
} else {
|
||||
err = -EINVAL;
|
||||
goto ret_err;
|
||||
}
|
||||
goto kind_done;
|
||||
}
|
||||
err = nvgpu_vm_compute_compression(vm, binfo_ptr);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failure setting up compression");
|
||||
@@ -1305,6 +1316,7 @@ static int nvgpu_vm_do_map(struct vm_gk20a *vm,
|
||||
goto ret_err;
|
||||
}
|
||||
|
||||
kind_done:
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
if (clear_ctags) {
|
||||
clear_ctags = gk20a_comptags_start_clear(os_buf);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -202,6 +202,7 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
|
||||
struct nvgpu_vm_remap_os_buffer *remap_os_buf)
|
||||
{
|
||||
u64 page_size = nvgpu_vm_remap_page_size(op);
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
u64 map_offset;
|
||||
u64 map_size;
|
||||
u64 os_buf_size;
|
||||
@@ -216,9 +217,9 @@ static int nvgpu_vm_remap_validate_map(struct vm_gk20a *vm,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
if (op->compr_kind != NVGPU_KIND_INVALID) {
|
||||
if (op->compr_kind != NVGPU_KIND_INVALID &&
|
||||
!g->cbc_use_raw_mode) {
|
||||
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
struct gk20a_comptags comptags = { 0 };
|
||||
|
||||
/*
|
||||
@@ -360,6 +361,10 @@ static u64 nvgpu_vm_remap_get_ctag_offset(struct vm_gk20a *vm,
|
||||
page_size);
|
||||
u64 compression_page_size;
|
||||
|
||||
if (g->cbc_use_raw_mode) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
gk20a_get_comptags(os_buf, &comptags);
|
||||
|
||||
if (comptags.lines != 0) {
|
||||
|
||||
@@ -752,6 +752,13 @@ struct gk20a {
|
||||
|
||||
u32 max_comptag_mem; /* max memory size (MB) for comptag */
|
||||
struct nvgpu_cbc *cbc;
|
||||
/*
|
||||
* In raw mode, L2 calculates the CBC backing storage address from
|
||||
* physical address of the compressible surface. The comptag lines are
|
||||
* not used in the calculation, so nvgpu doesn't need to allocate comptag
|
||||
* lines in the pagetable.
|
||||
*/
|
||||
bool cbc_use_raw_mode;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NVGPU_NON_FUSA
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -51,6 +51,16 @@ int gk20a_alloc_comptags(struct gk20a *g, struct nvgpu_os_buffer *buf,
|
||||
u32 lines = 0;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* In raw mode, raw address is used by the hardware to map the
|
||||
* compressible memory address to CBC address, comptaglines are never
|
||||
* used.
|
||||
*/
|
||||
if (g->cbc_use_raw_mode) {
|
||||
nvgpu_err(g, "comptags should not be allocated in raw mode\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
|
||||
|
||||
|
||||
@@ -428,10 +428,13 @@ static long gk20a_ctrl_ioctl_gpu_characteristics(
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_COMPRESSION)) {
|
||||
gpu.compression_page_size = g->ops.fb.compression_page_size(g);
|
||||
gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw;
|
||||
gpu.gr_gobs_per_comptagline_per_slice =
|
||||
g->cbc->gobs_per_comptagline_per_slice;
|
||||
gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline;
|
||||
|
||||
if (!g->cbc_use_raw_mode) {
|
||||
gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw;
|
||||
gpu.gr_gobs_per_comptagline_per_slice =
|
||||
g->cbc->gobs_per_comptagline_per_slice;
|
||||
gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2256,16 +2259,28 @@ static int nvgpu_handle_comptags_control(struct gk20a *g,
|
||||
os_buf.dmabuf = dmabuf;
|
||||
os_buf.dev = dev_from_gk20a(g);
|
||||
|
||||
err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
|
||||
if (err != 0) {
|
||||
if (comptags_alloc_control ==
|
||||
NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
|
||||
nvgpu_err(g, "Comptags allocation (required) failed (%d)",
|
||||
err);
|
||||
} else {
|
||||
nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
|
||||
err);
|
||||
err = 0;
|
||||
/*
|
||||
* In raw mode, comptaglines are not used to map the compressible memory
|
||||
* address to the CBC address, instead raw address is used by the
|
||||
* hardrdware to offset to the CBC address. Mark comptags as enabled and
|
||||
* return. In the comptagline mode, comptags will be marked as enabled
|
||||
* once the comptags are successfuly allocated in the
|
||||
* gk20a_alloc_comptags().
|
||||
*/
|
||||
if (g->cbc_use_raw_mode) {
|
||||
priv->comptags.enabled = true;
|
||||
} else {
|
||||
err = gk20a_alloc_comptags(g, &os_buf, &g->cbc->comp_tags);
|
||||
if (err != 0) {
|
||||
if (comptags_alloc_control ==
|
||||
NVGPU_GPU_COMPTAGS_ALLOC_REQUIRED) {
|
||||
nvgpu_err(g, "Comptags allocation (required) failed (%d)",
|
||||
err);
|
||||
} else {
|
||||
nvgpu_err(g, "Comptags allocation (requested) failed (%d)",
|
||||
err);
|
||||
err = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -524,7 +524,8 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm,
|
||||
* If we support compression and there's a compressible kind, use it.
|
||||
*/
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
if (mapped_buffer->ctag_offset != 0) {
|
||||
if (mapped_buffer->ctag_offset != 0 ||
|
||||
g->cbc_use_raw_mode) {
|
||||
if (compr_kind == NV_KIND_INVALID) {
|
||||
kind = incompr_kind;
|
||||
} else {
|
||||
@@ -549,13 +550,15 @@ int nvgpu_vm_mapping_modify(struct vm_gk20a *vm,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
ctag_offset = mapped_buffer->ctag_offset;
|
||||
if (!g->cbc_use_raw_mode) {
|
||||
ctag_offset = mapped_buffer->ctag_offset;
|
||||
|
||||
compression_page_size = g->ops.fb.compression_page_size(g);
|
||||
nvgpu_assert(compression_page_size > 0ULL);
|
||||
compression_page_size = g->ops.fb.compression_page_size(g);
|
||||
nvgpu_assert(compression_page_size > 0ULL);
|
||||
|
||||
ctag_offset += (u32)(buffer_offset >>
|
||||
nvgpu_ilog2(compression_page_size));
|
||||
ctag_offset += (u32)(buffer_offset >>
|
||||
nvgpu_ilog2(compression_page_size));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (g->ops.mm.gmmu.map(vm,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2023, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -127,19 +127,21 @@ void nvgpu_vm_remap_os_buf_put(struct vm_gk20a *vm,
|
||||
remap_os_buf->os_priv.attachment, remap_os_buf->os_priv.sgt);
|
||||
|
||||
#ifdef CONFIG_NVGPU_COMPRESSION
|
||||
gk20a_get_comptags(&remap_os_buf->os_buf, &comptags);
|
||||
if (!g->cbc_use_raw_mode) {
|
||||
gk20a_get_comptags(&remap_os_buf->os_buf, &comptags);
|
||||
|
||||
/*
|
||||
* Flush compression bit cache before releasing the physical
|
||||
* memory buffer reference.
|
||||
*/
|
||||
if (comptags.offset != 0) {
|
||||
g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0);
|
||||
err = nvgpu_pg_elpg_ms_protected_call(g,
|
||||
g->ops.mm.cache.l2_flush(g, true));
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "l2 flush failed");
|
||||
return;
|
||||
/*
|
||||
* Flush compression bit cache before releasing the physical
|
||||
* memory buffer reference.
|
||||
*/
|
||||
if (comptags.offset != 0) {
|
||||
g->ops.cbc.ctrl(g, nvgpu_cbc_op_clean, 0, 0);
|
||||
err = nvgpu_pg_elpg_ms_protected_call(g,
|
||||
g->ops.mm.cache.l2_flush(g, true));
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "l2 flush failed");
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user