gpu: nvgpu: Implement compbits padding for mapping

Implement NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS, which adds
extra alignment to compbits allocation for safe compbits mapping.

Bug 200077571

Change-Id: I3a74ebb81412e4e1e69501debeb9ef4e2056ef1a
Signed-off-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-on: http://git-master/r/730763
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/740693
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Sami Kiminki
2015-04-10 15:40:46 +03:00
committed by Terje Bergstrom
parent 5a5662fffb
commit 8d6fe0f2ef
3 changed files with 132 additions and 11 deletions

View File

@@ -26,6 +26,7 @@
#include <linux/tegra-soc.h> #include <linux/tegra-soc.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/dma-buf.h> #include <linux/dma-buf.h>
#include <linux/lcm.h>
#include <uapi/linux/nvgpu.h> #include <uapi/linux/nvgpu.h>
#include <trace/events/gk20a.h> #include <trace/events/gk20a.h>
@@ -133,7 +134,8 @@ static void gk20a_mm_delete_priv(void *_priv)
BUG_ON(!priv->comptag_allocator); BUG_ON(!priv->comptag_allocator);
priv->comptag_allocator->free(priv->comptag_allocator, priv->comptag_allocator->free(priv->comptag_allocator,
priv->comptags.offset, priv->comptags.offset,
priv->comptags.lines, 1); priv->comptags.allocated_lines,
1);
} }
/* Free buffer states */ /* Free buffer states */
@@ -208,22 +210,28 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
return; return;
if (!priv) { if (!priv) {
comptags->lines = 0; memset(comptags, 0, sizeof(*comptags));
comptags->offset = 0;
return; return;
} }
*comptags = priv->comptags; *comptags = priv->comptags;
} }
static int gk20a_alloc_comptags(struct device *dev, static int gk20a_alloc_comptags(struct gk20a *g,
struct device *dev,
struct dma_buf *dmabuf, struct dma_buf *dmabuf,
struct gk20a_allocator *allocator, struct gk20a_allocator *allocator,
int lines) u32 lines, bool user_mappable)
{ {
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev); struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
u32 offset = 0; u32 offset = 0;
int err; int err;
u32 ctaglines_to_allocate;
u32 ctagline_align;
const u32 aggregate_cacheline_sz =
g->gr.cacheline_size * g->gr.slices_per_ltc *
g->ltc_count;
const u32 small_pgsz = 4096;
if (!priv) if (!priv)
return -ENOSYS; return -ENOSYS;
@@ -231,12 +239,99 @@ static int gk20a_alloc_comptags(struct device *dev,
if (!lines) if (!lines)
return -EINVAL; return -EINVAL;
if (!user_mappable) {
ctaglines_to_allocate = lines;
ctagline_align = 1;
} else {
/* Unfortunately, we cannot use allocation alignment
* here, since compbits per cacheline is not always a
* power of two. So, we just have to allocate enough
* extra that we're guaranteed to find a ctagline
* inside the allocation so that: 1) it is the first
* ctagline in a cacheline that starts at a page
* boundary, and 2) we can add enough overallocation
* that the ctaglines of the succeeding allocation
* are on different page than ours
*/
ctagline_align =
(lcm(aggregate_cacheline_sz, small_pgsz) /
aggregate_cacheline_sz) *
g->gr.comptags_per_cacheline;
ctaglines_to_allocate =
/* for alignment */
ctagline_align +
/* lines rounded up to cachelines */
DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
g->gr.comptags_per_cacheline +
/* trail-padding */
DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
g->gr.comptags_per_cacheline;
if (ctaglines_to_allocate < lines)
return -EINVAL; /* integer overflow */
}
/* store the allocator so we can use it when we free the ctags */ /* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator; priv->comptag_allocator = allocator;
err = allocator->alloc(allocator, &offset, lines, 1); err = allocator->alloc(allocator, &offset,
ctaglines_to_allocate, 1);
if (!err) { if (!err) {
priv->comptags.lines = lines; const u32 alignment_lines =
DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
offset;
/* prune the preceding ctaglines that were allocated
for alignment */
if (alignment_lines) {
/* free alignment lines */
int tmp=
allocator->free(allocator, offset,
alignment_lines,
1);
WARN_ON(tmp);
offset += alignment_lines;
ctaglines_to_allocate -= alignment_lines;
}
/* check if we can prune the trailing, too */
if (user_mappable)
{
u32 needed_cachelines =
DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
u32 first_unneeded_cacheline =
DIV_ROUND_UP(round_up(needed_cachelines *
aggregate_cacheline_sz,
small_pgsz),
aggregate_cacheline_sz);
u32 needed_ctaglines =
first_unneeded_cacheline *
g->gr.comptags_per_cacheline;
if (needed_ctaglines < ctaglines_to_allocate) {
/* free alignment lines */
int tmp=
allocator->free(
allocator,
offset + needed_ctaglines,
(ctaglines_to_allocate -
needed_ctaglines),
1);
WARN_ON(tmp);
ctaglines_to_allocate = needed_ctaglines;
}
}
priv->comptags.offset = offset; priv->comptags.offset = offset;
priv->comptags.lines = lines;
priv->comptags.allocated_lines = ctaglines_to_allocate;
priv->comptags.user_mappable = user_mappable;
} }
return err; return err;
} }
@@ -955,9 +1050,11 @@ struct buffer_attrs {
u64 align; u64 align;
u32 ctag_offset; u32 ctag_offset;
u32 ctag_lines; u32 ctag_lines;
u32 ctag_allocated_lines;
int pgsz_idx; int pgsz_idx;
u8 kind_v; u8 kind_v;
u8 uc_kind_v; u8 uc_kind_v;
bool ctag_user_mappable;
}; };
static void gmmu_select_page_size(struct vm_gk20a *vm, static void gmmu_select_page_size(struct vm_gk20a *vm,
@@ -1399,22 +1496,37 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
g->ops.fb.compression_page_size(g)); g->ops.fb.compression_page_size(g));
if (bfr.ctag_lines && !comptags.lines) { if (bfr.ctag_lines && !comptags.lines) {
const bool user_mappable =
!!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
/* allocate compression resources if needed */ /* allocate compression resources if needed */
err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator, err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
bfr.ctag_lines); bfr.ctag_lines, user_mappable);
if (err) { if (err) {
/* ok to fall back here if we ran out */ /* ok to fall back here if we ran out */
/* TBD: we can partially alloc ctags as well... */ /* TBD: we can partially alloc ctags as well... */
bfr.ctag_lines = bfr.ctag_offset = 0;
bfr.kind_v = bfr.uc_kind_v; bfr.kind_v = bfr.uc_kind_v;
} else { } else {
gk20a_get_comptags(d, dmabuf, &comptags); gk20a_get_comptags(d, dmabuf, &comptags);
clear_ctags = true; clear_ctags = true;
if (comptags.lines < comptags.allocated_lines) {
/* clear tail-padding comptags */
u32 ctagmin = comptags.offset + comptags.lines;
u32 ctagmax = comptags.offset +
comptags.allocated_lines - 1;
g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
ctagmin, ctagmax);
}
} }
} }
/* store the comptag info */ /* store the comptag info */
bfr.ctag_offset = comptags.offset; bfr.ctag_offset = comptags.offset;
bfr.ctag_lines = comptags.lines;
bfr.ctag_allocated_lines = comptags.allocated_lines;
bfr.ctag_user_mappable = comptags.user_mappable;
/* update gmmu ptes */ /* update gmmu ptes */
map_offset = g->ops.mm.gmmu_map(vm, map_offset, map_offset = g->ops.mm.gmmu_map(vm, map_offset,
@@ -1433,10 +1545,11 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
gk20a_dbg(gpu_dbg_map, gk20a_dbg(gpu_dbg_map,
"as=%d pgsz=%d " "as=%d pgsz=%d "
"kind=0x%x kind_uc=0x%x flags=0x%x " "kind=0x%x kind_uc=0x%x flags=0x%x "
"ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x", "ctags=%d start=%d ctags_allocated=%d ctags_mappable=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
vm_aspace_id(vm), gmmu_page_size, vm_aspace_id(vm), gmmu_page_size,
bfr.kind_v, bfr.uc_kind_v, flags, bfr.kind_v, bfr.uc_kind_v, flags,
bfr.ctag_lines, bfr.ctag_offset, bfr.ctag_lines, bfr.ctag_offset,
bfr.ctag_allocated_lines, bfr.ctag_user_mappable,
hi32(map_offset), lo32(map_offset), hi32(map_offset), lo32(map_offset),
hi32((u64)sg_dma_address(bfr.sgt->sgl)), hi32((u64)sg_dma_address(bfr.sgt->sgl)),
lo32((u64)sg_dma_address(bfr.sgt->sgl)), lo32((u64)sg_dma_address(bfr.sgt->sgl)),
@@ -1473,6 +1586,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
mapped_buffer->pgsz_idx = bfr.pgsz_idx; mapped_buffer->pgsz_idx = bfr.pgsz_idx;
mapped_buffer->ctag_offset = bfr.ctag_offset; mapped_buffer->ctag_offset = bfr.ctag_offset;
mapped_buffer->ctag_lines = bfr.ctag_lines; mapped_buffer->ctag_lines = bfr.ctag_lines;
mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
mapped_buffer->vm = vm; mapped_buffer->vm = vm;
mapped_buffer->flags = flags; mapped_buffer->flags = flags;
mapped_buffer->kind = kind; mapped_buffer->kind = kind;

View File

@@ -140,6 +140,8 @@ enum gmmu_pgsz_gk20a {
struct gk20a_comptags { struct gk20a_comptags {
u32 offset; u32 offset;
u32 lines; u32 lines;
u32 allocated_lines;
bool user_mappable;
}; };
struct gk20a_mm_entry { struct gk20a_mm_entry {
@@ -190,6 +192,9 @@ struct mapped_buffer_node {
u32 pgsz_idx; u32 pgsz_idx;
u32 ctag_offset; u32 ctag_offset;
u32 ctag_lines; u32 ctag_lines;
u32 ctag_allocated_lines;
bool ctags_mappable;
u32 flags; u32 flags;
u32 kind; u32 kind;
bool va_allocated; bool va_allocated;

View File

@@ -859,6 +859,7 @@ struct nvgpu_as_map_buffer_args {
__u32 flags; /* in/out */ __u32 flags; /* in/out */
#define NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET (1 << 0) #define NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET (1 << 0)
#define NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE (1 << 2) #define NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE (1 << 2)
#define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6)
__u32 reserved; /* in */ __u32 reserved; /* in */
__u32 dmabuf_fd; /* in */ __u32 dmabuf_fd; /* in */
__u32 page_size; /* inout, 0:= best fit to buffer */ __u32 page_size; /* inout, 0:= best fit to buffer */