gpu: nvgpu: Per-alloc alignment

Change-Id: I8b7e86afb68adf6dd33b05995d0978f42d57e7b7
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/554185
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Terje Bergstrom
2014-10-07 15:02:35 +03:00
committed by Dan Willemsen
parent c0668f05ea
commit 1d9fba8804
9 changed files with 60 additions and 62 deletions

View File

@@ -20,7 +20,7 @@
/* init allocator struct */
int gk20a_allocator_init(struct gk20a_allocator *allocator,
const char *name, u32 start, u32 len, u32 align)
const char *name, u32 start, u32 len)
{
memset(allocator, 0, sizeof(struct gk20a_allocator));
@@ -28,16 +28,14 @@ int gk20a_allocator_init(struct gk20a_allocator *allocator,
allocator->base = start;
allocator->limit = start + len - 1;
allocator->align = align;
allocator->bitmap = kzalloc(BITS_TO_LONGS(len) * sizeof(long),
GFP_KERNEL);
if (!allocator->bitmap)
return -ENOMEM;
allocator_dbg(allocator, "%s : base %d, limit %d, align %d",
allocator->name, allocator->base,
allocator->limit, allocator->align);
allocator_dbg(allocator, "%s : base %d, limit %d",
allocator->name, allocator->base);
init_rwsem(&allocator->rw_sema);
@@ -65,7 +63,7 @@ void gk20a_allocator_destroy(struct gk20a_allocator *allocator)
* contiguous address.
*/
int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
u32 *addr, u32 len)
u32 *addr, u32 len, u32 align)
{
unsigned long _addr;
@@ -73,11 +71,11 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
*addr + len > allocator->limit || /* check addr range */
*addr & (allocator->align - 1) || /* check addr alignment */
*addr & (align - 1) || /* check addr alignment */
len == 0) /* check len */
return -EINVAL;
len = ALIGN(len, allocator->align);
len = ALIGN(len, align);
if (!len)
return -ENOMEM;
@@ -87,7 +85,7 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
allocator->limit - allocator->base + 1,
*addr ? (*addr - allocator->base) : 0,
len,
allocator->align - 1);
align - 1);
if ((_addr > allocator->limit - allocator->base + 1) ||
(*addr && *addr != (_addr + allocator->base))) {
up_write(&allocator->rw_sema);
@@ -106,16 +104,16 @@ int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
/* free all blocks between start and end */
int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
u32 addr, u32 len)
u32 addr, u32 len, u32 align)
{
allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
if (addr + len > allocator->limit || /* check addr range */
addr < allocator->base ||
addr & (allocator->align - 1)) /* check addr alignment */
addr & (align - 1)) /* check addr alignment */
return -EINVAL;
len = ALIGN(len, allocator->align);
len = ALIGN(len, align);
if (!len)
return -EINVAL;

View File

@@ -31,7 +31,6 @@ struct gk20a_allocator {
u32 base; /* min value of this linear space */
u32 limit; /* max value = limit - 1 */
u32 align; /* alignment size, power of 2 */
unsigned long *bitmap; /* bitmap */
@@ -58,21 +57,21 @@ struct gk20a_allocator {
} constraint;
int (*alloc)(struct gk20a_allocator *allocator,
u32 *addr, u32 len);
u32 *addr, u32 len, u32 align);
int (*free)(struct gk20a_allocator *allocator,
u32 addr, u32 len);
u32 addr, u32 len, u32 align);
};
int gk20a_allocator_init(struct gk20a_allocator *allocator,
const char *name, u32 base, u32 size, u32 align);
const char *name, u32 base, u32 size);
void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
u32 *addr, u32 len);
u32 *addr, u32 len, u32 align);
int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
u32 addr, u32 len);
u32 addr, u32 len, u32 align);
#if defined(ALLOCATOR_DEBUG)

View File

@@ -92,8 +92,7 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
gk20a_allocator_init(&gr->comp_tags, "comptag",
1, /* start */
max_comptag_lines - 1, /* length*/
1); /* align */
max_comptag_lines - 1); /* length*/
gr->comptags_per_cacheline = comptags_per_cacheline;
gr->slices_per_ltc = slices_per_fbp / g->ltc_count;

View File

@@ -129,7 +129,7 @@ static void gk20a_mm_delete_priv(void *_priv)
BUG_ON(!priv->comptag_allocator);
priv->comptag_allocator->free(priv->comptag_allocator,
priv->comptags.offset,
priv->comptags.lines);
priv->comptags.lines, 1);
}
/* Free buffer states */
@@ -229,7 +229,7 @@ static int gk20a_alloc_comptags(struct device *dev,
/* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator;
err = allocator->alloc(allocator, &offset, lines);
err = allocator->alloc(allocator, &offset, lines, 1);
if (!err) {
priv->comptags.lines = lines;
priv->comptags.offset = offset;
@@ -837,7 +837,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
/* The vma allocator represents page accounting. */
num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
err = vma->alloc(vma, &start_page_nr, num_pages);
err = vma->alloc(vma, &start_page_nr, num_pages, 1);
if (err) {
gk20a_err(dev_from_vm(vm),
@@ -868,7 +868,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
start_page_nr = (u32)(offset >> page_shift);
num_pages = (u32)((size + page_size - 1) >> page_shift);
err = vma->free(vma, start_page_nr, num_pages);
err = vma->free(vma, start_page_nr, num_pages, 1);
if (err) {
gk20a_err(dev_from_vm(vm),
"not found: offset=0x%llx, sz=0x%llx",
@@ -2290,9 +2290,8 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
alloc_name,
low_hole_pages, /*start*/
num_pages - low_hole_pages,/* length*/
1); /* align */
low_hole_pages, /*start*/
num_pages - low_hole_pages);/* length*/
if (err)
goto clean_up_map_pde;
@@ -2305,8 +2304,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
alloc_name,
num_pages, /* start */
num_pages, /* length */
1); /* align */
num_pages); /* length */
if (err)
goto clean_up_small_allocator;
}
@@ -2435,7 +2433,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
ilog2(vm->gmmu_page_sizes[pgsz_idx]));
vma = &vm->vma[pgsz_idx];
err = vma->alloc(vma, &start_page_nr, args->pages);
err = vma->alloc(vma, &start_page_nr, args->pages, 1);
if (err) {
kfree(va_node);
goto clean_up;
@@ -2458,7 +2456,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
pgsz_idx, true);
if (err) {
mutex_unlock(&vm->update_gmmu_lock);
vma->free(vma, start_page_nr, args->pages);
vma->free(vma, start_page_nr, args->pages, 1);
kfree(va_node);
goto clean_up;
}
@@ -2506,7 +2504,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
ilog2(vm->gmmu_page_sizes[pgsz_idx]));
vma = &vm->vma[pgsz_idx];
err = vma->free(vma, start_page_nr, args->pages);
err = vma->free(vma, start_page_nr, args->pages, 1);
if (err)
goto clean_up;

View File

@@ -2603,7 +2603,8 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
if (!pmu->sample_buffer)
err = pmu->dmem.alloc(&pmu->dmem,
&pmu->sample_buffer, 2 * sizeof(u16));
&pmu->sample_buffer, 2 * sizeof(u16),
PMU_DMEM_ALLOC_ALIGNMENT);
if (err) {
gk20a_err(dev_from_gk20a(g),
"failed to allocate perfmon sample buffer");
@@ -2707,8 +2708,7 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
if (!pmu->dmem.alloc)
gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
pv->get_pmu_init_msg_pmu_sw_mg_off(init),
pv->get_pmu_init_msg_pmu_sw_mg_size(init),
PMU_DMEM_ALLOC_ALIGNMENT);
pv->get_pmu_init_msg_pmu_sw_mg_size(init));
pmu->pmu_ready = true;
pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
@@ -2845,17 +2845,19 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
if (pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_in_a_ptr(seq)) != 0)
pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu,
pv->get_pmu_seq_in_a_ptr(seq)),
pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_in_a_ptr(seq)));
pv->pmu_allocation_get_dmem_offset(pmu,
pv->get_pmu_seq_in_a_ptr(seq)),
pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_in_a_ptr(seq)),
PMU_DMEM_ALLOC_ALIGNMENT);
if (pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_out_a_ptr(seq)) != 0)
pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu,
pv->get_pmu_seq_out_a_ptr(seq)),
pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_out_a_ptr(seq)));
pv->pmu_allocation_get_dmem_offset(pmu,
pv->get_pmu_seq_out_a_ptr(seq)),
pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_out_a_ptr(seq)),
PMU_DMEM_ALLOC_ALIGNMENT);
if (seq->callback)
seq->callback(g, msg, seq->cb_params, seq->desc, ret);
@@ -3493,8 +3495,9 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
(u16)max(payload->in.size, payload->out.size));
err = pmu->dmem.alloc(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
pv->pmu_allocation_get_dmem_size(pmu, in));
pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
pv->pmu_allocation_get_dmem_size(pmu, in),
PMU_DMEM_ALLOC_ALIGNMENT);
if (err)
goto clean_up;
@@ -3517,8 +3520,9 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
if (payload->out.buf != payload->in.buf) {
err = pmu->dmem.alloc(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
pv->pmu_allocation_get_dmem_size(pmu, out));
pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
pv->pmu_allocation_get_dmem_size(pmu, out),
PMU_DMEM_ALLOC_ALIGNMENT);
if (err)
goto clean_up;
} else {
@@ -3548,12 +3552,14 @@ clean_up:
gk20a_dbg_fn("fail");
if (in)
pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu, in),
pv->pmu_allocation_get_dmem_size(pmu, in));
pv->pmu_allocation_get_dmem_offset(pmu, in),
pv->pmu_allocation_get_dmem_size(pmu, in),
PMU_DMEM_ALLOC_ALIGNMENT);
if (out)
pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu, out),
pv->pmu_allocation_get_dmem_size(pmu, out));
pv->pmu_allocation_get_dmem_offset(pmu, out),
pv->pmu_allocation_get_dmem_size(pmu, out),
PMU_DMEM_ALLOC_ALIGNMENT);
pmu_seq_release(pmu, seq);
return err;

View File

@@ -45,7 +45,7 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
goto clean_up;
if (gk20a_allocator_init(&p->alloc, unique_name, 0,
p->size, SEMAPHORE_SIZE))
p->size))
goto clean_up;
gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
@@ -163,7 +163,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
if (!s)
return NULL;
if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE)) {
if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE,
SEMAPHORE_SIZE)) {
gk20a_err(pool->dev, "failed to allocate semaphore");
kfree(s);
return NULL;
@@ -185,7 +186,8 @@ static void gk20a_semaphore_free(struct kref *ref)
struct gk20a_semaphore *s =
container_of(ref, struct gk20a_semaphore, ref);
s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE);
s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE,
SEMAPHORE_SIZE);
gk20a_semaphore_pool_put(s->pool);
kfree(s);
}

View File

@@ -92,8 +92,7 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
gk20a_allocator_init(&gr->comp_tags, "comptag",
1, /* start */
max_comptag_lines - 1, /* length*/
1); /* align */
max_comptag_lines - 1); /* length*/
gr->comptags_per_cacheline = comptags_per_cacheline;
gr->slices_per_ltc = slices_per_ltc;

View File

@@ -43,8 +43,7 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
gk20a_allocator_init(&gr->comp_tags, "comptag",
1, /* start */
max_comptag_lines - 1, /* length*/
1); /* align */
max_comptag_lines - 1); /* length*/
return 0;
}

View File

@@ -297,8 +297,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
gk20a_allocator_init(&vm->vma[gmmu_page_size_small], name,
low_hole_pages, /* start */
num_pages - low_hole_pages, /* length */
1); /* align */
num_pages - low_hole_pages); /* length */
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
gmmu_page_sizes[gmmu_page_size_big]>>10);
@@ -307,8 +306,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
ilog2(gmmu_page_sizes[gmmu_page_size_big]));
gk20a_allocator_init(&vm->vma[gmmu_page_size_big], name,
num_pages, /* start */
num_pages, /* length */
1); /* align */
num_pages); /* length */
vm->mapped_buffers = RB_ROOT;