gpu: nvgpu: use mem_desc for semaphores

Replace manual buffer allocation and cpu_va pointer accesses with
gk20a_gmmu_{alloc,free}() and gk20a_mem_{rd,wr}() using a struct
mem_desc in gk20a_semaphore_pool, for buffer aperture flexibility.

JIRA DNVGPU-23

Change-Id: I394c38f407a9da02480bfd35062a892eec242ea3
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1146684
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Konsta Holtta
2016-05-18 14:19:53 +03:00
committed by Terje Bergstrom
parent dc45473eeb
commit abec0ddc19
4 changed files with 33 additions and 46 deletions

View File

@@ -702,8 +702,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
asid = c->vm->as_share->id;
sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g),
pool_name, 1024);
sema->pool = gk20a_semaphore_pool_alloc(c->g, pool_name, 1024);
if (!sema->pool)
goto clean_up;

View File

@@ -154,17 +154,9 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
struct sync_fence *sync_fence = NULL;
#ifdef CONFIG_SYNC
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
sync_fence = gk20a_sync_fence_create(timeline, semaphore,
dependency, "f-gk20a-0x%04llx",
((uintptr_t)(void *)semaphore->value) &
0xffff);
#else
sync_fence = gk20a_sync_fence_create(timeline, semaphore,
dependency, "f-gk20a-0x%04llx",
((u64)(void *)semaphore->value) &
0xffff);
#endif
dependency, "f-gk20a-0x%04x",
semaphore->offset & 0xffff);
if (!sync_fence)
return NULL;
#endif

View File

@@ -23,7 +23,7 @@
static const int SEMAPHORE_SIZE = 16;
struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct gk20a *g,
const char *unique_name, size_t capacity)
{
struct gk20a_semaphore_pool *p;
@@ -34,30 +34,27 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
kref_init(&p->ref);
INIT_LIST_HEAD(&p->maps);
mutex_init(&p->maps_mutex);
p->dev = d;
p->g = g;
/* Alloc one 4k page of semaphore per channel. */
p->size = roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE);
p->cpu_va = dma_alloc_coherent(d, p->size, &p->iova, GFP_KERNEL);
if (!p->cpu_va)
goto clean_up;
if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size))
if (gk20a_gmmu_alloc(g, roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE),
&p->mem))
goto clean_up;
/* Sacrifice one semaphore in the name of returning error codes. */
if (gk20a_allocator_init(&p->alloc, unique_name,
SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE,
SEMAPHORE_SIZE, p->mem.size - SEMAPHORE_SIZE,
SEMAPHORE_SIZE))
goto clean_up;
gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
(u64)sg_dma_address(p->sgt->sgl), (u64)sg_phys(p->sgt->sgl));
gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->mem.cpu_va,
(u64)sg_dma_address(p->mem.sgt->sgl),
(u64)sg_phys(p->mem.sgt->sgl));
return p;
clean_up:
if (p->cpu_va)
dma_free_coherent(d, p->size, p->cpu_va, p->iova);
if (p->sgt)
gk20a_free_sgtable(&p->sgt);
if (p->mem.size)
gk20a_gmmu_free(p->g, &p->mem);
kfree(p);
return NULL;
}
@@ -69,8 +66,7 @@ static void gk20a_semaphore_pool_free(struct kref *ref)
mutex_lock(&p->maps_mutex);
WARN_ON(!list_empty(&p->maps));
mutex_unlock(&p->maps_mutex);
gk20a_free_sgtable(&p->sgt);
dma_free_coherent(p->dev, p->size, p->cpu_va, p->iova);
gk20a_gmmu_free(p->g, &p->mem);
gk20a_allocator_destroy(&p->alloc);
kfree(p);
}
@@ -110,7 +106,7 @@ int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p,
return -ENOMEM;
map->vm = vm;
map->rw_flag = rw_flag;
map->gpu_va = gk20a_gmmu_map(vm, &p->sgt, p->size,
map->gpu_va = gk20a_gmmu_map(vm, &p->mem.sgt, p->mem.size,
0/*uncached*/, rw_flag,
false);
if (!map->gpu_va) {
@@ -135,7 +131,7 @@ void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p,
mutex_lock(&p->maps_mutex);
map = gk20a_semaphore_pool_find_map_locked(p, vm);
if (map) {
gk20a_gmmu_unmap(vm, map->gpu_va, p->size, map->rw_flag);
gk20a_gmmu_unmap(vm, map->gpu_va, p->mem.size, map->rw_flag);
gk20a_vm_put(vm);
list_del(&map->list);
kfree(map);
@@ -168,7 +164,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);
if (!s->offset) {
gk20a_err(pool->dev, "failed to allocate semaphore");
gk20a_err(dev_from_gk20a(pool->g),
"failed to allocate semaphore");
kfree(s);
return NULL;
}
@@ -177,10 +174,11 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
s->pool = pool;
kref_init(&s->ref);
s->value = (volatile u32 *)((uintptr_t)pool->cpu_va + s->offset);
*s->value = 0; /* Initially acquired. */
gk20a_dbg_info("created semaphore offset=%d, value_cpu=%p, value=%d",
s->offset, s->value, *s->value);
/* Initially acquired. */
gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 0);
gk20a_dbg_info("created semaphore offset=%d, value=%d",
s->offset,
gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset));
return s;
}

View File

@@ -20,11 +20,8 @@
/* A memory pool for holding semaphores. */
struct gk20a_semaphore_pool {
void *cpu_va;
dma_addr_t iova;
size_t size;
struct device *dev;
struct sg_table *sgt;
struct mem_desc mem;
struct gk20a *g;
struct list_head maps;
struct mutex maps_mutex;
struct kref ref;
@@ -48,16 +45,17 @@ struct gk20a_semaphore_pool_map {
/* A semaphore that lives inside a semaphore pool. */
struct gk20a_semaphore {
struct gk20a_semaphore_pool *pool;
/*
* value exists within the pool's memory at the specified offset.
* 0=acquired, 1=released.
*/
u32 offset; /* byte offset within pool */
struct kref ref;
/* value is a pointer within the pool's coherent cpu_va.
* It is shared between CPU and GPU, hence volatile. */
volatile u32 *value; /* 0=acquired, 1=released */
};
/* Create a semaphore pool that can hold at most 'capacity' semaphores. */
struct gk20a_semaphore_pool *
gk20a_semaphore_pool_alloc(struct device *, const char *unique_name,
gk20a_semaphore_pool_alloc(struct gk20a *, const char *unique_name,
size_t capacity);
void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *);
int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *,
@@ -83,7 +81,7 @@ static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s,
static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
{
u32 v = *s->value;
u32 v = gk20a_mem_rd(s->pool->g, &s->pool->mem, s->offset);
/* When often block on value reaching a certain threshold. We must make
* sure that if we get unblocked, we haven't read anything too early. */
@@ -94,6 +92,6 @@ static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s)
static inline void gk20a_semaphore_release(struct gk20a_semaphore *s)
{
smp_wmb();
*s->value = 1;
gk20a_mem_wr(s->pool->g, &s->pool->mem, s->offset, 1);
}
#endif