diff --git a/drivers/gpu/nvgpu/gk20a/Makefile b/drivers/gpu/nvgpu/gk20a/Makefile index 81ae027e0..e5eb817d4 100644 --- a/drivers/gpu/nvgpu/gk20a/Makefile +++ b/drivers/gpu/nvgpu/gk20a/Makefile @@ -19,6 +19,7 @@ nvgpu-y := \ mm_gk20a.o \ pmu_gk20a.o \ priv_ring_gk20a.o \ + semaphore_gk20a.o \ clk_gk20a.o \ therm_gk20a.o \ gr_ctx_gk20a_sim.o \ diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c new file mode 100644 index 000000000..55fa0e32e --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c @@ -0,0 +1,191 @@ +/* + * drivers/video/tegra/host/gk20a/semaphore_gk20a.c + * + * GK20A Semaphores + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include "semaphore_gk20a.h" +#include +#include +#include "gk20a.h" +#include "mm_gk20a.h" + +static const int SEMAPHORE_SIZE = 16; + +struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d, + const char *unique_name, size_t capacity) +{ + struct gk20a_semaphore_pool *p; + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return NULL; + + kref_init(&p->ref); + INIT_LIST_HEAD(&p->maps); + mutex_init(&p->maps_mutex); + p->dev = d; + + /* Alloc one 4k page of semaphore per channel. */ + p->size = roundup(capacity * SEMAPHORE_SIZE, PAGE_SIZE); + p->cpu_va = dma_alloc_coherent(d, p->size, &p->iova, GFP_KERNEL); + if (!p->cpu_va) + goto clean_up; + if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size)) + goto clean_up; + + if (gk20a_allocator_init(&p->alloc, unique_name, 0, + p->size, SEMAPHORE_SIZE)) + goto clean_up; + + gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va, + (u64)sg_dma_address(p->sgt->sgl), (u64)sg_phys(p->sgt->sgl)); + return p; +clean_up: + if (p->cpu_va) + dma_free_coherent(d, p->size, p->cpu_va, p->iova); + if (p->sgt) + gk20a_free_sgtable(&p->sgt); + kfree(p); + return NULL; +} + +static void gk20a_semaphore_pool_free(struct kref *ref) +{ + struct gk20a_semaphore_pool *p = + container_of(ref, struct gk20a_semaphore_pool, ref); + mutex_lock(&p->maps_mutex); + WARN_ON(!list_empty(&p->maps)); + mutex_unlock(&p->maps_mutex); + gk20a_free_sgtable(&p->sgt); + dma_free_coherent(p->dev, p->size, p->cpu_va, p->iova); + gk20a_allocator_destroy(&p->alloc); + kfree(p); +} + +static void gk20a_semaphore_pool_get(struct gk20a_semaphore_pool *p) +{ + kref_get(&p->ref); +} + +void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *p) +{ + kref_put(&p->ref, gk20a_semaphore_pool_free); +} + +static struct gk20a_semaphore_pool_map * +gk20a_semaphore_pool_find_map(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm) +{ + struct gk20a_semaphore_pool_map *map, *found = NULL; + mutex_lock(&p->maps_mutex); + list_for_each_entry(map, &p->maps, list) { + if (map->vm == vm) { + found = map; + break; + } + } + mutex_unlock(&p->maps_mutex); + return found; +} + +int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm, + enum gk20a_mem_rw_flag rw_flag) +{ + struct gk20a_semaphore_pool_map *map; + + WARN_ON(gk20a_semaphore_pool_find_map(p, vm)); + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map->vm = vm; + map->rw_flag = rw_flag; + map->gpu_va = gk20a_gmmu_map(vm, &p->sgt, p->size, + 0/*uncached*/, rw_flag); + if (!map->gpu_va) { + kfree(map); + return -ENOMEM; + } + mutex_lock(&p->maps_mutex); + list_add(&map->list, &p->maps); + mutex_unlock(&p->maps_mutex); + return 0; +} + +void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm) +{ + struct gk20a_semaphore_pool_map *map = + gk20a_semaphore_pool_find_map(p, vm); + if (!map) + return; + gk20a_gmmu_unmap(vm, map->gpu_va, p->size, map->rw_flag); + list_del(&map->list); + kfree(map); +} + +u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *p, + struct vm_gk20a *vm) +{ + struct gk20a_semaphore_pool_map *map = + gk20a_semaphore_pool_find_map(p, vm); + if (!map) + return 0; + return map->gpu_va; +} + +struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool) +{ + struct gk20a_semaphore *s; + + s = kzalloc(sizeof(*s), GFP_KERNEL); + if (!s) + return NULL; + + if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE)) { + gk20a_err(pool->dev, "failed to allocate semaphore"); + kfree(s); + return NULL; + } + + gk20a_semaphore_pool_get(pool); + s->pool = pool; + + kref_init(&s->ref); + s->value = (volatile u32 *)((uintptr_t)pool->cpu_va + s->offset); + *s->value = 0; /* Initially acquired. */ + gk20a_dbg_info("created semaphore offset=%d, value_cpu=%p, value=%d", + s->offset, s->value, *s->value); + return s; +} + +static void gk20a_semaphore_free(struct kref *ref) +{ + struct gk20a_semaphore *s = + container_of(ref, struct gk20a_semaphore, ref); + + s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE); + gk20a_semaphore_pool_put(s->pool); + kfree(s); +} + +void gk20a_semaphore_put(struct gk20a_semaphore *s) +{ + kref_put(&s->ref, gk20a_semaphore_free); +} + +void gk20a_semaphore_get(struct gk20a_semaphore *s) +{ + kref_get(&s->ref); +} diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h new file mode 100644 index 000000000..214db3987 --- /dev/null +++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.h @@ -0,0 +1,97 @@ +/* + * drivers/video/tegra/host/gk20a/semaphore_gk20a.h + * + * GK20A Semaphores + * + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _GK20A_SEMAPHORE_H_ +#define _GK20A_SEMAPHORE_H_ + +#include +#include "gk20a_allocator.h" +#include "mm_gk20a.h" + +/* A memory pool for holding semaphores. */ +struct gk20a_semaphore_pool { + void *cpu_va; + dma_addr_t iova; + size_t size; + struct device *dev; + struct sg_table *sgt; + struct list_head maps; + struct mutex maps_mutex; + struct kref ref; + struct gk20a_allocator alloc; +}; + +/* A semaphore pool can be mapped to multiple GPU address spaces. */ +struct gk20a_semaphore_pool_map { + u64 gpu_va; + enum gk20a_mem_rw_flag rw_flag; + struct vm_gk20a *vm; + struct list_head list; +}; + +/* A semaphore that lives inside a semaphore pool. */ +struct gk20a_semaphore { + struct gk20a_semaphore_pool *pool; + u32 offset; /* byte offset within pool */ + struct kref ref; + /* value is a pointer within the pool's coherent cpu_va. + * It is shared between CPU and GPU, hence volatile. */ + volatile u32 *value; /* 0=acquired, 1=released */ +}; + +/* Create a semaphore pool that can hold at most 'capacity' semaphores. */ +struct gk20a_semaphore_pool * +gk20a_semaphore_pool_alloc(struct device *, const char *unique_name, + size_t capacity); +void gk20a_semaphore_pool_put(struct gk20a_semaphore_pool *); +int gk20a_semaphore_pool_map(struct gk20a_semaphore_pool *, + struct vm_gk20a *, + enum gk20a_mem_rw_flag); +void gk20a_semaphore_pool_unmap(struct gk20a_semaphore_pool *, + struct vm_gk20a *); +u64 gk20a_semaphore_pool_gpu_va(struct gk20a_semaphore_pool *, + struct vm_gk20a *); + +/* Allocate a semaphore from the semaphore pool. The newly allocated + * semaphore will be in acquired state (value=0). */ +struct gk20a_semaphore * +gk20a_semaphore_alloc(struct gk20a_semaphore_pool *); +void gk20a_semaphore_put(struct gk20a_semaphore *); +void gk20a_semaphore_get(struct gk20a_semaphore *); + +static inline u64 gk20a_semaphore_gpu_va(struct gk20a_semaphore *s, + struct vm_gk20a *vm) +{ + return gk20a_semaphore_pool_gpu_va(s->pool, vm) + s->offset; +} + +static inline bool gk20a_semaphore_is_acquired(struct gk20a_semaphore *s) +{ + u32 v = *s->value; + + /* When often block on value reaching a certain threshold. We must make + * sure that if we get unblocked, we haven't read anything too early. */ + smp_rmb(); + return v == 0; +} + +static inline void gk20a_semaphore_release(struct gk20a_semaphore *s) +{ + smp_wmb(); + *s->value = 1; +} +#endif