mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Greatly simplify the semaphore detection
Greatly simplify and make more robust the gpu semaphore detection in sync_fences. Instead of using a magic number use the parent timeline of sync_pts. This will also work with multi-GPU setups using nvgpu since the timeline ops pointer will be the same across all instances of nvgpu. Bug 1732449 Reviewed-on: http://git-master/r/1203834 (cherry picked from commit 66eeb577eae5d10741fd15f3659e843c70792cd6) Change-Id: I4c6619d70b5531e2676e18d1330724e8f8b9bcb3 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1221042 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
9bd76b7fa0
commit
7b8cbd2be3
@@ -510,24 +510,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
|
|||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18.
|
|
||||||
* But since there's no API for getting the underlying sync_pts we have to do
|
|
||||||
* some conditional compilation.
|
|
||||||
*/
|
|
||||||
#ifdef CONFIG_SYNC
|
#ifdef CONFIG_SYNC
|
||||||
static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f)
|
|
||||||
{
|
|
||||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
||||||
struct sync_pt *pt;
|
|
||||||
|
|
||||||
pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
|
|
||||||
return gk20a_sync_pt_inst_get_sema(pt);
|
|
||||||
#else
|
|
||||||
return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt a fast path for waiting on a sync_fence. Basically if the passed
|
* Attempt a fast path for waiting on a sync_fence. Basically if the passed
|
||||||
* sync_fence is backed by a gk20a_semaphore then there's no reason to go
|
* sync_fence is backed by a gk20a_semaphore then there's no reason to go
|
||||||
@@ -551,7 +534,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
|
|||||||
if (!gk20a_is_sema_backed_sync_fence(fence))
|
if (!gk20a_is_sema_backed_sync_fence(fence))
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
sema = sema_from_sync_fence(fence);
|
sema = gk20a_sync_fence_get_sema(fence);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there's no underlying sema then that means the underlying sema has
|
* If there's no underlying sema then that means the underlying sema has
|
||||||
|
|||||||
@@ -57,110 +57,85 @@ struct gk20a_sync_pt {
|
|||||||
|
|
||||||
struct gk20a_sync_pt_inst {
|
struct gk20a_sync_pt_inst {
|
||||||
struct sync_pt pt;
|
struct sync_pt pt;
|
||||||
|
|
||||||
/*
|
|
||||||
* Magic number to identify a gk20a_sync_pt_inst from either a struct
|
|
||||||
* fence or a struct sync_pt.
|
|
||||||
*/
|
|
||||||
#define GK20A_SYNC_PT_INST_MAGIC 0xb333eeef;
|
|
||||||
u32 magic;
|
|
||||||
|
|
||||||
struct gk20a_sync_pt *shared;
|
struct gk20a_sync_pt *shared;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a sync_pt is a gk20a_sync_pt_inst.
|
|
||||||
*/
|
|
||||||
int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt)
|
|
||||||
{
|
|
||||||
struct gk20a_sync_pt_inst *pti =
|
|
||||||
container_of(pt, struct gk20a_sync_pt_inst, pt);
|
|
||||||
|
|
||||||
return pti->magic == GK20A_SYNC_PT_INST_MAGIC;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
|
|
||||||
/**
|
|
||||||
* Check if a fence is a gk20a_sync_pt_inst.
|
|
||||||
*/
|
|
||||||
int gk20a_is_gk20a_sync_pt_inst(struct fence *f)
|
|
||||||
{
|
|
||||||
struct sync_pt *pt = container_of(f, struct sync_pt, base);
|
|
||||||
|
|
||||||
return __gk20a_is_gk20a_sync_pt_inst(pt);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
|
|
||||||
* passed fence is in fact a gk20a_sync_pt_inst - use
|
|
||||||
* gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
|
|
||||||
*/
|
|
||||||
struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f)
|
|
||||||
{
|
|
||||||
struct sync_pt *pt = container_of(f, struct sync_pt, base);
|
|
||||||
struct gk20a_sync_pt_inst *pti =
|
|
||||||
container_of(pt, struct gk20a_sync_pt_inst, pt);
|
|
||||||
|
|
||||||
BUG_ON(!gk20a_is_gk20a_sync_pt_inst(f));
|
|
||||||
|
|
||||||
return pti->shared->sema;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/**
|
|
||||||
* Get the underlying semaphore from a gk20a_sync_pt_inst. This assumes the
|
|
||||||
* passed sync_pt is in fact a gk20a_sync_pt_inst - use
|
|
||||||
* gk20a_is_gk20a_sync_pt_inst() to verify this before using this function.
|
|
||||||
*/
|
|
||||||
struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt)
|
|
||||||
{
|
|
||||||
struct gk20a_sync_pt_inst *pti;
|
|
||||||
|
|
||||||
BUG_ON(!__gk20a_is_gk20a_sync_pt_inst(pt));
|
|
||||||
pti = container_of(pt, struct gk20a_sync_pt_inst, pt);
|
|
||||||
|
|
||||||
return pti->shared->sema;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if the passed sync_fence is backed by a single GPU semaphore. In such
|
* Check if the passed sync_fence is backed by a single GPU semaphore. In such
|
||||||
* cases we can short circuit a lot of SW involved in signaling pre-fences and
|
* cases we can short circuit a lot of SW involved in signaling pre-fences and
|
||||||
* post fences.
|
* post fences.
|
||||||
|
*
|
||||||
|
* For now reject multi-sync_pt fences. This could be changed in future. It
|
||||||
|
* would require that the sema fast path push a sema acquire for each semaphore
|
||||||
|
* in the fence.
|
||||||
*/
|
*/
|
||||||
int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
|
int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence)
|
||||||
{
|
{
|
||||||
|
struct sync_timeline *t;
|
||||||
|
|
||||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
||||||
struct sync_pt *pt;
|
struct sync_pt *spt;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
|
||||||
if (list_empty(&fence->pt_list_head))
|
if (list_empty(&fence->pt_list_head))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
list_for_each_entry(spt, &fence->pt_list_head, pt_list) {
|
||||||
* For now reject multi-sync_pt fences. This could be changed in
|
|
||||||
* future. It would require that the sema fast path push a sema
|
|
||||||
* acquire for each semaphore in the fence.
|
|
||||||
*/
|
|
||||||
list_for_each_entry(pt, &fence->pt_list_head, pt_list) {
|
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
if (i >= 2)
|
if (i >= 2)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list);
|
spt = list_first_entry(&fence->pt_list_head, struct sync_pt, pt_list);
|
||||||
return __gk20a_is_gk20a_sync_pt_inst(pt);
|
t = spt->parent;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
struct sync_fence_cb *cb0 = &fence->cbs[0];
|
struct fence *pt = fence->cbs[0].sync_pt;
|
||||||
|
struct sync_pt *spt = sync_pt_from_fence(pt);
|
||||||
|
|
||||||
if (fence->num_fences != 1)
|
if (fence->num_fences != 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
return gk20a_is_gk20a_sync_pt_inst(cb0->sync_pt);
|
if (spt == NULL)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
t = sync_pt_parent(spt);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (t->ops == &gk20a_sync_timeline_ops)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f)
|
||||||
|
{
|
||||||
|
struct sync_pt *spt;
|
||||||
|
struct gk20a_sync_pt_inst *pti;
|
||||||
|
|
||||||
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
||||||
|
if (!f)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!gk20a_is_sema_backed_sync_fence(f))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
spt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
|
||||||
|
#else
|
||||||
|
struct fence *pt;
|
||||||
|
|
||||||
|
if (!f)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!gk20a_is_sema_backed_sync_fence(f))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
pt = f->cbs[0].sync_pt;
|
||||||
|
spt = sync_pt_from_fence(pt);
|
||||||
|
#endif
|
||||||
|
pti = container_of(spt, struct gk20a_sync_pt_inst, pt);
|
||||||
|
|
||||||
|
return pti->shared->sema;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compares sync pt values a and b, both of which will trigger either before
|
* Compares sync pt values a and b, both of which will trigger either before
|
||||||
@@ -283,7 +258,6 @@ static struct sync_pt *gk20a_sync_pt_create_inst(
|
|||||||
if (!pti)
|
if (!pti)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
pti->magic = GK20A_SYNC_PT_INST_MAGIC;
|
|
||||||
pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
|
pti->shared = gk20a_sync_pt_create_shared(obj, sema, dependency);
|
||||||
if (!pti->shared) {
|
if (!pti->shared) {
|
||||||
sync_pt_free(&pti->pt);
|
sync_pt_free(&pti->pt);
|
||||||
|
|||||||
@@ -27,15 +27,8 @@ struct sync_pt;
|
|||||||
struct gk20a_semaphore;
|
struct gk20a_semaphore;
|
||||||
struct fence;
|
struct fence;
|
||||||
|
|
||||||
int __gk20a_is_gk20a_sync_pt_inst(struct sync_pt *pt);
|
|
||||||
int gk20a_is_gk20a_sync_pt_inst(struct fence *f);
|
|
||||||
int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence);
|
int gk20a_is_sema_backed_sync_fence(struct sync_fence *fence);
|
||||||
|
struct gk20a_semaphore *gk20a_sync_fence_get_sema(struct sync_fence *f);
|
||||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,18,0)
|
|
||||||
struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct fence *f);
|
|
||||||
#else
|
|
||||||
struct gk20a_semaphore *gk20a_sync_pt_inst_get_sema(struct sync_pt *pt);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_SYNC
|
#ifdef CONFIG_SYNC
|
||||||
struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
|
struct sync_timeline *gk20a_sync_timeline_create(const char *fmt, ...);
|
||||||
|
|||||||
Reference in New Issue
Block a user