mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
Unify the job metadata handling by deleting the parts that have handled dynamically allocated job structs and fences. Now a channel can be in one less mode than before which reduces branching in tricky places and makes the submit/cleanup sequence easier to understand. While preallocating all the resources upfront may increase average memory consumption by some kilobytes, users of channels have to supply the worst case numbers anyway and this preallocation has been already done on deterministic channels. Flip the channel_joblist_delete() call in nvgpu_channel_clean_up_jobs() to be done after nvgpu_channel_free_job(). Deleting from the list (which is a ringbuffer) makes it possible to reuse the job again, so the job must be freed before that. The comment about using post_fence is no longer valid; nvgpu_channel_abort() does not use fences. This inverse order has not posed problems before because it's been buggy only for deterministic channels, and such channels do not do the cleanup asynchronously so no races are possible. With preallocated job list for all channels, this would have become a problem. Jira NVGPU-5492 Change-Id: I085066b0c9c2475e38be885a275d7be629725d64 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2346064 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
280 lines
7.3 KiB
C
280 lines
7.3 KiB
C
/*
|
|
* Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/soc.h>
|
|
#include <nvgpu/nvhost.h>
|
|
#include <nvgpu/barrier.h>
|
|
#include <nvgpu/os_fence.h>
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/channel.h>
|
|
#include <nvgpu/semaphore.h>
|
|
#include <nvgpu/fence.h>
|
|
#include <nvgpu/channel_sync_syncpt.h>
|
|
#include <nvgpu/user_fence.h>
|
|
|
|
static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref)
|
|
{
|
|
return (struct nvgpu_fence_type *)((uintptr_t)ref -
|
|
offsetof(struct nvgpu_fence_type, ref));
|
|
}
|
|
|
|
static void nvgpu_fence_free(struct nvgpu_ref *ref)
|
|
{
|
|
struct nvgpu_fence_type *f = nvgpu_fence_from_ref(ref);
|
|
|
|
if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
|
|
f->os_fence.ops->drop_ref(&f->os_fence);
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
|
if (f->semaphore != NULL) {
|
|
nvgpu_semaphore_put(f->semaphore);
|
|
}
|
|
#endif
|
|
|
|
/* the allocator must outlive the fences */
|
|
BUG_ON(!nvgpu_alloc_initialized(f->allocator));
|
|
|
|
nvgpu_free(f->allocator, (u64)(uintptr_t)f);
|
|
}
|
|
|
|
void nvgpu_fence_put(struct nvgpu_fence_type *f)
|
|
{
|
|
nvgpu_ref_put(&f->ref, nvgpu_fence_free);
|
|
}
|
|
|
|
struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f)
|
|
{
|
|
nvgpu_ref_get(&f->ref);
|
|
return f;
|
|
}
|
|
|
|
/*
|
|
* Extract an object to be passed to the userspace as a result of a submitted
|
|
* job. This must be balanced with a call to nvgpu_user_fence_release().
|
|
*/
|
|
struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f)
|
|
{
|
|
struct nvgpu_user_fence uf = (struct nvgpu_user_fence) {
|
|
.syncpt_id = f->syncpt_id,
|
|
.syncpt_value = f->syncpt_value,
|
|
.os_fence = f->os_fence,
|
|
};
|
|
|
|
/*
|
|
* The os fence member has to live so it can be signaled when the job
|
|
* completes. The returned user fence may live longer than that before
|
|
* being safely attached to an fd if the job completes before a
|
|
* submission ioctl finishes, or if it's stored for cde job state
|
|
* tracking.
|
|
*/
|
|
if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
|
|
f->os_fence.ops->dup(&f->os_fence);
|
|
}
|
|
|
|
return uf;
|
|
}
|
|
|
|
int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f,
|
|
u32 timeout)
|
|
{
|
|
if (!nvgpu_platform_is_silicon(g)) {
|
|
timeout = U32_MAX;
|
|
}
|
|
return f->ops->wait(f, timeout);
|
|
}
|
|
|
|
bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f)
|
|
{
|
|
return f->ops->is_expired(f);
|
|
}
|
|
|
|
int nvgpu_fence_pool_alloc(struct nvgpu_channel *ch, unsigned int count)
|
|
{
|
|
int err;
|
|
size_t size;
|
|
struct nvgpu_fence_type *fence_pool = NULL;
|
|
|
|
size = sizeof(struct nvgpu_fence_type);
|
|
if (count <= UINT_MAX / size) {
|
|
size = count * size;
|
|
fence_pool = nvgpu_vzalloc(ch->g, size);
|
|
}
|
|
|
|
if (fence_pool == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
err = nvgpu_lockless_allocator_init(ch->g, &ch->fence_allocator,
|
|
"fence_pool", (size_t)fence_pool, size,
|
|
sizeof(struct nvgpu_fence_type), 0);
|
|
if (err != 0) {
|
|
goto fail;
|
|
}
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
nvgpu_vfree(ch->g, fence_pool);
|
|
return err;
|
|
}
|
|
|
|
void nvgpu_fence_pool_free(struct nvgpu_channel *ch)
|
|
{
|
|
if (nvgpu_alloc_initialized(&ch->fence_allocator)) {
|
|
struct nvgpu_fence_type *fence_pool;
|
|
|
|
fence_pool = (struct nvgpu_fence_type *)(uintptr_t)
|
|
nvgpu_alloc_base(&ch->fence_allocator);
|
|
nvgpu_alloc_destroy(&ch->fence_allocator);
|
|
nvgpu_vfree(ch->g, fence_pool);
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
|
|
struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch)
|
|
{
|
|
struct nvgpu_fence_type *fence = NULL;
|
|
|
|
if (nvgpu_alloc_initialized(&ch->fence_allocator)) {
|
|
fence = (struct nvgpu_fence_type *)(uintptr_t)
|
|
nvgpu_alloc(&ch->fence_allocator,
|
|
sizeof(struct nvgpu_fence_type));
|
|
}
|
|
|
|
if (fence != NULL) {
|
|
(void) memset(fence, 0, sizeof(*fence));
|
|
fence->allocator = &ch->fence_allocator;
|
|
|
|
nvgpu_ref_init(&fence->ref);
|
|
fence->g = ch->g;
|
|
}
|
|
|
|
return fence;
|
|
}
|
|
#endif
|
|
|
|
void nvgpu_fence_init(struct nvgpu_fence_type *f,
|
|
const struct nvgpu_fence_ops *ops,
|
|
struct nvgpu_os_fence os_fence)
|
|
{
|
|
f->ops = ops;
|
|
f->syncpt_id = NVGPU_INVALID_SYNCPT_ID;
|
|
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
|
f->semaphore = NULL;
|
|
#endif
|
|
f->os_fence = os_fence;
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_SW_SEMAPHORE
|
|
/* Fences that are backed by GPU semaphores: */
|
|
|
|
static int nvgpu_semaphore_fence_wait(struct nvgpu_fence_type *f, u32 timeout)
|
|
{
|
|
if (!nvgpu_semaphore_is_acquired(f->semaphore)) {
|
|
return 0;
|
|
}
|
|
|
|
return NVGPU_COND_WAIT_INTERRUPTIBLE(
|
|
f->semaphore_wq,
|
|
!nvgpu_semaphore_is_acquired(f->semaphore),
|
|
timeout);
|
|
}
|
|
|
|
static bool nvgpu_semaphore_fence_is_expired(struct nvgpu_fence_type *f)
|
|
{
|
|
return !nvgpu_semaphore_is_acquired(f->semaphore);
|
|
}
|
|
|
|
static const struct nvgpu_fence_ops nvgpu_semaphore_fence_ops = {
|
|
.wait = &nvgpu_semaphore_fence_wait,
|
|
.is_expired = &nvgpu_semaphore_fence_is_expired,
|
|
};
|
|
|
|
/* This function takes ownership of the semaphore as well as the os_fence */
|
|
void nvgpu_fence_from_semaphore(
|
|
struct nvgpu_fence_type *f,
|
|
struct nvgpu_semaphore *semaphore,
|
|
struct nvgpu_cond *semaphore_wq,
|
|
struct nvgpu_os_fence os_fence)
|
|
{
|
|
nvgpu_fence_init(f, &nvgpu_semaphore_fence_ops, os_fence);
|
|
|
|
f->semaphore = semaphore;
|
|
f->semaphore_wq = semaphore_wq;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
|
/* Fences that are backed by host1x syncpoints: */
|
|
|
|
static int nvgpu_fence_syncpt_wait(struct nvgpu_fence_type *f, u32 timeout)
|
|
{
|
|
return nvgpu_nvhost_syncpt_wait_timeout_ext(
|
|
f->nvhost_dev, f->syncpt_id, f->syncpt_value,
|
|
timeout, NVGPU_NVHOST_DEFAULT_WAITER);
|
|
}
|
|
|
|
static bool nvgpu_fence_syncpt_is_expired(struct nvgpu_fence_type *f)
|
|
{
|
|
|
|
/*
|
|
* In cases we don't register a notifier, we can't expect the
|
|
* syncpt value to be updated. For this case, we force a read
|
|
* of the value from HW, and then check for expiration.
|
|
*/
|
|
if (!nvgpu_nvhost_syncpt_is_expired_ext(f->nvhost_dev, f->syncpt_id,
|
|
f->syncpt_value)) {
|
|
u32 val;
|
|
|
|
if (!nvgpu_nvhost_syncpt_read_ext_check(f->nvhost_dev,
|
|
f->syncpt_id, &val)) {
|
|
return nvgpu_nvhost_syncpt_is_expired_ext(
|
|
f->nvhost_dev,
|
|
f->syncpt_id, f->syncpt_value);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static const struct nvgpu_fence_ops nvgpu_fence_syncpt_ops = {
|
|
.wait = &nvgpu_fence_syncpt_wait,
|
|
.is_expired = &nvgpu_fence_syncpt_is_expired,
|
|
};
|
|
|
|
/* This function takes the ownership of the os_fence */
|
|
void nvgpu_fence_from_syncpt(
|
|
struct nvgpu_fence_type *f,
|
|
struct nvgpu_nvhost_dev *nvhost_dev,
|
|
u32 id, u32 value, struct nvgpu_os_fence os_fence)
|
|
{
|
|
nvgpu_fence_init(f, &nvgpu_fence_syncpt_ops, os_fence);
|
|
|
|
f->nvhost_dev = nvhost_dev;
|
|
f->syncpt_id = id;
|
|
f->syncpt_value = value;
|
|
}
|
|
#endif
|