gpu: nvgpu: remove lockless allocator

The lockless allocator that spins in alloc and free ops using cmpxchg to
mitigate race conditions has only ever been used for the post fences in
preallocated job resources. Now each post fence has a clear owner (the
job struct which already is allocated well) and lifetime, so this
allocator has no longer a purpose. Delete it to avoid bitrot. (The
design of the job queue has always been such that there's minimal
contention in any case.)

Jira NVGPU-5773

Change-Id: Ied98d977c2c75bacfd3d010ce60c80fe709231e0
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2392705
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-07-30 15:50:24 +03:00
committed by Alex Waterman
parent e6c0d84683
commit fcbd807842
7 changed files with 1 additions and 407 deletions

View File

@@ -580,11 +580,6 @@ mm:
sources: [ common/mm/allocators/page_allocator.c,
include/nvgpu/page_allocator.h ]
deps: [ ]
lockless:
safe: no
sources: [ common/mm/allocators/lockless_allocator.c,
common/mm/allocators/lockless_allocator_priv.h ]
tags: unit-testable
dma:
safe: yes
sources: [ common/mm/dma.c ]

View File

@@ -508,7 +508,6 @@ nvgpu-y += \
common/mm/allocators/nvgpu_allocator.o \
common/mm/allocators/bitmap_allocator.o \
common/mm/allocators/buddy_allocator.o \
common/mm/allocators/lockless_allocator.o \
common/mm/gmmu/page_table.o \
common/mm/gmmu/pd_cache.o \
common/mm/gmmu/pte.o \

View File

@@ -427,8 +427,7 @@ endif
endif
ifeq ($(CONFIG_NVGPU_FENCE),1)
srcs += common/fence/fence.c \
common/mm/allocators/lockless_allocator.c
srcs += common/fence/fence.c
endif
ifeq ($(CONFIG_NVGPU_FECS_TRACE),1)

View File

@@ -1,234 +0,0 @@
/*
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/atomic.h>
#include <nvgpu/allocator.h>
#include <nvgpu/kmem.h>
#include <nvgpu/barrier.h>
#include <nvgpu/bug.h>
#include "lockless_allocator_priv.h"
static u64 nvgpu_lockless_alloc_length(struct nvgpu_allocator *a)
{
struct nvgpu_lockless_allocator *pa = a->priv;
return pa->length;
}
static u64 nvgpu_lockless_alloc_base(struct nvgpu_allocator *a)
{
struct nvgpu_lockless_allocator *pa = a->priv;
return pa->base;
}
static bool nvgpu_lockless_alloc_inited(struct nvgpu_allocator *a)
{
struct nvgpu_lockless_allocator *pa = a->priv;
bool inited = pa->inited;
nvgpu_smp_rmb();
return inited;
}
static u64 nvgpu_lockless_alloc_end(struct nvgpu_allocator *a)
{
struct nvgpu_lockless_allocator *pa = a->priv;
return nvgpu_safe_add_u64(pa->base, pa->length);
}
static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len)
{
struct nvgpu_lockless_allocator *pa = a->priv;
int head, new_head, ret;
u64 addr = 0;
if (len != pa->blk_size) {
return 0;
}
head = NV_READ_ONCE(pa->head);
while (head >= 0) {
new_head = NV_READ_ONCE(pa->next[head]);
ret = cmpxchg(&pa->head, head, new_head);
if (ret == head) {
addr = pa->base + U64(head) * pa->blk_size;
nvgpu_atomic_inc(&pa->nr_allocs);
alloc_dbg(a, "Alloc node # %d @ addr 0x%llx", head,
addr);
break;
}
head = NV_READ_ONCE(pa->head);
}
if (addr != 0ULL) {
alloc_dbg(a, "Alloc node # %d @ addr 0x%llx", head, addr);
} else {
alloc_dbg(a, "Alloc failed!");
}
return addr;
}
static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr)
{
struct nvgpu_lockless_allocator *pa = a->priv;
int head, ret;
u64 cur_idx;
cur_idx = nvgpu_safe_sub_u64(addr, pa->base) / pa->blk_size;
alloc_dbg(a, "Free node # %llu @ addr 0x%llx", cur_idx, addr);
while (true) {
head = NV_READ_ONCE(pa->head);
NV_WRITE_ONCE(pa->next[cur_idx], head);
nvgpu_assert(cur_idx <= U64(INT_MAX));
ret = cmpxchg(&pa->head, head, (int)cur_idx);
if (ret == head) {
nvgpu_atomic_dec(&pa->nr_allocs);
alloc_dbg(a, "Free node # %llu", cur_idx);
break;
}
}
}
static void nvgpu_lockless_alloc_destroy(struct nvgpu_allocator *a)
{
struct nvgpu_lockless_allocator *pa = a->priv;
#ifdef CONFIG_DEBUG_FS
nvgpu_fini_alloc_debug(a);
#endif
nvgpu_vfree(a->g, pa->next);
nvgpu_kfree(nvgpu_alloc_to_gpu(a), pa);
}
#ifdef __KERNEL__
static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a,
struct seq_file *s, int lock)
{
struct nvgpu_lockless_allocator *pa = a->priv;
alloc_pstat(s, a, "Lockless allocator params:");
alloc_pstat(s, a, " start = 0x%llx", pa->base);
alloc_pstat(s, a, " end = 0x%llx", pa->base + pa->length);
/* Actual stats. */
alloc_pstat(s, a, "Stats:");
alloc_pstat(s, a, " Number allocs = %d",
nvgpu_atomic_read(&pa->nr_allocs));
alloc_pstat(s, a, " Number free = %d",
pa->nr_nodes - nvgpu_atomic_read(&pa->nr_allocs));
}
#endif
static const struct nvgpu_allocator_ops pool_ops = {
.alloc = nvgpu_lockless_alloc,
.free_alloc = nvgpu_lockless_free,
.base = nvgpu_lockless_alloc_base,
.length = nvgpu_lockless_alloc_length,
.end = nvgpu_lockless_alloc_end,
.inited = nvgpu_lockless_alloc_inited,
.fini = nvgpu_lockless_alloc_destroy,
#ifdef __KERNEL__
.print_stats = nvgpu_lockless_print_stats,
#endif
};
int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
const char *name, u64 base, u64 length,
u64 blk_size, u64 flags)
{
int i;
int err;
int nr_nodes;
u64 count;
struct nvgpu_lockless_allocator *a;
if (blk_size == 0ULL) {
return -EINVAL;
}
/*
* Ensure we have space for at least one node & there's no overflow.
* In order to control memory footprint, we require count < INT_MAX
*/
count = length / blk_size;
if (base == 0ULL || count == 0ULL || count > U64(INT_MAX)) {
return -EINVAL;
}
a = nvgpu_kzalloc(g, sizeof(struct nvgpu_lockless_allocator));
if (a == NULL) {
return -ENOMEM;
}
err = nvgpu_alloc_common_init(na, g, name, a, false, &pool_ops);
if (err != 0) {
goto fail;
}
a->next = nvgpu_vzalloc(g, sizeof(*a->next) * count);
if (a->next == NULL) {
err = -ENOMEM;
goto fail;
}
/* chain the elements together to form the initial free list */
nr_nodes = (int)count;
for (i = 0; i < nr_nodes; i++) {
a->next[i] = i + 1;
}
a->next[nr_nodes - 1] = -1;
a->base = base;
a->length = length;
a->blk_size = blk_size;
a->nr_nodes = nr_nodes;
a->flags = flags;
nvgpu_atomic_set(&a->nr_allocs, 0);
nvgpu_smp_wmb();
a->inited = true;
#ifdef CONFIG_DEBUG_FS
nvgpu_init_alloc_debug(g, na);
#endif
alloc_dbg(na, "New allocator: type lockless");
alloc_dbg(na, " base 0x%llx", a->base);
alloc_dbg(na, " nodes %d", a->nr_nodes);
alloc_dbg(na, " blk_size 0x%llx", a->blk_size);
alloc_dbg(na, " flags 0x%llx", a->flags);
return 0;
fail:
nvgpu_kfree(g, a);
return err;
}

View File

@@ -1,127 +0,0 @@
/*
* Copyright (c) 2016 - 2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/*
* Basics:
*
* - Lockless memory allocator for fixed-size structures, whose
* size is defined up front at init time.
* - Memory footprint scales linearly w/ the number of structures in
* the pool. It is ~= sizeof(int) * N.
* - Memory is pre-allocated by the client. The allocator itself
* only computes the addresses for allocations.
* - Limit of MAX_INT nodes that the allocator can be responsible for.
*
* Implementation details:
*
* The allocator maintains a single list of free nodes. We allocate &
* free nodes from the head of the list. We rely on the cmpxchg() operator
* to maintain atomicity on the head.
*
* So, both allocs & frees are O(1)!!
*
* -- Definitions --
* Block Size - size of a single structure that this allocator will
* allocate.
* Node - one of the elements of size blk_size in the
* client-allocated buffer.
* Node Index - zero-based index of a node in the client-allocated
* contiguous buffer.
*
* -- Initial State --
* We maintain the following to track the state of the free list:
*
* 1) A "head" index to track the index of the first free node in the list
* 2) A "next" array to track the index of the next free node in the list
* for every node. So next[head], will give the index to the 2nd free
* element in the list.
*
* So, to begin with, the free list consists of all node indices, and each
* position in the next array contains index N + 1:
*
* head = 0
* next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
* free_list = 0->1->2->3->4->-1
*
* -- Allocations --
* 1) Read the current head (aka acq_head)
* 2) Read next[acq_head], to get the 2nd free element (aka new_head)
* 3) cmp_xchg(&head, acq_head, new_head)
* 4) If it succeeds, compute the address of the node, based on
* base address, blk_size, & acq_head.
*
* head = 1;
* next = [1, 2, 3, 4, -1] : Example after allocating Node #0
* free_list = 1->2->3->4->-1
*
* head = 2;
* next = [1, 2, 3, 4, -1] : Example after allocating Node #1
* free_list = 2->3->4->-1
*
* -- Frees --
* 1) Based on the address to be freed, calculate the index of the node
* being freed (cur_idx)
* 2) Read the current head (old_head)
* 3) So the freed node is going to go at the head of the list, and we
* want to put the old_head after it. So next[cur_idx] = old_head
* 4) cmpxchg(head, old_head, cur_idx)
*
* head = 0
* next = [2, 2, 3, 4, -1]
* free_list = 0->2->3->4->-1 : Example after freeing Node #0
*
* head = 1
* next = [2, 0, 3, 4, -1]
* free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
*/
#ifndef LOCKLESS_ALLOCATOR_PRIV_H
#define LOCKLESS_ALLOCATOR_PRIV_H
struct nvgpu_allocator;
struct nvgpu_lockless_allocator {
struct nvgpu_allocator *owner;
u64 base; /* Base address of the space. */
u64 length; /* Length of the space. */
u64 blk_size; /* Size of the structure being allocated */
int nr_nodes; /* Number of nodes available for allocation */
int *next; /* An array holding the next indices per node */
int head; /* Current node at the top of the stack */
u64 flags;
bool inited;
/* Statistics */
nvgpu_atomic_t nr_allocs;
};
static inline struct nvgpu_lockless_allocator *lockless_allocator(
struct nvgpu_allocator *a)
{
return (struct nvgpu_lockless_allocator *)(a)->priv;
}
#endif

View File

@@ -473,27 +473,6 @@ int nvgpu_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
u64 base, u64 length, u64 blk_size, u64 max_order,
u64 flags, enum nvgpu_allocator_type alloc_type);
#ifdef CONFIG_NVGPU_FENCE
/**
* @brief Initialize lockless allocator.
*
* @param[in] g Pointer to GPU structure.
* @param[in] na Pointer to allocator structure.
* @param[in] name Name of lockless allocator.
* @param[in] base Base address of lockless allocator.
* @param[in] length Size of lockless allocator.
* @param[in] blk_size Block size of lockless allocator.
* @param[in] flags Flags indicating lockless allocator conditions.
*
* @return 0 in case of success, < 0 otherwise.
* @retval -EINVAL in case of incorrect input value.
* @retval -ENOMEM in case there is not enough memory for allocation.
*/
int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
const char *name, u64 base, u64 length,
u64 blk_size, u64 flags);
#endif
/**
* Largest block of resources that fits in address space.
*/

View File

@@ -701,21 +701,4 @@ static inline bool nvgpu_atomic64_sub_and_test_impl(long x, nvgpu_atomic64_t *v)
return NVGPU_POSIX_ATOMIC_SUB_RETURN(v, x) == 0;
}
/*
* The following is only used by the lockless allocator and makes direct use
* of the cmpxchg function. For POSIX, this is translated to a call to
* nvgpu_atomic_cmpxchg.
*/
/**
* @brief Define for compare and exchange POSIX implementation.
*
* @param p Pointer to data.
* @param old Value to compare.
* @param new Value to exchange.
*/
#define cmpxchg(p, old, new) \
nvgpu_atomic_cmpxchg((nvgpu_atomic_t *)(void *)(p), (old), (new))
#endif /* NVGPU_POSIX_ATOMIC_H */