mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: implement lockless allocator
Implement a lockless allocator for fixed-size data structures. Bug 1795076 Change-Id: I70a5f52cbdb4452cc0fd9a8edf26735be29ede57 Signed-off-by: Sachit Kadle <skadle@nvidia.com> Reviewed-on: http://git-master/r/1213211 (cherry picked from commit e4bff7da0f39c8f4b5691169c02e482bc9d4166e) Reviewed-on: http://git-master/r/1223246 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
101689dd8b
commit
35b2507fe3
@@ -57,6 +57,7 @@ nvgpu-y := \
|
||||
gk20a/gk20a_allocator_bitmap.o \
|
||||
gk20a/gk20a_allocator_buddy.o \
|
||||
gk20a/gk20a_allocator_page.o \
|
||||
gk20a/gk20a_allocator_lockless.o \
|
||||
gk20a/cde_gk20a.o \
|
||||
gk20a/platform_gk20a_generic.o \
|
||||
gk20a/tsg_gk20a.o \
|
||||
|
||||
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
|
||||
const char *name, u64 base, u64 length,
|
||||
u64 blk_size, u64 flags);
|
||||
|
||||
/*
|
||||
* Lockless allocatior initializers.
|
||||
* Note: This allocator can only allocate fixed-size structures of a
|
||||
* pre-defined size.
|
||||
*/
|
||||
int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
|
||||
const char *name, u64 base, u64 length,
|
||||
u64 struct_size, u64 flags);
|
||||
|
||||
#define GPU_BALLOC_MAX_ORDER 31
|
||||
|
||||
/*
|
||||
|
||||
204
drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
Normal file
204
drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
Normal file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "gk20a_allocator.h"
|
||||
#include "lockless_allocator_priv.h"
|
||||
|
||||
static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
|
||||
return pa->length;
|
||||
}
|
||||
|
||||
static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
|
||||
return pa->base;
|
||||
}
|
||||
|
||||
static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
int inited = pa->inited;
|
||||
|
||||
rmb();
|
||||
return inited;
|
||||
}
|
||||
|
||||
static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
|
||||
return pa->base + pa->length;
|
||||
}
|
||||
|
||||
static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
int head, new_head, ret;
|
||||
u64 addr = 0;
|
||||
|
||||
if (len != pa->blk_size)
|
||||
return 0;
|
||||
|
||||
head = ACCESS_ONCE(pa->head);
|
||||
while (head >= 0) {
|
||||
new_head = ACCESS_ONCE(pa->next[head]);
|
||||
ret = cmpxchg(&pa->head, head, new_head);
|
||||
if (ret == head) {
|
||||
addr = pa->base + head * pa->blk_size;
|
||||
atomic_inc(&pa->nr_allocs);
|
||||
alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
|
||||
addr);
|
||||
break;
|
||||
}
|
||||
head = ACCESS_ONCE(pa->head);
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
int head, ret;
|
||||
u64 cur_idx, rem;
|
||||
|
||||
cur_idx = addr - pa->base;
|
||||
rem = do_div(cur_idx, pa->blk_size);
|
||||
|
||||
while (1) {
|
||||
head = ACCESS_ONCE(pa->head);
|
||||
ACCESS_ONCE(pa->next[cur_idx]) = head;
|
||||
ret = cmpxchg(&pa->head, head, cur_idx);
|
||||
if (ret == head) {
|
||||
atomic_dec(&pa->nr_allocs);
|
||||
alloc_dbg(a, "Free node # %llu\n", cur_idx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
|
||||
vfree(pa->next);
|
||||
kfree(pa);
|
||||
}
|
||||
|
||||
static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
|
||||
struct seq_file *s, int lock)
|
||||
{
|
||||
struct gk20a_lockless_allocator *pa = a->priv;
|
||||
|
||||
__alloc_pstat(s, a, "Lockless allocator params:\n");
|
||||
__alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
|
||||
__alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
|
||||
|
||||
/* Actual stats. */
|
||||
__alloc_pstat(s, a, "Stats:\n");
|
||||
__alloc_pstat(s, a, " Number allocs = %d\n",
|
||||
atomic_read(&pa->nr_allocs));
|
||||
__alloc_pstat(s, a, " Number free = %d\n",
|
||||
pa->nr_nodes - atomic_read(&pa->nr_allocs));
|
||||
}
|
||||
|
||||
static const struct gk20a_allocator_ops pool_ops = {
|
||||
.alloc = gk20a_lockless_alloc,
|
||||
.free = gk20a_lockless_free,
|
||||
|
||||
.base = gk20a_lockless_alloc_base,
|
||||
.length = gk20a_lockless_alloc_length,
|
||||
.end = gk20a_lockless_alloc_end,
|
||||
.inited = gk20a_lockless_alloc_inited,
|
||||
|
||||
.fini = gk20a_lockless_alloc_destroy,
|
||||
|
||||
.print_stats = gk20a_lockless_print_stats,
|
||||
};
|
||||
|
||||
int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
|
||||
const char *name, u64 base, u64 length,
|
||||
u64 blk_size, u64 flags)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int nr_nodes;
|
||||
u64 count, rem;
|
||||
struct gk20a_lockless_allocator *a;
|
||||
|
||||
if (!blk_size)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Ensure we have space for atleast one node & there's no overflow.
|
||||
* In order to control memory footprint, we require count < INT_MAX
|
||||
*/
|
||||
count = length;
|
||||
rem = do_div(count, blk_size);
|
||||
if (!base || !count || count > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
|
||||
if (!a)
|
||||
return -ENOMEM;
|
||||
|
||||
err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
a->next = vzalloc(sizeof(*a->next) * count);
|
||||
if (!a->next) {
|
||||
err = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* chain the elements together to form the initial free list */
|
||||
nr_nodes = (int)count;
|
||||
for (i = 0; i < nr_nodes; i++)
|
||||
a->next[i] = i + 1;
|
||||
a->next[nr_nodes - 1] = -1;
|
||||
|
||||
a->base = base;
|
||||
a->length = length;
|
||||
a->blk_size = blk_size;
|
||||
a->nr_nodes = nr_nodes;
|
||||
a->flags = flags;
|
||||
atomic_set(&a->nr_allocs, 0);
|
||||
|
||||
wmb();
|
||||
a->inited = true;
|
||||
|
||||
gk20a_init_alloc_debug(__a);
|
||||
alloc_dbg(__a, "New allocator: type lockless\n");
|
||||
alloc_dbg(__a, " base 0x%llx\n", a->base);
|
||||
alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
|
||||
alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
|
||||
alloc_dbg(__a, " flags 0x%llx\n", a->flags);
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
kfree(a);
|
||||
return err;
|
||||
}
|
||||
121
drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
Normal file
121
drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Basics:
|
||||
*
|
||||
* - Lockless memory allocator for fixed-size structures, whose
|
||||
* size is defined up front at init time.
|
||||
* - Memory footprint scales linearly w/ the number of structures in
|
||||
* the pool. It is ~= sizeof(int) * N.
|
||||
* - Memory is pre-allocated by the client. The allocator itself
|
||||
* only computes the addresses for allocations.
|
||||
* - Limit of MAX_INT nodes that the allocator can be responsible for.
|
||||
*
|
||||
* Implementation details:
|
||||
*
|
||||
* The allocator maintains a single list of free nodes. We allocate &
|
||||
* free nodes from the head of the list. We rely on the cmpxchg() operator
|
||||
* to maintain atomicity on the head.
|
||||
*
|
||||
* So, both allocs & frees are O(1)!!
|
||||
*
|
||||
* -- Definitions --
|
||||
* Block Size - size of a single structure that this allocator will
|
||||
* allocate.
|
||||
* Node - one of the elements of size blk_size in the
|
||||
* client-allocated buffer.
|
||||
* Node Index - zero-based index of a node in the client-allocated
|
||||
* contiguous buffer.
|
||||
*
|
||||
* -- Initial State --
|
||||
* We maintain the following to track the state of the free list:
|
||||
*
|
||||
* 1) A "head" index to track the index of the first free node in the list
|
||||
* 2) A "next" array to track the index of the next free node in the list
|
||||
* for every node. So next[head], will give the index to the 2nd free
|
||||
* element in the list.
|
||||
*
|
||||
* So, to begin with, the free list consists of all node indices, and each
|
||||
* position in the next array contains index N + 1:
|
||||
*
|
||||
* head = 0
|
||||
* next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
|
||||
* free_list = 0->1->2->3->4->-1
|
||||
*
|
||||
* -- Allocations --
|
||||
* 1) Read the current head (aka acq_head)
|
||||
* 2) Read next[acq_head], to get the 2nd free element (aka new_head)
|
||||
* 3) cmp_xchg(&head, acq_head, new_head)
|
||||
* 4) If it succeeds, compute the address of the node, based on
|
||||
* base address, blk_size, & acq_head.
|
||||
*
|
||||
* head = 1;
|
||||
* next = [1, 2, 3, 4, -1] : Example after allocating Node #0
|
||||
* free_list = 1->2->3->4->-1
|
||||
*
|
||||
* head = 2;
|
||||
* next = [1, 2, 3, 4, -1] : Example after allocating Node #1
|
||||
* free_list = 2->3->4->-1
|
||||
*
|
||||
* -- Frees --
|
||||
* 1) Based on the address to be freed, calculate the index of the node
|
||||
* being freed (cur_idx)
|
||||
* 2) Read the current head (old_head)
|
||||
* 3) So the freed node is going to go at the head of the list, and we
|
||||
* want to put the old_head after it. So next[cur_idx] = old_head
|
||||
* 4) cmpxchg(head, old_head, cur_idx)
|
||||
*
|
||||
* head = 0
|
||||
* next = [2, 2, 3, 4, -1]
|
||||
* free_list = 0->2->3->4->-1 : Example after freeing Node #0
|
||||
*
|
||||
* head = 1
|
||||
* next = [2, 0, 3, 4, -1]
|
||||
* free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
|
||||
*/
|
||||
|
||||
#ifndef LOCKLESS_ALLOCATOR_PRIV_H
|
||||
#define LOCKLESS_ALLOCATOR_PRIV_H
|
||||
|
||||
struct gk20a_allocator;
|
||||
|
||||
struct gk20a_lockless_allocator {
|
||||
struct gk20a_allocator *owner;
|
||||
|
||||
u64 base; /* Base address of the space. */
|
||||
u64 length; /* Length of the space. */
|
||||
u64 blk_size; /* Size of the structure being allocated */
|
||||
int nr_nodes; /* Number of nodes available for allocation */
|
||||
|
||||
int *next; /* An array holding the next indices per node */
|
||||
int head; /* Current node at the top of the stack */
|
||||
|
||||
u64 flags;
|
||||
|
||||
bool inited;
|
||||
|
||||
/* Statistics */
|
||||
atomic_t nr_allocs;
|
||||
};
|
||||
|
||||
static inline struct gk20a_lockless_allocator *lockless_allocator(
|
||||
struct gk20a_allocator *a)
|
||||
{
|
||||
return (struct gk20a_lockless_allocator *)(a)->priv;
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user