mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Move Linux files away from common
Move all Linux source code files to drivers/gpu/nvgpu/os/linux from drivers/gpu/nvgpu/common/linux. This changes the meaning of common to be OS independent. JIRA NVGPU-598 JIRA NVGPU-601 Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1747714 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
98d996f4ff
commit
2a2c16af5f
1786
drivers/gpu/nvgpu/os/linux/cde.c
Normal file
1786
drivers/gpu/nvgpu/os/linux/cde.c
Normal file
File diff suppressed because it is too large
Load Diff
326
drivers/gpu/nvgpu/os/linux/cde.h
Normal file
326
drivers/gpu/nvgpu/os/linux/cde.h
Normal file
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
* GK20A color decompression engine support
|
||||
*
|
||||
* Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _CDE_GK20A_H_
|
||||
#define _CDE_GK20A_H_
|
||||
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/lock.h>
|
||||
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#define MAX_CDE_BUFS 10
|
||||
#define MAX_CDE_PARAMS 64
|
||||
#define MAX_CDE_USER_PARAMS 40
|
||||
#define MAX_CDE_ARRAY_ENTRIES 9
|
||||
|
||||
/*
|
||||
* The size of the context ring buffer that is dedicated for handling cde
|
||||
* jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
|
||||
* wait on the previous job to that channel, so increasing this value
|
||||
* reduces the likelihood of stalls.
|
||||
*/
|
||||
#define NUM_CDE_CONTEXTS 4
|
||||
|
||||
struct dma_buf;
|
||||
struct device;
|
||||
struct nvgpu_os_linux;
|
||||
struct gk20a;
|
||||
struct gk20a_fence;
|
||||
struct nvgpu_channel_fence;
|
||||
struct channel_gk20a;
|
||||
struct vm_gk20a;
|
||||
struct nvgpu_gpfifo_entry;
|
||||
|
||||
/*
|
||||
* this element defines a buffer that is allocated and mapped into gpu address
|
||||
* space. data_byte_offset defines the beginning of the buffer inside the
|
||||
* firmare. num_bytes defines how many bytes the firmware contains.
|
||||
*
|
||||
* If data_byte_offset is zero, we allocate an empty buffer.
|
||||
*/
|
||||
|
||||
struct gk20a_cde_hdr_buf {
|
||||
u64 data_byte_offset;
|
||||
u64 num_bytes;
|
||||
};
|
||||
|
||||
/*
|
||||
* this element defines a constant patching in buffers. It basically
|
||||
* computes physical address to <source_buf>+source_byte_offset. The
|
||||
* address is then modified into patch value as per:
|
||||
* value = (current_value & ~mask) | (address << shift) & mask .
|
||||
*
|
||||
* The type field defines the register size as:
|
||||
* 0=u32,
|
||||
* 1=u64 (little endian),
|
||||
* 2=u64 (big endian)
|
||||
*/
|
||||
|
||||
struct gk20a_cde_hdr_replace {
|
||||
u32 target_buf;
|
||||
u32 source_buf;
|
||||
s32 shift;
|
||||
u32 type;
|
||||
u64 target_byte_offset;
|
||||
u64 source_byte_offset;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
enum {
|
||||
TYPE_PARAM_TYPE_U32 = 0,
|
||||
TYPE_PARAM_TYPE_U64_LITTLE,
|
||||
TYPE_PARAM_TYPE_U64_BIG
|
||||
};
|
||||
|
||||
/*
|
||||
* this element defines a runtime patching in buffers. Parameters with id from
|
||||
* 0 to 1024 are reserved for special usage as follows:
|
||||
* 0 = comptags_per_cacheline,
|
||||
* 1 = slices_per_fbp,
|
||||
* 2 = num_fbps
|
||||
* 3 = source buffer first page offset
|
||||
* 4 = source buffer block height log2
|
||||
* 5 = backing store memory address
|
||||
* 6 = destination memory address
|
||||
* 7 = destination size (bytes)
|
||||
* 8 = backing store size (bytes)
|
||||
* 9 = cache line size
|
||||
*
|
||||
* Parameters above id 1024 are user-specified. I.e. they determine where a
|
||||
* parameters from user space should be placed in buffers, what is their
|
||||
* type, etc.
|
||||
*
|
||||
* Once the value is available, we add data_offset to the value.
|
||||
*
|
||||
* The value address is then modified into patch value as per:
|
||||
* value = (current_value & ~mask) | (address << shift) & mask .
|
||||
*
|
||||
* The type field defines the register size as:
|
||||
* 0=u32,
|
||||
* 1=u64 (little endian),
|
||||
* 2=u64 (big endian)
|
||||
*/
|
||||
|
||||
struct gk20a_cde_hdr_param {
|
||||
u32 id;
|
||||
u32 target_buf;
|
||||
s32 shift;
|
||||
u32 type;
|
||||
s64 data_offset;
|
||||
u64 target_byte_offset;
|
||||
u64 mask;
|
||||
};
|
||||
|
||||
enum {
|
||||
TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
|
||||
TYPE_PARAM_GPU_CONFIGURATION,
|
||||
TYPE_PARAM_FIRSTPAGEOFFSET,
|
||||
TYPE_PARAM_NUMPAGES,
|
||||
TYPE_PARAM_BACKINGSTORE,
|
||||
TYPE_PARAM_DESTINATION,
|
||||
TYPE_PARAM_DESTINATION_SIZE,
|
||||
TYPE_PARAM_BACKINGSTORE_SIZE,
|
||||
TYPE_PARAM_SOURCE_SMMU_ADDR,
|
||||
TYPE_PARAM_BACKINGSTORE_BASE_HW,
|
||||
TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
|
||||
TYPE_PARAM_SCATTERBUFFER,
|
||||
TYPE_PARAM_SCATTERBUFFER_SIZE,
|
||||
NUM_RESERVED_PARAMS = 1024,
|
||||
};
|
||||
|
||||
/*
|
||||
* This header element defines a command. The op field determines whether the
|
||||
* element is defining an init (0) or convert command (1). data_byte_offset
|
||||
* denotes the beginning address of command elements in the file.
|
||||
*/
|
||||
|
||||
struct gk20a_cde_hdr_command {
|
||||
u32 op;
|
||||
u32 num_entries;
|
||||
u64 data_byte_offset;
|
||||
};
|
||||
|
||||
enum {
|
||||
TYPE_BUF_COMMAND_INIT = 0,
|
||||
TYPE_BUF_COMMAND_CONVERT,
|
||||
TYPE_BUF_COMMAND_NOOP
|
||||
};
|
||||
|
||||
/*
|
||||
* This is a command element defines one entry inside push buffer. target_buf
|
||||
* defines the buffer including the pushbuffer entries, target_byte_offset the
|
||||
* offset inside the buffer and num_bytes the number of words in the buffer.
|
||||
*/
|
||||
|
||||
struct gk20a_cde_cmd_elem {
|
||||
u32 target_buf;
|
||||
u32 padding;
|
||||
u64 target_byte_offset;
|
||||
u64 num_bytes;
|
||||
};
|
||||
|
||||
/*
|
||||
* This element is used for storing a small array of data.
|
||||
*/
|
||||
|
||||
enum {
|
||||
ARRAY_PROGRAM_OFFSET = 0,
|
||||
ARRAY_REGISTER_COUNT,
|
||||
ARRAY_LAUNCH_COMMAND,
|
||||
NUM_CDE_ARRAYS
|
||||
};
|
||||
|
||||
struct gk20a_cde_hdr_array {
|
||||
u32 id;
|
||||
u32 data[MAX_CDE_ARRAY_ENTRIES];
|
||||
};
|
||||
|
||||
/*
|
||||
* Following defines a single header element. Each element has a type and
|
||||
* some of the data structures.
|
||||
*/
|
||||
|
||||
struct gk20a_cde_hdr_elem {
|
||||
u32 type;
|
||||
u32 padding;
|
||||
union {
|
||||
struct gk20a_cde_hdr_buf buf;
|
||||
struct gk20a_cde_hdr_replace replace;
|
||||
struct gk20a_cde_hdr_param param;
|
||||
u32 required_class;
|
||||
struct gk20a_cde_hdr_command command;
|
||||
struct gk20a_cde_hdr_array array;
|
||||
};
|
||||
};
|
||||
|
||||
enum {
|
||||
TYPE_BUF = 0,
|
||||
TYPE_REPLACE,
|
||||
TYPE_PARAM,
|
||||
TYPE_REQUIRED_CLASS,
|
||||
TYPE_COMMAND,
|
||||
TYPE_ARRAY
|
||||
};
|
||||
|
||||
struct gk20a_cde_param {
|
||||
u32 id;
|
||||
u32 padding;
|
||||
u64 value;
|
||||
};
|
||||
|
||||
struct gk20a_cde_ctx {
|
||||
struct nvgpu_os_linux *l;
|
||||
struct device *dev;
|
||||
|
||||
/* channel related data */
|
||||
struct channel_gk20a *ch;
|
||||
struct tsg_gk20a *tsg;
|
||||
struct vm_gk20a *vm;
|
||||
|
||||
/* buf converter configuration */
|
||||
struct nvgpu_mem mem[MAX_CDE_BUFS];
|
||||
unsigned int num_bufs;
|
||||
|
||||
/* buffer patching params (where should patching be done) */
|
||||
struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
|
||||
unsigned int num_params;
|
||||
|
||||
/* storage for user space parameter values */
|
||||
u32 user_param_values[MAX_CDE_USER_PARAMS];
|
||||
|
||||
u32 surf_param_offset;
|
||||
u32 surf_param_lines;
|
||||
u64 surf_vaddr;
|
||||
|
||||
u64 compbit_vaddr;
|
||||
u64 compbit_size;
|
||||
|
||||
u64 scatterbuffer_vaddr;
|
||||
u64 scatterbuffer_size;
|
||||
|
||||
u64 backing_store_vaddr;
|
||||
|
||||
struct nvgpu_gpfifo_entry *init_convert_cmd;
|
||||
int init_cmd_num_entries;
|
||||
|
||||
struct nvgpu_gpfifo_entry *convert_cmd;
|
||||
int convert_cmd_num_entries;
|
||||
|
||||
struct kobj_attribute attr;
|
||||
|
||||
bool init_cmd_executed;
|
||||
|
||||
struct nvgpu_list_node list;
|
||||
bool is_temporary;
|
||||
bool in_use;
|
||||
struct delayed_work ctx_deleter_work;
|
||||
};
|
||||
|
||||
static inline struct gk20a_cde_ctx *
|
||||
gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
|
||||
{
|
||||
return (struct gk20a_cde_ctx *)
|
||||
((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
|
||||
};
|
||||
|
||||
struct gk20a_cde_app {
|
||||
bool initialised;
|
||||
struct nvgpu_mutex mutex;
|
||||
|
||||
struct nvgpu_list_node free_contexts;
|
||||
struct nvgpu_list_node used_contexts;
|
||||
unsigned int ctx_count;
|
||||
unsigned int ctx_usecount;
|
||||
unsigned int ctx_count_top;
|
||||
|
||||
u32 firmware_version;
|
||||
|
||||
u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
|
||||
|
||||
u32 shader_parameter;
|
||||
};
|
||||
|
||||
void gk20a_cde_destroy(struct nvgpu_os_linux *l);
|
||||
void gk20a_cde_suspend(struct nvgpu_os_linux *l);
|
||||
int gk20a_init_cde_support(struct nvgpu_os_linux *l);
|
||||
int gk20a_cde_reload(struct nvgpu_os_linux *l);
|
||||
int gk20a_cde_convert(struct nvgpu_os_linux *l,
|
||||
struct dma_buf *compbits_buf,
|
||||
u64 compbits_byte_offset,
|
||||
u64 scatterbuffer_byte_offset,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
u32 __flags, struct gk20a_cde_param *params,
|
||||
int num_params, struct gk20a_fence **fence_out);
|
||||
|
||||
int gk20a_prepare_compressible_read(
|
||||
struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
|
||||
u64 compbits_hoffset, u64 compbits_voffset,
|
||||
u64 scatterbuffer_offset,
|
||||
u32 width, u32 height, u32 block_height_log2,
|
||||
u32 submit_flags, struct nvgpu_channel_fence *fence,
|
||||
u32 *valid_compbits, u32 *zbc_color,
|
||||
struct gk20a_fence **fence_out);
|
||||
int gk20a_mark_compressible_write(
|
||||
struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
|
||||
u32 zbc_color);
|
||||
int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
|
||||
|
||||
#endif
|
||||
64
drivers/gpu/nvgpu/os/linux/cde_gm20b.c
Normal file
64
drivers/gpu/nvgpu/os/linux/cde_gm20b.c
Normal file
@@ -0,0 +1,64 @@
|
||||
/*
|
||||
* GM20B CDE
|
||||
*
|
||||
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "cde_gm20b.h"
|
||||
|
||||
enum programs {
|
||||
PROG_HPASS = 0,
|
||||
PROG_VPASS_LARGE = 1,
|
||||
PROG_VPASS_SMALL = 2,
|
||||
PROG_HPASS_DEBUG = 3,
|
||||
PROG_VPASS_LARGE_DEBUG = 4,
|
||||
PROG_VPASS_SMALL_DEBUG = 5,
|
||||
PROG_PASSTHROUGH = 6,
|
||||
};
|
||||
|
||||
static void gm20b_cde_get_program_numbers(struct gk20a *g,
|
||||
u32 block_height_log2,
|
||||
u32 shader_parameter,
|
||||
int *hprog_out, int *vprog_out)
|
||||
{
|
||||
int hprog = PROG_HPASS;
|
||||
int vprog = (block_height_log2 >= 2) ?
|
||||
PROG_VPASS_LARGE : PROG_VPASS_SMALL;
|
||||
if (shader_parameter == 1) {
|
||||
hprog = PROG_PASSTHROUGH;
|
||||
vprog = PROG_PASSTHROUGH;
|
||||
} else if (shader_parameter == 2) {
|
||||
hprog = PROG_HPASS_DEBUG;
|
||||
vprog = (block_height_log2 >= 2) ?
|
||||
PROG_VPASS_LARGE_DEBUG :
|
||||
PROG_VPASS_SMALL_DEBUG;
|
||||
}
|
||||
|
||||
*hprog_out = hprog;
|
||||
*vprog_out = vprog;
|
||||
}
|
||||
|
||||
struct nvgpu_os_linux_ops gm20b_cde_ops = {
|
||||
.cde = {
|
||||
.get_program_numbers = gm20b_cde_get_program_numbers,
|
||||
},
|
||||
};
|
||||
32
drivers/gpu/nvgpu/os/linux/cde_gm20b.h
Normal file
32
drivers/gpu/nvgpu/os/linux/cde_gm20b.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* GM20B CDE
|
||||
*
|
||||
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVHOST_GM20B_CDE
|
||||
#define _NVHOST_GM20B_CDE
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
extern struct nvgpu_os_linux_ops gm20b_cde_ops;
|
||||
|
||||
#endif
|
||||
161
drivers/gpu/nvgpu/os/linux/cde_gp10b.c
Normal file
161
drivers/gpu/nvgpu/os/linux/cde_gp10b.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* GP10B CDE
|
||||
*
|
||||
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "cde_gp10b.h"
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/dma.h>
|
||||
|
||||
enum gp10b_programs {
|
||||
GP10B_PROG_HPASS = 0,
|
||||
GP10B_PROG_HPASS_4K = 1,
|
||||
GP10B_PROG_VPASS = 2,
|
||||
GP10B_PROG_VPASS_4K = 3,
|
||||
GP10B_PROG_HPASS_DEBUG = 4,
|
||||
GP10B_PROG_HPASS_4K_DEBUG = 5,
|
||||
GP10B_PROG_VPASS_DEBUG = 6,
|
||||
GP10B_PROG_VPASS_4K_DEBUG = 7,
|
||||
GP10B_PROG_PASSTHROUGH = 8,
|
||||
};
|
||||
|
||||
void gp10b_cde_get_program_numbers(struct gk20a *g,
|
||||
u32 block_height_log2,
|
||||
u32 shader_parameter,
|
||||
int *hprog_out, int *vprog_out)
|
||||
{
|
||||
int hprog, vprog;
|
||||
|
||||
if (shader_parameter == 1) {
|
||||
hprog = GP10B_PROG_PASSTHROUGH;
|
||||
vprog = GP10B_PROG_PASSTHROUGH;
|
||||
} else {
|
||||
hprog = GP10B_PROG_HPASS;
|
||||
vprog = GP10B_PROG_VPASS;
|
||||
if (shader_parameter == 2) {
|
||||
hprog = GP10B_PROG_HPASS_DEBUG;
|
||||
vprog = GP10B_PROG_VPASS_DEBUG;
|
||||
}
|
||||
if (!nvgpu_iommuable(g)) {
|
||||
if (!g->mm.disable_bigpage) {
|
||||
nvgpu_warn(g,
|
||||
"When no IOMMU big pages cannot be used");
|
||||
}
|
||||
hprog |= 1;
|
||||
vprog |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
*hprog_out = hprog;
|
||||
*vprog_out = vprog;
|
||||
}
|
||||
|
||||
bool gp10b_need_scatter_buffer(struct gk20a *g)
|
||||
{
|
||||
return !nvgpu_iommuable(g);
|
||||
}
|
||||
|
||||
static u8 parity(u32 a)
|
||||
{
|
||||
a ^= a>>16u;
|
||||
a ^= a>>8u;
|
||||
a ^= a>>4u;
|
||||
a &= 0xfu;
|
||||
return (0x6996u >> a) & 1u;
|
||||
}
|
||||
|
||||
int gp10b_populate_scatter_buffer(struct gk20a *g,
|
||||
struct sg_table *sgt,
|
||||
size_t surface_size,
|
||||
void *scatter_buffer_ptr,
|
||||
size_t scatter_buffer_size)
|
||||
{
|
||||
/* map scatter buffer to CPU VA and fill it */
|
||||
const u32 page_size_log2 = 12;
|
||||
const u32 page_size = 1 << page_size_log2;
|
||||
const u32 page_size_shift = page_size_log2 - 7u;
|
||||
|
||||
/* 0011 1111 1111 1111 1111 1110 0100 1000 */
|
||||
const u32 getSliceMaskGP10B = 0x3ffffe48;
|
||||
u8 *scatter_buffer = scatter_buffer_ptr;
|
||||
|
||||
size_t i;
|
||||
struct scatterlist *sg = NULL;
|
||||
u8 d = 0;
|
||||
size_t page = 0;
|
||||
size_t pages_left;
|
||||
|
||||
surface_size = round_up(surface_size, page_size);
|
||||
|
||||
pages_left = surface_size >> page_size_log2;
|
||||
if ((pages_left >> 3) > scatter_buffer_size)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
|
||||
unsigned int j;
|
||||
u64 surf_pa = sg_phys(sg);
|
||||
unsigned int n = (int)(sg->length >> page_size_log2);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
|
||||
|
||||
for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
|
||||
u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
|
||||
u8 scatter_bit = parity(addr);
|
||||
u8 bit = page & 7;
|
||||
|
||||
d |= scatter_bit << bit;
|
||||
if (bit == 7) {
|
||||
scatter_buffer[page >> 3] = d;
|
||||
d = 0;
|
||||
}
|
||||
|
||||
++page;
|
||||
--pages_left;
|
||||
}
|
||||
|
||||
if (pages_left == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
/* write the last byte in case the number of pages is not divisible by 8 */
|
||||
if ((page & 7) != 0)
|
||||
scatter_buffer[page >> 3] = d;
|
||||
|
||||
if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
|
||||
nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
|
||||
for (i = 0; i < page >> 3; i++) {
|
||||
nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct nvgpu_os_linux_ops gp10b_cde_ops = {
|
||||
.cde = {
|
||||
.get_program_numbers = gp10b_cde_get_program_numbers,
|
||||
.need_scatter_buffer = gp10b_need_scatter_buffer,
|
||||
.populate_scatter_buffer = gp10b_populate_scatter_buffer,
|
||||
},
|
||||
};
|
||||
32
drivers/gpu/nvgpu/os/linux/cde_gp10b.h
Normal file
32
drivers/gpu/nvgpu/os/linux/cde_gp10b.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* GP10B CDE
|
||||
*
|
||||
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _NVHOST_GP10B_CDE
|
||||
#define _NVHOST_GP10B_CDE
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
extern struct nvgpu_os_linux_ops gp10b_cde_ops;
|
||||
|
||||
#endif
|
||||
155
drivers/gpu/nvgpu/os/linux/ce2.c
Normal file
155
drivers/gpu/nvgpu/os/linux/ce2.c
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
|
||||
|
||||
#include "gk20a/ce2_gk20a.h"
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "channel.h"
|
||||
|
||||
static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
|
||||
{
|
||||
/* there is no local memory available,
|
||||
don't allow local memory related CE flags */
|
||||
if (!g->mm.vidmem.size) {
|
||||
launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
|
||||
NVGPU_CE_DST_LOCATION_LOCAL_FB);
|
||||
}
|
||||
return launch_flags;
|
||||
}
|
||||
|
||||
int gk20a_ce_execute_ops(struct gk20a *g,
|
||||
u32 ce_ctx_id,
|
||||
u64 src_buf,
|
||||
u64 dst_buf,
|
||||
u64 size,
|
||||
unsigned int payload,
|
||||
int launch_flags,
|
||||
int request_operation,
|
||||
u32 submit_flags,
|
||||
struct gk20a_fence **gk20a_fence_out)
|
||||
{
|
||||
int ret = -EPERM;
|
||||
struct gk20a_ce_app *ce_app = &g->ce_app;
|
||||
struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
|
||||
bool found = false;
|
||||
u32 *cmd_buf_cpu_va;
|
||||
u64 cmd_buf_gpu_va = 0;
|
||||
u32 methodSize;
|
||||
u32 cmd_buf_read_offset;
|
||||
u32 dma_copy_class;
|
||||
struct nvgpu_gpfifo_entry gpfifo;
|
||||
struct nvgpu_channel_fence fence = {0, 0};
|
||||
struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
|
||||
|
||||
if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
|
||||
goto end;
|
||||
|
||||
nvgpu_mutex_acquire(&ce_app->app_mutex);
|
||||
|
||||
nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
|
||||
&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
|
||||
if (ce_ctx->ctx_id == ce_ctx_id) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&ce_app->app_mutex);
|
||||
|
||||
if (!found) {
|
||||
ret = -EINVAL;
|
||||
goto end;
|
||||
}
|
||||
|
||||
if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
|
||||
ret = -ENODEV;
|
||||
goto end;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
|
||||
|
||||
ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
|
||||
|
||||
cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
|
||||
(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
|
||||
|
||||
cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
|
||||
|
||||
if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
|
||||
struct gk20a_fence **prev_post_fence =
|
||||
&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
|
||||
|
||||
ret = gk20a_fence_wait(g, *prev_post_fence,
|
||||
gk20a_get_gr_idle_timeout(g));
|
||||
|
||||
gk20a_fence_put(*prev_post_fence);
|
||||
*prev_post_fence = NULL;
|
||||
if (ret)
|
||||
goto noop;
|
||||
}
|
||||
|
||||
cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
|
||||
|
||||
dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
|
||||
methodSize = gk20a_ce_prepare_submit(src_buf,
|
||||
dst_buf,
|
||||
size,
|
||||
&cmd_buf_cpu_va[cmd_buf_read_offset],
|
||||
NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
|
||||
payload,
|
||||
gk20a_get_valid_launch_flags(g, launch_flags),
|
||||
request_operation,
|
||||
dma_copy_class);
|
||||
|
||||
if (methodSize) {
|
||||
/* store the element into gpfifo */
|
||||
gpfifo.entry0 =
|
||||
u64_lo32(cmd_buf_gpu_va);
|
||||
gpfifo.entry1 =
|
||||
(u64_hi32(cmd_buf_gpu_va) |
|
||||
pbdma_gp_entry1_length_f(methodSize));
|
||||
|
||||
/* take always the postfence as it is needed for protecting the ce context */
|
||||
submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
|
||||
|
||||
nvgpu_smp_wmb();
|
||||
|
||||
ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
|
||||
1, submit_flags, &fence,
|
||||
&ce_cmd_buf_fence_out, NULL);
|
||||
|
||||
if (!ret) {
|
||||
ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
|
||||
ce_cmd_buf_fence_out;
|
||||
if (gk20a_fence_out) {
|
||||
gk20a_fence_get(ce_cmd_buf_fence_out);
|
||||
*gk20a_fence_out = ce_cmd_buf_fence_out;
|
||||
}
|
||||
|
||||
/* Next available command buffer queue Index */
|
||||
++ce_ctx->cmd_buf_read_queue_offset;
|
||||
}
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
noop:
|
||||
nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
|
||||
end:
|
||||
return ret;
|
||||
}
|
||||
1021
drivers/gpu/nvgpu/os/linux/channel.c
Normal file
1021
drivers/gpu/nvgpu/os/linux/channel.c
Normal file
File diff suppressed because it is too large
Load Diff
96
drivers/gpu/nvgpu/os/linux/channel.h
Normal file
96
drivers/gpu/nvgpu/os/linux/channel.h
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __NVGPU_CHANNEL_H__
|
||||
#define __NVGPU_CHANNEL_H__
|
||||
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct channel_gk20a;
|
||||
struct nvgpu_gpfifo;
|
||||
struct nvgpu_submit_gpfifo_args;
|
||||
struct nvgpu_channel_fence;
|
||||
struct gk20a_fence;
|
||||
struct fifo_profile_gk20a;
|
||||
struct nvgpu_os_linux;
|
||||
|
||||
struct sync_fence;
|
||||
struct sync_timeline;
|
||||
|
||||
struct nvgpu_channel_completion_cb {
|
||||
/*
|
||||
* Signal channel owner via a callback, if set, in job cleanup with
|
||||
* schedule_work. Means that something finished on the channel (perhaps
|
||||
* more than one job).
|
||||
*/
|
||||
void (*fn)(struct channel_gk20a *, void *);
|
||||
void *user_data;
|
||||
/* Make access to the two above atomic */
|
||||
struct nvgpu_spinlock lock;
|
||||
/* Per-channel async work task, cannot reschedule itself */
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
struct nvgpu_error_notifier {
|
||||
struct dma_buf *dmabuf;
|
||||
void *vaddr;
|
||||
|
||||
struct nvgpu_notification *notification;
|
||||
|
||||
struct nvgpu_mutex mutex;
|
||||
};
|
||||
|
||||
/*
|
||||
* This struct contains fence_related data.
|
||||
* e.g. sync_timeline for sync_fences.
|
||||
*/
|
||||
struct nvgpu_os_fence_framework {
|
||||
struct sync_timeline *timeline;
|
||||
};
|
||||
|
||||
struct nvgpu_channel_linux {
|
||||
struct channel_gk20a *ch;
|
||||
|
||||
struct nvgpu_os_fence_framework fence_framework;
|
||||
|
||||
struct nvgpu_channel_completion_cb completion_cb;
|
||||
struct nvgpu_error_notifier error_notifier;
|
||||
|
||||
struct dma_buf *cyclestate_buffer_handler;
|
||||
};
|
||||
|
||||
u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
|
||||
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
|
||||
void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
|
||||
|
||||
struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
|
||||
void (*update_fn)(struct channel_gk20a *, void *),
|
||||
void *update_fn_data,
|
||||
int runlist_id,
|
||||
bool is_privileged_channel);
|
||||
|
||||
int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
||||
struct nvgpu_gpfifo_entry *gpfifo,
|
||||
struct nvgpu_submit_gpfifo_args *args,
|
||||
u32 num_entries,
|
||||
u32 flags,
|
||||
struct nvgpu_channel_fence *fence,
|
||||
struct gk20a_fence **fence_out,
|
||||
struct fifo_profile_gk20a *profile);
|
||||
|
||||
#endif /* __NVGPU_CHANNEL_H__ */
|
||||
165
drivers/gpu/nvgpu/os/linux/clk.c
Normal file
165
drivers/gpu/nvgpu/os/linux/clk.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Linux clock support
|
||||
*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/clk.h>
|
||||
|
||||
#include <soc/tegra/tegra-dvfs.h>
|
||||
#include <soc/tegra/tegra-bpmp-dvfs.h>
|
||||
|
||||
#include "clk.h"
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
|
||||
unsigned long ret;
|
||||
|
||||
switch (api_domain) {
|
||||
case CTRL_CLK_DOMAIN_GPCCLK:
|
||||
if (g->clk.tegra_clk)
|
||||
ret = g->clk.cached_rate ?
|
||||
g->clk.cached_rate :
|
||||
clk_get_rate(g->clk.tegra_clk);
|
||||
else
|
||||
ret = platform->cached_rate ?
|
||||
platform->cached_rate :
|
||||
clk_get_rate(platform->clk[0]);
|
||||
break;
|
||||
case CTRL_CLK_DOMAIN_PWRCLK:
|
||||
ret = clk_get_rate(platform->clk[1]);
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "unknown clock: %u", api_domain);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvgpu_linux_clk_set_rate(struct gk20a *g,
|
||||
u32 api_domain, unsigned long rate)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
|
||||
int ret;
|
||||
|
||||
switch (api_domain) {
|
||||
case CTRL_CLK_DOMAIN_GPCCLK:
|
||||
if (g->clk.tegra_clk) {
|
||||
ret = clk_set_rate(g->clk.tegra_clk, rate);
|
||||
if (!ret)
|
||||
g->clk.cached_rate = rate;
|
||||
} else {
|
||||
ret = clk_set_rate(platform->clk[0], rate);
|
||||
if (!ret)
|
||||
platform->cached_rate = rate;
|
||||
}
|
||||
break;
|
||||
case CTRL_CLK_DOMAIN_PWRCLK:
|
||||
ret = clk_set_rate(platform->clk[1], rate);
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "unknown clock: %u", api_domain);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
|
||||
|
||||
/*
|
||||
* On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
|
||||
* to frequency governor is a shared user on the gbus. The latter can be
|
||||
* accessed as GPU clock parent, and incorporate DVFS related data.
|
||||
*/
|
||||
if (g->clk.tegra_clk)
|
||||
return tegra_dvfs_get_fmax_at_vmin_safe_t(
|
||||
clk_get_parent(g->clk.tegra_clk));
|
||||
|
||||
if (platform->maxmin_clk_id)
|
||||
return tegra_bpmp_dvfs_get_fmax_at_vmin(
|
||||
platform->maxmin_clk_id);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
|
||||
{
|
||||
struct clk *c;
|
||||
|
||||
c = clk_get_sys("gpu_ref", "gpu_ref");
|
||||
if (IS_ERR(c)) {
|
||||
nvgpu_err(g, "failed to get GPCPLL reference clock");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return clk_get_rate(c);
|
||||
}
|
||||
|
||||
static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
|
||||
unsigned long rate)
|
||||
{
|
||||
return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
|
||||
clk_get_parent(clk->tegra_clk), rate);
|
||||
}
|
||||
|
||||
static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch (api_domain) {
|
||||
case CTRL_CLK_DOMAIN_GPCCLK:
|
||||
ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk));
|
||||
break;
|
||||
default:
|
||||
nvgpu_err(g, "unknown clock: %u", api_domain);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
|
||||
{
|
||||
return clk_prepare_enable(clk->tegra_clk);
|
||||
}
|
||||
|
||||
static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
|
||||
{
|
||||
clk_disable_unprepare(clk->tegra_clk);
|
||||
}
|
||||
|
||||
void nvgpu_linux_init_clk_support(struct gk20a *g)
|
||||
{
|
||||
g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
|
||||
g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
|
||||
g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
|
||||
g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
|
||||
g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
|
||||
g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
|
||||
g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
|
||||
g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
|
||||
}
|
||||
22
drivers/gpu/nvgpu/os/linux/clk.h
Normal file
22
drivers/gpu/nvgpu/os/linux/clk.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_COMMON_LINUX_CLK_H
|
||||
|
||||
struct gk20a;
|
||||
void nvgpu_linux_init_clk_support(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
140
drivers/gpu/nvgpu/os/linux/comptags.c
Normal file
140
drivers/gpu/nvgpu/os/linux/comptags.c
Normal file
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/dma-buf.h>
|
||||
|
||||
#include <nvgpu/comptags.h>
|
||||
|
||||
#include <nvgpu/linux/vm.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "dmabuf.h"
|
||||
|
||||
void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
|
||||
struct gk20a_comptags *comptags)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
|
||||
buf->dev);
|
||||
|
||||
if (!comptags)
|
||||
return;
|
||||
|
||||
if (!priv) {
|
||||
memset(comptags, 0, sizeof(*comptags));
|
||||
return;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
*comptags = priv->comptags;
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
}
|
||||
|
||||
int gk20a_alloc_or_get_comptags(struct gk20a *g,
|
||||
struct nvgpu_os_buffer *buf,
|
||||
struct gk20a_comptag_allocator *allocator,
|
||||
struct gk20a_comptags *comptags)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
|
||||
buf->dev);
|
||||
u32 offset;
|
||||
int err;
|
||||
unsigned int ctag_granularity;
|
||||
u32 lines;
|
||||
|
||||
if (!priv)
|
||||
return -ENOSYS;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
|
||||
if (priv->comptags.allocated) {
|
||||
/*
|
||||
* already allocated
|
||||
*/
|
||||
*comptags = priv->comptags;
|
||||
|
||||
err = 0;
|
||||
goto exit_locked;
|
||||
}
|
||||
|
||||
ctag_granularity = g->ops.fb.compression_page_size(g);
|
||||
lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
|
||||
|
||||
/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
|
||||
if (lines < 1) {
|
||||
err = -EINVAL;
|
||||
goto exit_locked;
|
||||
}
|
||||
|
||||
/* store the allocator so we can use it when we free the ctags */
|
||||
priv->comptag_allocator = allocator;
|
||||
err = gk20a_comptaglines_alloc(allocator, &offset, lines);
|
||||
if (!err) {
|
||||
priv->comptags.offset = offset;
|
||||
priv->comptags.lines = lines;
|
||||
priv->comptags.needs_clear = true;
|
||||
} else {
|
||||
priv->comptags.offset = 0;
|
||||
priv->comptags.lines = 0;
|
||||
priv->comptags.needs_clear = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't report an error here if comptag alloc failed. The
|
||||
* caller will simply fallback to incompressible kinds. It
|
||||
* would not be safe to re-allocate comptags anyways on
|
||||
* successive calls, as that would break map aliasing.
|
||||
*/
|
||||
err = 0;
|
||||
priv->comptags.allocated = true;
|
||||
|
||||
*comptags = priv->comptags;
|
||||
|
||||
exit_locked:
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
|
||||
buf->dev);
|
||||
bool clear_started = false;
|
||||
|
||||
if (priv) {
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
|
||||
clear_started = priv->comptags.needs_clear;
|
||||
|
||||
if (!clear_started)
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
}
|
||||
|
||||
return clear_started;
|
||||
}
|
||||
|
||||
void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
|
||||
bool clear_successful)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
|
||||
buf->dev);
|
||||
if (priv) {
|
||||
if (clear_successful)
|
||||
priv->comptags.needs_clear = false;
|
||||
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
}
|
||||
}
|
||||
73
drivers/gpu/nvgpu/os/linux/cond.c
Normal file
73
drivers/gpu/nvgpu/os/linux/cond.c
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <nvgpu/cond.h>
|
||||
|
||||
int nvgpu_cond_init(struct nvgpu_cond *cond)
|
||||
{
|
||||
init_waitqueue_head(&cond->wq);
|
||||
cond->initialized = true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_cond_destroy(struct nvgpu_cond *cond)
|
||||
{
|
||||
cond->initialized = false;
|
||||
}
|
||||
|
||||
int nvgpu_cond_signal(struct nvgpu_cond *cond)
|
||||
{
|
||||
if (!cond->initialized)
|
||||
return -EINVAL;
|
||||
|
||||
wake_up(&cond->wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
|
||||
{
|
||||
if (!cond->initialized)
|
||||
return -EINVAL;
|
||||
|
||||
wake_up_interruptible(&cond->wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
|
||||
{
|
||||
if (!cond->initialized)
|
||||
return -EINVAL;
|
||||
|
||||
wake_up_all(&cond->wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
|
||||
{
|
||||
if (!cond->initialized)
|
||||
return -EINVAL;
|
||||
|
||||
wake_up_interruptible_all(&cond->wq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
730
drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
Normal file
730
drivers/gpu/nvgpu/os/linux/ctxsw_trace.c
Normal file
@@ -0,0 +1,730 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/poll.h>
|
||||
#include <trace/events/gk20a.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/gr_gk20a.h"
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
#include "ctxsw_trace.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
|
||||
|
||||
/* Userland-facing FIFO (one global + eventually one per VM) */
|
||||
struct gk20a_ctxsw_dev {
|
||||
struct gk20a *g;
|
||||
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
struct nvgpu_ctxsw_trace_entry *ents;
|
||||
struct nvgpu_ctxsw_trace_filter filter;
|
||||
bool write_enabled;
|
||||
struct nvgpu_cond readout_wq;
|
||||
size_t size;
|
||||
u32 num_ents;
|
||||
|
||||
nvgpu_atomic_t vma_ref;
|
||||
|
||||
struct nvgpu_mutex write_lock;
|
||||
};
|
||||
|
||||
|
||||
struct gk20a_ctxsw_trace {
|
||||
struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
|
||||
};
|
||||
|
||||
static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return (hdr->write_idx == hdr->read_idx);
|
||||
}
|
||||
|
||||
static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
|
||||
}
|
||||
|
||||
static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
|
||||
{
|
||||
return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
|
||||
}
|
||||
|
||||
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
|
||||
loff_t *off)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
||||
struct nvgpu_ctxsw_trace_entry __user *entry =
|
||||
(struct nvgpu_ctxsw_trace_entry *) buf;
|
||||
size_t copied = 0;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
|
||||
"filp=%p buf=%p size=%zu", filp, buf, size);
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
while (ring_is_empty(hdr)) {
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
if (filp->f_flags & O_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
|
||||
!ring_is_empty(hdr), 0);
|
||||
if (err)
|
||||
return err;
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
}
|
||||
|
||||
while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
|
||||
if (ring_is_empty(hdr))
|
||||
break;
|
||||
|
||||
if (copy_to_user(entry, &dev->ents[hdr->read_idx],
|
||||
sizeof(*entry))) {
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
hdr->read_idx++;
|
||||
if (hdr->read_idx >= hdr->num_ents)
|
||||
hdr->read_idx = 0;
|
||||
|
||||
entry++;
|
||||
copied += sizeof(*entry);
|
||||
size -= sizeof(*entry);
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
|
||||
hdr->read_idx);
|
||||
|
||||
*off = hdr->read_idx;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
dev->write_enabled = true;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
dev->g->ops.fecs_trace.enable(dev->g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
|
||||
dev->g->ops.fecs_trace.disable(dev->g);
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
dev->write_enabled = false;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
|
||||
size_t size)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
void *buf;
|
||||
int err;
|
||||
|
||||
if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
|
||||
return -EBUSY;
|
||||
|
||||
err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
||||
dev->hdr = buf;
|
||||
dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
|
||||
dev->size = size;
|
||||
dev->num_ents = dev->hdr->num_ents;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
|
||||
dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
|
||||
void **buf, size_t *size)
|
||||
{
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
|
||||
*size = roundup(*size, PAGE_SIZE);
|
||||
hdr = vmalloc_user(*size);
|
||||
if (!hdr)
|
||||
return -ENOMEM;
|
||||
|
||||
hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
|
||||
hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
|
||||
hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
|
||||
/ sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
hdr->drop_count = 0;
|
||||
hdr->read_idx = 0;
|
||||
hdr->write_idx = 0;
|
||||
hdr->write_seqno = 0;
|
||||
|
||||
*buf = hdr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
|
||||
|
||||
nvgpu_vfree(g, dev->hdr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_ring_setup_args *args)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
size_t size = args->size;
|
||||
int ret;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
|
||||
|
||||
if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_trace_filter_args *args)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
dev->filter = args->filter;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
if (g->ops.fecs_trace.set_filter)
|
||||
g->ops.fecs_trace.set_filter(g, &dev->filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
|
||||
struct nvgpu_ctxsw_trace_filter_args *args)
|
||||
{
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
args->filter = dev->filter;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
|
||||
{
|
||||
struct gk20a *g = dev->g;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (g->ops.fecs_trace.flush)
|
||||
err = g->ops.fecs_trace.flush(g);
|
||||
|
||||
if (likely(!err))
|
||||
err = g->ops.fecs_trace.poll(g);
|
||||
|
||||
gk20a_idle(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct nvgpu_os_linux *l;
|
||||
struct gk20a *g;
|
||||
struct gk20a_ctxsw_trace *trace;
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
int err;
|
||||
size_t size;
|
||||
u32 n;
|
||||
|
||||
/* only one VM for now */
|
||||
const int vmid = 0;
|
||||
|
||||
l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
|
||||
g = gk20a_get(&l->g);
|
||||
if (!g)
|
||||
return -ENODEV;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
err = -EPERM;
|
||||
goto free_ref;
|
||||
}
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
goto free_ref;
|
||||
|
||||
trace = g->ctxsw_trace;
|
||||
if (!trace) {
|
||||
err = -ENODEV;
|
||||
goto idle;
|
||||
}
|
||||
|
||||
/* Allow only one user for this device */
|
||||
dev = &trace->devs[vmid];
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
if (dev->hdr) {
|
||||
err = -EBUSY;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* By default, allocate ring buffer big enough to accommodate
|
||||
* FECS records with default event filter */
|
||||
|
||||
/* enable all traces by default */
|
||||
NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
|
||||
|
||||
/* compute max number of entries generated with this filter */
|
||||
n = g->ops.fecs_trace.max_entries(g, &dev->filter);
|
||||
|
||||
size = sizeof(struct nvgpu_ctxsw_ring_header) +
|
||||
n * sizeof(struct nvgpu_ctxsw_trace_entry);
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
|
||||
size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
|
||||
|
||||
err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
|
||||
if (!err) {
|
||||
filp->private_data = dev;
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
|
||||
filp, dev, size);
|
||||
}
|
||||
|
||||
done:
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
idle:
|
||||
gk20a_idle(g);
|
||||
free_ref:
|
||||
if (err)
|
||||
gk20a_put(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
|
||||
|
||||
g->ops.fecs_trace.disable(g);
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
dev->write_enabled = false;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
if (dev->hdr) {
|
||||
dev->g->ops.fecs_trace.free_user_buffer(dev->g);
|
||||
dev->hdr = NULL;
|
||||
}
|
||||
gk20a_put(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
|
||||
(_IOC_NR(cmd) == 0) ||
|
||||
(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
|
||||
(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
|
||||
err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
|
||||
err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_RING_SETUP:
|
||||
err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
|
||||
(struct nvgpu_ctxsw_ring_setup_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_SET_FILTER:
|
||||
err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
|
||||
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_GET_FILTER:
|
||||
err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
|
||||
(struct nvgpu_ctxsw_trace_filter_args *) buf);
|
||||
break;
|
||||
case NVGPU_CTXSW_IOCTL_POLL:
|
||||
err = gk20a_ctxsw_dev_ioctl_poll(dev);
|
||||
break;
|
||||
default:
|
||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
|
||||
cmd);
|
||||
err = -ENOTTY;
|
||||
}
|
||||
|
||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
|
||||
unsigned int mask = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
poll_wait(filp, &dev->readout_wq.wq, wait);
|
||||
if (!ring_is_empty(hdr))
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_atomic_inc(&dev->vma_ref);
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
||||
nvgpu_atomic_read(&dev->vma_ref));
|
||||
}
|
||||
|
||||
static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
|
||||
nvgpu_atomic_dec(&dev->vma_ref);
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
|
||||
nvgpu_atomic_read(&dev->vma_ref));
|
||||
}
|
||||
|
||||
static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
|
||||
.open = gk20a_ctxsw_dev_vma_open,
|
||||
.close = gk20a_ctxsw_dev_vma_close,
|
||||
};
|
||||
|
||||
int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->g;
|
||||
int ret;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
|
||||
vma->vm_start, vma->vm_end);
|
||||
|
||||
ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
|
||||
if (likely(!ret)) {
|
||||
vma->vm_private_data = dev;
|
||||
vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
|
||||
vma->vm_ops->open(vma);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
static int gk20a_ctxsw_init_devs(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
||||
struct gk20a_ctxsw_dev *dev = trace->devs;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
|
||||
dev->g = g;
|
||||
dev->hdr = NULL;
|
||||
dev->write_enabled = false;
|
||||
nvgpu_cond_init(&dev->readout_wq);
|
||||
err = nvgpu_mutex_init(&dev->write_lock);
|
||||
if (err)
|
||||
return err;
|
||||
nvgpu_atomic_set(&dev->vma_ref, 0);
|
||||
dev++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
int gk20a_ctxsw_trace_init(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
|
||||
|
||||
/* if tracing is not supported, skip this */
|
||||
if (!g->ops.fecs_trace.init)
|
||||
return 0;
|
||||
|
||||
if (likely(trace))
|
||||
return 0;
|
||||
|
||||
trace = nvgpu_kzalloc(g, sizeof(*trace));
|
||||
if (unlikely(!trace))
|
||||
return -ENOMEM;
|
||||
g->ctxsw_trace = trace;
|
||||
|
||||
err = gk20a_ctxsw_init_devs(g);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = g->ops.fecs_trace.init(g);
|
||||
if (unlikely(err))
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
|
||||
nvgpu_kfree(g, trace);
|
||||
g->ctxsw_trace = NULL;
|
||||
return err;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct gk20a_ctxsw_trace *trace;
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
int i;
|
||||
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
trace = g->ctxsw_trace;
|
||||
dev = trace->devs;
|
||||
|
||||
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
|
||||
nvgpu_mutex_destroy(&dev->write_lock);
|
||||
dev++;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, g->ctxsw_trace);
|
||||
g->ctxsw_trace = NULL;
|
||||
|
||||
g->ops.fecs_trace.deinit(g);
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_ctxsw_trace_write(struct gk20a *g,
|
||||
struct nvgpu_ctxsw_trace_entry *entry)
|
||||
{
|
||||
struct nvgpu_ctxsw_ring_header *hdr;
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
int ret = 0;
|
||||
const char *reason;
|
||||
u32 write_idx;
|
||||
|
||||
if (!g->ctxsw_trace)
|
||||
return 0;
|
||||
|
||||
if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
|
||||
return -ENODEV;
|
||||
|
||||
dev = &g->ctxsw_trace->devs[entry->vmid];
|
||||
hdr = dev->hdr;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
|
||||
"dev=%p hdr=%p", dev, hdr);
|
||||
|
||||
nvgpu_mutex_acquire(&dev->write_lock);
|
||||
|
||||
if (unlikely(!hdr)) {
|
||||
/* device has been released */
|
||||
ret = -ENODEV;
|
||||
goto done;
|
||||
}
|
||||
|
||||
write_idx = hdr->write_idx;
|
||||
if (write_idx >= dev->num_ents) {
|
||||
nvgpu_err(dev->g,
|
||||
"write_idx=%u out of range [0..%u]",
|
||||
write_idx, dev->num_ents);
|
||||
ret = -ENOSPC;
|
||||
reason = "write_idx out of range";
|
||||
goto disable;
|
||||
}
|
||||
|
||||
entry->seqno = hdr->write_seqno++;
|
||||
|
||||
if (!dev->write_enabled) {
|
||||
ret = -EBUSY;
|
||||
reason = "write disabled";
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (unlikely(ring_is_full(hdr))) {
|
||||
ret = -ENOSPC;
|
||||
reason = "user fifo full";
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
|
||||
reason = "filtered out";
|
||||
goto filter;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
|
||||
entry->seqno, entry->context_id, entry->pid,
|
||||
entry->tag, entry->timestamp);
|
||||
|
||||
dev->ents[write_idx] = *entry;
|
||||
|
||||
/* ensure record is written before updating write index */
|
||||
nvgpu_smp_wmb();
|
||||
|
||||
write_idx++;
|
||||
if (unlikely(write_idx >= hdr->num_ents))
|
||||
write_idx = 0;
|
||||
hdr->write_idx = write_idx;
|
||||
nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
|
||||
hdr->read_idx, hdr->write_idx, ring_len(hdr));
|
||||
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
return ret;
|
||||
|
||||
disable:
|
||||
g->ops.fecs_trace.disable(g);
|
||||
|
||||
drop:
|
||||
hdr->drop_count++;
|
||||
|
||||
filter:
|
||||
nvgpu_log(g, gpu_dbg_ctxsw,
|
||||
"dropping seqno=%d context_id=%08x pid=%lld "
|
||||
"tag=%x time=%llx (%s)",
|
||||
entry->seqno, entry->context_id, entry->pid,
|
||||
entry->tag, entry->timestamp, reason);
|
||||
|
||||
done:
|
||||
nvgpu_mutex_release(&dev->write_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
|
||||
{
|
||||
struct gk20a_ctxsw_dev *dev;
|
||||
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
dev = &g->ctxsw_trace->devs[vmid];
|
||||
nvgpu_cond_signal_interruptible(&dev->readout_wq);
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct nvgpu_ctxsw_trace_entry entry = {
|
||||
.vmid = 0,
|
||||
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
|
||||
.context_id = 0,
|
||||
.pid = ch->tgid,
|
||||
};
|
||||
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
gk20a_ctxsw_trace_wake_up(g, 0);
|
||||
#endif
|
||||
trace_gk20a_channel_reset(ch->chid, ch->tsgid);
|
||||
}
|
||||
|
||||
void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
{
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
struct nvgpu_ctxsw_trace_entry entry = {
|
||||
.vmid = 0,
|
||||
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
|
||||
.context_id = 0,
|
||||
.pid = tsg->tgid,
|
||||
};
|
||||
|
||||
if (!g->ctxsw_trace)
|
||||
return;
|
||||
|
||||
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
|
||||
gk20a_ctxsw_trace_write(g, &entry);
|
||||
gk20a_ctxsw_trace_wake_up(g, 0);
|
||||
#endif
|
||||
trace_gk20a_channel_reset(~0, tsg->tsgid);
|
||||
}
|
||||
39
drivers/gpu/nvgpu/os/linux/ctxsw_trace.h
Normal file
39
drivers/gpu/nvgpu/os/linux/ctxsw_trace.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __CTXSW_TRACE_H__
|
||||
#define __CTXSW_TRACE_H__
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#define GK20A_CTXSW_TRACE_NUM_DEVS 1
|
||||
|
||||
struct file;
|
||||
struct inode;
|
||||
struct poll_table_struct;
|
||||
|
||||
struct gk20a;
|
||||
|
||||
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
|
||||
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
|
||||
long gk20a_ctxsw_dev_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
|
||||
size_t size, loff_t *offs);
|
||||
unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
|
||||
struct poll_table_struct *pts);
|
||||
|
||||
#endif /* __CTXSW_TRACE_H__ */
|
||||
452
drivers/gpu/nvgpu/os/linux/debug.c
Normal file
452
drivers/gpu/nvgpu/os/linux/debug.c
Normal file
@@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_cde.h"
|
||||
#include "debug_ce.h"
|
||||
#include "debug_fifo.h"
|
||||
#include "debug_gr.h"
|
||||
#include "debug_allocator.h"
|
||||
#include "debug_kmem.h"
|
||||
#include "debug_pmu.h"
|
||||
#include "debug_sched.h"
|
||||
#include "debug_hal.h"
|
||||
#include "debug_xve.h"
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <nvgpu/debug.h>
|
||||
|
||||
unsigned int gk20a_debug_trace_cmdbuf;
|
||||
|
||||
static inline void gk20a_debug_write_printk(void *ctx, const char *str,
|
||||
size_t len)
|
||||
{
|
||||
pr_info("%s", str);
|
||||
}
|
||||
|
||||
static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
|
||||
size_t len)
|
||||
{
|
||||
seq_write((struct seq_file *)ctx, str, len);
|
||||
}
|
||||
|
||||
void gk20a_debug_output(struct gk20a_debug_output *o,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
int len;
|
||||
|
||||
va_start(args, fmt);
|
||||
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
|
||||
va_end(args);
|
||||
o->fn(o->ctx, o->buf, len);
|
||||
}
|
||||
|
||||
static int gk20a_gr_dump_regs(struct gk20a *g,
|
||||
struct gk20a_debug_output *o)
|
||||
{
|
||||
if (g->ops.gr.dump_gr_regs)
|
||||
gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_gr_debug_dump(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_debug_output o = {
|
||||
.fn = gk20a_debug_write_printk
|
||||
};
|
||||
|
||||
gk20a_gr_dump_regs(g, &o);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct device *dev = s->private;
|
||||
struct gk20a *g = gk20a_get_platform(dev)->g;
|
||||
struct gk20a_debug_output o = {
|
||||
.fn = gk20a_debug_write_to_seqfile,
|
||||
.ctx = s,
|
||||
};
|
||||
int err;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to power on gpu: %d", err);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
gk20a_gr_dump_regs(g, &o);
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_debug_dump(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
|
||||
struct gk20a_debug_output o = {
|
||||
.fn = gk20a_debug_write_printk
|
||||
};
|
||||
|
||||
if (platform->dump_platform_dependencies)
|
||||
platform->dump_platform_dependencies(dev_from_gk20a(g));
|
||||
|
||||
/* HAL only initialized after 1st power-on */
|
||||
if (g->ops.debug.show_dump)
|
||||
g->ops.debug.show_dump(g, &o);
|
||||
}
|
||||
|
||||
static int gk20a_debug_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct device *dev = s->private;
|
||||
struct gk20a_debug_output o = {
|
||||
.fn = gk20a_debug_write_to_seqfile,
|
||||
.ctx = s,
|
||||
};
|
||||
struct gk20a *g;
|
||||
int err;
|
||||
|
||||
g = gk20a_get_platform(dev)->g;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to power on gpu: %d", err);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/* HAL only initialized after 1st power-on */
|
||||
if (g->ops.debug.show_dump)
|
||||
g->ops.debug.show_dump(g, &o);
|
||||
|
||||
gk20a_idle(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, gk20a_gr_debug_show, inode->i_private);
|
||||
}
|
||||
|
||||
static int gk20a_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, gk20a_debug_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations gk20a_gr_debug_fops = {
|
||||
.open = gk20a_gr_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_debug_fops = {
|
||||
.open = gk20a_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
|
||||
{
|
||||
g->ops.fifo.dump_pbdma_status(g, o);
|
||||
g->ops.fifo.dump_eng_status(g, o);
|
||||
|
||||
gk20a_debug_dump_all_channel_status_ramfc(g, o);
|
||||
}
|
||||
|
||||
static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[3];
|
||||
struct gk20a *g = file->private_data;
|
||||
|
||||
if (g->mm.disable_bigpage)
|
||||
buf[0] = 'Y';
|
||||
else
|
||||
buf[0] = 'N';
|
||||
buf[1] = '\n';
|
||||
buf[2] = 0x00;
|
||||
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
|
||||
}
|
||||
|
||||
static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[32];
|
||||
int buf_size;
|
||||
bool bv;
|
||||
struct gk20a *g = file->private_data;
|
||||
|
||||
buf_size = min(count, (sizeof(buf)-1));
|
||||
if (copy_from_user(buf, user_buf, buf_size))
|
||||
return -EFAULT;
|
||||
|
||||
if (strtobool(buf, &bv) == 0) {
|
||||
g->mm.disable_bigpage = bv;
|
||||
gk20a_init_gpu_characteristics(g);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static struct file_operations disable_bigpage_fops = {
|
||||
.open = simple_open,
|
||||
.read = disable_bigpage_read,
|
||||
.write = disable_bigpage_write,
|
||||
};
|
||||
|
||||
static int railgate_residency_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
unsigned long time_since_last_state_transition_ms;
|
||||
unsigned long total_rail_gate_time_ms;
|
||||
unsigned long total_rail_ungate_time_ms;
|
||||
|
||||
if (platform->is_railgated(dev_from_gk20a(g))) {
|
||||
time_since_last_state_transition_ms =
|
||||
jiffies_to_msecs(jiffies -
|
||||
g->pstats.last_rail_gate_complete);
|
||||
total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
|
||||
total_rail_gate_time_ms =
|
||||
g->pstats.total_rail_gate_time_ms +
|
||||
time_since_last_state_transition_ms;
|
||||
} else {
|
||||
time_since_last_state_transition_ms =
|
||||
jiffies_to_msecs(jiffies -
|
||||
g->pstats.last_rail_ungate_complete);
|
||||
total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
|
||||
total_rail_ungate_time_ms =
|
||||
g->pstats.total_rail_ungate_time_ms +
|
||||
time_since_last_state_transition_ms;
|
||||
}
|
||||
|
||||
seq_printf(s, "Time with Rails Gated: %lu ms\n"
|
||||
"Time with Rails UnGated: %lu ms\n"
|
||||
"Total railgating cycles: %lu\n",
|
||||
total_rail_gate_time_ms,
|
||||
total_rail_ungate_time_ms,
|
||||
g->pstats.railgating_cycle_count - 1);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int railgate_residency_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, railgate_residency_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations railgate_residency_fops = {
|
||||
.open = railgate_residency_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int gk20a_railgating_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *d;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
|
||||
&railgate_residency_fops);
|
||||
if (!d)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
static ssize_t timeouts_enabled_read(struct file *file,
|
||||
char __user *user_buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[3];
|
||||
struct gk20a *g = file->private_data;
|
||||
|
||||
if (nvgpu_is_timeouts_enabled(g))
|
||||
buf[0] = 'Y';
|
||||
else
|
||||
buf[0] = 'N';
|
||||
buf[1] = '\n';
|
||||
buf[2] = 0x00;
|
||||
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
|
||||
}
|
||||
|
||||
static ssize_t timeouts_enabled_write(struct file *file,
|
||||
const char __user *user_buf, size_t count, loff_t *ppos)
|
||||
{
|
||||
char buf[3];
|
||||
int buf_size;
|
||||
bool timeouts_enabled;
|
||||
struct gk20a *g = file->private_data;
|
||||
|
||||
buf_size = min(count, (sizeof(buf)-1));
|
||||
if (copy_from_user(buf, user_buf, buf_size))
|
||||
return -EFAULT;
|
||||
|
||||
if (strtobool(buf, &timeouts_enabled) == 0) {
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
if (timeouts_enabled == false) {
|
||||
/* requesting to disable timeouts */
|
||||
if (g->timeouts_disabled_by_user == false) {
|
||||
nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
|
||||
g->timeouts_disabled_by_user = true;
|
||||
}
|
||||
} else {
|
||||
/* requesting to enable timeouts */
|
||||
if (g->timeouts_disabled_by_user == true) {
|
||||
nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
|
||||
g->timeouts_disabled_by_user = false;
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations timeouts_enabled_fops = {
|
||||
.open = simple_open,
|
||||
.read = timeouts_enabled_read,
|
||||
.write = timeouts_enabled_write,
|
||||
};
|
||||
|
||||
void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
|
||||
if (!l->debugfs)
|
||||
return;
|
||||
|
||||
if (debugfs_symlink)
|
||||
l->debugfs_alias =
|
||||
debugfs_create_symlink(debugfs_symlink,
|
||||
NULL, dev_name(dev));
|
||||
|
||||
debugfs_create_file("status", S_IRUGO, l->debugfs,
|
||||
dev, &gk20a_debug_fops);
|
||||
debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
|
||||
dev, &gk20a_gr_debug_fops);
|
||||
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
|
||||
l->debugfs, &gk20a_debug_trace_cmdbuf);
|
||||
|
||||
debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
|
||||
l->debugfs, &g->ch_wdt_timeout_ms);
|
||||
|
||||
debugfs_create_u32("disable_syncpoints", S_IRUGO,
|
||||
l->debugfs, &g->disable_syncpoints);
|
||||
|
||||
/* New debug logging API. */
|
||||
debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
|
||||
l->debugfs, &g->log_mask);
|
||||
debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
|
||||
l->debugfs, &g->log_trace);
|
||||
|
||||
l->debugfs_ltc_enabled =
|
||||
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->mm.ltc_enabled_target);
|
||||
|
||||
l->debugfs_gr_idle_timeout_default =
|
||||
debugfs_create_u32("gr_idle_timeout_default_us",
|
||||
S_IRUGO|S_IWUSR, l->debugfs,
|
||||
&g->gr_idle_timeout_default);
|
||||
l->debugfs_timeouts_enabled =
|
||||
debugfs_create_file("timeouts_enabled",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
g,
|
||||
&timeouts_enabled_fops);
|
||||
|
||||
l->debugfs_disable_bigpage =
|
||||
debugfs_create_file("disable_bigpage",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
g,
|
||||
&disable_bigpage_fops);
|
||||
|
||||
l->debugfs_timeslice_low_priority_us =
|
||||
debugfs_create_u32("timeslice_low_priority_us",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->timeslice_low_priority_us);
|
||||
l->debugfs_timeslice_medium_priority_us =
|
||||
debugfs_create_u32("timeslice_medium_priority_us",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->timeslice_medium_priority_us);
|
||||
l->debugfs_timeslice_high_priority_us =
|
||||
debugfs_create_u32("timeslice_high_priority_us",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->timeslice_high_priority_us);
|
||||
l->debugfs_runlist_interleave =
|
||||
debugfs_create_bool("runlist_interleave",
|
||||
S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->runlist_interleave);
|
||||
l->debugfs_force_preemption_gfxp =
|
||||
debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->gr.ctx_vars.force_preemption_gfxp);
|
||||
|
||||
l->debugfs_force_preemption_cilp =
|
||||
debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
|
||||
l->debugfs,
|
||||
&g->gr.ctx_vars.force_preemption_cilp);
|
||||
|
||||
l->debugfs_dump_ctxsw_stats =
|
||||
debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
|
||||
S_IRUGO|S_IWUSR, l->debugfs,
|
||||
&g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
|
||||
|
||||
gr_gk20a_debugfs_init(g);
|
||||
gk20a_pmu_debugfs_init(g);
|
||||
gk20a_railgating_debugfs_init(g);
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_CDE
|
||||
gk20a_cde_debugfs_init(g);
|
||||
#endif
|
||||
gk20a_ce_debugfs_init(g);
|
||||
nvgpu_alloc_debugfs_init(g);
|
||||
nvgpu_hal_debugfs_init(g);
|
||||
gk20a_fifo_debugfs_init(g);
|
||||
gk20a_sched_debugfs_init(g);
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
nvgpu_kmem_debugfs_init(g);
|
||||
#endif
|
||||
if (g->pci_vendor_id)
|
||||
nvgpu_xve_debugfs_init(g);
|
||||
}
|
||||
|
||||
void gk20a_debug_deinit(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (!l->debugfs)
|
||||
return;
|
||||
|
||||
gk20a_fifo_debugfs_deinit(g);
|
||||
|
||||
debugfs_remove_recursive(l->debugfs);
|
||||
debugfs_remove(l->debugfs_alias);
|
||||
}
|
||||
69
drivers/gpu/nvgpu/os/linux/debug_allocator.c
Normal file
69
drivers/gpu/nvgpu/os/linux/debug_allocator.c
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_allocator.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <nvgpu/allocator.h>
|
||||
|
||||
static int __alloc_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct nvgpu_allocator *a = s->private;
|
||||
|
||||
nvgpu_alloc_print_stats(a, s, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __alloc_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, __alloc_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations __alloc_fops = {
|
||||
.open = __alloc_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (!l->debugfs_allocators)
|
||||
return;
|
||||
|
||||
a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
|
||||
l->debugfs_allocators,
|
||||
a, &__alloc_fops);
|
||||
}
|
||||
|
||||
void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
|
||||
{
|
||||
}
|
||||
|
||||
void nvgpu_alloc_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
|
||||
if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
|
||||
l->debugfs_allocators = NULL;
|
||||
return;
|
||||
}
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_allocator.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_allocator.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
|
||||
#define __NVGPU_DEBUG_ALLOCATOR_H__
|
||||
|
||||
struct gk20a;
|
||||
void nvgpu_alloc_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */
|
||||
53
drivers/gpu/nvgpu/os/linux/debug_cde.c
Normal file
53
drivers/gpu/nvgpu/os/linux/debug_cde.c
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_cde.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
|
||||
static ssize_t gk20a_cde_reload_write(struct file *file,
|
||||
const char __user *userbuf, size_t count, loff_t *ppos)
|
||||
{
|
||||
struct nvgpu_os_linux *l = file->private_data;
|
||||
gk20a_cde_reload(l);
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations gk20a_cde_reload_fops = {
|
||||
.open = simple_open,
|
||||
.write = gk20a_cde_reload_write,
|
||||
};
|
||||
|
||||
void gk20a_cde_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
if (!platform->has_cde)
|
||||
return;
|
||||
|
||||
debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &l->cde_app.shader_parameter);
|
||||
debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &l->cde_app.ctx_count);
|
||||
debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &l->cde_app.ctx_usecount);
|
||||
debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &l->cde_app.ctx_count_top);
|
||||
debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
|
||||
l, &gk20a_cde_reload_fops);
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_cde.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_cde.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_CDE_H__
|
||||
#define __NVGPU_DEBUG_CDE_H__
|
||||
|
||||
struct gk20a;
|
||||
void gk20a_cde_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_CDE_H__ */
|
||||
30
drivers/gpu/nvgpu/os/linux/debug_ce.c
Normal file
30
drivers/gpu/nvgpu/os/linux/debug_ce.c
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_ce.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
void gk20a_ce_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &g->ce_app.ctx_count);
|
||||
debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &g->ce_app.app_state);
|
||||
debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
|
||||
l->debugfs, &g->ce_app.next_ctx_id);
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_ce.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_ce.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_CE_H__
|
||||
#define __NVGPU_DEBUG_CE_H__
|
||||
|
||||
struct gk20a;
|
||||
void gk20a_ce_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_CE_H__ */
|
||||
271
drivers/gpu/nvgpu/os/linux/debug_clk.c
Normal file
271
drivers/gpu/nvgpu/os/linux/debug_clk.c
Normal file
@@ -0,0 +1,271 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include "gm20b/clk_gm20b.h"
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
static int rate_get(void *data, u64 *val)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)data;
|
||||
struct clk_gk20a *clk = &g->clk;
|
||||
|
||||
*val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
|
||||
return 0;
|
||||
}
|
||||
static int rate_set(void *data, u64 val)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)data;
|
||||
return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
|
||||
|
||||
static int pll_reg_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct nvgpu_clk_pll_debug_data d;
|
||||
u32 reg, m, n, pl, f;
|
||||
int err = 0;
|
||||
|
||||
if (g->ops.clk.get_pll_debug_data) {
|
||||
err = g->ops.clk.get_pll_debug_data(g, &d);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
seq_printf(s, "bypassctrl = %s, ",
|
||||
d.trim_sys_bypassctrl_val ? "bypass" : "vco");
|
||||
seq_printf(s, "sel_vco = %s, ",
|
||||
d.trim_sys_sel_vco_val ? "vco" : "bypass");
|
||||
|
||||
seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
|
||||
d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
|
||||
d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
|
||||
d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
|
||||
|
||||
reg = d.trim_sys_gpcpll_coeff_val;
|
||||
m = d.trim_sys_gpcpll_coeff_mdiv;
|
||||
n = d.trim_sys_gpcpll_coeff_ndiv;
|
||||
pl = d.trim_sys_gpcpll_coeff_pldiv;
|
||||
f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
|
||||
seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
|
||||
seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
|
||||
|
||||
seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
|
||||
d.trim_sys_gpcpll_dvfs0_val,
|
||||
d.trim_sys_gpcpll_dvfs0_dfs_coeff,
|
||||
d.trim_sys_gpcpll_dvfs0_dfs_det_max,
|
||||
d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pll_reg_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pll_reg_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations pll_reg_fops = {
|
||||
.open = pll_reg_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int pll_reg_raw_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct nvgpu_clk_pll_debug_data d;
|
||||
u32 reg;
|
||||
int err = 0;
|
||||
|
||||
if (g->ops.clk.get_pll_debug_data) {
|
||||
err = g->ops.clk.get_pll_debug_data(g, &d);
|
||||
if (err)
|
||||
return err;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
seq_puts(s, "GPCPLL REGISTERS:\n");
|
||||
for (reg = d.trim_sys_gpcpll_cfg_reg;
|
||||
reg <= d.trim_sys_gpcpll_dvfs2_reg;
|
||||
reg += sizeof(u32))
|
||||
seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
|
||||
|
||||
seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
|
||||
|
||||
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
|
||||
d.trim_sys_sel_vco_val);
|
||||
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
|
||||
d.trim_sys_gpc2clk_out_val);
|
||||
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
|
||||
d.trim_sys_bypassctrl_val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pll_reg_raw_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pll_reg_raw_show, inode->i_private);
|
||||
}
|
||||
|
||||
static ssize_t pll_reg_raw_write(struct file *file,
|
||||
const char __user *userbuf, size_t count, loff_t *ppos)
|
||||
{
|
||||
struct gk20a *g = file->f_path.dentry->d_inode->i_private;
|
||||
char buf[80];
|
||||
u32 reg, val;
|
||||
int err = 0;
|
||||
|
||||
if (sizeof(buf) <= count)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(buf, userbuf, count))
|
||||
return -EFAULT;
|
||||
|
||||
/* terminate buffer and trim - white spaces may be appended
|
||||
* at the end when invoked from shell command line */
|
||||
buf[count] = '\0';
|
||||
strim(buf);
|
||||
|
||||
if (sscanf(buf, "[0x%x] = 0x%x", ®, &val) != 2)
|
||||
return -EINVAL;
|
||||
|
||||
if (g->ops.clk.pll_reg_write(g, reg, val))
|
||||
err = g->ops.clk.pll_reg_write(g, reg, val);
|
||||
else
|
||||
err = -EINVAL;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct file_operations pll_reg_raw_fops = {
|
||||
.open = pll_reg_raw_open,
|
||||
.read = seq_read,
|
||||
.write = pll_reg_raw_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int monitor_get(void *data, u64 *val)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)data;
|
||||
int err = 0;
|
||||
|
||||
if (g->ops.clk.get_gpcclk_clock_counter)
|
||||
err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
|
||||
else
|
||||
err = -EINVAL;
|
||||
|
||||
return err;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
|
||||
|
||||
static int voltage_get(void *data, u64 *val)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *)data;
|
||||
int err = 0;
|
||||
|
||||
if (g->ops.clk.get_voltage)
|
||||
err = g->ops.clk.get_voltage(&g->clk, val);
|
||||
else
|
||||
err = -EINVAL;
|
||||
|
||||
return err;
|
||||
}
|
||||
DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
|
||||
|
||||
static int pll_param_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
|
||||
|
||||
seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
|
||||
gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
|
||||
gpc_pll_params->vco_ctrl);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pll_param_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, pll_param_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations pll_param_fops = {
|
||||
.open = pll_param_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
int gm20b_clk_init_debugfs(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *d;
|
||||
|
||||
if (!l->debugfs)
|
||||
return -EINVAL;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file("pll_reg_raw",
|
||||
S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
|
||||
(u32 *)&g->clk.gpc_pll.mode);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
|
||||
l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
pr_err("%s: Failed to make debugfs node\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
378
drivers/gpu/nvgpu/os/linux/debug_fifo.c
Normal file
378
drivers/gpu/nvgpu/os/linux/debug_fifo.c
Normal file
@@ -0,0 +1,378 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_fifo.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include <nvgpu/sort.h>
|
||||
#include <nvgpu/timers.h>
|
||||
|
||||
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
|
||||
|
||||
static void *gk20a_fifo_sched_debugfs_seq_start(
|
||||
struct seq_file *s, loff_t *pos)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
|
||||
if (*pos >= f->num_channels)
|
||||
return NULL;
|
||||
|
||||
return &f->channel[*pos];
|
||||
}
|
||||
|
||||
static void *gk20a_fifo_sched_debugfs_seq_next(
|
||||
struct seq_file *s, void *v, loff_t *pos)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
|
||||
++(*pos);
|
||||
if (*pos >= f->num_channels)
|
||||
return NULL;
|
||||
|
||||
return &f->channel[*pos];
|
||||
}
|
||||
|
||||
static void gk20a_fifo_sched_debugfs_seq_stop(
|
||||
struct seq_file *s, void *v)
|
||||
{
|
||||
}
|
||||
|
||||
static int gk20a_fifo_sched_debugfs_seq_show(
|
||||
struct seq_file *s, void *v)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct channel_gk20a *ch = v;
|
||||
struct tsg_gk20a *tsg = NULL;
|
||||
|
||||
struct fifo_engine_info_gk20a *engine_info;
|
||||
struct fifo_runlist_info_gk20a *runlist;
|
||||
u32 runlist_id;
|
||||
int ret = SEQ_SKIP;
|
||||
u32 engine_id;
|
||||
|
||||
engine_id = gk20a_fifo_get_gr_engine_id(g);
|
||||
engine_info = (f->engine_info + engine_id);
|
||||
runlist_id = engine_info->runlist_id;
|
||||
runlist = &f->runlist_info[runlist_id];
|
||||
|
||||
if (ch == f->channel) {
|
||||
seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
|
||||
seq_puts(s, " (usecs) (msecs)\n");
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (!test_bit(ch->chid, runlist->active_channels))
|
||||
return ret;
|
||||
|
||||
if (gk20a_channel_get(ch)) {
|
||||
tsg = tsg_gk20a_from_ch(ch);
|
||||
|
||||
if (tsg)
|
||||
seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
|
||||
ch->chid,
|
||||
ch->tsgid,
|
||||
ch->tgid,
|
||||
tsg->timeslice_us,
|
||||
ch->timeout_ms_max,
|
||||
tsg->interleave_level,
|
||||
tsg->gr_ctx.graphics_preempt_mode,
|
||||
tsg->gr_ctx.compute_preempt_mode);
|
||||
gk20a_channel_put(ch);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
|
||||
.start = gk20a_fifo_sched_debugfs_seq_start,
|
||||
.next = gk20a_fifo_sched_debugfs_seq_next,
|
||||
.stop = gk20a_fifo_sched_debugfs_seq_stop,
|
||||
.show = gk20a_fifo_sched_debugfs_seq_show
|
||||
};
|
||||
|
||||
static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
|
||||
struct file *file)
|
||||
{
|
||||
struct gk20a *g = inode->i_private;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
|
||||
|
||||
((struct seq_file *)file->private_data)->private = inode->i_private;
|
||||
return 0;
|
||||
};
|
||||
|
||||
/*
|
||||
* The file operations structure contains our open function along with
|
||||
* set of the canned seq_ ops.
|
||||
*/
|
||||
static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = gk20a_fifo_sched_debugfs_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release
|
||||
};
|
||||
|
||||
static int gk20a_fifo_profile_enable(void *data, u64 val)
|
||||
{
|
||||
struct gk20a *g = (struct gk20a *) data;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
|
||||
|
||||
nvgpu_mutex_acquire(&f->profile.lock);
|
||||
if (val == 0) {
|
||||
if (f->profile.enabled) {
|
||||
f->profile.enabled = false;
|
||||
nvgpu_ref_put(&f->profile.ref,
|
||||
__gk20a_fifo_profile_free);
|
||||
}
|
||||
} else {
|
||||
if (!f->profile.enabled) {
|
||||
/* not kref init as it can have a running condition if
|
||||
* we enable/disable/enable while kickoff is happening
|
||||
*/
|
||||
if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
|
||||
f->profile.data = nvgpu_vzalloc(g,
|
||||
FIFO_PROFILING_ENTRIES *
|
||||
sizeof(struct fifo_profile_gk20a));
|
||||
f->profile.sorted = nvgpu_vzalloc(g,
|
||||
FIFO_PROFILING_ENTRIES *
|
||||
sizeof(u64));
|
||||
if (!(f->profile.data && f->profile.sorted)) {
|
||||
nvgpu_vfree(g, f->profile.data);
|
||||
nvgpu_vfree(g, f->profile.sorted);
|
||||
nvgpu_mutex_release(&f->profile.lock);
|
||||
return -ENOMEM;
|
||||
}
|
||||
nvgpu_ref_init(&f->profile.ref);
|
||||
}
|
||||
atomic_set(&f->profile.get.atomic_var, 0);
|
||||
f->profile.enabled = true;
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&f->profile.lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(
|
||||
gk20a_fifo_profile_enable_debugfs_fops,
|
||||
NULL,
|
||||
gk20a_fifo_profile_enable,
|
||||
"%llu\n"
|
||||
);
|
||||
|
||||
static int __profile_cmp(const void *a, const void *b)
|
||||
{
|
||||
return *((unsigned long long *) a) - *((unsigned long long *) b);
|
||||
}
|
||||
|
||||
/*
|
||||
* This uses about 800b in the stack, but the function using it is not part
|
||||
* of a callstack where much memory is being used, so it is fine
|
||||
*/
|
||||
#define PERCENTILE_WIDTH 5
|
||||
#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
|
||||
|
||||
static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
|
||||
u64 *percentiles, u32 index_end, u32 index_start)
|
||||
{
|
||||
unsigned int nelem = 0;
|
||||
unsigned int index;
|
||||
struct fifo_profile_gk20a *profile;
|
||||
|
||||
for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
|
||||
profile = &g->fifo.profile.data[index];
|
||||
|
||||
if (profile->timestamp[index_end] >
|
||||
profile->timestamp[index_start]) {
|
||||
/* This is a valid element */
|
||||
g->fifo.profile.sorted[nelem] =
|
||||
profile->timestamp[index_end] -
|
||||
profile->timestamp[index_start];
|
||||
nelem++;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort it */
|
||||
sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
|
||||
__profile_cmp, NULL);
|
||||
|
||||
/* build ranges */
|
||||
for (index = 0; index < PERCENTILE_RANGES; index++) {
|
||||
percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
|
||||
g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
|
||||
nelem)/100 - 1];
|
||||
}
|
||||
return nelem;
|
||||
}
|
||||
|
||||
static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
unsigned int get, nelem, index;
|
||||
/*
|
||||
* 800B in the stack, but function is declared statically and only
|
||||
* called from debugfs handler
|
||||
*/
|
||||
u64 percentiles_ioctl[PERCENTILE_RANGES];
|
||||
u64 percentiles_kickoff[PERCENTILE_RANGES];
|
||||
u64 percentiles_jobtracking[PERCENTILE_RANGES];
|
||||
u64 percentiles_append[PERCENTILE_RANGES];
|
||||
u64 percentiles_userd[PERCENTILE_RANGES];
|
||||
|
||||
if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
|
||||
seq_printf(s, "Profiling disabled\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
get = atomic_read(&g->fifo.profile.get.atomic_var);
|
||||
|
||||
__gk20a_fifo_create_stats(g, percentiles_ioctl,
|
||||
PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
|
||||
__gk20a_fifo_create_stats(g, percentiles_kickoff,
|
||||
PROFILE_END, PROFILE_ENTRY);
|
||||
__gk20a_fifo_create_stats(g, percentiles_jobtracking,
|
||||
PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
|
||||
__gk20a_fifo_create_stats(g, percentiles_append,
|
||||
PROFILE_APPEND, PROFILE_JOB_TRACKING);
|
||||
nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
|
||||
PROFILE_END, PROFILE_APPEND);
|
||||
|
||||
seq_printf(s, "Number of kickoffs: %d\n", nelem);
|
||||
seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
|
||||
|
||||
for (index = 0; index < PERCENTILE_RANGES; index++)
|
||||
seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
|
||||
PERCENTILE_WIDTH * (index+1),
|
||||
percentiles_ioctl[index],
|
||||
percentiles_kickoff[index],
|
||||
percentiles_append[index],
|
||||
percentiles_jobtracking[index],
|
||||
percentiles_userd[index]);
|
||||
|
||||
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
|
||||
.open = gk20a_fifo_profile_stats_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
|
||||
void gk20a_fifo_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *gpu_root = l->debugfs;
|
||||
struct dentry *fifo_root;
|
||||
struct dentry *profile_root;
|
||||
|
||||
fifo_root = debugfs_create_dir("fifo", gpu_root);
|
||||
if (IS_ERR_OR_NULL(fifo_root))
|
||||
return;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
|
||||
|
||||
debugfs_create_file("sched", 0600, fifo_root, g,
|
||||
&gk20a_fifo_sched_debugfs_fops);
|
||||
|
||||
profile_root = debugfs_create_dir("profile", fifo_root);
|
||||
if (IS_ERR_OR_NULL(profile_root))
|
||||
return;
|
||||
|
||||
nvgpu_mutex_init(&g->fifo.profile.lock);
|
||||
g->fifo.profile.enabled = false;
|
||||
atomic_set(&g->fifo.profile.get.atomic_var, 0);
|
||||
atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
|
||||
|
||||
debugfs_create_file("enable", 0600, profile_root, g,
|
||||
&gk20a_fifo_profile_enable_debugfs_fops);
|
||||
|
||||
debugfs_create_file("stats", 0600, profile_root, g,
|
||||
&gk20a_fifo_profile_stats_debugfs_fops);
|
||||
|
||||
}
|
||||
|
||||
void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
|
||||
{
|
||||
if (profile)
|
||||
profile->timestamp[idx] = nvgpu_current_time_ns();
|
||||
}
|
||||
|
||||
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
|
||||
{
|
||||
struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
|
||||
profile.ref);
|
||||
nvgpu_vfree(f->g, f->profile.data);
|
||||
nvgpu_vfree(f->g, f->profile.sorted);
|
||||
}
|
||||
|
||||
/* Get the next element in the ring buffer of profile entries
|
||||
* and grab a reference to the structure
|
||||
*/
|
||||
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
|
||||
{
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct fifo_profile_gk20a *profile;
|
||||
unsigned int index;
|
||||
|
||||
/* If kref is zero, profiling is not enabled */
|
||||
if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
|
||||
return NULL;
|
||||
index = atomic_inc_return(&f->profile.get.atomic_var);
|
||||
profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
|
||||
|
||||
return profile;
|
||||
}
|
||||
|
||||
/* Free the reference to the structure. This allows deferred cleanups */
|
||||
void gk20a_fifo_profile_release(struct gk20a *g,
|
||||
struct fifo_profile_gk20a *profile)
|
||||
{
|
||||
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
|
||||
}
|
||||
|
||||
void gk20a_fifo_debugfs_deinit(struct gk20a *g)
|
||||
{
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
|
||||
nvgpu_mutex_acquire(&f->profile.lock);
|
||||
if (f->profile.enabled) {
|
||||
f->profile.enabled = false;
|
||||
nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
|
||||
}
|
||||
nvgpu_mutex_release(&f->profile.lock);
|
||||
}
|
||||
22
drivers/gpu/nvgpu/os/linux/debug_fifo.h
Normal file
22
drivers/gpu/nvgpu/os/linux/debug_fifo.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_FIFO_H__
|
||||
#define __NVGPU_DEBUG_FIFO_H__
|
||||
|
||||
struct gk20a;
|
||||
void gk20a_fifo_debugfs_init(struct gk20a *g);
|
||||
void gk20a_fifo_debugfs_deinit(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_FIFO_H__ */
|
||||
31
drivers/gpu/nvgpu/os/linux/debug_gr.c
Normal file
31
drivers/gpu/nvgpu/os/linux/debug_gr.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_gr.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
int gr_gk20a_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->debugfs_gr_default_attrib_cb_size =
|
||||
debugfs_create_u32("gr_default_attrib_cb_size",
|
||||
S_IRUGO|S_IWUSR, l->debugfs,
|
||||
&g->gr.attrib_cb_default_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_gr.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_gr.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_GR_H__
|
||||
#define __NVGPU_DEBUG_GR_H__
|
||||
|
||||
struct gk20a;
|
||||
int gr_gk20a_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_GR_H__ */
|
||||
95
drivers/gpu/nvgpu/os/linux/debug_hal.c
Normal file
95
drivers/gpu/nvgpu/os/linux/debug_hal.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_hal.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
/* Format and print a single function pointer to the specified seq_file. */
|
||||
static void __hal_print_op(struct seq_file *s, void *op_ptr)
|
||||
{
|
||||
seq_printf(s, "%pF\n", op_ptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prints an array of function pointer addresses in op_ptrs to the
|
||||
* specified seq_file
|
||||
*/
|
||||
static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_ops; i++)
|
||||
__hal_print_op(s, op_ptrs[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Show file operation, which generates content of the file once. Prints a list
|
||||
* of gpu operations as defined by gops and the corresponding function pointer
|
||||
* destination addresses. Relies on no compiler reordering of struct fields and
|
||||
* assumption that all members are function pointers.
|
||||
*/
|
||||
static int __hal_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gpu_ops *gops = s->private;
|
||||
|
||||
__hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __hal_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, __hal_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations __hal_fops = {
|
||||
.open = __hal_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void nvgpu_hal_debugfs_fini(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (!(l->debugfs_hal == NULL))
|
||||
debugfs_remove_recursive(l->debugfs_hal);
|
||||
}
|
||||
|
||||
void nvgpu_hal_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct dentry *d;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (!l->debugfs)
|
||||
return;
|
||||
l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
|
||||
if (IS_ERR_OR_NULL(l->debugfs_hal)) {
|
||||
l->debugfs_hal = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Pass along reference to the gpu_ops struct as private data */
|
||||
d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
|
||||
&g->ops, &__hal_fops);
|
||||
if (!d) {
|
||||
nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
|
||||
debugfs_remove_recursive(l->debugfs_hal);
|
||||
return;
|
||||
}
|
||||
}
|
||||
22
drivers/gpu/nvgpu/os/linux/debug_hal.h
Normal file
22
drivers/gpu/nvgpu/os/linux/debug_hal.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_HAL_H__
|
||||
#define __NVGPU_DEBUG_HAL_H__
|
||||
|
||||
struct gk20a;
|
||||
void nvgpu_hal_debugfs_fini(struct gk20a *g);
|
||||
void nvgpu_hal_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_HAL_H__ */
|
||||
312
drivers/gpu/nvgpu/os/linux/debug_kmem.c
Normal file
312
drivers/gpu/nvgpu/os/linux/debug_kmem.c
Normal file
@@ -0,0 +1,312 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "debug_kmem.h"
|
||||
#include "kmem_priv.h"
|
||||
|
||||
/**
|
||||
* to_human_readable_bytes - Determine suffix for passed size.
|
||||
*
|
||||
* @bytes - Number of bytes to generate a suffix for.
|
||||
* @hr_bytes [out] - The human readable number of bytes.
|
||||
* @hr_suffix [out] - The suffix for the HR number of bytes.
|
||||
*
|
||||
* Computes a human readable decomposition of the passed number of bytes. The
|
||||
* suffix for the bytes is passed back through the @hr_suffix pointer. The right
|
||||
* number of bytes is then passed back in @hr_bytes. This returns the following
|
||||
* ranges:
|
||||
*
|
||||
* 0 - 1023 B
|
||||
* 1 - 1023 KB
|
||||
* 1 - 1023 MB
|
||||
* 1 - 1023 GB
|
||||
* 1 - 1023 TB
|
||||
* 1 - ... PB
|
||||
*/
|
||||
static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
|
||||
const char **hr_suffix)
|
||||
{
|
||||
static const char *suffixes[] =
|
||||
{ "B", "KB", "MB", "GB", "TB", "PB" };
|
||||
|
||||
u64 suffix_ind = 0;
|
||||
|
||||
while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
|
||||
bytes >>= 10;
|
||||
suffix_ind++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle case where bytes > 1023PB.
|
||||
*/
|
||||
suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
|
||||
suffix_ind : ARRAY_SIZE(suffixes) - 1;
|
||||
|
||||
*hr_bytes = bytes;
|
||||
*hr_suffix = suffixes[suffix_ind];
|
||||
}
|
||||
|
||||
/**
|
||||
* print_hr_bytes - Print human readable bytes
|
||||
*
|
||||
* @s - A seq_file to print to. May be NULL.
|
||||
* @msg - A message to print before the bytes.
|
||||
* @bytes - Number of bytes.
|
||||
*
|
||||
* Print @msg followed by the human readable decomposition of the passed number
|
||||
* of bytes.
|
||||
*
|
||||
* If @s is NULL then this prints will be made to the kernel log.
|
||||
*/
|
||||
static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
|
||||
{
|
||||
u64 hr_bytes;
|
||||
const char *hr_suffix;
|
||||
|
||||
__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
|
||||
__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
|
||||
}
|
||||
|
||||
/**
|
||||
* print_histogram - Build a histogram of the memory usage.
|
||||
*
|
||||
* @tracker The tracking to pull data from.
|
||||
* @s A seq_file to dump info into.
|
||||
*/
|
||||
static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
struct seq_file *s)
|
||||
{
|
||||
int i;
|
||||
u64 pot_min, pot_max;
|
||||
u64 nr_buckets;
|
||||
unsigned int *buckets;
|
||||
unsigned int total_allocs;
|
||||
struct nvgpu_rbtree_node *node;
|
||||
static const char histogram_line[] =
|
||||
"++++++++++++++++++++++++++++++++++++++++";
|
||||
|
||||
/*
|
||||
* pot_min is essentially a round down to the nearest power of 2. This
|
||||
* is the start of the histogram. pot_max is just a round up to the
|
||||
* nearest power of two. Each histogram bucket is one power of two so
|
||||
* the histogram buckets are exponential.
|
||||
*/
|
||||
pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
|
||||
pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
|
||||
|
||||
nr_buckets = __ffs(pot_max) - __ffs(pot_min);
|
||||
|
||||
buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
|
||||
if (!buckets) {
|
||||
__pstat(s, "OOM: could not allocate bucket storage!?\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate across all of the allocs and determine what bucket they
|
||||
* should go in. Round the size down to the nearest power of two to
|
||||
* find the right bucket.
|
||||
*/
|
||||
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
||||
while (node) {
|
||||
int b;
|
||||
u64 bucket_min;
|
||||
struct nvgpu_mem_alloc *alloc =
|
||||
nvgpu_mem_alloc_from_rbtree_node(node);
|
||||
|
||||
bucket_min = (u64)rounddown_pow_of_two(alloc->size);
|
||||
if (bucket_min < tracker->min_alloc)
|
||||
bucket_min = tracker->min_alloc;
|
||||
|
||||
b = __ffs(bucket_min) - __ffs(pot_min);
|
||||
|
||||
/*
|
||||
* Handle the one case were there's an alloc exactly as big as
|
||||
* the maximum bucket size of the largest bucket. Most of the
|
||||
* buckets have an inclusive minimum and exclusive maximum. But
|
||||
* the largest bucket needs to have an _inclusive_ maximum as
|
||||
* well.
|
||||
*/
|
||||
if (b == (int)nr_buckets)
|
||||
b--;
|
||||
|
||||
buckets[b]++;
|
||||
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
}
|
||||
|
||||
total_allocs = 0;
|
||||
for (i = 0; i < (int)nr_buckets; i++)
|
||||
total_allocs += buckets[i];
|
||||
|
||||
__pstat(s, "Alloc histogram:\n");
|
||||
|
||||
/*
|
||||
* Actually compute the histogram lines.
|
||||
*/
|
||||
for (i = 0; i < (int)nr_buckets; i++) {
|
||||
char this_line[sizeof(histogram_line) + 1];
|
||||
u64 line_length;
|
||||
u64 hr_bytes;
|
||||
const char *hr_suffix;
|
||||
|
||||
memset(this_line, 0, sizeof(this_line));
|
||||
|
||||
/*
|
||||
* Compute the normalized line length. Cant use floating point
|
||||
* so we will just multiply everything by 1000 and use fixed
|
||||
* point.
|
||||
*/
|
||||
line_length = (1000 * buckets[i]) / total_allocs;
|
||||
line_length *= sizeof(histogram_line);
|
||||
line_length /= 1000;
|
||||
|
||||
memset(this_line, '+', line_length);
|
||||
|
||||
__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
|
||||
&hr_bytes, &hr_suffix);
|
||||
__pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
|
||||
hr_bytes, hr_bytes << 1,
|
||||
hr_suffix, buckets[i], this_line);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nvgpu_kmem_print_stats - Print kmem tracking stats.
|
||||
*
|
||||
* @tracker The tracking to pull data from.
|
||||
* @s A seq_file to dump info into.
|
||||
*
|
||||
* Print stats from a tracker. If @s is non-null then seq_printf() will be
|
||||
* used with @s. Otherwise the stats are pr_info()ed.
|
||||
*/
|
||||
void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
struct seq_file *s)
|
||||
{
|
||||
nvgpu_lock_tracker(tracker);
|
||||
|
||||
__pstat(s, "Mem tracker: %s\n\n", tracker->name);
|
||||
|
||||
__pstat(s, "Basic Stats:\n");
|
||||
__pstat(s, " Number of allocs %lld\n",
|
||||
tracker->nr_allocs);
|
||||
__pstat(s, " Number of frees %lld\n",
|
||||
tracker->nr_frees);
|
||||
print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
|
||||
print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
|
||||
print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
|
||||
print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
|
||||
print_hr_bytes(s, " Bytes allocated (real) ",
|
||||
tracker->bytes_alloced_real);
|
||||
print_hr_bytes(s, " Bytes freed (real) ",
|
||||
tracker->bytes_freed_real);
|
||||
__pstat(s, "\n");
|
||||
|
||||
print_histogram(tracker, s);
|
||||
|
||||
nvgpu_unlock_tracker(tracker);
|
||||
}
|
||||
|
||||
static int __kmem_tracking_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct nvgpu_mem_alloc_tracker *tracker = s->private;
|
||||
|
||||
nvgpu_kmem_print_stats(tracker, s);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kmem_tracking_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, __kmem_tracking_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations __kmem_tracking_fops = {
|
||||
.open = __kmem_tracking_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __kmem_traces_dump_tracker(struct gk20a *g,
|
||||
struct nvgpu_mem_alloc_tracker *tracker,
|
||||
struct seq_file *s)
|
||||
{
|
||||
struct nvgpu_rbtree_node *node;
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
||||
while (node) {
|
||||
struct nvgpu_mem_alloc *alloc =
|
||||
nvgpu_mem_alloc_from_rbtree_node(node);
|
||||
|
||||
kmem_print_mem_alloc(g, alloc, s);
|
||||
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kmem_traces_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
|
||||
nvgpu_lock_tracker(g->vmallocs);
|
||||
seq_puts(s, "Oustanding vmallocs:\n");
|
||||
__kmem_traces_dump_tracker(g, g->vmallocs, s);
|
||||
seq_puts(s, "\n");
|
||||
nvgpu_unlock_tracker(g->vmallocs);
|
||||
|
||||
nvgpu_lock_tracker(g->kmallocs);
|
||||
seq_puts(s, "Oustanding kmallocs:\n");
|
||||
__kmem_traces_dump_tracker(g, g->kmallocs, s);
|
||||
nvgpu_unlock_tracker(g->kmallocs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kmem_traces_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, __kmem_traces_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations __kmem_traces_fops = {
|
||||
.open = __kmem_traces_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void nvgpu_kmem_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *node;
|
||||
|
||||
l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
|
||||
if (IS_ERR_OR_NULL(l->debugfs_kmem))
|
||||
return;
|
||||
|
||||
node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
|
||||
l->debugfs_kmem,
|
||||
g->vmallocs, &__kmem_tracking_fops);
|
||||
node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
|
||||
l->debugfs_kmem,
|
||||
g->kmallocs, &__kmem_tracking_fops);
|
||||
node = debugfs_create_file("traces", S_IRUGO,
|
||||
l->debugfs_kmem,
|
||||
g, &__kmem_traces_fops);
|
||||
}
|
||||
23
drivers/gpu/nvgpu/os/linux/debug_kmem.h
Normal file
23
drivers/gpu/nvgpu/os/linux/debug_kmem.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_KMEM_H__
|
||||
#define __NVGPU_DEBUG_KMEM_H__
|
||||
|
||||
struct gk20a;
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
void nvgpu_kmem_debugfs_init(struct gk20a *g);
|
||||
#endif
|
||||
|
||||
#endif /* __NVGPU_DEBUG_KMEM_H__ */
|
||||
481
drivers/gpu/nvgpu/os/linux/debug_pmu.c
Normal file
481
drivers/gpu/nvgpu/os/linux/debug_pmu.c
Normal file
@@ -0,0 +1,481 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <nvgpu/enabled.h>
|
||||
#include "debug_pmu.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
static int lpwr_debug_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
|
||||
if (g->ops.pmu.pmu_pg_engines_feature_list &&
|
||||
g->ops.pmu.pmu_pg_engines_feature_list(g,
|
||||
PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
|
||||
NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
|
||||
seq_printf(s, "PSTATE: %u\n"
|
||||
"RPPG Enabled: %u\n"
|
||||
"RPPG ref count: %u\n"
|
||||
"RPPG state: %u\n"
|
||||
"MSCG Enabled: %u\n"
|
||||
"MSCG pstate state: %u\n"
|
||||
"MSCG transition state: %u\n",
|
||||
g->ops.clk_arb.get_current_pstate(g),
|
||||
g->elpg_enabled, g->pmu.elpg_refcnt,
|
||||
g->pmu.elpg_stat, g->mscg_enabled,
|
||||
g->pmu.mscg_stat, g->pmu.mscg_transition_state);
|
||||
|
||||
} else
|
||||
seq_printf(s, "ELPG Enabled: %u\n"
|
||||
"ELPG ref count: %u\n"
|
||||
"ELPG state: %u\n",
|
||||
g->elpg_enabled, g->pmu.elpg_refcnt,
|
||||
g->pmu.elpg_stat);
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int lpwr_debug_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, lpwr_debug_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations lpwr_debug_fops = {
|
||||
.open = lpwr_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int mscg_stat_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
u64 total_ingating, total_ungating, residency, divisor, dividend;
|
||||
struct pmu_pg_stats_data pg_stat_data = { 0 };
|
||||
int err;
|
||||
|
||||
/* Don't unnecessarily power on the device */
|
||||
if (g->power_on) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nvgpu_pmu_get_pg_stats(g,
|
||||
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
|
||||
gk20a_idle(g);
|
||||
}
|
||||
total_ingating = g->pg_ingating_time_us +
|
||||
(u64)pg_stat_data.ingating_time;
|
||||
total_ungating = g->pg_ungating_time_us +
|
||||
(u64)pg_stat_data.ungating_time;
|
||||
|
||||
divisor = total_ingating + total_ungating;
|
||||
|
||||
/* We compute the residency on a scale of 1000 */
|
||||
dividend = total_ingating * 1000;
|
||||
|
||||
if (divisor)
|
||||
residency = div64_u64(dividend, divisor);
|
||||
else
|
||||
residency = 0;
|
||||
|
||||
seq_printf(s,
|
||||
"Time in MSCG: %llu us\n"
|
||||
"Time out of MSCG: %llu us\n"
|
||||
"MSCG residency ratio: %llu\n"
|
||||
"MSCG Entry Count: %u\n"
|
||||
"MSCG Avg Entry latency %u\n"
|
||||
"MSCG Avg Exit latency %u\n",
|
||||
total_ingating, total_ungating,
|
||||
residency, pg_stat_data.gating_cnt,
|
||||
pg_stat_data.avg_entry_latency_us,
|
||||
pg_stat_data.avg_exit_latency_us);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int mscg_stat_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, mscg_stat_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations mscg_stat_fops = {
|
||||
.open = mscg_stat_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int mscg_transitions_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct pmu_pg_stats_data pg_stat_data = { 0 };
|
||||
u32 total_gating_cnt;
|
||||
int err;
|
||||
|
||||
if (g->power_on) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nvgpu_pmu_get_pg_stats(g,
|
||||
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
|
||||
gk20a_idle(g);
|
||||
}
|
||||
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
|
||||
|
||||
seq_printf(s, "%u\n", total_gating_cnt);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int mscg_transitions_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, mscg_transitions_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations mscg_transitions_fops = {
|
||||
.open = mscg_transitions_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int elpg_stat_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct pmu_pg_stats_data pg_stat_data = { 0 };
|
||||
u64 total_ingating, total_ungating, residency, divisor, dividend;
|
||||
int err;
|
||||
|
||||
/* Don't unnecessarily power on the device */
|
||||
if (g->power_on) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nvgpu_pmu_get_pg_stats(g,
|
||||
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
|
||||
gk20a_idle(g);
|
||||
}
|
||||
total_ingating = g->pg_ingating_time_us +
|
||||
(u64)pg_stat_data.ingating_time;
|
||||
total_ungating = g->pg_ungating_time_us +
|
||||
(u64)pg_stat_data.ungating_time;
|
||||
divisor = total_ingating + total_ungating;
|
||||
|
||||
/* We compute the residency on a scale of 1000 */
|
||||
dividend = total_ingating * 1000;
|
||||
|
||||
if (divisor)
|
||||
residency = div64_u64(dividend, divisor);
|
||||
else
|
||||
residency = 0;
|
||||
|
||||
seq_printf(s,
|
||||
"Time in ELPG: %llu us\n"
|
||||
"Time out of ELPG: %llu us\n"
|
||||
"ELPG residency ratio: %llu\n"
|
||||
"ELPG Entry Count: %u\n"
|
||||
"ELPG Avg Entry latency %u us\n"
|
||||
"ELPG Avg Exit latency %u us\n",
|
||||
total_ingating, total_ungating,
|
||||
residency, pg_stat_data.gating_cnt,
|
||||
pg_stat_data.avg_entry_latency_us,
|
||||
pg_stat_data.avg_exit_latency_us);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int elpg_stat_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, elpg_stat_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations elpg_stat_fops = {
|
||||
.open = elpg_stat_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int elpg_transitions_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct pmu_pg_stats_data pg_stat_data = { 0 };
|
||||
u32 total_gating_cnt;
|
||||
int err;
|
||||
|
||||
if (g->power_on) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
nvgpu_pmu_get_pg_stats(g,
|
||||
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
|
||||
gk20a_idle(g);
|
||||
}
|
||||
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
|
||||
|
||||
seq_printf(s, "%u\n", total_gating_cnt);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int elpg_transitions_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, elpg_transitions_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations elpg_transitions_fops = {
|
||||
.open = elpg_transitions_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int falc_trace_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct nvgpu_pmu *pmu = &g->pmu;
|
||||
u32 i = 0, j = 0, k, l, m;
|
||||
char part_str[40];
|
||||
void *tracebuffer;
|
||||
char *trace;
|
||||
u32 *trace1;
|
||||
|
||||
/* allocate system memory to copy pmu trace buffer */
|
||||
tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
|
||||
if (tracebuffer == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
/* read pmu traces into system memory buffer */
|
||||
nvgpu_mem_rd_n(g, &pmu->trace_buf,
|
||||
0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
|
||||
|
||||
trace = (char *)tracebuffer;
|
||||
trace1 = (u32 *)tracebuffer;
|
||||
|
||||
for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
|
||||
for (j = 0; j < 0x40; j++)
|
||||
if (trace1[(i / 4) + j])
|
||||
break;
|
||||
if (j == 0x40)
|
||||
break;
|
||||
seq_printf(s, "Index %x: ", trace1[(i / 4)]);
|
||||
l = 0;
|
||||
m = 0;
|
||||
while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
|
||||
if (k >= 40)
|
||||
break;
|
||||
strncpy(part_str, (trace+i+20+m), k);
|
||||
part_str[k] = 0;
|
||||
seq_printf(s, "%s0x%x", part_str,
|
||||
trace1[(i / 4) + 1 + l]);
|
||||
l++;
|
||||
m += k + 2;
|
||||
}
|
||||
seq_printf(s, "%s", (trace+i+20+m));
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, tracebuffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int falc_trace_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, falc_trace_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations falc_trace_fops = {
|
||||
.open = falc_trace_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int perfmon_events_enable_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
|
||||
seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int perfmon_events_enable_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, perfmon_events_enable_show, inode->i_private);
|
||||
}
|
||||
|
||||
static ssize_t perfmon_events_enable_write(struct file *file,
|
||||
const char __user *userbuf, size_t count, loff_t *ppos)
|
||||
{
|
||||
struct seq_file *s = file->private_data;
|
||||
struct gk20a *g = s->private;
|
||||
unsigned long val = 0;
|
||||
char buf[40];
|
||||
int buf_size;
|
||||
int err;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
buf_size = min(count, (sizeof(buf)-1));
|
||||
|
||||
if (copy_from_user(buf, userbuf, buf_size))
|
||||
return -EFAULT;
|
||||
|
||||
if (kstrtoul(buf, 10, &val) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
/* Don't turn on gk20a unnecessarily */
|
||||
if (g->power_on) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (val && !g->pmu.perfmon_sampling_enabled &&
|
||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||
g->pmu.perfmon_sampling_enabled = true;
|
||||
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
||||
} else if (!val && g->pmu.perfmon_sampling_enabled &&
|
||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||
g->pmu.perfmon_sampling_enabled = false;
|
||||
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
|
||||
}
|
||||
gk20a_idle(g);
|
||||
} else {
|
||||
g->pmu.perfmon_sampling_enabled = val ? true : false;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static const struct file_operations perfmon_events_enable_fops = {
|
||||
.open = perfmon_events_enable_open,
|
||||
.read = seq_read,
|
||||
.write = perfmon_events_enable_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int perfmon_events_count_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
|
||||
seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int perfmon_events_count_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, perfmon_events_count_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations perfmon_events_count_fops = {
|
||||
.open = perfmon_events_count_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int security_show(struct seq_file *s, void *data)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
|
||||
seq_printf(s, "%d\n", g->pmu.pmu_mode);
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int security_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, security_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations security_fops = {
|
||||
.open = security_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
int gk20a_pmu_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct dentry *d;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
d = debugfs_create_file(
|
||||
"lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
|
||||
&lpwr_debug_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
|
||||
&mscg_stat_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"mscg_transitions", S_IRUGO, l->debugfs, g,
|
||||
&mscg_transitions_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
|
||||
&elpg_stat_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"elpg_transitions", S_IRUGO, l->debugfs, g,
|
||||
&elpg_transitions_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"falc_trace", S_IRUGO, l->debugfs, g,
|
||||
&falc_trace_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"perfmon_events_enable", S_IRUGO, l->debugfs, g,
|
||||
&perfmon_events_enable_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"perfmon_events_count", S_IRUGO, l->debugfs, g,
|
||||
&perfmon_events_count_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
|
||||
d = debugfs_create_file(
|
||||
"pmu_security", S_IRUGO, l->debugfs, g,
|
||||
&security_fops);
|
||||
if (!d)
|
||||
goto err_out;
|
||||
return 0;
|
||||
err_out:
|
||||
pr_err("%s: Failed to make debugfs node\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_pmu.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_pmu.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_PMU_H__
|
||||
#define __NVGPU_DEBUG_PMU_H__
|
||||
|
||||
struct gk20a;
|
||||
int gk20a_pmu_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_PMU_H__ */
|
||||
80
drivers/gpu/nvgpu/os/linux/debug_sched.c
Normal file
80
drivers/gpu/nvgpu/os/linux/debug_sched.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "debug_sched.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
bool sched_busy = true;
|
||||
|
||||
int n = sched->bitmap_size / sizeof(u64);
|
||||
int i;
|
||||
int err;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
|
||||
sched_busy = false;
|
||||
nvgpu_mutex_release(&sched->busy_lock);
|
||||
}
|
||||
|
||||
seq_printf(s, "control_locked=%d\n", sched->control_locked);
|
||||
seq_printf(s, "busy=%d\n", sched_busy);
|
||||
seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
|
||||
seq_puts(s, "active_tsg_bitmap\n");
|
||||
for (i = 0; i < n; i++)
|
||||
seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
|
||||
|
||||
seq_puts(s, "recent_tsg_bitmap\n");
|
||||
for (i = 0; i < n; i++)
|
||||
seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
|
||||
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations gk20a_sched_debugfs_fops = {
|
||||
.open = gk20a_sched_debugfs_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
void gk20a_sched_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
|
||||
g, &gk20a_sched_debugfs_fops);
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_sched.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_sched.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_SCHED_H__
|
||||
#define __NVGPU_DEBUG_SCHED_H__
|
||||
|
||||
struct gk20a;
|
||||
void gk20a_sched_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_SCHED_H__ */
|
||||
176
drivers/gpu/nvgpu/os/linux/debug_xve.c
Normal file
176
drivers/gpu/nvgpu/os/linux/debug_xve.c
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/xve.h>
|
||||
|
||||
#include "debug_xve.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
static ssize_t xve_link_speed_write(struct file *filp,
|
||||
const char __user *buff,
|
||||
size_t len, loff_t *off)
|
||||
{
|
||||
struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
|
||||
char kbuff[16];
|
||||
u32 buff_size, check_len;
|
||||
u32 link_speed = 0;
|
||||
int ret;
|
||||
|
||||
buff_size = min_t(size_t, 16, len);
|
||||
|
||||
memset(kbuff, 0, 16);
|
||||
if (copy_from_user(kbuff, buff, buff_size))
|
||||
return -EFAULT;
|
||||
|
||||
check_len = strlen("Gen1");
|
||||
if (strncmp(kbuff, "Gen1", check_len) == 0)
|
||||
link_speed = GPU_XVE_SPEED_2P5;
|
||||
else if (strncmp(kbuff, "Gen2", check_len) == 0)
|
||||
link_speed = GPU_XVE_SPEED_5P0;
|
||||
else if (strncmp(kbuff, "Gen3", check_len) == 0)
|
||||
link_speed = GPU_XVE_SPEED_8P0;
|
||||
else
|
||||
nvgpu_err(g, "%s: Unknown PCIe speed: %s",
|
||||
__func__, kbuff);
|
||||
|
||||
if (!link_speed)
|
||||
return -EINVAL;
|
||||
|
||||
/* Brief pause... To help rate limit this. */
|
||||
nvgpu_msleep(250);
|
||||
|
||||
/*
|
||||
* And actually set the speed. Yay.
|
||||
*/
|
||||
ret = g->ops.xve.set_speed(g, link_speed);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static int xve_link_speed_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
u32 speed;
|
||||
int err;
|
||||
|
||||
err = g->ops.xve.get_speed(g, &speed);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xve_link_speed_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, xve_link_speed_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations xve_link_speed_fops = {
|
||||
.open = xve_link_speed_open,
|
||||
.read = seq_read,
|
||||
.write = xve_link_speed_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int xve_available_speeds_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
u32 available_speeds;
|
||||
|
||||
g->ops.xve.available_speeds(g, &available_speeds);
|
||||
|
||||
seq_puts(s, "Available PCIe bus speeds:\n");
|
||||
if (available_speeds & GPU_XVE_SPEED_2P5)
|
||||
seq_puts(s, " Gen1\n");
|
||||
if (available_speeds & GPU_XVE_SPEED_5P0)
|
||||
seq_puts(s, " Gen2\n");
|
||||
if (available_speeds & GPU_XVE_SPEED_8P0)
|
||||
seq_puts(s, " Gen3\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xve_available_speeds_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, xve_available_speeds_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations xve_available_speeds_fops = {
|
||||
.open = xve_available_speeds_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int xve_link_control_status_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
u32 link_status;
|
||||
|
||||
link_status = g->ops.xve.get_link_control_status(g);
|
||||
seq_printf(s, "0x%08x\n", link_status);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xve_link_control_status_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, xve_link_control_status_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations xve_link_control_status_fops = {
|
||||
.open = xve_link_control_status_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
int nvgpu_xve_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
int err = -ENODEV;
|
||||
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *gpu_root = l->debugfs;
|
||||
|
||||
l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
|
||||
if (IS_ERR_OR_NULL(l->debugfs_xve))
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* These are just debug nodes. If they fail to get made it's not worth
|
||||
* worrying the higher level SW.
|
||||
*/
|
||||
debugfs_create_file("link_speed", S_IRUGO,
|
||||
l->debugfs_xve, g,
|
||||
&xve_link_speed_fops);
|
||||
debugfs_create_file("available_speeds", S_IRUGO,
|
||||
l->debugfs_xve, g,
|
||||
&xve_available_speeds_fops);
|
||||
debugfs_create_file("link_control_status", S_IRUGO,
|
||||
l->debugfs_xve, g,
|
||||
&xve_link_control_status_fops);
|
||||
|
||||
err = 0;
|
||||
fail:
|
||||
return err;
|
||||
}
|
||||
21
drivers/gpu/nvgpu/os/linux/debug_xve.h
Normal file
21
drivers/gpu/nvgpu/os/linux/debug_xve.h
Normal file
@@ -0,0 +1,21 @@
|
||||
/*
|
||||
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_DEBUG_XVE_H__
|
||||
#define __NVGPU_DEBUG_XVE_H__
|
||||
|
||||
struct gk20a;
|
||||
int nvgpu_xve_debugfs_init(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_DEBUG_SVE_H__ */
|
||||
694
drivers/gpu/nvgpu/os/linux/dma.c
Normal file
694
drivers/gpu/nvgpu/os/linux/dma.c
Normal file
@@ -0,0 +1,694 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/gmmu.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/vidmem.h>
|
||||
|
||||
#include <nvgpu/linux/dma.h>
|
||||
#include <nvgpu/linux/vidmem.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#ifdef __DMA_ATTRS_LONGS
|
||||
#define NVGPU_DEFINE_DMA_ATTRS(x) \
|
||||
struct dma_attrs x = { \
|
||||
.flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \
|
||||
}
|
||||
#define NVGPU_DMA_ATTR(attrs) &attrs
|
||||
#else
|
||||
#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
|
||||
#define NVGPU_DMA_ATTR(attrs) attrs
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enough to hold all the possible flags in string form. When a new flag is
|
||||
* added it must be added here as well!!
|
||||
*/
|
||||
#define NVGPU_DMA_STR_SIZE \
|
||||
sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
|
||||
|
||||
/*
|
||||
* The returned string is kmalloc()ed here but must be freed by the caller.
|
||||
*/
|
||||
static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
|
||||
{
|
||||
char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
|
||||
int bytes_available = NVGPU_DMA_STR_SIZE;
|
||||
|
||||
/*
|
||||
* Return the empty buffer if there's no flags. Makes it easier on the
|
||||
* calling code to just print it instead of any if (NULL) type logic.
|
||||
*/
|
||||
if (!flags)
|
||||
return buf;
|
||||
|
||||
#define APPEND_FLAG(flag, str_flag) \
|
||||
do { \
|
||||
if (flags & flag) { \
|
||||
strncat(buf, str_flag, bytes_available); \
|
||||
bytes_available -= strlen(str_flag); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
|
||||
APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
|
||||
#undef APPEND_FLAG
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/**
|
||||
* __dma_dbg - Debug print for DMA allocs and frees.
|
||||
*
|
||||
* @g - The GPU.
|
||||
* @size - The requested size of the alloc (size_t).
|
||||
* @flags - The flags (unsigned long).
|
||||
* @type - A string describing the type (i.e: sysmem or vidmem).
|
||||
* @what - A string with 'alloc' or 'free'.
|
||||
*
|
||||
* @flags is the DMA flags. If there are none or it doesn't make sense to print
|
||||
* flags just pass 0.
|
||||
*
|
||||
* Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
|
||||
*/
|
||||
static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
|
||||
const char *type, const char *what)
|
||||
{
|
||||
char *flags_str = NULL;
|
||||
|
||||
/*
|
||||
* Don't bother making the flags_str if debugging is
|
||||
* not enabled. This saves a malloc and a free.
|
||||
*/
|
||||
if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
|
||||
return;
|
||||
|
||||
flags_str = nvgpu_dma_flags_to_str(g, flags);
|
||||
|
||||
__nvgpu_log_dbg(g, gpu_dbg_dma,
|
||||
__func__, __LINE__,
|
||||
"DMA %s: [%s] size=%-7zu "
|
||||
"aligned=%-7zu total=%-10llukB %s",
|
||||
what, type,
|
||||
size, PAGE_ALIGN(size),
|
||||
g->dma_memory_used >> 10,
|
||||
flags_str);
|
||||
|
||||
if (flags_str)
|
||||
nvgpu_kfree(g, flags_str);
|
||||
}
|
||||
|
||||
#define dma_dbg_alloc(g, size, flags, type) \
|
||||
__dma_dbg(g, size, flags, type, "alloc")
|
||||
#define dma_dbg_free(g, size, flags, type) \
|
||||
__dma_dbg(g, size, flags, type, "free")
|
||||
|
||||
/*
|
||||
* For after the DMA alloc is done.
|
||||
*/
|
||||
#define __dma_dbg_done(g, size, type, what) \
|
||||
nvgpu_log(g, gpu_dbg_dma, \
|
||||
"DMA %s: [%s] size=%-7zu Done!", \
|
||||
what, type, size); \
|
||||
|
||||
#define dma_dbg_alloc_done(g, size, type) \
|
||||
__dma_dbg_done(g, size, type, "alloc")
|
||||
#define dma_dbg_free_done(g, size, type) \
|
||||
__dma_dbg_done(g, size, type, "free")
|
||||
|
||||
#if defined(CONFIG_GK20A_VIDMEM)
|
||||
static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
|
||||
size_t size)
|
||||
{
|
||||
u64 addr = 0;
|
||||
|
||||
if (at)
|
||||
addr = nvgpu_alloc_fixed(allocator, at, size, 0);
|
||||
else
|
||||
addr = nvgpu_alloc(allocator, size);
|
||||
|
||||
return addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
|
||||
static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
|
||||
unsigned long flags)
|
||||
#define ATTR_ARG(x) *x
|
||||
#else
|
||||
static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
|
||||
unsigned long flags)
|
||||
#define ATTR_ARG(x) x
|
||||
#endif
|
||||
{
|
||||
if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
|
||||
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
|
||||
if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
|
||||
dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
|
||||
#undef ATTR_ARG
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_flags(g, 0, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
|
||||
/*
|
||||
* Force the no-kernel-mapping flag on because we don't support
|
||||
* the lack of it for vidmem - the user should not care when
|
||||
* using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
|
||||
* difference, the user should use the flag explicitly anyway.
|
||||
*
|
||||
* Incoming flags are ignored here, since bits other than the
|
||||
* no-kernel-mapping flag are ignored by the vidmem mapping
|
||||
* functions anyway.
|
||||
*/
|
||||
int err = nvgpu_dma_alloc_flags_vid(g,
|
||||
NVGPU_DMA_NO_KERNEL_MAPPING,
|
||||
size, mem);
|
||||
|
||||
if (!err)
|
||||
return 0;
|
||||
/*
|
||||
* Fall back to sysmem (which may then also fail) in case
|
||||
* vidmem is exhausted.
|
||||
*/
|
||||
}
|
||||
|
||||
return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
struct device *d = dev_from_gk20a(g);
|
||||
int err;
|
||||
dma_addr_t iova;
|
||||
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
|
||||
void *alloc_ret;
|
||||
|
||||
if (nvgpu_mem_is_valid(mem)) {
|
||||
nvgpu_warn(g, "memory leak !!");
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* WAR for IO coherent chips: the DMA API does not seem to generate
|
||||
* mappings that work correctly. Unclear why - Bug ID: 2040115.
|
||||
*
|
||||
* Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
|
||||
* and then make a vmap() ourselves.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
|
||||
flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
|
||||
|
||||
/*
|
||||
* Before the debug print so we see this in the total. But during
|
||||
* cleanup in the fail path this has to be subtracted.
|
||||
*/
|
||||
g->dma_memory_used += PAGE_ALIGN(size);
|
||||
|
||||
dma_dbg_alloc(g, size, flags, "sysmem");
|
||||
|
||||
/*
|
||||
* Save the old size but for actual allocation purposes the size is
|
||||
* going to be page aligned.
|
||||
*/
|
||||
mem->size = size;
|
||||
size = PAGE_ALIGN(size);
|
||||
|
||||
nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
|
||||
|
||||
alloc_ret = dma_alloc_attrs(d, size, &iova,
|
||||
GFP_KERNEL|__GFP_ZERO,
|
||||
NVGPU_DMA_ATTR(dma_attrs));
|
||||
if (!alloc_ret)
|
||||
return -ENOMEM;
|
||||
|
||||
if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
|
||||
mem->priv.pages = alloc_ret;
|
||||
err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
|
||||
mem->priv.pages,
|
||||
iova, size);
|
||||
} else {
|
||||
mem->cpu_va = alloc_ret;
|
||||
err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
|
||||
iova, size, flags);
|
||||
}
|
||||
if (err)
|
||||
goto fail_free_dma;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
|
||||
mem->cpu_va = vmap(mem->priv.pages,
|
||||
size >> PAGE_SHIFT,
|
||||
0, PAGE_KERNEL);
|
||||
if (!mem->cpu_va) {
|
||||
err = -ENOMEM;
|
||||
goto fail_free_sgt;
|
||||
}
|
||||
}
|
||||
|
||||
mem->aligned_size = size;
|
||||
mem->aperture = APERTURE_SYSMEM;
|
||||
mem->priv.flags = flags;
|
||||
|
||||
dma_dbg_alloc_done(g, mem->size, "sysmem");
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free_sgt:
|
||||
nvgpu_free_sgtable(g, &mem->priv.sgt);
|
||||
fail_free_dma:
|
||||
dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
|
||||
mem->cpu_va = NULL;
|
||||
mem->priv.sgt = NULL;
|
||||
mem->size = 0;
|
||||
g->dma_memory_used -= mem->aligned_size;
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_flags_vid(g,
|
||||
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem, u64 at)
|
||||
{
|
||||
#if defined(CONFIG_GK20A_VIDMEM)
|
||||
u64 addr;
|
||||
int err;
|
||||
struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
|
||||
&g->mm.vidmem.allocator :
|
||||
&g->mm.vidmem.bootstrap_allocator;
|
||||
int before_pending;
|
||||
|
||||
if (nvgpu_mem_is_valid(mem)) {
|
||||
nvgpu_warn(g, "memory leak !!");
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
dma_dbg_alloc(g, size, flags, "vidmem");
|
||||
|
||||
mem->size = size;
|
||||
size = PAGE_ALIGN(size);
|
||||
|
||||
if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
|
||||
return -ENOSYS;
|
||||
|
||||
/*
|
||||
* Our own allocator doesn't have any flags yet, and we can't
|
||||
* kernel-map these, so require explicit flags.
|
||||
*/
|
||||
WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
|
||||
|
||||
nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
|
||||
before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
|
||||
addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
|
||||
nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
|
||||
if (!addr) {
|
||||
/*
|
||||
* If memory is known to be freed soon, let the user know that
|
||||
* it may be available after a while.
|
||||
*/
|
||||
if (before_pending)
|
||||
return -EAGAIN;
|
||||
else
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (at)
|
||||
mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
|
||||
|
||||
mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
|
||||
if (!mem->priv.sgt) {
|
||||
err = -ENOMEM;
|
||||
goto fail_physfree;
|
||||
}
|
||||
|
||||
err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
|
||||
if (err)
|
||||
goto fail_kfree;
|
||||
|
||||
nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
|
||||
sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
|
||||
|
||||
mem->aligned_size = size;
|
||||
mem->aperture = APERTURE_VIDMEM;
|
||||
mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
|
||||
mem->allocator = vidmem_alloc;
|
||||
mem->priv.flags = flags;
|
||||
|
||||
nvgpu_init_list_node(&mem->clear_list_entry);
|
||||
|
||||
dma_dbg_alloc_done(g, mem->size, "vidmem");
|
||||
|
||||
return 0;
|
||||
|
||||
fail_kfree:
|
||||
nvgpu_kfree(g, mem->priv.sgt);
|
||||
fail_physfree:
|
||||
nvgpu_free(&g->mm.vidmem.allocator, addr);
|
||||
mem->size = 0;
|
||||
return err;
|
||||
#else
|
||||
return -ENOSYS;
|
||||
#endif
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
|
||||
/*
|
||||
* Force the no-kernel-mapping flag on because we don't support
|
||||
* the lack of it for vidmem - the user should not care when
|
||||
* using nvgpu_dma_alloc_map and it's vidmem, or if there's a
|
||||
* difference, the user should use the flag explicitly anyway.
|
||||
*/
|
||||
int err = nvgpu_dma_alloc_map_flags_vid(vm,
|
||||
flags | NVGPU_DMA_NO_KERNEL_MAPPING,
|
||||
size, mem);
|
||||
|
||||
if (!err)
|
||||
return 0;
|
||||
/*
|
||||
* Fall back to sysmem (which may then also fail) in case
|
||||
* vidmem is exhausted.
|
||||
*/
|
||||
}
|
||||
|
||||
return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
|
||||
gk20a_mem_flag_none, false,
|
||||
mem->aperture);
|
||||
if (!mem->gpu_va) {
|
||||
err = -ENOMEM;
|
||||
goto fail_free;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free:
|
||||
nvgpu_dma_free(vm->mm->g, mem);
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_dma_alloc_map_flags_vid(vm,
|
||||
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
|
||||
}
|
||||
|
||||
int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
|
||||
size_t size, struct nvgpu_mem *mem)
|
||||
{
|
||||
int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
|
||||
gk20a_mem_flag_none, false,
|
||||
mem->aperture);
|
||||
if (!mem->gpu_va) {
|
||||
err = -ENOMEM;
|
||||
goto fail_free;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail_free:
|
||||
nvgpu_dma_free(vm->mm->g, mem);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
struct device *d = dev_from_gk20a(g);
|
||||
|
||||
g->dma_memory_used -= mem->aligned_size;
|
||||
|
||||
dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
|
||||
|
||||
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
|
||||
!(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
|
||||
(mem->cpu_va || mem->priv.pages)) {
|
||||
/*
|
||||
* Free side of WAR for bug 2040115.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
|
||||
vunmap(mem->cpu_va);
|
||||
|
||||
if (mem->priv.flags) {
|
||||
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
|
||||
|
||||
nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
|
||||
|
||||
if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
|
||||
dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
|
||||
sg_dma_address(mem->priv.sgt->sgl),
|
||||
NVGPU_DMA_ATTR(dma_attrs));
|
||||
} else {
|
||||
dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
|
||||
sg_dma_address(mem->priv.sgt->sgl),
|
||||
NVGPU_DMA_ATTR(dma_attrs));
|
||||
}
|
||||
} else {
|
||||
dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
|
||||
sg_dma_address(mem->priv.sgt->sgl));
|
||||
}
|
||||
mem->cpu_va = NULL;
|
||||
mem->priv.pages = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* When this flag is set we expect that pages is still populated but not
|
||||
* by the DMA API.
|
||||
*/
|
||||
if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
|
||||
nvgpu_kfree(g, mem->priv.pages);
|
||||
|
||||
if (mem->priv.sgt)
|
||||
nvgpu_free_sgtable(g, &mem->priv.sgt);
|
||||
|
||||
dma_dbg_free_done(g, mem->size, "sysmem");
|
||||
|
||||
mem->size = 0;
|
||||
mem->aligned_size = 0;
|
||||
mem->aperture = APERTURE_INVALID;
|
||||
}
|
||||
|
||||
static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
#if defined(CONFIG_GK20A_VIDMEM)
|
||||
size_t mem_size = mem->size;
|
||||
|
||||
dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
|
||||
|
||||
/* Sanity check - only this supported when allocating. */
|
||||
WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
|
||||
|
||||
if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
|
||||
int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
|
||||
|
||||
/*
|
||||
* If there's an error here then that means we can't clear the
|
||||
* vidmem. That's too bad; however, we still own the nvgpu_mem
|
||||
* buf so we have to free that.
|
||||
*
|
||||
* We don't need to worry about the vidmem allocator itself
|
||||
* since when that gets cleaned up in the driver shutdown path
|
||||
* all the outstanding allocs are force freed.
|
||||
*/
|
||||
if (err)
|
||||
nvgpu_kfree(g, mem);
|
||||
} else {
|
||||
nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
|
||||
nvgpu_free(mem->allocator,
|
||||
(u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
|
||||
nvgpu_free_sgtable(g, &mem->priv.sgt);
|
||||
|
||||
mem->size = 0;
|
||||
mem->aligned_size = 0;
|
||||
mem->aperture = APERTURE_INVALID;
|
||||
}
|
||||
|
||||
dma_dbg_free_done(g, mem_size, "vidmem");
|
||||
#endif
|
||||
}
|
||||
|
||||
void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
switch (mem->aperture) {
|
||||
case APERTURE_SYSMEM:
|
||||
return nvgpu_dma_free_sys(g, mem);
|
||||
case APERTURE_VIDMEM:
|
||||
return nvgpu_dma_free_vid(g, mem);
|
||||
default:
|
||||
break; /* like free() on "null" memory */
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
|
||||
{
|
||||
if (mem->gpu_va)
|
||||
nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
|
||||
mem->gpu_va = 0;
|
||||
|
||||
nvgpu_dma_free(vm->mm->g, mem);
|
||||
}
|
||||
|
||||
int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
|
||||
void *cpuva, u64 iova, size_t size, unsigned long flags)
|
||||
{
|
||||
int err = 0;
|
||||
struct sg_table *tbl;
|
||||
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
|
||||
|
||||
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
|
||||
if (!tbl) {
|
||||
err = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
|
||||
err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
|
||||
size, NVGPU_DMA_ATTR(dma_attrs));
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
sg_dma_address(tbl->sgl) = iova;
|
||||
*sgt = tbl;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (tbl)
|
||||
nvgpu_kfree(g, tbl);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
|
||||
void *cpuva, u64 iova, size_t size)
|
||||
{
|
||||
return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
|
||||
}
|
||||
|
||||
int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
|
||||
struct page **pages, u64 iova, size_t size)
|
||||
{
|
||||
int err = 0;
|
||||
struct sg_table *tbl;
|
||||
|
||||
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
|
||||
if (!tbl) {
|
||||
err = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = sg_alloc_table_from_pages(tbl, pages,
|
||||
DIV_ROUND_UP(size, PAGE_SIZE),
|
||||
0, size, GFP_KERNEL);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
sg_dma_address(tbl->sgl) = iova;
|
||||
*sgt = tbl;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (tbl)
|
||||
nvgpu_kfree(g, tbl);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
|
||||
{
|
||||
sg_free_table(*sgt);
|
||||
nvgpu_kfree(g, *sgt);
|
||||
*sgt = NULL;
|
||||
}
|
||||
|
||||
bool nvgpu_iommuable(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
/*
|
||||
* Check against the nvgpu device to see if it's been marked as
|
||||
* IOMMU'able.
|
||||
*/
|
||||
if (!device_is_iommuable(l->dev))
|
||||
return false;
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
218
drivers/gpu/nvgpu/os/linux/dmabuf.c
Normal file
218
drivers/gpu/nvgpu/os/linux/dmabuf.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/scatterlist.h>
|
||||
|
||||
#include <nvgpu/comptags.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
|
||||
#include <nvgpu/linux/vm.h>
|
||||
#include <nvgpu/linux/vidmem.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "dmabuf.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
static void gk20a_mm_delete_priv(void *_priv)
|
||||
{
|
||||
struct gk20a_buffer_state *s, *s_tmp;
|
||||
struct gk20a_dmabuf_priv *priv = _priv;
|
||||
struct gk20a *g;
|
||||
|
||||
if (!priv)
|
||||
return;
|
||||
|
||||
g = priv->g;
|
||||
|
||||
if (priv->comptags.allocated && priv->comptags.lines) {
|
||||
BUG_ON(!priv->comptag_allocator);
|
||||
gk20a_comptaglines_free(priv->comptag_allocator,
|
||||
priv->comptags.offset,
|
||||
priv->comptags.lines);
|
||||
}
|
||||
|
||||
/* Free buffer states */
|
||||
nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
|
||||
gk20a_buffer_state, list) {
|
||||
gk20a_fence_put(s->fence);
|
||||
nvgpu_list_del(&s->list);
|
||||
nvgpu_kfree(g, s);
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, priv);
|
||||
}
|
||||
|
||||
enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
|
||||
struct dma_buf *dmabuf)
|
||||
{
|
||||
struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
|
||||
bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
|
||||
|
||||
if (buf_owner == NULL) {
|
||||
/* Not nvgpu-allocated, assume system memory */
|
||||
return APERTURE_SYSMEM;
|
||||
} else if (WARN_ON(buf_owner == g && unified_memory)) {
|
||||
/* Looks like our video memory, but this gpu doesn't support
|
||||
* it. Warn about a bug and bail out */
|
||||
nvgpu_warn(g,
|
||||
"dmabuf is our vidmem but we don't have local vidmem");
|
||||
return APERTURE_INVALID;
|
||||
} else if (buf_owner != g) {
|
||||
/* Someone else's vidmem */
|
||||
return APERTURE_INVALID;
|
||||
} else {
|
||||
/* Yay, buf_owner == g */
|
||||
return APERTURE_VIDMEM;
|
||||
}
|
||||
}
|
||||
|
||||
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment **attachment)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv;
|
||||
|
||||
priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
if (WARN_ON(!priv))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
|
||||
if (priv->pin_count == 0) {
|
||||
priv->attach = dma_buf_attach(dmabuf, dev);
|
||||
if (IS_ERR(priv->attach)) {
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
return (struct sg_table *)priv->attach;
|
||||
}
|
||||
|
||||
priv->sgt = dma_buf_map_attachment(priv->attach,
|
||||
DMA_BIDIRECTIONAL);
|
||||
if (IS_ERR(priv->sgt)) {
|
||||
dma_buf_detach(dmabuf, priv->attach);
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
return priv->sgt;
|
||||
}
|
||||
}
|
||||
|
||||
priv->pin_count++;
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
*attachment = priv->attach;
|
||||
return priv->sgt;
|
||||
}
|
||||
|
||||
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment *attachment,
|
||||
struct sg_table *sgt)
|
||||
{
|
||||
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
dma_addr_t dma_addr;
|
||||
|
||||
if (IS_ERR(priv) || !priv)
|
||||
return;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
WARN_ON(priv->sgt != sgt);
|
||||
WARN_ON(priv->attach != attachment);
|
||||
priv->pin_count--;
|
||||
WARN_ON(priv->pin_count < 0);
|
||||
dma_addr = sg_dma_address(priv->sgt->sgl);
|
||||
if (priv->pin_count == 0) {
|
||||
dma_buf_unmap_attachment(priv->attach, priv->sgt,
|
||||
DMA_BIDIRECTIONAL);
|
||||
dma_buf_detach(dmabuf, priv->attach);
|
||||
}
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
}
|
||||
|
||||
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
|
||||
{
|
||||
struct gk20a *g = gk20a_get_platform(dev)->g;
|
||||
struct gk20a_dmabuf_priv *priv;
|
||||
|
||||
priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
if (likely(priv))
|
||||
return 0;
|
||||
|
||||
nvgpu_mutex_acquire(&g->mm.priv_lock);
|
||||
priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
if (priv)
|
||||
goto priv_exist_or_err;
|
||||
|
||||
priv = nvgpu_kzalloc(g, sizeof(*priv));
|
||||
if (!priv) {
|
||||
priv = ERR_PTR(-ENOMEM);
|
||||
goto priv_exist_or_err;
|
||||
}
|
||||
|
||||
nvgpu_mutex_init(&priv->lock);
|
||||
nvgpu_init_list_node(&priv->states);
|
||||
priv->g = g;
|
||||
dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
|
||||
|
||||
priv_exist_or_err:
|
||||
nvgpu_mutex_release(&g->mm.priv_lock);
|
||||
if (IS_ERR(priv))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
|
||||
u64 offset, struct gk20a_buffer_state **state)
|
||||
{
|
||||
int err = 0;
|
||||
struct gk20a_dmabuf_priv *priv;
|
||||
struct gk20a_buffer_state *s;
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (WARN_ON(offset >= (u64)dmabuf->size))
|
||||
return -EINVAL;
|
||||
|
||||
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
priv = dma_buf_get_drvdata(dmabuf, dev);
|
||||
if (WARN_ON(!priv))
|
||||
return -ENOSYS;
|
||||
|
||||
nvgpu_mutex_acquire(&priv->lock);
|
||||
|
||||
nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
|
||||
if (s->offset == offset)
|
||||
goto out;
|
||||
|
||||
/* State not found, create state. */
|
||||
s = nvgpu_kzalloc(g, sizeof(*s));
|
||||
if (!s) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
s->offset = offset;
|
||||
nvgpu_init_list_node(&s->list);
|
||||
nvgpu_mutex_init(&s->lock);
|
||||
nvgpu_list_add_tail(&s->list, &priv->states);
|
||||
|
||||
out:
|
||||
nvgpu_mutex_release(&priv->lock);
|
||||
if (!err)
|
||||
*state = s;
|
||||
return err;
|
||||
}
|
||||
62
drivers/gpu/nvgpu/os/linux/dmabuf.h
Normal file
62
drivers/gpu/nvgpu/os/linux/dmabuf.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __COMMON_LINUX_DMABUF_H__
|
||||
#define __COMMON_LINUX_DMABUF_H__
|
||||
|
||||
#include <nvgpu/comptags.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/gmmu.h>
|
||||
|
||||
struct sg_table;
|
||||
struct dma_buf;
|
||||
struct dma_buf_attachment;
|
||||
struct device;
|
||||
|
||||
struct gk20a;
|
||||
struct gk20a_buffer_state;
|
||||
|
||||
struct gk20a_dmabuf_priv {
|
||||
struct nvgpu_mutex lock;
|
||||
|
||||
struct gk20a *g;
|
||||
|
||||
struct gk20a_comptag_allocator *comptag_allocator;
|
||||
struct gk20a_comptags comptags;
|
||||
|
||||
struct dma_buf_attachment *attach;
|
||||
struct sg_table *sgt;
|
||||
|
||||
int pin_count;
|
||||
|
||||
struct nvgpu_list_node states;
|
||||
|
||||
u64 buffer_id;
|
||||
};
|
||||
|
||||
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment **attachment);
|
||||
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
|
||||
struct dma_buf_attachment *attachment,
|
||||
struct sg_table *sgt);
|
||||
|
||||
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
|
||||
|
||||
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
|
||||
u64 offset, struct gk20a_buffer_state **state);
|
||||
|
||||
#endif
|
||||
334
drivers/gpu/nvgpu/os/linux/driver_common.c
Normal file
334
drivers/gpu/nvgpu/os/linux/driver_common.c
Normal file
@@ -0,0 +1,334 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/mm.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <nvgpu/defaults.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/nvgpu_common.h>
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/debug.h>
|
||||
#include <nvgpu/sizes.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "module.h"
|
||||
#include "os_linux.h"
|
||||
#include "sysfs.h"
|
||||
#include "ioctl.h"
|
||||
#include "gk20a/regops_gk20a.h"
|
||||
|
||||
#define EMC3D_DEFAULT_RATIO 750
|
||||
|
||||
void nvgpu_kernel_restart(void *cmd)
|
||||
{
|
||||
kernel_restart(cmd);
|
||||
}
|
||||
|
||||
static void nvgpu_init_vars(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
|
||||
nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
|
||||
nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
|
||||
|
||||
init_rwsem(&l->busy_lock);
|
||||
nvgpu_rwsem_init(&g->deterministic_busy);
|
||||
|
||||
nvgpu_spinlock_init(&g->mc_enable_lock);
|
||||
|
||||
nvgpu_mutex_init(&platform->railgate_lock);
|
||||
nvgpu_mutex_init(&g->dbg_sessions_lock);
|
||||
nvgpu_mutex_init(&g->client_lock);
|
||||
nvgpu_mutex_init(&g->poweron_lock);
|
||||
nvgpu_mutex_init(&g->poweroff_lock);
|
||||
nvgpu_mutex_init(&g->ctxsw_disable_lock);
|
||||
|
||||
l->regs_saved = l->regs;
|
||||
l->bar1_saved = l->bar1;
|
||||
|
||||
g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
|
||||
|
||||
/* Set DMA parameters to allow larger sgt lists */
|
||||
dev->dma_parms = &l->dma_parms;
|
||||
dma_set_max_seg_size(dev, UINT_MAX);
|
||||
|
||||
/*
|
||||
* A default of 16GB is the largest supported DMA size that is
|
||||
* acceptable to all currently supported Tegra SoCs.
|
||||
*/
|
||||
if (!platform->dma_mask)
|
||||
platform->dma_mask = DMA_BIT_MASK(34);
|
||||
|
||||
dma_set_mask(dev, platform->dma_mask);
|
||||
dma_set_coherent_mask(dev, platform->dma_mask);
|
||||
|
||||
nvgpu_init_list_node(&g->profiler_objects);
|
||||
|
||||
nvgpu_init_list_node(&g->boardobj_head);
|
||||
nvgpu_init_list_node(&g->boardobjgrp_head);
|
||||
}
|
||||
|
||||
static void nvgpu_init_gr_vars(struct gk20a *g)
|
||||
{
|
||||
gk20a_init_gr(g);
|
||||
|
||||
nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
|
||||
g->gr.max_comptag_mem = totalram_pages
|
||||
>> (10 - (PAGE_SHIFT - 10));
|
||||
}
|
||||
|
||||
static void nvgpu_init_timeout(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
g->timeouts_disabled_by_user = false;
|
||||
nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
|
||||
|
||||
if (nvgpu_platform_is_silicon(g)) {
|
||||
g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
|
||||
} else if (nvgpu_platform_is_fpga(g)) {
|
||||
g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
|
||||
} else {
|
||||
g->gr_idle_timeout_default = (u32)ULONG_MAX;
|
||||
}
|
||||
g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
|
||||
g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
|
||||
}
|
||||
|
||||
static void nvgpu_init_timeslice(struct gk20a *g)
|
||||
{
|
||||
g->runlist_interleave = true;
|
||||
|
||||
g->timeslice_low_priority_us = 1300;
|
||||
g->timeslice_medium_priority_us = 2600;
|
||||
g->timeslice_high_priority_us = 5200;
|
||||
|
||||
g->min_timeslice_us = 1000;
|
||||
g->max_timeslice_us = 50000;
|
||||
}
|
||||
|
||||
static void nvgpu_init_pm_vars(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
/*
|
||||
* Set up initial power settings. For non-slicon platforms, disable
|
||||
* power features and for silicon platforms, read from platform data
|
||||
*/
|
||||
g->slcg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
|
||||
g->blcg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
|
||||
g->elcg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
|
||||
g->elpg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
|
||||
g->aelpg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
|
||||
g->mscg_enabled =
|
||||
nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
|
||||
g->can_elpg =
|
||||
nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
|
||||
nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
|
||||
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
|
||||
nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
|
||||
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
|
||||
nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
|
||||
|
||||
g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
|
||||
g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
|
||||
g->has_syncpoints = platform->has_syncpoints;
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_CDE
|
||||
g->has_cde = platform->has_cde;
|
||||
#endif
|
||||
g->ptimer_src_freq = platform->ptimer_src_freq;
|
||||
g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
|
||||
g->can_railgate = platform->can_railgate_init;
|
||||
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
|
||||
/* if default delay is not set, set default delay to 500msec */
|
||||
if (platform->railgate_delay_init)
|
||||
g->railgate_delay = platform->railgate_delay_init;
|
||||
else
|
||||
g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
|
||||
|
||||
/* set default values to aelpg parameters */
|
||||
g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
|
||||
g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
|
||||
g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
|
||||
g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
|
||||
g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
|
||||
}
|
||||
|
||||
static void nvgpu_init_vbios_vars(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
__nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
|
||||
g->vbios_min_version = platform->vbios_min_version;
|
||||
}
|
||||
|
||||
static void nvgpu_init_ltc_vars(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
g->ltc_streamid = platform->ltc_streamid;
|
||||
}
|
||||
|
||||
static void nvgpu_init_mm_vars(struct gk20a *g)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
|
||||
|
||||
g->mm.disable_bigpage = platform->disable_bigpage;
|
||||
__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
|
||||
platform->honors_aperture);
|
||||
__nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
|
||||
platform->unified_memory);
|
||||
__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
|
||||
platform->unify_address_spaces);
|
||||
|
||||
nvgpu_mutex_init(&g->mm.tlb_lock);
|
||||
nvgpu_mutex_init(&g->mm.priv_lock);
|
||||
}
|
||||
|
||||
int nvgpu_probe(struct gk20a *g,
|
||||
const char *debugfs_symlink,
|
||||
const char *interface_name,
|
||||
struct class *class)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
int err = 0;
|
||||
|
||||
nvgpu_init_vars(g);
|
||||
nvgpu_init_gr_vars(g);
|
||||
nvgpu_init_timeout(g);
|
||||
nvgpu_init_timeslice(g);
|
||||
nvgpu_init_pm_vars(g);
|
||||
nvgpu_init_vbios_vars(g);
|
||||
nvgpu_init_ltc_vars(g);
|
||||
err = nvgpu_init_soc_vars(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "init soc vars failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Initialize the platform interface. */
|
||||
err = platform->probe(dev);
|
||||
if (err) {
|
||||
if (err == -EPROBE_DEFER)
|
||||
nvgpu_info(g, "platform probe failed");
|
||||
else
|
||||
nvgpu_err(g, "platform probe failed");
|
||||
return err;
|
||||
}
|
||||
|
||||
nvgpu_init_mm_vars(g);
|
||||
|
||||
/* platform probe can defer do user init only if probe succeeds */
|
||||
err = gk20a_user_init(dev, interface_name, class);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (platform->late_probe) {
|
||||
err = platform->late_probe(dev);
|
||||
if (err) {
|
||||
nvgpu_err(g, "late probe failed");
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_create_sysfs(dev);
|
||||
gk20a_debug_init(g, debugfs_symlink);
|
||||
|
||||
g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
|
||||
if (!g->dbg_regops_tmp_buf) {
|
||||
nvgpu_err(g, "couldn't allocate regops tmp buf");
|
||||
return -ENOMEM;
|
||||
}
|
||||
g->dbg_regops_tmp_buf_ops =
|
||||
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
|
||||
|
||||
g->remove_support = gk20a_remove_support;
|
||||
|
||||
nvgpu_ref_init(&g->refcount);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cyclic_delta - Returns delta of cyclic integers a and b.
|
||||
*
|
||||
* @a - First integer
|
||||
* @b - Second integer
|
||||
*
|
||||
* Note: if a is ahead of b, delta is positive.
|
||||
*/
|
||||
static int cyclic_delta(int a, int b)
|
||||
{
|
||||
return a - b;
|
||||
}
|
||||
|
||||
/**
|
||||
* nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
|
||||
*
|
||||
* @g - The GPU to wait on.
|
||||
*
|
||||
* Waits until all interrupt handlers that have been scheduled to run have
|
||||
* completed.
|
||||
*/
|
||||
void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
|
||||
int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
|
||||
|
||||
/* wait until all stalling irqs are handled */
|
||||
NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
|
||||
cyclic_delta(stall_irq_threshold,
|
||||
atomic_read(&l->sw_irq_stall_last_handled))
|
||||
<= 0, 0);
|
||||
|
||||
/* wait until all non-stalling irqs are handled */
|
||||
NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
|
||||
cyclic_delta(nonstall_irq_threshold,
|
||||
atomic_read(&l->sw_irq_nonstall_last_handled))
|
||||
<= 0, 0);
|
||||
}
|
||||
|
||||
static void nvgpu_free_gk20a(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
kfree(l);
|
||||
}
|
||||
|
||||
void nvgpu_init_gk20a(struct gk20a *g)
|
||||
{
|
||||
g->free = nvgpu_free_gk20a;
|
||||
}
|
||||
22
drivers/gpu/nvgpu/os/linux/driver_common.h
Normal file
22
drivers/gpu/nvgpu/os/linux/driver_common.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_LINUX_DRIVER_COMMON
|
||||
#define NVGPU_LINUX_DRIVER_COMMON
|
||||
|
||||
void nvgpu_init_gk20a(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
29
drivers/gpu/nvgpu/os/linux/dt.c
Normal file
29
drivers/gpu/nvgpu/os/linux/dt.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/dt.h>
|
||||
#include <linux/of.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
|
||||
u32 index, u32 *value)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct device_node *np = dev->of_node;
|
||||
|
||||
return of_property_read_u32_index(np, name, index, value);
|
||||
}
|
||||
117
drivers/gpu/nvgpu/os/linux/firmware.c
Normal file
117
drivers/gpu/nvgpu/os/linux/firmware.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/firmware.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/firmware.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
static const struct firmware *do_request_firmware(struct device *dev,
|
||||
const char *prefix, const char *fw_name, int flags)
|
||||
{
|
||||
const struct firmware *fw;
|
||||
char *fw_path = NULL;
|
||||
int path_len, err;
|
||||
|
||||
if (prefix) {
|
||||
path_len = strlen(prefix) + strlen(fw_name);
|
||||
path_len += 2; /* for the path separator and zero terminator*/
|
||||
|
||||
fw_path = nvgpu_kzalloc(get_gk20a(dev),
|
||||
sizeof(*fw_path) * path_len);
|
||||
if (!fw_path)
|
||||
return NULL;
|
||||
|
||||
sprintf(fw_path, "%s/%s", prefix, fw_name);
|
||||
fw_name = fw_path;
|
||||
}
|
||||
|
||||
if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
|
||||
err = request_firmware_direct(&fw, fw_name, dev);
|
||||
else
|
||||
err = request_firmware(&fw, fw_name, dev);
|
||||
|
||||
nvgpu_kfree(get_gk20a(dev), fw_path);
|
||||
if (err)
|
||||
return NULL;
|
||||
return fw;
|
||||
}
|
||||
|
||||
/* This is a simple wrapper around request_firmware that takes 'fw_name' and
|
||||
* applies an IP specific relative path prefix to it. The caller is
|
||||
* responsible for calling nvgpu_release_firmware later. */
|
||||
struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
|
||||
const char *fw_name,
|
||||
int flags)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct nvgpu_firmware *fw;
|
||||
const struct firmware *linux_fw;
|
||||
|
||||
/* current->fs is NULL when calling from SYS_EXIT.
|
||||
Add a check here to prevent crash in request_firmware */
|
||||
if (!current->fs || !fw_name)
|
||||
return NULL;
|
||||
|
||||
fw = nvgpu_kzalloc(g, sizeof(*fw));
|
||||
if (!fw)
|
||||
return NULL;
|
||||
|
||||
linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A
|
||||
/* TO BE REMOVED - Support loading from legacy SOC specific path. */
|
||||
if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
linux_fw = do_request_firmware(dev,
|
||||
platform->soc_name, fw_name, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!linux_fw)
|
||||
goto err;
|
||||
|
||||
fw->data = nvgpu_kmalloc(g, linux_fw->size);
|
||||
if (!fw->data)
|
||||
goto err_release;
|
||||
|
||||
memcpy(fw->data, linux_fw->data, linux_fw->size);
|
||||
fw->size = linux_fw->size;
|
||||
|
||||
release_firmware(linux_fw);
|
||||
|
||||
return fw;
|
||||
|
||||
err_release:
|
||||
release_firmware(linux_fw);
|
||||
err:
|
||||
nvgpu_kfree(g, fw);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
|
||||
{
|
||||
if(!fw)
|
||||
return;
|
||||
|
||||
nvgpu_kfree(g, fw->data);
|
||||
nvgpu_kfree(g, fw);
|
||||
}
|
||||
55
drivers/gpu/nvgpu/os/linux/fuse.c
Normal file
55
drivers/gpu/nvgpu/os/linux/fuse.c
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <soc/tegra/fuse.h>
|
||||
|
||||
#include <nvgpu/fuse.h>
|
||||
|
||||
int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
|
||||
{
|
||||
return tegra_sku_info.gpu_speedo_id;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
|
||||
* Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
|
||||
*/
|
||||
void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
|
||||
{
|
||||
tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
|
||||
}
|
||||
|
||||
void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
|
||||
{
|
||||
tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
|
||||
}
|
||||
|
||||
void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
|
||||
{
|
||||
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
|
||||
}
|
||||
|
||||
void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
|
||||
{
|
||||
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
|
||||
}
|
||||
|
||||
int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
|
||||
{
|
||||
return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
|
||||
}
|
||||
|
||||
int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
|
||||
{
|
||||
return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
|
||||
}
|
||||
122
drivers/gpu/nvgpu/os/linux/intr.c
Normal file
122
drivers/gpu/nvgpu/os/linux/intr.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <trace/events/gk20a.h>
|
||||
#include <linux/irqreturn.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/mc_gk20a.h"
|
||||
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/unit.h>
|
||||
#include "os_linux.h"
|
||||
|
||||
irqreturn_t nvgpu_intr_stall(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 mc_intr_0;
|
||||
|
||||
trace_mc_gk20a_intr_stall(g->name);
|
||||
|
||||
if (!g->power_on)
|
||||
return IRQ_NONE;
|
||||
|
||||
/* not from gpu when sharing irq with others */
|
||||
mc_intr_0 = g->ops.mc.intr_stall(g);
|
||||
if (unlikely(!mc_intr_0))
|
||||
return IRQ_NONE;
|
||||
|
||||
g->ops.mc.intr_stall_pause(g);
|
||||
|
||||
atomic_inc(&l->hw_irq_stall_count);
|
||||
|
||||
trace_mc_gk20a_intr_stall_done(g->name);
|
||||
|
||||
return IRQ_WAKE_THREAD;
|
||||
}
|
||||
|
||||
irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
int hw_irq_count;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
|
||||
|
||||
trace_mc_gk20a_intr_thread_stall(g->name);
|
||||
|
||||
hw_irq_count = atomic_read(&l->hw_irq_stall_count);
|
||||
g->ops.mc.isr_stall(g);
|
||||
g->ops.mc.intr_stall_resume(g);
|
||||
/* sync handled irq counter before re-enabling interrupts */
|
||||
atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
|
||||
|
||||
nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
|
||||
|
||||
trace_mc_gk20a_intr_thread_stall_done(g->name);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
|
||||
{
|
||||
u32 non_stall_intr_val;
|
||||
u32 hw_irq_count;
|
||||
int ops_old, ops_new, ops = 0;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (!g->power_on)
|
||||
return IRQ_NONE;
|
||||
|
||||
/* not from gpu when sharing irq with others */
|
||||
non_stall_intr_val = g->ops.mc.intr_nonstall(g);
|
||||
if (unlikely(!non_stall_intr_val))
|
||||
return IRQ_NONE;
|
||||
|
||||
g->ops.mc.intr_nonstall_pause(g);
|
||||
|
||||
ops = g->ops.mc.isr_nonstall(g);
|
||||
if (ops) {
|
||||
do {
|
||||
ops_old = atomic_read(&l->nonstall_ops);
|
||||
ops_new = ops_old | ops;
|
||||
} while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
|
||||
ops_old, ops_new));
|
||||
|
||||
queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
|
||||
}
|
||||
|
||||
hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
|
||||
|
||||
/* sync handled irq counter before re-enabling interrupts */
|
||||
atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
|
||||
|
||||
g->ops.mc.intr_nonstall_resume(g);
|
||||
|
||||
nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
void nvgpu_intr_nonstall_cb(struct work_struct *work)
|
||||
{
|
||||
struct nvgpu_os_linux *l =
|
||||
container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
|
||||
struct gk20a *g = &l->g;
|
||||
|
||||
do {
|
||||
u32 ops;
|
||||
|
||||
ops = atomic_xchg(&l->nonstall_ops, 0);
|
||||
mc_gk20a_handle_intr_nonstall(g, ops);
|
||||
} while (atomic_read(&l->nonstall_ops) != 0);
|
||||
}
|
||||
22
drivers/gpu/nvgpu/os/linux/intr.h
Normal file
22
drivers/gpu/nvgpu/os/linux/intr.h
Normal file
@@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_LINUX_INTR_H__
|
||||
#define __NVGPU_LINUX_INTR_H__
|
||||
struct gk20a;
|
||||
|
||||
irqreturn_t nvgpu_intr_stall(struct gk20a *g);
|
||||
irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
|
||||
irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
|
||||
void nvgpu_intr_nonstall_cb(struct work_struct *work);
|
||||
#endif
|
||||
118
drivers/gpu/nvgpu/os/linux/io.c
Normal file
118
drivers/gpu/nvgpu/os/linux/io.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (unlikely(!l->regs)) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
|
||||
} else {
|
||||
writel_relaxed(v, l->regs + r);
|
||||
nvgpu_wmb();
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_readl(struct gk20a *g, u32 r)
|
||||
{
|
||||
u32 v = __nvgpu_readl(g, r);
|
||||
|
||||
if (v == 0xffffffff)
|
||||
__nvgpu_check_gpu_state(g);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
u32 __nvgpu_readl(struct gk20a *g, u32 r)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 v = 0xffffffff;
|
||||
|
||||
if (unlikely(!l->regs)) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
|
||||
} else {
|
||||
v = readl(l->regs + r);
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (unlikely(!l->regs)) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
|
||||
} else {
|
||||
nvgpu_wmb();
|
||||
do {
|
||||
writel_relaxed(v, l->regs + r);
|
||||
} while (readl(l->regs + r) != v);
|
||||
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (unlikely(!l->bar1)) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
|
||||
} else {
|
||||
nvgpu_wmb();
|
||||
writel_relaxed(v, l->bar1 + b);
|
||||
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
|
||||
}
|
||||
}
|
||||
|
||||
u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 v = 0xffffffff;
|
||||
|
||||
if (unlikely(!l->bar1)) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
|
||||
} else {
|
||||
v = readl(l->bar1 + b);
|
||||
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
bool nvgpu_io_exists(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
return l->regs != NULL;
|
||||
}
|
||||
|
||||
bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
return r < resource_size(l->regs);
|
||||
}
|
||||
29
drivers/gpu/nvgpu/os/linux/io_usermode.c
Normal file
29
drivers/gpu/nvgpu/os/linux/io_usermode.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
|
||||
|
||||
void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
|
||||
|
||||
writel_relaxed(v, reg);
|
||||
nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
|
||||
}
|
||||
296
drivers/gpu/nvgpu/os/linux/ioctl.c
Normal file
296
drivers/gpu/nvgpu/os/linux/ioctl.c
Normal file
@@ -0,0 +1,296 @@
|
||||
/*
|
||||
* NVGPU IOCTLs
|
||||
*
|
||||
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/file.h>
|
||||
|
||||
#include <nvgpu/nvgpu_common.h>
|
||||
#include <nvgpu/ctxsw_trace.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/dbg_gpu_gk20a.h"
|
||||
|
||||
#include "ioctl_channel.h"
|
||||
#include "ioctl_ctrl.h"
|
||||
#include "ioctl_as.h"
|
||||
#include "ioctl_tsg.h"
|
||||
#include "ioctl_dbg.h"
|
||||
#include "module.h"
|
||||
#include "os_linux.h"
|
||||
#include "ctxsw_trace.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
#define GK20A_NUM_CDEVS 7
|
||||
|
||||
const struct file_operations gk20a_channel_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_channel_release,
|
||||
.open = gk20a_channel_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_channel_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = gk20a_channel_ioctl,
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_ctrl_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_ctrl_dev_release,
|
||||
.open = gk20a_ctrl_dev_open,
|
||||
.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_ctrl_dev_ioctl,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_dbg_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_dbg_gpu_dev_release,
|
||||
.open = gk20a_dbg_gpu_dev_open,
|
||||
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
||||
.poll = gk20a_dbg_gpu_dev_poll,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_as_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_as_dev_release,
|
||||
.open = gk20a_as_dev_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_as_dev_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = gk20a_as_dev_ioctl,
|
||||
};
|
||||
|
||||
/*
|
||||
* Note: We use a different 'open' to trigger handling of the profiler session.
|
||||
* Most of the code is shared between them... Though, at some point if the
|
||||
* code does get too tangled trying to handle each in the same path we can
|
||||
* separate them cleanly.
|
||||
*/
|
||||
static const struct file_operations gk20a_prof_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_dbg_gpu_dev_release,
|
||||
.open = gk20a_prof_gpu_dev_open,
|
||||
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
|
||||
#endif
|
||||
};
|
||||
|
||||
static const struct file_operations gk20a_tsg_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = nvgpu_ioctl_tsg_dev_release,
|
||||
.open = nvgpu_ioctl_tsg_dev_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_GK20A_CTXSW_TRACE
|
||||
static const struct file_operations gk20a_ctxsw_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_ctxsw_dev_release,
|
||||
.open = gk20a_ctxsw_dev_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_ctxsw_dev_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
|
||||
.poll = gk20a_ctxsw_dev_poll,
|
||||
.read = gk20a_ctxsw_dev_read,
|
||||
.mmap = gk20a_ctxsw_dev_mmap,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct file_operations gk20a_sched_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = gk20a_sched_dev_release,
|
||||
.open = gk20a_sched_dev_open,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = gk20a_sched_dev_ioctl,
|
||||
#endif
|
||||
.unlocked_ioctl = gk20a_sched_dev_ioctl,
|
||||
.poll = gk20a_sched_dev_poll,
|
||||
.read = gk20a_sched_dev_read,
|
||||
};
|
||||
|
||||
static int gk20a_create_device(
|
||||
struct device *dev, int devno,
|
||||
const char *interface_name, const char *cdev_name,
|
||||
struct cdev *cdev, struct device **out,
|
||||
const struct file_operations *ops,
|
||||
struct class *class)
|
||||
{
|
||||
struct device *subdev;
|
||||
int err;
|
||||
struct gk20a *g = gk20a_from_dev(dev);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
cdev_init(cdev, ops);
|
||||
cdev->owner = THIS_MODULE;
|
||||
|
||||
err = cdev_add(cdev, devno, 1);
|
||||
if (err) {
|
||||
dev_err(dev, "failed to add %s cdev\n", cdev_name);
|
||||
return err;
|
||||
}
|
||||
|
||||
subdev = device_create(class, NULL, devno, NULL,
|
||||
interface_name, cdev_name);
|
||||
|
||||
if (IS_ERR(subdev)) {
|
||||
err = PTR_ERR(dev);
|
||||
cdev_del(cdev);
|
||||
dev_err(dev, "failed to create %s device for %s\n",
|
||||
cdev_name, dev_name(dev));
|
||||
return err;
|
||||
}
|
||||
|
||||
*out = subdev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_user_deinit(struct device *dev, struct class *class)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_dev(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (l->channel.node) {
|
||||
device_destroy(class, l->channel.cdev.dev);
|
||||
cdev_del(&l->channel.cdev);
|
||||
}
|
||||
|
||||
if (l->as_dev.node) {
|
||||
device_destroy(class, l->as_dev.cdev.dev);
|
||||
cdev_del(&l->as_dev.cdev);
|
||||
}
|
||||
|
||||
if (l->ctrl.node) {
|
||||
device_destroy(class, l->ctrl.cdev.dev);
|
||||
cdev_del(&l->ctrl.cdev);
|
||||
}
|
||||
|
||||
if (l->dbg.node) {
|
||||
device_destroy(class, l->dbg.cdev.dev);
|
||||
cdev_del(&l->dbg.cdev);
|
||||
}
|
||||
|
||||
if (l->prof.node) {
|
||||
device_destroy(class, l->prof.cdev.dev);
|
||||
cdev_del(&l->prof.cdev);
|
||||
}
|
||||
|
||||
if (l->tsg.node) {
|
||||
device_destroy(class, l->tsg.cdev.dev);
|
||||
cdev_del(&l->tsg.cdev);
|
||||
}
|
||||
|
||||
if (l->ctxsw.node) {
|
||||
device_destroy(class, l->ctxsw.cdev.dev);
|
||||
cdev_del(&l->ctxsw.cdev);
|
||||
}
|
||||
|
||||
if (l->sched.node) {
|
||||
device_destroy(class, l->sched.cdev.dev);
|
||||
cdev_del(&l->sched.cdev);
|
||||
}
|
||||
|
||||
if (l->cdev_region)
|
||||
unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
|
||||
}
|
||||
|
||||
int gk20a_user_init(struct device *dev, const char *interface_name,
|
||||
struct class *class)
|
||||
{
|
||||
int err;
|
||||
dev_t devno;
|
||||
struct gk20a *g = gk20a_from_dev(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
|
||||
if (err) {
|
||||
dev_err(dev, "failed to allocate devno\n");
|
||||
goto fail;
|
||||
}
|
||||
l->cdev_region = devno;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "",
|
||||
&l->channel.cdev, &l->channel.node,
|
||||
&gk20a_channel_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-as",
|
||||
&l->as_dev.cdev, &l->as_dev.node,
|
||||
&gk20a_as_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
|
||||
&l->ctrl.cdev, &l->ctrl.node,
|
||||
&gk20a_ctrl_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
|
||||
&l->dbg.cdev, &l->dbg.node,
|
||||
&gk20a_dbg_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-prof",
|
||||
&l->prof.cdev, &l->prof.node,
|
||||
&gk20a_prof_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
|
||||
&l->tsg.cdev, &l->tsg.node,
|
||||
&gk20a_tsg_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
#if defined(CONFIG_GK20A_CTXSW_TRACE)
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
|
||||
&l->ctxsw.cdev, &l->ctxsw.node,
|
||||
&gk20a_ctxsw_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
#endif
|
||||
|
||||
err = gk20a_create_device(dev, devno++, interface_name, "-sched",
|
||||
&l->sched.cdev, &l->sched.node,
|
||||
&gk20a_sched_ops,
|
||||
class);
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
gk20a_user_deinit(dev, &nvgpu_class);
|
||||
return err;
|
||||
}
|
||||
23
drivers/gpu/nvgpu/os/linux/ioctl.h
Normal file
23
drivers/gpu/nvgpu/os/linux/ioctl.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#ifndef __NVGPU_IOCTL_H__
|
||||
#define __NVGPU_IOCTL_H__
|
||||
|
||||
struct device;
|
||||
struct class;
|
||||
|
||||
int gk20a_user_init(struct device *dev, const char *interface_name,
|
||||
struct class *class);
|
||||
void gk20a_user_deinit(struct device *dev, struct class *class);
|
||||
|
||||
#endif
|
||||
423
drivers/gpu/nvgpu/os/linux/ioctl_as.c
Normal file
423
drivers/gpu/nvgpu/os/linux/ioctl_as.c
Normal file
@@ -0,0 +1,423 @@
|
||||
/*
|
||||
* GK20A Address Spaces
|
||||
*
|
||||
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include <trace/events/gk20a.h>
|
||||
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <nvgpu/gmmu.h>
|
||||
#include <nvgpu/vm_area.h>
|
||||
#include <nvgpu/log2.h>
|
||||
|
||||
#include <nvgpu/linux/vm.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "ioctl_as.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
|
||||
{
|
||||
u32 core_flags = 0;
|
||||
|
||||
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
|
||||
core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
|
||||
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
|
||||
core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
|
||||
|
||||
return core_flags;
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_bind_channel(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_bind_channel_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
struct channel_gk20a *ch;
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ch = gk20a_get_channel_from_file(args->channel_fd);
|
||||
if (!ch)
|
||||
return -EINVAL;
|
||||
|
||||
if (gk20a_channel_as_bound(ch)) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* this will set channel_gk20a->vm */
|
||||
err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
|
||||
|
||||
out:
|
||||
gk20a_channel_put(ch);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_alloc_space(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_alloc_space_args *args)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
|
||||
&args->o_a.offset,
|
||||
gk20a_as_translate_as_alloc_space_flags(g,
|
||||
args->flags));
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_free_space(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_free_space_args *args)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
return nvgpu_vm_area_free(as_share->vm, args->offset);
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_map_buffer_ex(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_map_buffer_ex_args *args)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
/* unsupported, direct kind control must be used */
|
||||
if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
|
||||
struct gk20a *g = as_share->vm->mm->g;
|
||||
nvgpu_log_info(g, "Direct kind control must be requested");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
|
||||
&args->offset, args->flags,
|
||||
args->compr_kind,
|
||||
args->incompr_kind,
|
||||
args->buffer_offset,
|
||||
args->mapping_size,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_unmap_buffer(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_unmap_buffer_args *args)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_map_buffer_batch(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_map_buffer_batch_args *args)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(as_share->vm);
|
||||
u32 i;
|
||||
int err = 0;
|
||||
|
||||
struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
|
||||
(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
|
||||
args->unmaps;
|
||||
struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
|
||||
(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
|
||||
args->maps;
|
||||
|
||||
struct vm_gk20a_mapping_batch batch;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
|
||||
args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_vm_mapping_batch_start(&batch);
|
||||
|
||||
for (i = 0; i < args->num_unmaps; ++i) {
|
||||
struct nvgpu_as_unmap_buffer_args unmap_args;
|
||||
|
||||
if (copy_from_user(&unmap_args, &user_unmap_args[i],
|
||||
sizeof(unmap_args))) {
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
|
||||
|
||||
args->num_unmaps = i;
|
||||
args->num_maps = 0;
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0; i < args->num_maps; ++i) {
|
||||
s16 compressible_kind;
|
||||
s16 incompressible_kind;
|
||||
|
||||
struct nvgpu_as_map_buffer_ex_args map_args;
|
||||
memset(&map_args, 0, sizeof(map_args));
|
||||
|
||||
if (copy_from_user(&map_args, &user_map_args[i],
|
||||
sizeof(map_args))) {
|
||||
err = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
if (map_args.flags &
|
||||
NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
|
||||
compressible_kind = map_args.compr_kind;
|
||||
incompressible_kind = map_args.incompr_kind;
|
||||
} else {
|
||||
/* direct kind control must be used */
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
err = nvgpu_vm_map_buffer(
|
||||
as_share->vm, map_args.dmabuf_fd,
|
||||
&map_args.offset, map_args.flags,
|
||||
compressible_kind, incompressible_kind,
|
||||
map_args.buffer_offset,
|
||||
map_args.mapping_size,
|
||||
&batch);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
|
||||
|
||||
if (err)
|
||||
args->num_maps = i;
|
||||
/* note: args->num_unmaps will be unmodified, which is ok
|
||||
* since all unmaps are done */
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_as_ioctl_get_va_regions(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_get_va_regions_args *args)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int write_entries;
|
||||
struct nvgpu_as_va_region __user *user_region_ptr;
|
||||
struct vm_gk20a *vm = as_share->vm;
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
unsigned int page_sizes = gmmu_page_size_kernel;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
if (!vm->big_pages)
|
||||
page_sizes--;
|
||||
|
||||
write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
|
||||
if (write_entries > page_sizes)
|
||||
write_entries = page_sizes;
|
||||
|
||||
user_region_ptr =
|
||||
(struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
|
||||
|
||||
for (i = 0; i < write_entries; ++i) {
|
||||
struct nvgpu_as_va_region region;
|
||||
struct nvgpu_allocator *vma = vm->vma[i];
|
||||
|
||||
memset(®ion, 0, sizeof(struct nvgpu_as_va_region));
|
||||
|
||||
region.page_size = vm->gmmu_page_sizes[i];
|
||||
region.offset = nvgpu_alloc_base(vma);
|
||||
/* No __aeabi_uldivmod() on some platforms... */
|
||||
region.pages = (nvgpu_alloc_end(vma) -
|
||||
nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
|
||||
|
||||
if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
args->buf_size =
|
||||
page_sizes * sizeof(struct nvgpu_as_va_region);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_as_ioctl_get_sync_ro_map(
|
||||
struct gk20a_as_share *as_share,
|
||||
struct nvgpu_as_get_sync_ro_map_args *args)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
struct vm_gk20a *vm = as_share->vm;
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
u64 base_gpuva;
|
||||
u32 sync_size;
|
||||
int err = 0;
|
||||
|
||||
if (!g->ops.fifo.get_sync_ro_map)
|
||||
return -EINVAL;
|
||||
|
||||
if (!gk20a_platform_has_syncpoints(g))
|
||||
return -EINVAL;
|
||||
|
||||
err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
args->base_gpuva = base_gpuva;
|
||||
args->sync_size = sync_size;
|
||||
|
||||
return err;
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_as_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct nvgpu_os_linux *l;
|
||||
struct gk20a_as_share *as_share;
|
||||
struct gk20a *g;
|
||||
int err;
|
||||
|
||||
l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
|
||||
g = &l->g;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
err = gk20a_as_alloc_share(g, 0, 0, &as_share);
|
||||
if (err) {
|
||||
nvgpu_log_fn(g, "failed to alloc share");
|
||||
return err;
|
||||
}
|
||||
|
||||
filp->private_data = as_share;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_as_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a_as_share *as_share = filp->private_data;
|
||||
|
||||
if (!as_share)
|
||||
return 0;
|
||||
|
||||
return gk20a_as_release_share(as_share);
|
||||
}
|
||||
|
||||
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
int err = 0;
|
||||
struct gk20a_as_share *as_share = filp->private_data;
|
||||
struct gk20a *g = gk20a_from_as(as_share->as);
|
||||
|
||||
u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
|
||||
|
||||
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
|
||||
(_IOC_NR(cmd) == 0) ||
|
||||
(_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
|
||||
(_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_AS_IOCTL_BIND_CHANNEL:
|
||||
trace_gk20a_as_ioctl_bind_channel(g->name);
|
||||
err = gk20a_as_ioctl_bind_channel(as_share,
|
||||
(struct nvgpu_as_bind_channel_args *)buf);
|
||||
|
||||
break;
|
||||
case NVGPU32_AS_IOCTL_ALLOC_SPACE:
|
||||
{
|
||||
struct nvgpu32_as_alloc_space_args *args32 =
|
||||
(struct nvgpu32_as_alloc_space_args *)buf;
|
||||
struct nvgpu_as_alloc_space_args args;
|
||||
|
||||
args.pages = args32->pages;
|
||||
args.page_size = args32->page_size;
|
||||
args.flags = args32->flags;
|
||||
args.o_a.offset = args32->o_a.offset;
|
||||
trace_gk20a_as_ioctl_alloc_space(g->name);
|
||||
err = gk20a_as_ioctl_alloc_space(as_share, &args);
|
||||
args32->o_a.offset = args.o_a.offset;
|
||||
break;
|
||||
}
|
||||
case NVGPU_AS_IOCTL_ALLOC_SPACE:
|
||||
trace_gk20a_as_ioctl_alloc_space(g->name);
|
||||
err = gk20a_as_ioctl_alloc_space(as_share,
|
||||
(struct nvgpu_as_alloc_space_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_FREE_SPACE:
|
||||
trace_gk20a_as_ioctl_free_space(g->name);
|
||||
err = gk20a_as_ioctl_free_space(as_share,
|
||||
(struct nvgpu_as_free_space_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
|
||||
trace_gk20a_as_ioctl_map_buffer(g->name);
|
||||
err = gk20a_as_ioctl_map_buffer_ex(as_share,
|
||||
(struct nvgpu_as_map_buffer_ex_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_UNMAP_BUFFER:
|
||||
trace_gk20a_as_ioctl_unmap_buffer(g->name);
|
||||
err = gk20a_as_ioctl_unmap_buffer(as_share,
|
||||
(struct nvgpu_as_unmap_buffer_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_GET_VA_REGIONS:
|
||||
trace_gk20a_as_ioctl_get_va_regions(g->name);
|
||||
err = gk20a_as_ioctl_get_va_regions(as_share,
|
||||
(struct nvgpu_as_get_va_regions_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
|
||||
err = gk20a_as_ioctl_map_buffer_batch(as_share,
|
||||
(struct nvgpu_as_map_buffer_batch_args *)buf);
|
||||
break;
|
||||
case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
|
||||
err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
|
||||
(struct nvgpu_as_get_sync_ro_map_args *)buf);
|
||||
break;
|
||||
default:
|
||||
err = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||
if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
|
||||
err = -EFAULT;
|
||||
|
||||
return err;
|
||||
}
|
||||
30
drivers/gpu/nvgpu/os/linux/ioctl_as.h
Normal file
30
drivers/gpu/nvgpu/os/linux/ioctl_as.h
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* GK20A Address Spaces
|
||||
*
|
||||
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#ifndef __NVGPU_COMMON_LINUX_AS_H__
|
||||
#define __NVGPU_COMMON_LINUX_AS_H__
|
||||
|
||||
struct inode;
|
||||
struct file;
|
||||
|
||||
/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
|
||||
* num_maps */
|
||||
#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256
|
||||
|
||||
/* struct file_operations driver interface */
|
||||
int gk20a_as_dev_open(struct inode *inode, struct file *filp);
|
||||
int gk20a_as_dev_release(struct inode *inode, struct file *filp);
|
||||
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
|
||||
#endif
|
||||
1388
drivers/gpu/nvgpu/os/linux/ioctl_channel.c
Normal file
1388
drivers/gpu/nvgpu/os/linux/ioctl_channel.c
Normal file
File diff suppressed because it is too large
Load Diff
50
drivers/gpu/nvgpu/os/linux/ioctl_channel.h
Normal file
50
drivers/gpu/nvgpu/os/linux/ioctl_channel.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#ifndef __NVGPU_IOCTL_CHANNEL_H__
|
||||
#define __NVGPU_IOCTL_CHANNEL_H__
|
||||
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "gk20a/css_gr_gk20a.h"
|
||||
|
||||
struct inode;
|
||||
struct file;
|
||||
struct gk20a;
|
||||
struct nvgpu_channel_open_args;
|
||||
|
||||
struct gk20a_cs_snapshot_client_linux {
|
||||
struct gk20a_cs_snapshot_client cs_client;
|
||||
|
||||
u32 dmabuf_fd;
|
||||
struct dma_buf *dma_handler;
|
||||
};
|
||||
|
||||
int gk20a_channel_open(struct inode *inode, struct file *filp);
|
||||
int gk20a_channel_release(struct inode *inode, struct file *filp);
|
||||
long gk20a_channel_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
int gk20a_channel_open_ioctl(struct gk20a *g,
|
||||
struct nvgpu_channel_open_args *args);
|
||||
|
||||
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
|
||||
void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
|
||||
|
||||
extern const struct file_operations gk20a_channel_ops;
|
||||
|
||||
u32 nvgpu_get_common_runlist_level(u32 level);
|
||||
|
||||
u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
|
||||
u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
|
||||
u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
|
||||
u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
|
||||
#endif
|
||||
562
drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c
Normal file
562
drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c
Normal file
@@ -0,0 +1,562 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This software is licensed under the terms of the GNU General Public
|
||||
* License version 2, as published by the Free Software Foundation, and
|
||||
* may be copied, distributed, and modified under those terms.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/poll.h>
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
#include <linux/debugfs.h>
|
||||
#endif
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <nvgpu/bitops.h>
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/kref.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
#include <nvgpu/cond.h>
|
||||
#include <nvgpu/list.h>
|
||||
#include <nvgpu/clk_arb.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "clk/clk.h"
|
||||
#include "pstate/pstate.h"
|
||||
#include "lpwr/lpwr.h"
|
||||
#include "volt/volt.h"
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
#include "os_linux.h"
|
||||
#endif
|
||||
|
||||
static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
|
||||
struct file *filp)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev = filp->private_data;
|
||||
struct nvgpu_clk_session *session = dev->session;
|
||||
|
||||
|
||||
clk_arb_dbg(session->g, " ");
|
||||
|
||||
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
|
||||
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
|
||||
{
|
||||
unsigned int poll_mask = 0;
|
||||
|
||||
if (nvgpu_poll_mask & NVGPU_POLLIN)
|
||||
poll_mask |= POLLIN;
|
||||
if (nvgpu_poll_mask & NVGPU_POLLPRI)
|
||||
poll_mask |= POLLPRI;
|
||||
if (nvgpu_poll_mask & NVGPU_POLLOUT)
|
||||
poll_mask |= POLLOUT;
|
||||
if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
|
||||
poll_mask |= POLLRDNORM;
|
||||
if (nvgpu_poll_mask & NVGPU_POLLHUP)
|
||||
poll_mask |= POLLHUP;
|
||||
|
||||
return poll_mask;
|
||||
}
|
||||
|
||||
static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev = filp->private_data;
|
||||
|
||||
clk_arb_dbg(dev->session->g, " ");
|
||||
|
||||
poll_wait(filp, &dev->readout_wq.wq, wait);
|
||||
return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
|
||||
}
|
||||
|
||||
void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
|
||||
{
|
||||
nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
|
||||
}
|
||||
|
||||
static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
|
||||
struct file *filp)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev = filp->private_data;
|
||||
struct nvgpu_clk_session *session = dev->session;
|
||||
struct nvgpu_clk_arb *arb;
|
||||
|
||||
arb = session->g->clk_arb;
|
||||
|
||||
clk_arb_dbg(session->g, " ");
|
||||
|
||||
if (arb) {
|
||||
nvgpu_spinlock_acquire(&arb->users_lock);
|
||||
nvgpu_list_del(&dev->link);
|
||||
nvgpu_spinlock_release(&arb->users_lock);
|
||||
nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
|
||||
}
|
||||
|
||||
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
|
||||
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
|
||||
{
|
||||
u32 nvgpu_gpu_event;
|
||||
|
||||
switch (nvgpu_event) {
|
||||
case NVGPU_EVENT_VF_UPDATE:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
|
||||
break;
|
||||
case NVGPU_EVENT_ALARM_GPU_LOST:
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
|
||||
break;
|
||||
default:
|
||||
/* Control shouldn't come here */
|
||||
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
|
||||
break;
|
||||
}
|
||||
return nvgpu_gpu_event;
|
||||
}
|
||||
|
||||
static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
|
||||
struct nvgpu_gpu_event_info *info) {
|
||||
|
||||
u32 tail, head;
|
||||
u32 events = 0;
|
||||
struct nvgpu_clk_notification *p_notif;
|
||||
|
||||
tail = nvgpu_atomic_read(&dev->queue.tail);
|
||||
head = nvgpu_atomic_read(&dev->queue.head);
|
||||
|
||||
head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
|
||||
|
||||
if (_WRAPGTEQ(tail, head) && info) {
|
||||
head++;
|
||||
p_notif = &dev->queue.notifications[head % dev->queue.size];
|
||||
events |= nvgpu_convert_gpu_event(p_notif->notification);
|
||||
info->event_id = ffs(events) - 1;
|
||||
info->timestamp = p_notif->timestamp;
|
||||
nvgpu_atomic_set(&dev->queue.head, head);
|
||||
}
|
||||
|
||||
return events;
|
||||
}
|
||||
|
||||
static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
|
||||
size_t size, loff_t *off)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev = filp->private_data;
|
||||
struct nvgpu_gpu_event_info info;
|
||||
ssize_t err;
|
||||
|
||||
clk_arb_dbg(dev->session->g,
|
||||
"filp=%p, buf=%p, size=%zu", filp, buf, size);
|
||||
|
||||
if ((size - *off) < sizeof(info))
|
||||
return 0;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
/* Get the oldest event from the queue */
|
||||
while (!__pending_event(dev, &info)) {
|
||||
if (filp->f_flags & O_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
|
||||
__pending_event(dev, &info), 0);
|
||||
if (err)
|
||||
return err;
|
||||
if (info.timestamp)
|
||||
break;
|
||||
}
|
||||
|
||||
if (copy_to_user(buf + *off, &info, sizeof(info)))
|
||||
return -EFAULT;
|
||||
|
||||
return sizeof(info);
|
||||
}
|
||||
|
||||
static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
|
||||
struct nvgpu_gpu_set_event_filter_args *args)
|
||||
{
|
||||
struct gk20a *g = dev->session->g;
|
||||
u32 mask;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn, " ");
|
||||
|
||||
if (args->flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (args->size != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&mask, (void __user *) args->buffer,
|
||||
args->size * sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
/* update alarm mask */
|
||||
nvgpu_atomic_set(&dev->enabled_mask, mask);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev = filp->private_data;
|
||||
struct gk20a *g = dev->session->g;
|
||||
u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
|
||||
|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
|
||||
return -EINVAL;
|
||||
|
||||
BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_EVENT_IOCTL_SET_FILTER:
|
||||
err = nvgpu_clk_arb_set_event_filter(dev,
|
||||
(struct nvgpu_gpu_set_event_filter_args *)buf);
|
||||
break;
|
||||
default:
|
||||
nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
|
||||
err = -ENOTTY;
|
||||
}
|
||||
|
||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct file_operations completion_dev_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = nvgpu_clk_arb_release_completion_dev,
|
||||
.poll = nvgpu_clk_arb_poll_dev,
|
||||
};
|
||||
|
||||
static const struct file_operations event_dev_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.release = nvgpu_clk_arb_release_event_dev,
|
||||
.poll = nvgpu_clk_arb_poll_dev,
|
||||
.read = nvgpu_clk_arb_read_event_dev,
|
||||
#ifdef CONFIG_COMPAT
|
||||
.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
|
||||
#endif
|
||||
.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
|
||||
};
|
||||
|
||||
static int nvgpu_clk_arb_install_fd(struct gk20a *g,
|
||||
struct nvgpu_clk_session *session,
|
||||
const struct file_operations *fops,
|
||||
struct nvgpu_clk_dev **_dev)
|
||||
{
|
||||
struct file *file;
|
||||
int fd;
|
||||
int err;
|
||||
int status;
|
||||
char name[64];
|
||||
struct nvgpu_clk_dev *dev;
|
||||
|
||||
clk_arb_dbg(g, " ");
|
||||
|
||||
dev = nvgpu_kzalloc(g, sizeof(*dev));
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
|
||||
DEFAULT_EVENT_NUMBER);
|
||||
if (status < 0) {
|
||||
err = status;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
fd = get_unused_fd_flags(O_RDWR);
|
||||
if (fd < 0) {
|
||||
err = fd;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
|
||||
file = anon_inode_getfile(name, fops, dev, O_RDWR);
|
||||
if (IS_ERR(file)) {
|
||||
err = PTR_ERR(file);
|
||||
goto fail_fd;
|
||||
}
|
||||
|
||||
fd_install(fd, file);
|
||||
|
||||
nvgpu_cond_init(&dev->readout_wq);
|
||||
|
||||
nvgpu_atomic_set(&dev->poll_mask, 0);
|
||||
|
||||
dev->session = session;
|
||||
nvgpu_ref_init(&dev->refcount);
|
||||
|
||||
nvgpu_ref_get(&session->refcount);
|
||||
|
||||
*_dev = dev;
|
||||
|
||||
return fd;
|
||||
|
||||
fail_fd:
|
||||
put_unused_fd(fd);
|
||||
fail:
|
||||
nvgpu_kfree(g, dev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
|
||||
struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
|
||||
{
|
||||
struct nvgpu_clk_arb *arb = g->clk_arb;
|
||||
struct nvgpu_clk_dev *dev;
|
||||
int fd;
|
||||
|
||||
clk_arb_dbg(g, " ");
|
||||
|
||||
fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
/* TODO: alarm mask needs to be set to default value to prevent
|
||||
* failures of legacy tests. This will be removed when sanity is
|
||||
* updated
|
||||
*/
|
||||
if (alarm_mask)
|
||||
nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
|
||||
else
|
||||
nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
|
||||
|
||||
dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
|
||||
|
||||
nvgpu_spinlock_acquire(&arb->users_lock);
|
||||
nvgpu_list_add_tail(&dev->link, &arb->users);
|
||||
nvgpu_spinlock_release(&arb->users_lock);
|
||||
|
||||
*event_fd = fd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
|
||||
struct nvgpu_clk_session *session, int *request_fd)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev;
|
||||
int fd;
|
||||
|
||||
clk_arb_dbg(g, " ");
|
||||
|
||||
fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
*request_fd = fd;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
|
||||
struct nvgpu_clk_session *session, int request_fd)
|
||||
{
|
||||
struct nvgpu_clk_arb *arb = g->clk_arb;
|
||||
struct nvgpu_clk_dev *dev;
|
||||
struct fd fd;
|
||||
int err = 0;
|
||||
|
||||
clk_arb_dbg(g, " ");
|
||||
|
||||
fd = fdget(request_fd);
|
||||
if (!fd.file)
|
||||
return -EINVAL;
|
||||
|
||||
if (fd.file->f_op != &completion_dev_ops) {
|
||||
err = -EINVAL;
|
||||
goto fdput_fd;
|
||||
}
|
||||
|
||||
dev = (struct nvgpu_clk_dev *) fd.file->private_data;
|
||||
|
||||
if (!dev || dev->session != session) {
|
||||
err = -EINVAL;
|
||||
goto fdput_fd;
|
||||
}
|
||||
nvgpu_ref_get(&dev->refcount);
|
||||
nvgpu_spinlock_acquire(&session->session_lock);
|
||||
nvgpu_list_add(&dev->node, &session->targets);
|
||||
nvgpu_spinlock_release(&session->session_lock);
|
||||
nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
|
||||
|
||||
fdput_fd:
|
||||
fdput(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
|
||||
int request_fd, u32 api_domain, u16 target_mhz)
|
||||
{
|
||||
struct nvgpu_clk_dev *dev;
|
||||
struct fd fd;
|
||||
int err = 0;
|
||||
|
||||
clk_arb_dbg(session->g,
|
||||
"domain=0x%08x target_mhz=%u", api_domain, target_mhz);
|
||||
|
||||
fd = fdget(request_fd);
|
||||
if (!fd.file)
|
||||
return -EINVAL;
|
||||
|
||||
if (fd.file->f_op != &completion_dev_ops) {
|
||||
err = -EINVAL;
|
||||
goto fdput_fd;
|
||||
}
|
||||
|
||||
dev = fd.file->private_data;
|
||||
if (!dev || dev->session != session) {
|
||||
err = -EINVAL;
|
||||
goto fdput_fd;
|
||||
}
|
||||
|
||||
switch (api_domain) {
|
||||
case NVGPU_CLK_DOMAIN_MCLK:
|
||||
dev->mclk_target_mhz = target_mhz;
|
||||
break;
|
||||
|
||||
case NVGPU_CLK_DOMAIN_GPCCLK:
|
||||
dev->gpc2clk_target_mhz = target_mhz * 2ULL;
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EINVAL;
|
||||
}
|
||||
|
||||
fdput_fd:
|
||||
fdput(fd);
|
||||
return err;
|
||||
}
|
||||
|
||||
u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
|
||||
{
|
||||
u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
|
||||
u32 api_domains = 0;
|
||||
|
||||
if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
|
||||
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
|
||||
|
||||
if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
|
||||
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
|
||||
|
||||
return api_domains;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
|
||||
{
|
||||
struct gk20a *g = s->private;
|
||||
struct nvgpu_clk_arb *arb = g->clk_arb;
|
||||
struct nvgpu_clk_arb_debug *debug;
|
||||
|
||||
u64 num;
|
||||
s64 tmp, avg, std, max, min;
|
||||
|
||||
debug = NV_ACCESS_ONCE(arb->debug);
|
||||
/* Make copy of structure and ensure no reordering */
|
||||
nvgpu_smp_rmb();
|
||||
if (!debug)
|
||||
return -EINVAL;
|
||||
|
||||
std = debug->switch_std;
|
||||
avg = debug->switch_avg;
|
||||
max = debug->switch_max;
|
||||
min = debug->switch_min;
|
||||
num = debug->switch_num;
|
||||
|
||||
tmp = std;
|
||||
do_div(tmp, num);
|
||||
seq_printf(s, "Number of transitions: %lld\n",
|
||||
num);
|
||||
seq_printf(s, "max / min : %lld / %lld usec\n",
|
||||
max, min);
|
||||
seq_printf(s, "avg / std : %lld / %ld usec\n",
|
||||
avg, int_sqrt(tmp));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
|
||||
}
|
||||
|
||||
static const struct file_operations nvgpu_clk_arb_stats_fops = {
|
||||
.open = nvgpu_clk_arb_stats_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
|
||||
int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct dentry *gpu_root = l->debugfs;
|
||||
struct dentry *d;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
|
||||
|
||||
d = debugfs_create_file(
|
||||
"arb_stats",
|
||||
S_IRUGO,
|
||||
gpu_root,
|
||||
g,
|
||||
&nvgpu_clk_arb_stats_fops);
|
||||
if (!d)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
1962
drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
Normal file
1962
drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
Normal file
File diff suppressed because it is too large
Load Diff
23
drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
Normal file
23
drivers/gpu/nvgpu/os/linux/ioctl_ctrl.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __NVGPU_IOCTL_CTRL_H__
|
||||
#define __NVGPU_IOCTL_CTRL_H__
|
||||
|
||||
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
|
||||
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
|
||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
|
||||
#endif
|
||||
2003
drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
Normal file
2003
drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
Normal file
File diff suppressed because it is too large
Load Diff
54
drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
Normal file
54
drivers/gpu/nvgpu/os/linux/ioctl_dbg.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Tegra GK20A GPU Debugger Driver
|
||||
*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef DBG_GPU_IOCTL_GK20A_H
|
||||
#define DBG_GPU_IOCTL_GK20A_H
|
||||
#include <linux/poll.h>
|
||||
|
||||
#include "gk20a/dbg_gpu_gk20a.h"
|
||||
|
||||
/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
|
||||
* of regops */
|
||||
#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
|
||||
|
||||
struct dbg_session_gk20a_linux {
|
||||
struct device *dev;
|
||||
struct dbg_session_gk20a dbg_s;
|
||||
};
|
||||
|
||||
struct dbg_session_channel_data_linux {
|
||||
/*
|
||||
* We have to keep a ref to the _file_, not the channel, because
|
||||
* close(channel_fd) is synchronous and would deadlock if we had an
|
||||
* open debug session fd holding a channel ref at that time. Holding a
|
||||
* ref to the file makes close(channel_fd) just drop a kernel ref to
|
||||
* the file; the channel will close when the last file ref is dropped.
|
||||
*/
|
||||
struct file *ch_f;
|
||||
struct dbg_session_channel_data ch_data;
|
||||
};
|
||||
|
||||
/* module debug driver interface */
|
||||
int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
|
||||
int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
|
||||
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
|
||||
unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
|
||||
|
||||
/* used by profiler driver interface */
|
||||
int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
|
||||
|
||||
#endif
|
||||
677
drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
Normal file
677
drivers/gpu/nvgpu/os/linux/ioctl_tsg.c
Normal file
@@ -0,0 +1,677 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/poll.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/os_sched.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/tsg_gk20a.h"
|
||||
#include "gv11b/fifo_gv11b.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "ioctl_tsg.h"
|
||||
#include "ioctl_channel.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
struct tsg_private {
|
||||
struct gk20a *g;
|
||||
struct tsg_gk20a *tsg;
|
||||
};
|
||||
|
||||
static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
|
||||
{
|
||||
struct channel_gk20a *ch;
|
||||
int err;
|
||||
|
||||
ch = gk20a_get_channel_from_file(ch_fd);
|
||||
if (!ch)
|
||||
return -EINVAL;
|
||||
|
||||
err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
|
||||
|
||||
gk20a_channel_put(ch);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
|
||||
struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
struct channel_gk20a *ch;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
if (sched->control_locked) {
|
||||
err = -EPERM;
|
||||
goto mutex_release;
|
||||
}
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to power on gpu");
|
||||
goto mutex_release;
|
||||
}
|
||||
|
||||
ch = gk20a_get_channel_from_file(arg->channel_fd);
|
||||
if (!ch) {
|
||||
err = -EINVAL;
|
||||
goto idle;
|
||||
}
|
||||
|
||||
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
|
||||
if ((arg->num_active_tpcs > gr->max_tpc_count) ||
|
||||
!(arg->num_active_tpcs)) {
|
||||
nvgpu_err(g, "Invalid num of active TPCs");
|
||||
err = -EINVAL;
|
||||
goto ch_put;
|
||||
}
|
||||
tsg->tpc_num_initialized = true;
|
||||
tsg->num_active_tpcs = arg->num_active_tpcs;
|
||||
tsg->tpc_pg_enabled = true;
|
||||
} else {
|
||||
tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
|
||||
}
|
||||
|
||||
if (arg->subcontext_id < g->fifo.max_subctx_count) {
|
||||
ch->subctx_id = arg->subcontext_id;
|
||||
} else {
|
||||
err = -EINVAL;
|
||||
goto ch_put;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
|
||||
ch->chid, ch->subctx_id);
|
||||
|
||||
/* Use runqueue selector 1 for all ASYNC ids */
|
||||
if (ch->subctx_id > CHANNEL_INFO_VEID0)
|
||||
ch->runqueue_sel = 1;
|
||||
|
||||
err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
|
||||
ch_put:
|
||||
gk20a_channel_put(ch);
|
||||
idle:
|
||||
gk20a_idle(g);
|
||||
mutex_release:
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
|
||||
{
|
||||
struct channel_gk20a *ch;
|
||||
int err = 0;
|
||||
|
||||
ch = gk20a_get_channel_from_file(ch_fd);
|
||||
if (!ch)
|
||||
return -EINVAL;
|
||||
|
||||
if (ch->tsgid != tsg->tsgid) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = gk20a_tsg_unbind_channel(ch);
|
||||
|
||||
/*
|
||||
* Mark the channel timedout since channel unbound from TSG
|
||||
* has no context of its own so it can't serve any job
|
||||
*/
|
||||
ch->has_timedout = true;
|
||||
|
||||
out:
|
||||
gk20a_channel_put(ch);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
|
||||
unsigned int event_id,
|
||||
struct gk20a_event_id_data **event_id_data)
|
||||
{
|
||||
struct gk20a_event_id_data *local_event_id_data;
|
||||
bool event_found = false;
|
||||
|
||||
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
||||
nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
|
||||
gk20a_event_id_data, event_id_node) {
|
||||
if (local_event_id_data->event_id == event_id) {
|
||||
event_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&tsg->event_id_list_lock);
|
||||
|
||||
if (event_found) {
|
||||
*event_id_data = local_event_id_data;
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
|
||||
* event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
|
||||
*/
|
||||
static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
|
||||
{
|
||||
switch (event_id) {
|
||||
case NVGPU_EVENT_ID_BPT_INT:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
|
||||
case NVGPU_EVENT_ID_BPT_PAUSE:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
|
||||
case NVGPU_EVENT_ID_BLOCKING_SYNC:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
|
||||
case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
|
||||
case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
|
||||
case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
|
||||
}
|
||||
|
||||
return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
|
||||
}
|
||||
|
||||
void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
|
||||
int __event_id)
|
||||
{
|
||||
struct gk20a_event_id_data *event_id_data;
|
||||
u32 event_id;
|
||||
int err = 0;
|
||||
struct gk20a *g = tsg->g;
|
||||
|
||||
event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
|
||||
if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
||||
return;
|
||||
|
||||
err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
|
||||
&event_id_data);
|
||||
if (err)
|
||||
return;
|
||||
|
||||
nvgpu_mutex_acquire(&event_id_data->lock);
|
||||
|
||||
nvgpu_log_info(g,
|
||||
"posting event for event_id=%d on tsg=%d\n",
|
||||
event_id, tsg->tsgid);
|
||||
event_id_data->event_posted = true;
|
||||
|
||||
nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
|
||||
|
||||
nvgpu_mutex_release(&event_id_data->lock);
|
||||
}
|
||||
|
||||
static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
|
||||
{
|
||||
unsigned int mask = 0;
|
||||
struct gk20a_event_id_data *event_id_data = filep->private_data;
|
||||
struct gk20a *g = event_id_data->g;
|
||||
u32 event_id = event_id_data->event_id;
|
||||
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
|
||||
|
||||
poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
|
||||
|
||||
nvgpu_mutex_acquire(&event_id_data->lock);
|
||||
|
||||
if (event_id_data->event_posted) {
|
||||
nvgpu_log_info(g,
|
||||
"found pending event_id=%d on TSG=%d\n",
|
||||
event_id, tsg->tsgid);
|
||||
mask = (POLLPRI | POLLIN);
|
||||
event_id_data->event_posted = false;
|
||||
}
|
||||
|
||||
nvgpu_mutex_release(&event_id_data->lock);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int gk20a_event_id_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a_event_id_data *event_id_data = filp->private_data;
|
||||
struct gk20a *g = event_id_data->g;
|
||||
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
|
||||
|
||||
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
||||
nvgpu_list_del(&event_id_data->event_id_node);
|
||||
nvgpu_mutex_release(&tsg->event_id_list_lock);
|
||||
|
||||
nvgpu_mutex_destroy(&event_id_data->lock);
|
||||
gk20a_put(g);
|
||||
nvgpu_kfree(g, event_id_data);
|
||||
filp->private_data = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations gk20a_event_id_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.poll = gk20a_event_id_poll,
|
||||
.release = gk20a_event_id_release,
|
||||
};
|
||||
|
||||
static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
|
||||
int event_id,
|
||||
int *fd)
|
||||
{
|
||||
int err = 0;
|
||||
int local_fd;
|
||||
struct file *file;
|
||||
char name[64];
|
||||
struct gk20a_event_id_data *event_id_data;
|
||||
struct gk20a *g;
|
||||
|
||||
g = gk20a_get(tsg->g);
|
||||
if (!g)
|
||||
return -ENODEV;
|
||||
|
||||
err = gk20a_tsg_get_event_data_from_id(tsg,
|
||||
event_id, &event_id_data);
|
||||
if (err == 0) {
|
||||
/* We already have event enabled */
|
||||
err = -EINVAL;
|
||||
goto free_ref;
|
||||
}
|
||||
|
||||
err = get_unused_fd_flags(O_RDWR);
|
||||
if (err < 0)
|
||||
goto free_ref;
|
||||
local_fd = err;
|
||||
|
||||
snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
|
||||
event_id, local_fd);
|
||||
|
||||
file = anon_inode_getfile(name, &gk20a_event_id_ops,
|
||||
NULL, O_RDWR);
|
||||
if (IS_ERR(file)) {
|
||||
err = PTR_ERR(file);
|
||||
goto clean_up;
|
||||
}
|
||||
|
||||
event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
|
||||
if (!event_id_data) {
|
||||
err = -ENOMEM;
|
||||
goto clean_up_file;
|
||||
}
|
||||
event_id_data->g = g;
|
||||
event_id_data->id = tsg->tsgid;
|
||||
event_id_data->event_id = event_id;
|
||||
|
||||
nvgpu_cond_init(&event_id_data->event_id_wq);
|
||||
err = nvgpu_mutex_init(&event_id_data->lock);
|
||||
if (err)
|
||||
goto clean_up_free;
|
||||
|
||||
nvgpu_init_list_node(&event_id_data->event_id_node);
|
||||
|
||||
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
|
||||
nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
|
||||
nvgpu_mutex_release(&tsg->event_id_list_lock);
|
||||
|
||||
fd_install(local_fd, file);
|
||||
file->private_data = event_id_data;
|
||||
|
||||
*fd = local_fd;
|
||||
|
||||
return 0;
|
||||
|
||||
clean_up_free:
|
||||
nvgpu_kfree(g, event_id_data);
|
||||
clean_up_file:
|
||||
fput(file);
|
||||
clean_up:
|
||||
put_unused_fd(local_fd);
|
||||
free_ref:
|
||||
gk20a_put(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
|
||||
struct nvgpu_event_id_ctrl_args *args)
|
||||
{
|
||||
int err = 0;
|
||||
int fd = -1;
|
||||
|
||||
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
switch (args->cmd) {
|
||||
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
|
||||
err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
|
||||
if (!err)
|
||||
args->event_fd = fd;
|
||||
break;
|
||||
|
||||
default:
|
||||
nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
|
||||
args->cmd);
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
|
||||
{
|
||||
struct tsg_private *priv;
|
||||
struct tsg_gk20a *tsg;
|
||||
struct device *dev;
|
||||
int err;
|
||||
|
||||
g = gk20a_get(g);
|
||||
if (!g)
|
||||
return -ENODEV;
|
||||
|
||||
dev = dev_from_gk20a(g);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
|
||||
|
||||
priv = nvgpu_kmalloc(g, sizeof(*priv));
|
||||
if (!priv) {
|
||||
err = -ENOMEM;
|
||||
goto free_ref;
|
||||
}
|
||||
|
||||
tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
|
||||
if (!tsg) {
|
||||
nvgpu_kfree(g, priv);
|
||||
err = -ENOMEM;
|
||||
goto free_ref;
|
||||
}
|
||||
|
||||
priv->g = g;
|
||||
priv->tsg = tsg;
|
||||
filp->private_data = priv;
|
||||
|
||||
gk20a_sched_ctrl_tsg_added(g, tsg);
|
||||
|
||||
return 0;
|
||||
|
||||
free_ref:
|
||||
gk20a_put(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct nvgpu_os_linux *l;
|
||||
struct gk20a *g;
|
||||
int ret;
|
||||
|
||||
l = container_of(inode->i_cdev,
|
||||
struct nvgpu_os_linux, tsg.cdev);
|
||||
g = &l->g;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
ret = gk20a_busy(g);
|
||||
if (ret) {
|
||||
nvgpu_err(g, "failed to power on, %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = nvgpu_ioctl_tsg_open(&l->g, filp);
|
||||
|
||||
gk20a_idle(g);
|
||||
nvgpu_log_fn(g, "done");
|
||||
return ret;
|
||||
}
|
||||
|
||||
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
|
||||
{
|
||||
struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
|
||||
struct gk20a *g = tsg->g;
|
||||
|
||||
gk20a_sched_ctrl_tsg_removed(g, tsg);
|
||||
|
||||
gk20a_tsg_release(ref);
|
||||
gk20a_put(g);
|
||||
}
|
||||
|
||||
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct tsg_private *priv = filp->private_data;
|
||||
struct tsg_gk20a *tsg = priv->tsg;
|
||||
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
nvgpu_kfree(tsg->g, priv);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
|
||||
struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
u32 level = arg->level;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
if (sched->control_locked) {
|
||||
err = -EPERM;
|
||||
goto done;
|
||||
}
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to power on gpu");
|
||||
goto done;
|
||||
}
|
||||
|
||||
level = nvgpu_get_common_runlist_level(level);
|
||||
err = gk20a_tsg_set_runlist_interleave(tsg, level);
|
||||
|
||||
gk20a_idle(g);
|
||||
done:
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
|
||||
struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
if (sched->control_locked) {
|
||||
err = -EPERM;
|
||||
goto done;
|
||||
}
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to power on gpu");
|
||||
goto done;
|
||||
}
|
||||
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
|
||||
gk20a_idle(g);
|
||||
done:
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
|
||||
struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
|
||||
{
|
||||
arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct tsg_private *priv = filp->private_data;
|
||||
struct tsg_gk20a *tsg = priv->tsg;
|
||||
struct gk20a *g = tsg->g;
|
||||
u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
|
||||
(_IOC_NR(cmd) == 0) ||
|
||||
(_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
|
||||
(_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (!g->sw_ready) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
gk20a_idle(g);
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_TSG_IOCTL_BIND_CHANNEL:
|
||||
{
|
||||
int ch_fd = *(int *)buf;
|
||||
if (ch_fd < 0) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
|
||||
{
|
||||
err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
|
||||
(struct nvgpu_tsg_bind_channel_ex_args *)buf);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
|
||||
{
|
||||
int ch_fd = *(int *)buf;
|
||||
|
||||
if (ch_fd < 0) {
|
||||
err = -EINVAL;
|
||||
break;
|
||||
}
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
|
||||
break;
|
||||
}
|
||||
err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
|
||||
gk20a_idle(g);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_IOCTL_TSG_ENABLE:
|
||||
{
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
|
||||
return err;
|
||||
}
|
||||
g->ops.fifo.enable_tsg(tsg);
|
||||
gk20a_idle(g);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_IOCTL_TSG_DISABLE:
|
||||
{
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
|
||||
return err;
|
||||
}
|
||||
g->ops.fifo.disable_tsg(tsg);
|
||||
gk20a_idle(g);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_IOCTL_TSG_PREEMPT:
|
||||
{
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
|
||||
return err;
|
||||
}
|
||||
/* preempt TSG */
|
||||
err = g->ops.fifo.preempt_tsg(g, tsg->tsgid);
|
||||
gk20a_idle(g);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
|
||||
{
|
||||
err = gk20a_tsg_event_id_ctrl(g, tsg,
|
||||
(struct nvgpu_event_id_ctrl_args *)buf);
|
||||
break;
|
||||
}
|
||||
|
||||
case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
|
||||
err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
|
||||
(struct nvgpu_runlist_interleave_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_IOCTL_TSG_SET_TIMESLICE:
|
||||
{
|
||||
err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
|
||||
(struct nvgpu_timeslice_args *)buf);
|
||||
break;
|
||||
}
|
||||
case NVGPU_IOCTL_TSG_GET_TIMESLICE:
|
||||
{
|
||||
err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
|
||||
(struct nvgpu_timeslice_args *)buf);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
|
||||
cmd);
|
||||
err = -ENOTTY;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
|
||||
err = copy_to_user((void __user *)arg,
|
||||
buf, _IOC_SIZE(cmd));
|
||||
|
||||
return err;
|
||||
}
|
||||
28
drivers/gpu/nvgpu/os/linux/ioctl_tsg.h
Normal file
28
drivers/gpu/nvgpu/os/linux/ioctl_tsg.h
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#ifndef NVGPU_IOCTL_TSG_H
|
||||
#define NVGPU_IOCTL_TSG_H
|
||||
|
||||
struct inode;
|
||||
struct file;
|
||||
struct gk20a;
|
||||
struct nvgpu_ref;
|
||||
|
||||
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
|
||||
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
|
||||
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
|
||||
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
|
||||
unsigned int cmd, unsigned long arg);
|
||||
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
|
||||
|
||||
#endif
|
||||
654
drivers/gpu/nvgpu/os/linux/kmem.c
Normal file
654
drivers/gpu/nvgpu/os/linux/kmem.c
Normal file
@@ -0,0 +1,654 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/bug.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "kmem_priv.h"
|
||||
|
||||
/*
|
||||
* Statically declared because this needs to be shared across all nvgpu driver
|
||||
* instances. This makes sure that all kmem caches are _definitely_ uniquely
|
||||
* named.
|
||||
*/
|
||||
static atomic_t kmem_cache_id;
|
||||
|
||||
void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
|
||||
{
|
||||
void *p;
|
||||
|
||||
if (size > PAGE_SIZE) {
|
||||
if (clear)
|
||||
p = nvgpu_vzalloc(g, size);
|
||||
else
|
||||
p = nvgpu_vmalloc(g, size);
|
||||
} else {
|
||||
if (clear)
|
||||
p = nvgpu_kzalloc(g, size);
|
||||
else
|
||||
p = nvgpu_kmalloc(g, size);
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
void nvgpu_big_free(struct gk20a *g, void *p)
|
||||
{
|
||||
/*
|
||||
* This will have to be fixed eventually. Allocs that use
|
||||
* nvgpu_big_[mz]alloc() will need to remember the size of the alloc
|
||||
* when freeing.
|
||||
*/
|
||||
if (is_vmalloc_addr(p))
|
||||
nvgpu_vfree(g, p);
|
||||
else
|
||||
nvgpu_kfree(g, p);
|
||||
}
|
||||
|
||||
void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
|
||||
{
|
||||
void *alloc;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
alloc = __nvgpu_track_kmalloc(g, size, ip);
|
||||
#else
|
||||
alloc = kmalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
|
||||
size, alloc, GFP_KERNEL);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
|
||||
{
|
||||
void *alloc;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
alloc = __nvgpu_track_kzalloc(g, size, ip);
|
||||
#else
|
||||
alloc = kzalloc(size, GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
|
||||
size, alloc, GFP_KERNEL);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
|
||||
{
|
||||
void *alloc;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
alloc = __nvgpu_track_kcalloc(g, n, size, ip);
|
||||
#else
|
||||
alloc = kcalloc(n, size, GFP_KERNEL);
|
||||
#endif
|
||||
|
||||
kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
|
||||
n * size, alloc, GFP_KERNEL);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
|
||||
{
|
||||
void *alloc;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
alloc = __nvgpu_track_vmalloc(g, size, ip);
|
||||
#else
|
||||
alloc = vmalloc(size);
|
||||
#endif
|
||||
|
||||
kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
|
||||
{
|
||||
void *alloc;
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
alloc = __nvgpu_track_vzalloc(g, size, ip);
|
||||
#else
|
||||
alloc = vzalloc(size);
|
||||
#endif
|
||||
|
||||
kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void __nvgpu_kfree(struct gk20a *g, void *addr)
|
||||
{
|
||||
kmem_dbg(g, "kfree: addr=0x%p", addr);
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
__nvgpu_track_kfree(g, addr);
|
||||
#else
|
||||
kfree(addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __nvgpu_vfree(struct gk20a *g, void *addr)
|
||||
{
|
||||
kmem_dbg(g, "vfree: addr=0x%p", addr);
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
__nvgpu_track_vfree(g, addr);
|
||||
#else
|
||||
vfree(addr);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
|
||||
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
|
||||
{
|
||||
nvgpu_mutex_acquire(&tracker->lock);
|
||||
}
|
||||
|
||||
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
|
||||
{
|
||||
nvgpu_mutex_release(&tracker->lock);
|
||||
}
|
||||
|
||||
void kmem_print_mem_alloc(struct gk20a *g,
|
||||
struct nvgpu_mem_alloc *alloc,
|
||||
struct seq_file *s)
|
||||
{
|
||||
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
|
||||
int i;
|
||||
|
||||
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
|
||||
alloc->addr, alloc->size);
|
||||
for (i = 0; i < alloc->stack_length; i++)
|
||||
__pstat(s, " %3d [<%p>] %pS\n", i,
|
||||
(void *)alloc->stack[i],
|
||||
(void *)alloc->stack[i]);
|
||||
__pstat(s, "\n");
|
||||
#else
|
||||
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
|
||||
alloc->addr, alloc->size, alloc->ip);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
struct nvgpu_mem_alloc *alloc)
|
||||
{
|
||||
alloc->allocs_entry.key_start = alloc->addr;
|
||||
alloc->allocs_entry.key_end = alloc->addr + alloc->size;
|
||||
|
||||
nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
|
||||
struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
|
||||
{
|
||||
struct nvgpu_mem_alloc *alloc;
|
||||
struct nvgpu_rbtree_node *node = NULL;
|
||||
|
||||
nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
|
||||
if (!node)
|
||||
return NULL;
|
||||
|
||||
alloc = nvgpu_mem_alloc_from_rbtree_node(node);
|
||||
|
||||
nvgpu_rbtree_unlink(node, &tracker->allocs);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
unsigned long size, unsigned long real_size,
|
||||
u64 addr, unsigned long ip)
|
||||
{
|
||||
int ret;
|
||||
struct nvgpu_mem_alloc *alloc;
|
||||
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
|
||||
struct stack_trace stack_trace;
|
||||
#endif
|
||||
|
||||
alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
|
||||
if (!alloc)
|
||||
return -ENOMEM;
|
||||
|
||||
alloc->owner = tracker;
|
||||
alloc->size = size;
|
||||
alloc->real_size = real_size;
|
||||
alloc->addr = addr;
|
||||
alloc->ip = (void *)(uintptr_t)ip;
|
||||
|
||||
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
|
||||
stack_trace.max_entries = MAX_STACK_TRACE;
|
||||
stack_trace.nr_entries = 0;
|
||||
stack_trace.entries = alloc->stack;
|
||||
/*
|
||||
* This 4 here skips the 2 function calls that happen for all traced
|
||||
* allocs due to nvgpu:
|
||||
*
|
||||
* __nvgpu_save_kmem_alloc+0x7c/0x128
|
||||
* __nvgpu_track_kzalloc+0xcc/0xf8
|
||||
*
|
||||
* And the function calls that get made by the stack trace code itself.
|
||||
* If the trace savings code changes this will likely have to change
|
||||
* as well.
|
||||
*/
|
||||
stack_trace.skip = 4;
|
||||
save_stack_trace(&stack_trace);
|
||||
alloc->stack_length = stack_trace.nr_entries;
|
||||
#endif
|
||||
|
||||
nvgpu_lock_tracker(tracker);
|
||||
tracker->bytes_alloced += size;
|
||||
tracker->bytes_alloced_real += real_size;
|
||||
tracker->nr_allocs++;
|
||||
|
||||
/* Keep track of this for building a histogram later on. */
|
||||
if (tracker->max_alloc < size)
|
||||
tracker->max_alloc = size;
|
||||
if (tracker->min_alloc > size)
|
||||
tracker->min_alloc = size;
|
||||
|
||||
ret = nvgpu_add_alloc(tracker, alloc);
|
||||
if (ret) {
|
||||
WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
|
||||
kfree(alloc);
|
||||
nvgpu_unlock_tracker(tracker);
|
||||
return ret;
|
||||
}
|
||||
nvgpu_unlock_tracker(tracker);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
u64 addr)
|
||||
{
|
||||
struct nvgpu_mem_alloc *alloc;
|
||||
|
||||
nvgpu_lock_tracker(tracker);
|
||||
alloc = nvgpu_rem_alloc(tracker, addr);
|
||||
if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
|
||||
nvgpu_unlock_tracker(tracker);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset((void *)alloc->addr, 0, alloc->size);
|
||||
|
||||
tracker->nr_frees++;
|
||||
tracker->bytes_freed += alloc->size;
|
||||
tracker->bytes_freed_real += alloc->real_size;
|
||||
nvgpu_unlock_tracker(tracker);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __nvgpu_check_valloc_size(unsigned long size)
|
||||
{
|
||||
WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
|
||||
}
|
||||
|
||||
static void __nvgpu_check_kalloc_size(size_t size)
|
||||
{
|
||||
WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
|
||||
}
|
||||
|
||||
void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
|
||||
unsigned long ip)
|
||||
{
|
||||
void *alloc = vmalloc(size);
|
||||
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
__nvgpu_check_valloc_size(size);
|
||||
|
||||
/*
|
||||
* Ignore the return message. If this fails let's not cause any issues
|
||||
* for the rest of the driver.
|
||||
*/
|
||||
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
|
||||
(u64)(uintptr_t)alloc, ip);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
|
||||
unsigned long ip)
|
||||
{
|
||||
void *alloc = vzalloc(size);
|
||||
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
__nvgpu_check_valloc_size(size);
|
||||
|
||||
/*
|
||||
* Ignore the return message. If this fails let's not cause any issues
|
||||
* for the rest of the driver.
|
||||
*/
|
||||
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
|
||||
(u64)(uintptr_t)alloc, ip);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
|
||||
{
|
||||
void *alloc = kmalloc(size, GFP_KERNEL);
|
||||
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
__nvgpu_check_kalloc_size(size);
|
||||
|
||||
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
|
||||
(u64)(uintptr_t)alloc, ip);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
|
||||
{
|
||||
void *alloc = kzalloc(size, GFP_KERNEL);
|
||||
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
__nvgpu_check_kalloc_size(size);
|
||||
|
||||
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
|
||||
(u64)(uintptr_t)alloc, ip);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
|
||||
unsigned long ip)
|
||||
{
|
||||
void *alloc = kcalloc(n, size, GFP_KERNEL);
|
||||
|
||||
if (!alloc)
|
||||
return NULL;
|
||||
|
||||
__nvgpu_check_kalloc_size(n * size);
|
||||
|
||||
__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
|
||||
roundup_pow_of_two(n * size),
|
||||
(u64)(uintptr_t)alloc, ip);
|
||||
|
||||
return alloc;
|
||||
}
|
||||
|
||||
void __nvgpu_track_vfree(struct gk20a *g, void *addr)
|
||||
{
|
||||
/*
|
||||
* Often it is accepted practice to pass NULL pointers into free
|
||||
* functions to save code.
|
||||
*/
|
||||
if (!addr)
|
||||
return;
|
||||
|
||||
__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
|
||||
|
||||
vfree(addr);
|
||||
}
|
||||
|
||||
void __nvgpu_track_kfree(struct gk20a *g, void *addr)
|
||||
{
|
||||
if (!addr)
|
||||
return;
|
||||
|
||||
__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
|
||||
|
||||
kfree(addr);
|
||||
}
|
||||
|
||||
static int __do_check_for_outstanding_allocs(
|
||||
struct gk20a *g,
|
||||
struct nvgpu_mem_alloc_tracker *tracker,
|
||||
const char *type, bool silent)
|
||||
{
|
||||
struct nvgpu_rbtree_node *node;
|
||||
int count = 0;
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
||||
while (node) {
|
||||
struct nvgpu_mem_alloc *alloc =
|
||||
nvgpu_mem_alloc_from_rbtree_node(node);
|
||||
|
||||
if (!silent)
|
||||
kmem_print_mem_alloc(g, alloc, NULL);
|
||||
|
||||
count++;
|
||||
nvgpu_rbtree_enum_next(&node, node);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* check_for_outstanding_allocs - Count and display outstanding allocs
|
||||
*
|
||||
* @g - The GPU.
|
||||
* @silent - If set don't print anything about the allocs.
|
||||
*
|
||||
* Dump (or just count) the number of allocations left outstanding.
|
||||
*/
|
||||
static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
|
||||
silent);
|
||||
count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
|
||||
silent);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
|
||||
void (*force_free_func)(const void *))
|
||||
{
|
||||
struct nvgpu_rbtree_node *node;
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
||||
while (node) {
|
||||
struct nvgpu_mem_alloc *alloc =
|
||||
nvgpu_mem_alloc_from_rbtree_node(node);
|
||||
|
||||
if (force_free_func)
|
||||
force_free_func((void *)alloc->addr);
|
||||
|
||||
nvgpu_rbtree_unlink(node, &tracker->allocs);
|
||||
kfree(alloc);
|
||||
|
||||
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* nvgpu_kmem_cleanup - Cleanup the kmem tracking
|
||||
*
|
||||
* @g - The GPU.
|
||||
* @force_free - If set will also free leaked objects if possible.
|
||||
*
|
||||
* Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
|
||||
* is non-zero then the allocation made by nvgpu is also freed. This is risky,
|
||||
* though, as it is possible that the memory is still in use by other parts of
|
||||
* the GPU driver not aware that this has happened.
|
||||
*
|
||||
* In theory it should be fine if the GPU driver has been deinitialized and
|
||||
* there are no bugs in that code. However, if there are any bugs in that code
|
||||
* then they could likely manifest as odd crashes indeterminate amounts of time
|
||||
* in the future. So use @force_free at your own risk.
|
||||
*/
|
||||
static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
|
||||
{
|
||||
do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
|
||||
do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
|
||||
}
|
||||
|
||||
void nvgpu_kmem_fini(struct gk20a *g, int flags)
|
||||
{
|
||||
int count;
|
||||
bool silent, force_free;
|
||||
|
||||
if (!flags)
|
||||
return;
|
||||
|
||||
silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
|
||||
force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
|
||||
|
||||
count = check_for_outstanding_allocs(g, silent);
|
||||
nvgpu_kmem_cleanup(g, force_free);
|
||||
|
||||
/*
|
||||
* If we leak objects we can either BUG() out or just WARN(). In general
|
||||
* it doesn't make sense to BUG() on here since leaking a few objects
|
||||
* won't crash the kernel but it can be helpful for development.
|
||||
*
|
||||
* If neither flag is set then we just silently do nothing.
|
||||
*/
|
||||
if (count > 0) {
|
||||
if (flags & NVGPU_KMEM_FINI_WARN) {
|
||||
WARN(1, "Letting %d allocs leak!!\n", count);
|
||||
} else if (flags & NVGPU_KMEM_FINI_BUG) {
|
||||
nvgpu_err(g, "Letting %d allocs leak!!", count);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_kmem_init(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
|
||||
g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
|
||||
g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
|
||||
|
||||
if (!g->vmallocs || !g->kmallocs) {
|
||||
err = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
g->vmallocs->name = "vmalloc";
|
||||
g->kmallocs->name = "kmalloc";
|
||||
|
||||
g->vmallocs->allocs = NULL;
|
||||
g->kmallocs->allocs = NULL;
|
||||
|
||||
nvgpu_mutex_init(&g->vmallocs->lock);
|
||||
nvgpu_mutex_init(&g->kmallocs->lock);
|
||||
|
||||
g->vmallocs->min_alloc = PAGE_SIZE;
|
||||
g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
|
||||
|
||||
/*
|
||||
* This needs to go after all the other initialization since they use
|
||||
* the nvgpu_kzalloc() API.
|
||||
*/
|
||||
g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
|
||||
sizeof(struct nvgpu_mem_alloc));
|
||||
g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
|
||||
sizeof(struct nvgpu_mem_alloc));
|
||||
|
||||
if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
|
||||
err = -ENOMEM;
|
||||
if (g->vmallocs->allocs_cache)
|
||||
nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
|
||||
if (g->kmallocs->allocs_cache)
|
||||
nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (g->vmallocs)
|
||||
kfree(g->vmallocs);
|
||||
if (g->kmallocs)
|
||||
kfree(g->kmallocs);
|
||||
return err;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
|
||||
|
||||
int nvgpu_kmem_init(struct gk20a *g)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_kmem_fini(struct gk20a *g, int flags)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
|
||||
|
||||
struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
|
||||
{
|
||||
struct nvgpu_kmem_cache *cache =
|
||||
nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
|
||||
|
||||
if (!cache)
|
||||
return NULL;
|
||||
|
||||
cache->g = g;
|
||||
|
||||
snprintf(cache->name, sizeof(cache->name),
|
||||
"nvgpu-cache-0x%p-%d-%d", g, (int)size,
|
||||
atomic_inc_return(&kmem_cache_id));
|
||||
cache->cache = kmem_cache_create(cache->name,
|
||||
size, size, 0, NULL);
|
||||
if (!cache->cache) {
|
||||
nvgpu_kfree(g, cache);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
|
||||
{
|
||||
struct gk20a *g = cache->g;
|
||||
|
||||
kmem_cache_destroy(cache->cache);
|
||||
nvgpu_kfree(g, cache);
|
||||
}
|
||||
|
||||
void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
|
||||
{
|
||||
return kmem_cache_alloc(cache->cache, GFP_KERNEL);
|
||||
}
|
||||
|
||||
void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
|
||||
{
|
||||
kmem_cache_free(cache->cache, ptr);
|
||||
}
|
||||
105
drivers/gpu/nvgpu/os/linux/kmem_priv.h
Normal file
105
drivers/gpu/nvgpu/os/linux/kmem_priv.h
Normal file
@@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __KMEM_PRIV_H__
|
||||
#define __KMEM_PRIV_H__
|
||||
|
||||
#include <nvgpu/rbtree.h>
|
||||
#include <nvgpu/lock.h>
|
||||
|
||||
struct seq_file;
|
||||
|
||||
#define __pstat(s, fmt, msg...) \
|
||||
do { \
|
||||
if (s) \
|
||||
seq_printf(s, fmt, ##msg); \
|
||||
else \
|
||||
pr_info(fmt, ##msg); \
|
||||
} while (0)
|
||||
|
||||
#define MAX_STACK_TRACE 20
|
||||
|
||||
/*
|
||||
* Linux specific version of the nvgpu_kmem_cache struct. This type is
|
||||
* completely opaque to the rest of the driver.
|
||||
*/
|
||||
struct nvgpu_kmem_cache {
|
||||
struct gk20a *g;
|
||||
struct kmem_cache *cache;
|
||||
|
||||
/*
|
||||
* Memory to hold the kmem_cache unique name. Only necessary on our
|
||||
* k3.10 kernel when not using the SLUB allocator but it's easier to
|
||||
* just carry this on to newer kernels.
|
||||
*/
|
||||
char name[128];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
|
||||
|
||||
struct nvgpu_mem_alloc {
|
||||
struct nvgpu_mem_alloc_tracker *owner;
|
||||
|
||||
void *ip;
|
||||
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
|
||||
unsigned long stack[MAX_STACK_TRACE];
|
||||
int stack_length;
|
||||
#endif
|
||||
|
||||
u64 addr;
|
||||
|
||||
unsigned long size;
|
||||
unsigned long real_size;
|
||||
|
||||
struct nvgpu_rbtree_node allocs_entry;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_mem_alloc *
|
||||
nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
|
||||
{
|
||||
return (struct nvgpu_mem_alloc *)
|
||||
((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
|
||||
};
|
||||
|
||||
/*
|
||||
* Linux specific tracking of vmalloc, kmalloc, etc.
|
||||
*/
|
||||
struct nvgpu_mem_alloc_tracker {
|
||||
const char *name;
|
||||
struct nvgpu_kmem_cache *allocs_cache;
|
||||
struct nvgpu_rbtree_node *allocs;
|
||||
struct nvgpu_mutex lock;
|
||||
|
||||
u64 bytes_alloced;
|
||||
u64 bytes_freed;
|
||||
u64 bytes_alloced_real;
|
||||
u64 bytes_freed_real;
|
||||
u64 nr_allocs;
|
||||
u64 nr_frees;
|
||||
|
||||
unsigned long min_alloc;
|
||||
unsigned long max_alloc;
|
||||
};
|
||||
|
||||
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
|
||||
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
|
||||
|
||||
void kmem_print_mem_alloc(struct gk20a *g,
|
||||
struct nvgpu_mem_alloc *alloc,
|
||||
struct seq_file *s);
|
||||
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
|
||||
|
||||
#endif /* __KMEM_PRIV_H__ */
|
||||
132
drivers/gpu/nvgpu/os/linux/log.c
Normal file
132
drivers/gpu/nvgpu/os/linux/log.c
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/device.h>
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
/*
|
||||
* Define a length for log buffers. This is the buffer that the 'fmt, ...' part
|
||||
* of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
|
||||
* needs to not be overly sized since we have limited kernel stack space. But at
|
||||
* the same time we don't want it to be restrictive either.
|
||||
*/
|
||||
#define LOG_BUFFER_LENGTH 160
|
||||
|
||||
/*
|
||||
* Annoying quirk of Linux: this has to be a string literal since the printk()
|
||||
* function and friends use the preprocessor to concatenate stuff to the start
|
||||
* of this string when printing.
|
||||
*/
|
||||
#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n"
|
||||
|
||||
static const char *log_types[] = {
|
||||
"ERR",
|
||||
"WRN",
|
||||
"DBG",
|
||||
"INFO",
|
||||
};
|
||||
|
||||
int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
|
||||
{
|
||||
return !!(g->log_mask & log_mask);
|
||||
}
|
||||
|
||||
static inline const char *nvgpu_log_name(struct gk20a *g)
|
||||
{
|
||||
return dev_name(dev_from_gk20a(g));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GK20A_TRACE_PRINTK
|
||||
static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
|
||||
const char *func_name, int line,
|
||||
const char *log_type, const char *log)
|
||||
{
|
||||
trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
|
||||
const char *func_name, int line,
|
||||
enum nvgpu_log_type type, const char *log)
|
||||
{
|
||||
const char *name = gpu_name ? gpu_name : "";
|
||||
const char *log_type = log_types[type];
|
||||
|
||||
#ifdef CONFIG_GK20A_TRACE_PRINTK
|
||||
if (trace)
|
||||
return __nvgpu_trace_printk_log(trace, name, func_name,
|
||||
line, log_type, log);
|
||||
#endif
|
||||
switch (type) {
|
||||
case NVGPU_DEBUG:
|
||||
/*
|
||||
* We could use pr_debug() here but we control debug enablement
|
||||
* separately from the Linux kernel. Perhaps this is a bug in
|
||||
* nvgpu.
|
||||
*/
|
||||
pr_info(LOG_FMT, name, func_name, line, log_type, log);
|
||||
break;
|
||||
case NVGPU_INFO:
|
||||
pr_info(LOG_FMT, name, func_name, line, log_type, log);
|
||||
break;
|
||||
case NVGPU_WARNING:
|
||||
pr_warn(LOG_FMT, name, func_name, line, log_type, log);
|
||||
break;
|
||||
case NVGPU_ERROR:
|
||||
pr_err(LOG_FMT, name, func_name, line, log_type, log);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__((format (printf, 5, 6)))
|
||||
void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
|
||||
enum nvgpu_log_type type, const char *fmt, ...)
|
||||
{
|
||||
char log[LOG_BUFFER_LENGTH];
|
||||
va_list args;
|
||||
|
||||
va_start(args, fmt);
|
||||
vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
__nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
|
||||
func_name, line, type, log);
|
||||
}
|
||||
|
||||
__attribute__((format (printf, 5, 6)))
|
||||
void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
|
||||
const char *func_name, int line,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
char log[LOG_BUFFER_LENGTH];
|
||||
va_list args;
|
||||
|
||||
if ((log_mask & g->log_mask) == 0)
|
||||
return;
|
||||
|
||||
va_start(args, fmt);
|
||||
vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
|
||||
va_end(args);
|
||||
|
||||
__nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
|
||||
func_name, line, NVGPU_DEBUG, log);
|
||||
}
|
||||
1365
drivers/gpu/nvgpu/os/linux/module.c
Normal file
1365
drivers/gpu/nvgpu/os/linux/module.c
Normal file
File diff suppressed because it is too large
Load Diff
32
drivers/gpu/nvgpu/os/linux/module.h
Normal file
32
drivers/gpu/nvgpu/os/linux/module.h
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
|
||||
#define __NVGPU_COMMON_LINUX_MODULE_H__
|
||||
|
||||
struct gk20a;
|
||||
struct device;
|
||||
struct nvgpu_os_linux;
|
||||
|
||||
int gk20a_pm_finalize_poweron(struct device *dev);
|
||||
int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
|
||||
void gk20a_remove_support(struct gk20a *g);
|
||||
void gk20a_driver_start_unload(struct gk20a *g);
|
||||
int nvgpu_quiesce(struct gk20a *g);
|
||||
int nvgpu_remove(struct device *dev, struct class *class);
|
||||
void nvgpu_free_irq(struct gk20a *g);
|
||||
struct device_node *nvgpu_get_node(struct gk20a *g);
|
||||
void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
|
||||
struct resource **out);
|
||||
extern struct class nvgpu_class;
|
||||
|
||||
#endif
|
||||
62
drivers/gpu/nvgpu/os/linux/module_usermode.c
Normal file
62
drivers/gpu/nvgpu/os/linux/module_usermode.c
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
/*
|
||||
* Locks out the driver from accessing GPU registers. This prevents access to
|
||||
* thse registers after the GPU has been clock or power gated. This should help
|
||||
* find annoying bugs where register reads and writes are silently dropped
|
||||
* after the GPU has been turned off. On older chips these reads and writes can
|
||||
* also lock the entire CPU up.
|
||||
*/
|
||||
void nvgpu_lockout_usermode_registers(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->usermode_regs = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Undoes t19x_lockout_registers().
|
||||
*/
|
||||
void nvgpu_restore_usermode_registers(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->usermode_regs = l->usermode_regs_saved;
|
||||
}
|
||||
|
||||
void nvgpu_remove_usermode_support(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (l->usermode_regs) {
|
||||
l->usermode_regs = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_init_usermode_support(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->usermode_regs = l->regs + usermode_cfg0_r();
|
||||
l->usermode_regs_saved = l->usermode_regs;
|
||||
}
|
||||
27
drivers/gpu/nvgpu/os/linux/module_usermode.h
Normal file
27
drivers/gpu/nvgpu/os/linux/module_usermode.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_MODULE_T19X_H__
|
||||
#define __NVGPU_MODULE_T19X_H__
|
||||
|
||||
struct gk20a;
|
||||
|
||||
void nvgpu_init_usermode_support(struct gk20a *g);
|
||||
void nvgpu_remove_usermode_support(struct gk20a *g);
|
||||
void nvgpu_lockout_usermode_registers(struct gk20a *g);
|
||||
void nvgpu_restore_usermode_registers(struct gk20a *g);
|
||||
|
||||
#endif
|
||||
613
drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
Normal file
613
drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
Normal file
@@ -0,0 +1,613 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/gmmu.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/page_allocator.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/vidmem.h>
|
||||
|
||||
#include <nvgpu/linux/dma.h>
|
||||
#include <nvgpu/linux/vidmem.h>
|
||||
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/mm_gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
u64 ipa = sg_phys((struct scatterlist *)sgl);
|
||||
|
||||
if (platform->phys_addr)
|
||||
return platform->phys_addr(g, ipa);
|
||||
|
||||
return ipa;
|
||||
}
|
||||
|
||||
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
void *cpu_va;
|
||||
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
|
||||
PAGE_KERNEL :
|
||||
pgprot_writecombine(PAGE_KERNEL);
|
||||
|
||||
if (mem->aperture != APERTURE_SYSMEM)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* WAR for bug 2040115: we already will always have a coherent vmap()
|
||||
* for all sysmem buffers. The prot settings are left alone since
|
||||
* eventually this should be deleted.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
|
||||
* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
|
||||
* another CPU mapping.
|
||||
*/
|
||||
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
|
||||
return 0;
|
||||
|
||||
if (WARN_ON(mem->cpu_va)) {
|
||||
nvgpu_warn(g, "nested");
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
cpu_va = vmap(mem->priv.pages,
|
||||
PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
|
||||
0, prot);
|
||||
|
||||
if (WARN_ON(!cpu_va))
|
||||
return -ENOMEM;
|
||||
|
||||
mem->cpu_va = cpu_va;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
if (mem->aperture != APERTURE_SYSMEM)
|
||||
return;
|
||||
|
||||
/*
|
||||
* WAR for bug 2040115: skip this since the map will be taken care of
|
||||
* during the free in the DMA API.
|
||||
*/
|
||||
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
|
||||
* already made by the DMA API.
|
||||
*/
|
||||
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
|
||||
return;
|
||||
|
||||
vunmap(mem->cpu_va);
|
||||
mem->cpu_va = NULL;
|
||||
}
|
||||
|
||||
static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 r = start, *dest_u32 = *arg;
|
||||
|
||||
if (!l->regs) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
return;
|
||||
}
|
||||
|
||||
while (words--) {
|
||||
*dest_u32++ = gk20a_readl(g, r);
|
||||
r += sizeof(u32);
|
||||
}
|
||||
|
||||
*arg = dest_u32;
|
||||
}
|
||||
|
||||
u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
|
||||
{
|
||||
u32 data = 0;
|
||||
|
||||
if (mem->aperture == APERTURE_SYSMEM) {
|
||||
u32 *ptr = mem->cpu_va;
|
||||
|
||||
WARN_ON(!ptr);
|
||||
data = ptr[w];
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
#endif
|
||||
} else if (mem->aperture == APERTURE_VIDMEM) {
|
||||
u32 value;
|
||||
u32 *p = &value;
|
||||
|
||||
nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
|
||||
sizeof(u32), pramin_access_batch_rd_n, &p);
|
||||
|
||||
data = value;
|
||||
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
|
||||
{
|
||||
WARN_ON(offset & 3);
|
||||
return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
|
||||
}
|
||||
|
||||
void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
|
||||
u32 offset, void *dest, u32 size)
|
||||
{
|
||||
WARN_ON(offset & 3);
|
||||
WARN_ON(size & 3);
|
||||
|
||||
if (mem->aperture == APERTURE_SYSMEM) {
|
||||
u8 *src = (u8 *)mem->cpu_va + offset;
|
||||
|
||||
WARN_ON(!mem->cpu_va);
|
||||
memcpy(dest, src, size);
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
if (size)
|
||||
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
|
||||
src, *dest, size);
|
||||
#endif
|
||||
} else if (mem->aperture == APERTURE_VIDMEM) {
|
||||
u32 *dest_u32 = dest;
|
||||
|
||||
nvgpu_pramin_access_batched(g, mem, offset, size,
|
||||
pramin_access_batch_rd_n, &dest_u32);
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
}
|
||||
|
||||
static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 r = start, *src_u32 = *arg;
|
||||
|
||||
if (!l->regs) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
return;
|
||||
}
|
||||
|
||||
while (words--) {
|
||||
writel_relaxed(*src_u32++, l->regs + r);
|
||||
r += sizeof(u32);
|
||||
}
|
||||
|
||||
*arg = src_u32;
|
||||
}
|
||||
|
||||
void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
|
||||
{
|
||||
if (mem->aperture == APERTURE_SYSMEM) {
|
||||
u32 *ptr = mem->cpu_va;
|
||||
|
||||
WARN_ON(!ptr);
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
|
||||
#endif
|
||||
ptr[w] = data;
|
||||
} else if (mem->aperture == APERTURE_VIDMEM) {
|
||||
u32 value = data;
|
||||
u32 *p = &value;
|
||||
|
||||
nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
|
||||
sizeof(u32), pramin_access_batch_wr_n, &p);
|
||||
if (!mem->skip_wmb)
|
||||
wmb();
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
|
||||
{
|
||||
WARN_ON(offset & 3);
|
||||
nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
|
||||
}
|
||||
|
||||
void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
|
||||
void *src, u32 size)
|
||||
{
|
||||
WARN_ON(offset & 3);
|
||||
WARN_ON(size & 3);
|
||||
|
||||
if (mem->aperture == APERTURE_SYSMEM) {
|
||||
u8 *dest = (u8 *)mem->cpu_va + offset;
|
||||
|
||||
WARN_ON(!mem->cpu_va);
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
if (size)
|
||||
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
|
||||
dest, *src, size);
|
||||
#endif
|
||||
memcpy(dest, src, size);
|
||||
} else if (mem->aperture == APERTURE_VIDMEM) {
|
||||
u32 *src_u32 = src;
|
||||
|
||||
nvgpu_pramin_access_batched(g, mem, offset, size,
|
||||
pramin_access_batch_wr_n, &src_u32);
|
||||
if (!mem->skip_wmb)
|
||||
wmb();
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
}
|
||||
|
||||
static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
u32 r = start, repeat = **arg;
|
||||
|
||||
if (!l->regs) {
|
||||
__gk20a_warn_on_no_regs();
|
||||
return;
|
||||
}
|
||||
|
||||
while (words--) {
|
||||
writel_relaxed(repeat, l->regs + r);
|
||||
r += sizeof(u32);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
|
||||
u32 c, u32 size)
|
||||
{
|
||||
WARN_ON(offset & 3);
|
||||
WARN_ON(size & 3);
|
||||
WARN_ON(c & ~0xff);
|
||||
|
||||
c &= 0xff;
|
||||
|
||||
if (mem->aperture == APERTURE_SYSMEM) {
|
||||
u8 *dest = (u8 *)mem->cpu_va + offset;
|
||||
|
||||
WARN_ON(!mem->cpu_va);
|
||||
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
|
||||
if (size)
|
||||
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
|
||||
dest, c, size);
|
||||
#endif
|
||||
memset(dest, c, size);
|
||||
} else if (mem->aperture == APERTURE_VIDMEM) {
|
||||
u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
|
||||
u32 *p = &repeat_value;
|
||||
|
||||
nvgpu_pramin_access_batched(g, mem, offset, size,
|
||||
pramin_access_batch_set, &p);
|
||||
if (!mem->skip_wmb)
|
||||
wmb();
|
||||
} else {
|
||||
WARN_ON("Accessing unallocated nvgpu_mem");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
|
||||
* and/or become private to this file once all bad usages of Linux SGLs are
|
||||
* cleaned up in the driver.
|
||||
*/
|
||||
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
|
||||
{
|
||||
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
|
||||
!nvgpu_iommuable(g))
|
||||
return g->ops.mm.gpu_phys_addr(g, NULL,
|
||||
__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
|
||||
|
||||
if (sg_dma_address(sgl) == 0)
|
||||
return g->ops.mm.gpu_phys_addr(g, NULL,
|
||||
__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
|
||||
|
||||
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
|
||||
return 0;
|
||||
|
||||
return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
|
||||
}
|
||||
|
||||
/*
|
||||
* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
|
||||
* allocation.
|
||||
*/
|
||||
static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
|
||||
* allocation.
|
||||
*
|
||||
* Note: this API does not make sense to use for _VIDMEM_ buffers with greater
|
||||
* than one scatterlist chunk. If there's more than one scatterlist chunk then
|
||||
* the buffer will not be contiguous. As such the base address probably isn't
|
||||
* very useful. This is true for SYSMEM as well, if there's no IOMMU.
|
||||
*
|
||||
* However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
|
||||
* an IOMMU present and enabled for the GPU.
|
||||
*
|
||||
* %attrs can be NULL. If it is not NULL then it may be inspected to determine
|
||||
* if the address needs to be modified before writing into a PTE.
|
||||
*/
|
||||
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
struct nvgpu_page_alloc *alloc;
|
||||
|
||||
if (mem->aperture == APERTURE_SYSMEM)
|
||||
return nvgpu_mem_get_addr_sysmem(g, mem);
|
||||
|
||||
/*
|
||||
* Otherwise get the vidmem address.
|
||||
*/
|
||||
alloc = mem->vidmem_alloc;
|
||||
|
||||
/* This API should not be used with > 1 chunks */
|
||||
WARN_ON(alloc->nr_chunks != 1);
|
||||
|
||||
return alloc->base;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should only be used on contiguous buffers regardless of whether
|
||||
* there's an IOMMU present/enabled. This applies to both SYSMEM and
|
||||
* VIDMEM.
|
||||
*/
|
||||
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
|
||||
{
|
||||
/*
|
||||
* For a VIDMEM buf, this is identical to simply get_addr() so just fall
|
||||
* back to that.
|
||||
*/
|
||||
if (mem->aperture == APERTURE_VIDMEM)
|
||||
return nvgpu_mem_get_addr(g, mem);
|
||||
|
||||
return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Be careful how you use this! You are responsible for correctly freeing this
|
||||
* memory.
|
||||
*/
|
||||
int nvgpu_mem_create_from_mem(struct gk20a *g,
|
||||
struct nvgpu_mem *dest, struct nvgpu_mem *src,
|
||||
int start_page, int nr_pages)
|
||||
{
|
||||
int ret;
|
||||
u64 start = start_page * PAGE_SIZE;
|
||||
u64 size = nr_pages * PAGE_SIZE;
|
||||
dma_addr_t new_iova;
|
||||
|
||||
if (src->aperture != APERTURE_SYSMEM)
|
||||
return -EINVAL;
|
||||
|
||||
/* Some silly things a caller might do... */
|
||||
if (size > src->size)
|
||||
return -EINVAL;
|
||||
if ((start + size) > src->size)
|
||||
return -EINVAL;
|
||||
|
||||
dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
|
||||
dest->aperture = src->aperture;
|
||||
dest->skip_wmb = src->skip_wmb;
|
||||
dest->size = size;
|
||||
|
||||
/*
|
||||
* Re-use the CPU mapping only if the mapping was made by the DMA API.
|
||||
*
|
||||
* Bug 2040115: the DMA API wrapper makes the mapping that we should
|
||||
* re-use.
|
||||
*/
|
||||
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
|
||||
nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
|
||||
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
|
||||
|
||||
dest->priv.pages = src->priv.pages + start_page;
|
||||
dest->priv.flags = src->priv.flags;
|
||||
|
||||
new_iova = sg_dma_address(src->priv.sgt->sgl) ?
|
||||
sg_dma_address(src->priv.sgt->sgl) + start : 0;
|
||||
|
||||
/*
|
||||
* Make a new SG table that is based only on the subset of pages that
|
||||
* is passed to us. This table gets freed by the dma free routines.
|
||||
*/
|
||||
if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
|
||||
ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
|
||||
src->priv.pages + start_page,
|
||||
new_iova, size);
|
||||
else
|
||||
ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
|
||||
new_iova, size);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
|
||||
struct page **pages, int nr_pages)
|
||||
{
|
||||
struct sg_table *sgt;
|
||||
struct page **our_pages =
|
||||
nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
|
||||
|
||||
if (!our_pages)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
|
||||
|
||||
if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
|
||||
nr_pages * PAGE_SIZE)) {
|
||||
nvgpu_kfree(g, our_pages);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are making an SGT from physical pages we can be reasonably
|
||||
* certain that this should bypass the SMMU - thus we set the DMA (aka
|
||||
* IOVA) address to 0. This tells the GMMU mapping code to not make a
|
||||
* mapping directed to the SMMU.
|
||||
*/
|
||||
sg_dma_address(sgt->sgl) = 0;
|
||||
|
||||
dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
|
||||
dest->aperture = APERTURE_SYSMEM;
|
||||
dest->skip_wmb = 0;
|
||||
dest->size = PAGE_SIZE * nr_pages;
|
||||
|
||||
dest->priv.flags = 0;
|
||||
dest->priv.pages = our_pages;
|
||||
dest->priv.sgt = sgt;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
|
||||
u64 src_phys, int nr_pages)
|
||||
{
|
||||
struct page **pages =
|
||||
nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
|
||||
int i, ret = 0;
|
||||
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < nr_pages; i++)
|
||||
pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
|
||||
|
||||
ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
|
||||
nvgpu_kfree(g, pages);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
|
||||
{
|
||||
return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
|
||||
}
|
||||
|
||||
static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
|
||||
{
|
||||
return (u64)__nvgpu_sgl_phys(g, sgl);
|
||||
}
|
||||
|
||||
static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
|
||||
{
|
||||
return (u64)sg_dma_address((struct scatterlist *)sgl);
|
||||
}
|
||||
|
||||
static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
|
||||
{
|
||||
return (u64)((struct scatterlist *)sgl)->length;
|
||||
}
|
||||
|
||||
static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
|
||||
struct nvgpu_sgl *sgl,
|
||||
struct nvgpu_gmmu_attrs *attrs)
|
||||
{
|
||||
if (sg_dma_address((struct scatterlist *)sgl) == 0)
|
||||
return g->ops.mm.gpu_phys_addr(g, attrs,
|
||||
__nvgpu_sgl_phys(g, sgl));
|
||||
|
||||
if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
|
||||
return 0;
|
||||
|
||||
return nvgpu_mem_iommu_translate(g,
|
||||
sg_dma_address((struct scatterlist *)sgl));
|
||||
}
|
||||
|
||||
static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
|
||||
struct nvgpu_sgt *sgt)
|
||||
{
|
||||
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
|
||||
{
|
||||
/*
|
||||
* Free this SGT. All we do is free the passed SGT. The actual Linux
|
||||
* SGT/SGL needs to be freed separately.
|
||||
*/
|
||||
nvgpu_kfree(g, sgt);
|
||||
}
|
||||
|
||||
static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
|
||||
.sgl_next = nvgpu_mem_linux_sgl_next,
|
||||
.sgl_phys = nvgpu_mem_linux_sgl_phys,
|
||||
.sgl_dma = nvgpu_mem_linux_sgl_dma,
|
||||
.sgl_length = nvgpu_mem_linux_sgl_length,
|
||||
.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
|
||||
.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
|
||||
.sgt_free = nvgpu_mem_linux_sgl_free,
|
||||
};
|
||||
|
||||
static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
|
||||
struct gk20a *g,
|
||||
struct scatterlist *linux_sgl)
|
||||
{
|
||||
struct nvgpu_page_alloc *vidmem_alloc;
|
||||
|
||||
vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
|
||||
if (!vidmem_alloc)
|
||||
return NULL;
|
||||
|
||||
return &vidmem_alloc->sgt;
|
||||
}
|
||||
|
||||
struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
|
||||
{
|
||||
struct nvgpu_sgt *nvgpu_sgt;
|
||||
struct scatterlist *linux_sgl = sgt->sgl;
|
||||
|
||||
if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
|
||||
return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
|
||||
|
||||
nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
|
||||
if (!nvgpu_sgt)
|
||||
return NULL;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
|
||||
|
||||
nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
|
||||
nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
|
||||
|
||||
return nvgpu_sgt;
|
||||
}
|
||||
|
||||
struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
|
||||
struct nvgpu_mem *mem)
|
||||
{
|
||||
return nvgpu_linux_sgt_create(g, mem->priv.sgt);
|
||||
}
|
||||
294
drivers/gpu/nvgpu/os/linux/nvhost.c
Normal file
294
drivers/gpu/nvgpu/os/linux/nvhost.c
Normal file
@@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/nvhost.h>
|
||||
#include <linux/nvhost_t194.h>
|
||||
#include <linux/nvhost_ioctl.h>
|
||||
#include <linux/of_platform.h>
|
||||
|
||||
#include <nvgpu/nvhost.h>
|
||||
|
||||
#include "nvhost_priv.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "os_linux.h"
|
||||
#include "module.h"
|
||||
|
||||
int nvgpu_get_nvhost_dev(struct gk20a *g)
|
||||
{
|
||||
struct device_node *np = nvgpu_get_node(g);
|
||||
struct platform_device *host1x_pdev = NULL;
|
||||
const __be32 *host1x_ptr;
|
||||
|
||||
host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
|
||||
if (host1x_ptr) {
|
||||
struct device_node *host1x_node =
|
||||
of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
|
||||
|
||||
host1x_pdev = of_find_device_by_node(host1x_node);
|
||||
if (!host1x_pdev) {
|
||||
nvgpu_warn(g, "host1x device not available");
|
||||
return -EPROBE_DEFER;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (g->has_syncpoints) {
|
||||
nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
|
||||
g->has_syncpoints = false;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
|
||||
if (!g->nvhost_dev)
|
||||
return -ENOMEM;
|
||||
|
||||
g->nvhost_dev->host1x_pdev = host1x_pdev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void nvgpu_free_nvhost_dev(struct gk20a *g)
|
||||
{
|
||||
nvgpu_kfree(g, g->nvhost_dev);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_module_busy_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev)
|
||||
{
|
||||
return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_module_idle_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev)
|
||||
{
|
||||
nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_debug_dump_device(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev)
|
||||
{
|
||||
nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
|
||||
}
|
||||
|
||||
const char *nvgpu_nvhost_syncpt_get_name(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, int id)
|
||||
{
|
||||
return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
|
||||
}
|
||||
|
||||
bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
|
||||
{
|
||||
return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_syncpt_is_expired_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
|
||||
{
|
||||
return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
|
||||
id, thresh);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_syncpt_incr_max_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
|
||||
{
|
||||
return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_intr_register_notifier(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
|
||||
void (*callback)(void *, int), void *private_data)
|
||||
{
|
||||
return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
|
||||
id, thresh,
|
||||
callback, private_data);
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
|
||||
{
|
||||
nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_syncpt_put_ref_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
|
||||
{
|
||||
nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_get_syncpt_host_managed(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev,
|
||||
u32 param, const char *syncpt_name)
|
||||
{
|
||||
return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
|
||||
param, syncpt_name);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_get_syncpt_client_managed(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev,
|
||||
const char *syncpt_name)
|
||||
{
|
||||
return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
|
||||
syncpt_name);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_syncpt_wait_timeout_ext(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
|
||||
u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
|
||||
{
|
||||
return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
|
||||
id, thresh, timeout, value, ts);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_syncpt_read_ext_check(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
|
||||
{
|
||||
return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_syncpt_read_maxval(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
|
||||
{
|
||||
return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_syncpt_set_safe_state(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
/*
|
||||
* Add large number of increments to current value
|
||||
* so that all waiters on this syncpoint are released
|
||||
*
|
||||
* We don't expect any case where more than 0x10000 increments
|
||||
* are pending
|
||||
*/
|
||||
val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
|
||||
val += 0x10000;
|
||||
|
||||
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
|
||||
nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_create_symlink(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
int err = 0;
|
||||
|
||||
if (g->nvhost_dev &&
|
||||
(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
|
||||
err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
|
||||
&dev->kobj,
|
||||
dev_name(dev));
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_nvhost_remove_symlink(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (g->nvhost_dev &&
|
||||
(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
|
||||
sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
|
||||
dev_name(dev));
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYNC
|
||||
u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
|
||||
{
|
||||
return nvhost_sync_pt_id(pt);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
|
||||
{
|
||||
return nvhost_sync_pt_thresh(pt);
|
||||
}
|
||||
|
||||
struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
|
||||
{
|
||||
return nvhost_sync_fdget(fd);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
|
||||
{
|
||||
return nvhost_sync_num_pts(fence);
|
||||
}
|
||||
|
||||
struct sync_fence *nvgpu_nvhost_sync_create_fence(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev,
|
||||
u32 id, u32 thresh, const char *name)
|
||||
{
|
||||
struct nvhost_ctrl_sync_fence_info pt = {
|
||||
.id = id,
|
||||
.thresh = thresh,
|
||||
};
|
||||
|
||||
return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
|
||||
}
|
||||
#endif /* CONFIG_SYNC */
|
||||
|
||||
#ifdef CONFIG_TEGRA_T19X_GRHOST
|
||||
int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
|
||||
struct nvgpu_nvhost_dev *nvhost_dev,
|
||||
u64 *base, size_t *size)
|
||||
{
|
||||
return nvhost_syncpt_unit_interface_get_aperture(
|
||||
nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
|
||||
}
|
||||
|
||||
u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
|
||||
{
|
||||
return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
|
||||
}
|
||||
|
||||
int nvgpu_nvhost_syncpt_init(struct gk20a *g)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!g->has_syncpoints)
|
||||
return -ENOSYS;
|
||||
|
||||
err = nvgpu_get_nvhost_dev(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "host1x device not available");
|
||||
g->has_syncpoints = false;
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
|
||||
g->nvhost_dev,
|
||||
&g->syncpt_unit_base,
|
||||
&g->syncpt_unit_size);
|
||||
if (err) {
|
||||
nvgpu_err(g, "Failed to get syncpt interface");
|
||||
g->has_syncpoints = false;
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
g->syncpt_size =
|
||||
nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
|
||||
nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
|
||||
g->syncpt_unit_base, g->syncpt_unit_size,
|
||||
g->syncpt_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
24
drivers/gpu/nvgpu/os/linux/nvhost_priv.h
Normal file
24
drivers/gpu/nvgpu/os/linux/nvhost_priv.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef __NVGPU_NVHOST_PRIV_H__
|
||||
#define __NVGPU_NVHOST_PRIV_H__
|
||||
|
||||
struct nvgpu_nvhost_dev {
|
||||
struct platform_device *host1x_pdev;
|
||||
};
|
||||
|
||||
#endif /* __NVGPU_NVHOST_PRIV_H__ */
|
||||
106
drivers/gpu/nvgpu/os/linux/nvlink.c
Normal file
106
drivers/gpu/nvgpu/os/linux/nvlink.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <gk20a/gk20a.h>
|
||||
#include <nvgpu/nvlink.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include "module.h"
|
||||
|
||||
#ifdef CONFIG_TEGRA_NVLINK
|
||||
int nvgpu_nvlink_read_dt_props(struct gk20a *g)
|
||||
{
|
||||
struct device_node *np;
|
||||
struct nvlink_device *ndev = g->nvlink.priv;
|
||||
u32 local_dev_id;
|
||||
u32 local_link_id;
|
||||
u32 remote_dev_id;
|
||||
u32 remote_link_id;
|
||||
bool is_master;
|
||||
|
||||
/* Parse DT */
|
||||
np = nvgpu_get_node(g);
|
||||
if (!np)
|
||||
goto fail;
|
||||
|
||||
np = of_get_child_by_name(np, "nvidia,nvlink");
|
||||
if (!np)
|
||||
goto fail;
|
||||
|
||||
np = of_get_child_by_name(np, "endpoint");
|
||||
if (!np)
|
||||
goto fail;
|
||||
|
||||
/* Parse DT structure to detect endpoint topology */
|
||||
of_property_read_u32(np, "local_dev_id", &local_dev_id);
|
||||
of_property_read_u32(np, "local_link_id", &local_link_id);
|
||||
of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
|
||||
of_property_read_u32(np, "remote_link_id", &remote_link_id);
|
||||
is_master = of_property_read_bool(np, "is_master");
|
||||
|
||||
/* Check that we are in dGPU mode */
|
||||
if (local_dev_id != NVLINK_ENDPT_GV100) {
|
||||
nvgpu_err(g, "Local nvlink device is not dGPU");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ndev->is_master = is_master;
|
||||
ndev->device_id = local_dev_id;
|
||||
ndev->link.link_id = local_link_id;
|
||||
ndev->link.remote_dev_info.device_id = remote_dev_id;
|
||||
ndev->link.remote_dev_info.link_id = remote_link_id;
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
|
||||
return -ENODEV;
|
||||
}
|
||||
#endif /* CONFIG_TEGRA_NVLINK */
|
||||
|
||||
void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
|
||||
{
|
||||
/* MSS_NVLINK_1_BASE */
|
||||
void __iomem *soc1 = ioremap(0x01f20010, 4096);
|
||||
/* MSS_NVLINK_2_BASE */
|
||||
void __iomem *soc2 = ioremap(0x01f40010, 4096);
|
||||
/* MSS_NVLINK_3_BASE */
|
||||
void __iomem *soc3 = ioremap(0x01f60010, 4096);
|
||||
/* MSS_NVLINK_4_BASE */
|
||||
void __iomem *soc4 = ioremap(0x01f80010, 4096);
|
||||
u32 val;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
|
||||
|
||||
val = readl_relaxed(soc1);
|
||||
writel_relaxed(val, soc1);
|
||||
val = readl_relaxed(soc1 + 4);
|
||||
writel_relaxed(val, soc1 + 4);
|
||||
|
||||
val = readl_relaxed(soc2);
|
||||
writel_relaxed(val, soc2);
|
||||
val = readl_relaxed(soc2 + 4);
|
||||
writel_relaxed(val, soc2 + 4);
|
||||
|
||||
val = readl_relaxed(soc3);
|
||||
writel_relaxed(val, soc3);
|
||||
val = readl_relaxed(soc3 + 4);
|
||||
writel_relaxed(val, soc3 + 4);
|
||||
|
||||
val = readl_relaxed(soc4);
|
||||
writel_relaxed(val, soc4);
|
||||
val = readl_relaxed(soc4 + 4);
|
||||
writel_relaxed(val, soc4 + 4);
|
||||
}
|
||||
79
drivers/gpu/nvgpu/os/linux/os_fence_android.c
Normal file
79
drivers/gpu/nvgpu/os/linux/os_fence_android.c
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/os_fence.h>
|
||||
#include <nvgpu/linux/os_fence_android.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "../drivers/staging/android/sync.h"
|
||||
|
||||
inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
|
||||
{
|
||||
struct sync_fence *fence = (struct sync_fence *)s->priv;
|
||||
return fence;
|
||||
}
|
||||
|
||||
static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
|
||||
{
|
||||
fence_out->priv = NULL;
|
||||
fence_out->g = NULL;
|
||||
fence_out->ops = NULL;
|
||||
}
|
||||
|
||||
void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
|
||||
struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
|
||||
struct sync_fence *fence)
|
||||
{
|
||||
fence_out->g = g;
|
||||
fence_out->ops = fops;
|
||||
fence_out->priv = (void *)fence;
|
||||
}
|
||||
|
||||
void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
|
||||
{
|
||||
struct sync_fence *fence = nvgpu_get_sync_fence(s);
|
||||
|
||||
sync_fence_put(fence);
|
||||
|
||||
nvgpu_os_fence_clear(s);
|
||||
}
|
||||
|
||||
void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
|
||||
{
|
||||
struct sync_fence *fence = nvgpu_get_sync_fence(s);
|
||||
|
||||
sync_fence_get(fence);
|
||||
sync_fence_install(fence, fd);
|
||||
}
|
||||
|
||||
int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
|
||||
struct channel_gk20a *c, int fd)
|
||||
{
|
||||
int err = -ENOSYS;
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
|
||||
#endif
|
||||
|
||||
if (err)
|
||||
err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
|
||||
|
||||
if (err)
|
||||
nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
|
||||
|
||||
return err;
|
||||
}
|
||||
111
drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c
Normal file
111
drivers/gpu/nvgpu/os/linux/os_fence_android_sema.c
Normal file
@@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/errno.h>
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/os_fence.h>
|
||||
#include <nvgpu/linux/os_fence_android.h>
|
||||
#include <nvgpu/semaphore.h>
|
||||
|
||||
#include "gk20a/channel_sync_gk20a.h"
|
||||
#include "gk20a/mm_gk20a.h"
|
||||
|
||||
#include "sync_sema_android.h"
|
||||
|
||||
#include "../drivers/staging/android/sync.h"
|
||||
|
||||
int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
|
||||
struct priv_cmd_entry *wait_cmd,
|
||||
struct channel_gk20a *c,
|
||||
int max_wait_cmds)
|
||||
{
|
||||
int err;
|
||||
int wait_cmd_size;
|
||||
int num_wait_cmds;
|
||||
int i;
|
||||
struct nvgpu_semaphore *sema;
|
||||
struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
|
||||
|
||||
wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
|
||||
|
||||
num_wait_cmds = sync_fence->num_fences;
|
||||
if (num_wait_cmds == 0)
|
||||
return 0;
|
||||
|
||||
if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
|
||||
return -EINVAL;
|
||||
|
||||
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
||||
wait_cmd_size * num_wait_cmds,
|
||||
wait_cmd);
|
||||
if (err) {
|
||||
nvgpu_err(c->g, "not enough priv cmd buffer space");
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_wait_cmds; i++) {
|
||||
struct fence *f = sync_fence->cbs[i].sync_pt;
|
||||
struct sync_pt *pt = sync_pt_from_fence(f);
|
||||
|
||||
sema = gk20a_sync_pt_sema(pt);
|
||||
gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
|
||||
wait_cmd_size, i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct nvgpu_os_fence_ops sema_ops = {
|
||||
.program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
|
||||
.drop_ref = nvgpu_os_fence_android_drop_ref,
|
||||
.install_fence = nvgpu_os_fence_android_install_fd,
|
||||
};
|
||||
|
||||
int nvgpu_os_fence_sema_create(
|
||||
struct nvgpu_os_fence *fence_out,
|
||||
struct channel_gk20a *c,
|
||||
struct nvgpu_semaphore *sema)
|
||||
{
|
||||
struct sync_fence *fence;
|
||||
|
||||
fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
|
||||
nvgpu_semaphore_gpu_ro_va(sema));
|
||||
|
||||
if (!fence) {
|
||||
nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
|
||||
(u32)nvgpu_semaphore_gpu_ro_va(sema));
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
|
||||
struct channel_gk20a *c, int fd)
|
||||
{
|
||||
struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
|
||||
|
||||
if (!fence)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
121
drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c
Normal file
121
drivers/gpu/nvgpu/os/linux/os_fence_android_syncpt.c
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <nvgpu/errno.h>
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
#include <nvgpu/os_fence.h>
|
||||
#include <nvgpu/linux/os_fence_android.h>
|
||||
#include <nvgpu/nvhost.h>
|
||||
#include <nvgpu/atomic.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/channel_gk20a.h"
|
||||
#include "gk20a/channel_sync_gk20a.h"
|
||||
#include "gk20a/mm_gk20a.h"
|
||||
|
||||
#include "../drivers/staging/android/sync.h"
|
||||
|
||||
int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
|
||||
struct priv_cmd_entry *wait_cmd,
|
||||
struct channel_gk20a *c,
|
||||
int max_wait_cmds)
|
||||
{
|
||||
int err;
|
||||
int wait_cmd_size;
|
||||
int num_wait_cmds;
|
||||
int i;
|
||||
u32 wait_id;
|
||||
struct sync_pt *pt;
|
||||
|
||||
struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
|
||||
|
||||
if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
|
||||
return -EINVAL;
|
||||
|
||||
/* validate syncpt ids */
|
||||
for (i = 0; i < sync_fence->num_fences; i++) {
|
||||
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
|
||||
wait_id = nvgpu_nvhost_sync_pt_id(pt);
|
||||
if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
|
||||
c->g->nvhost_dev, wait_id)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
|
||||
if (num_wait_cmds == 0)
|
||||
return 0;
|
||||
|
||||
wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
|
||||
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
||||
wait_cmd_size * num_wait_cmds, wait_cmd);
|
||||
if (err) {
|
||||
nvgpu_err(c->g,
|
||||
"not enough priv cmd buffer space");
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0; i < sync_fence->num_fences; i++) {
|
||||
struct fence *f = sync_fence->cbs[i].sync_pt;
|
||||
struct sync_pt *pt = sync_pt_from_fence(f);
|
||||
u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
|
||||
u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
|
||||
|
||||
err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
|
||||
wait_cmd, wait_cmd_size, i, true);
|
||||
}
|
||||
|
||||
WARN_ON(i != num_wait_cmds);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct nvgpu_os_fence_ops syncpt_ops = {
|
||||
.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
|
||||
.drop_ref = nvgpu_os_fence_android_drop_ref,
|
||||
.install_fence = nvgpu_os_fence_android_install_fd,
|
||||
};
|
||||
|
||||
int nvgpu_os_fence_syncpt_create(
|
||||
struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
|
||||
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
|
||||
{
|
||||
struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
|
||||
nvhost_dev, id, thresh, "fence");
|
||||
|
||||
if (!fence) {
|
||||
nvgpu_err(c->g, "error constructing fence %s", "fence");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
|
||||
struct channel_gk20a *c, int fd)
|
||||
{
|
||||
struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
|
||||
|
||||
if (!fence)
|
||||
return -ENOMEM;
|
||||
|
||||
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
|
||||
|
||||
return 0;
|
||||
}
|
||||
166
drivers/gpu/nvgpu/os/linux/os_linux.h
Normal file
166
drivers/gpu/nvgpu/os/linux/os_linux.h
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef NVGPU_OS_LINUX_H
|
||||
#define NVGPU_OS_LINUX_H
|
||||
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "cde.h"
|
||||
#include "sched.h"
|
||||
|
||||
struct nvgpu_os_linux_ops {
|
||||
struct {
|
||||
void (*get_program_numbers)(struct gk20a *g,
|
||||
u32 block_height_log2,
|
||||
u32 shader_parameter,
|
||||
int *hprog, int *vprog);
|
||||
bool (*need_scatter_buffer)(struct gk20a *g);
|
||||
int (*populate_scatter_buffer)(struct gk20a *g,
|
||||
struct sg_table *sgt,
|
||||
size_t surface_size,
|
||||
void *scatter_buffer_ptr,
|
||||
size_t scatter_buffer_size);
|
||||
} cde;
|
||||
};
|
||||
|
||||
struct nvgpu_os_linux {
|
||||
struct gk20a g;
|
||||
struct device *dev;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} channel;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} ctrl;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} as_dev;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} dbg;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} prof;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} tsg;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} ctxsw;
|
||||
|
||||
struct {
|
||||
struct cdev cdev;
|
||||
struct device *node;
|
||||
} sched;
|
||||
|
||||
dev_t cdev_region;
|
||||
|
||||
struct devfreq *devfreq;
|
||||
|
||||
struct device_dma_parameters dma_parms;
|
||||
|
||||
atomic_t hw_irq_stall_count;
|
||||
atomic_t hw_irq_nonstall_count;
|
||||
|
||||
struct nvgpu_cond sw_irq_stall_last_handled_wq;
|
||||
atomic_t sw_irq_stall_last_handled;
|
||||
|
||||
atomic_t nonstall_ops;
|
||||
|
||||
struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
|
||||
atomic_t sw_irq_nonstall_last_handled;
|
||||
|
||||
struct work_struct nonstall_fn_work;
|
||||
struct workqueue_struct *nonstall_work_queue;
|
||||
|
||||
struct resource *reg_mem;
|
||||
void __iomem *regs;
|
||||
void __iomem *regs_saved;
|
||||
|
||||
struct resource *bar1_mem;
|
||||
void __iomem *bar1;
|
||||
void __iomem *bar1_saved;
|
||||
|
||||
void __iomem *usermode_regs;
|
||||
void __iomem *usermode_regs_saved;
|
||||
|
||||
struct nvgpu_os_linux_ops ops;
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
struct dentry *debugfs;
|
||||
struct dentry *debugfs_alias;
|
||||
|
||||
struct dentry *debugfs_ltc_enabled;
|
||||
struct dentry *debugfs_timeouts_enabled;
|
||||
struct dentry *debugfs_gr_idle_timeout_default;
|
||||
struct dentry *debugfs_disable_bigpage;
|
||||
struct dentry *debugfs_gr_default_attrib_cb_size;
|
||||
|
||||
struct dentry *debugfs_timeslice_low_priority_us;
|
||||
struct dentry *debugfs_timeslice_medium_priority_us;
|
||||
struct dentry *debugfs_timeslice_high_priority_us;
|
||||
struct dentry *debugfs_runlist_interleave;
|
||||
struct dentry *debugfs_allocators;
|
||||
struct dentry *debugfs_xve;
|
||||
struct dentry *debugfs_kmem;
|
||||
struct dentry *debugfs_hal;
|
||||
|
||||
struct dentry *debugfs_force_preemption_cilp;
|
||||
struct dentry *debugfs_force_preemption_gfxp;
|
||||
struct dentry *debugfs_dump_ctxsw_stats;
|
||||
#endif
|
||||
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
|
||||
|
||||
struct gk20a_cde_app cde_app;
|
||||
|
||||
struct rw_semaphore busy_lock;
|
||||
|
||||
struct gk20a_sched_ctrl sched_ctrl;
|
||||
|
||||
bool init_done;
|
||||
};
|
||||
|
||||
static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
|
||||
{
|
||||
return container_of(g, struct nvgpu_os_linux, g);
|
||||
}
|
||||
|
||||
static inline struct device *dev_from_gk20a(struct gk20a *g)
|
||||
{
|
||||
return nvgpu_os_linux_from_gk20a(g)->dev;
|
||||
}
|
||||
|
||||
#define INTERFACE_NAME "nvhost%s-gpu"
|
||||
|
||||
#endif
|
||||
26
drivers/gpu/nvgpu/os/linux/os_sched.c
Normal file
26
drivers/gpu/nvgpu/os/linux/os_sched.c
Normal file
@@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <nvgpu/os_sched.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
int nvgpu_current_tid(struct gk20a *g)
|
||||
{
|
||||
return current->pid;
|
||||
}
|
||||
|
||||
int nvgpu_current_pid(struct gk20a *g)
|
||||
{
|
||||
return current->tgid;
|
||||
}
|
||||
861
drivers/gpu/nvgpu/os/linux/pci.c
Normal file
861
drivers/gpu/nvgpu/os/linux/pci.c
Normal file
@@ -0,0 +1,861 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/of_address.h>
|
||||
|
||||
#include <nvgpu/nvhost.h>
|
||||
#include <nvgpu/nvgpu_common.h>
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/nvlink.h>
|
||||
#include <nvgpu/soc.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "clk/clk.h"
|
||||
#include "clk/clk_mclk.h"
|
||||
#include "module.h"
|
||||
#include "intr.h"
|
||||
#include "sysfs.h"
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include <nvgpu/sim.h>
|
||||
|
||||
#include "pci.h"
|
||||
#include "pci_usermode.h"
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "driver_common.h"
|
||||
|
||||
#define PCI_INTERFACE_NAME "card-%s%%s"
|
||||
|
||||
static int nvgpu_pci_tegra_probe(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_pci_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
|
||||
{
|
||||
long ret = (long)rate;
|
||||
|
||||
if (rate == UINT_MAX)
|
||||
ret = BOOT_GPC2CLK_MHZ * 1000000UL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct gk20a_platform nvgpu_pci_device[] = {
|
||||
{ /* DEVICE=0x1c35 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = true,
|
||||
.enable_elpg = true,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_mscg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x86063000,
|
||||
.hardcode_sw_threshold = true,
|
||||
.ina3221_dcb_index = 0,
|
||||
.ina3221_i2c_address = 0x84,
|
||||
.ina3221_i2c_port = 0x2,
|
||||
},
|
||||
{ /* DEVICE=0x1c36 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = true,
|
||||
.enable_elpg = true,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_mscg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x86062d00,
|
||||
.hardcode_sw_threshold = true,
|
||||
.ina3221_dcb_index = 0,
|
||||
.ina3221_i2c_address = 0x84,
|
||||
.ina3221_i2c_port = 0x2,
|
||||
},
|
||||
{ /* DEVICE=0x1c37 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = true,
|
||||
.enable_elpg = true,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_mscg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x86063000,
|
||||
.hardcode_sw_threshold = true,
|
||||
.ina3221_dcb_index = 0,
|
||||
.ina3221_i2c_address = 0x84,
|
||||
.ina3221_i2c_port = 0x2,
|
||||
},
|
||||
{ /* DEVICE=0x1c75 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = true,
|
||||
.enable_elpg = true,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_mscg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x86065300,
|
||||
.hardcode_sw_threshold = false,
|
||||
.ina3221_dcb_index = 1,
|
||||
.ina3221_i2c_address = 0x80,
|
||||
.ina3221_i2c_port = 0x1,
|
||||
},
|
||||
{ /* DEVICE=PG503 SKU 201 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = false,
|
||||
.enable_blcg = false,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = false,
|
||||
.can_blcg = false,
|
||||
.can_elcg = false,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x88001e00,
|
||||
.hardcode_sw_threshold = false,
|
||||
.run_preos = true,
|
||||
},
|
||||
{ /* DEVICE=PG503 SKU 200 ES */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = false,
|
||||
.enable_blcg = false,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = false,
|
||||
.can_blcg = false,
|
||||
.can_elcg = false,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x88001e00,
|
||||
.hardcode_sw_threshold = false,
|
||||
.run_preos = true,
|
||||
},
|
||||
{
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = false,
|
||||
.enable_blcg = false,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = false,
|
||||
.can_blcg = false,
|
||||
.can_elcg = false,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x88000126,
|
||||
.hardcode_sw_threshold = false,
|
||||
.run_preos = true,
|
||||
.has_syncpoints = true,
|
||||
},
|
||||
{ /* SKU250 */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = true,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x1,
|
||||
.hardcode_sw_threshold = false,
|
||||
.run_preos = true,
|
||||
.has_syncpoints = true,
|
||||
},
|
||||
{ /* SKU 0x1e3f */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = false,
|
||||
.enable_blcg = false,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = false,
|
||||
.can_blcg = false,
|
||||
.can_elcg = false,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
/*
|
||||
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
|
||||
*/
|
||||
.ch_wdt_timeout_ms = 30000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x1,
|
||||
.hardcode_sw_threshold = false,
|
||||
.unified_memory = false,
|
||||
},
|
||||
{ /* 0x1eba */
|
||||
/* ptimer src frequency in hz */
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.probe = nvgpu_pci_tegra_probe,
|
||||
.remove = nvgpu_pci_tegra_remove,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = false,
|
||||
.can_elpg_init = false,
|
||||
.enable_elpg = false,
|
||||
.enable_elcg = false,
|
||||
.enable_slcg = false,
|
||||
.enable_blcg = false,
|
||||
.enable_mscg = false,
|
||||
.can_slcg = false,
|
||||
.can_blcg = false,
|
||||
.can_elcg = false,
|
||||
|
||||
.disable_aspm = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.is_railgated = nvgpu_pci_tegra_is_railgated,
|
||||
.clk_round_rate = nvgpu_pci_clk_round_rate,
|
||||
|
||||
.ch_wdt_timeout_ms = 7000,
|
||||
|
||||
.honors_aperture = true,
|
||||
.dma_mask = DMA_BIT_MASK(40),
|
||||
.vbios_min_version = 0x90040109,
|
||||
.hardcode_sw_threshold = false,
|
||||
.has_syncpoints = true,
|
||||
},
|
||||
};
|
||||
|
||||
static struct pci_device_id nvgpu_pci_table[] = {
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 0,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 1,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 2,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 3,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 4,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 5,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 6,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 7,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 8,
|
||||
},
|
||||
{
|
||||
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba),
|
||||
.class = PCI_BASE_CLASS_DISPLAY << 16,
|
||||
.class_mask = 0xff << 16,
|
||||
.driver_data = 9,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
|
||||
{
|
||||
struct gk20a *g = dev_id;
|
||||
irqreturn_t ret_stall;
|
||||
irqreturn_t ret_nonstall;
|
||||
|
||||
ret_stall = nvgpu_intr_stall(g);
|
||||
ret_nonstall = nvgpu_intr_nonstall(g);
|
||||
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
/* Send MSI EOI */
|
||||
if (g->ops.xve.rearm_msi && g->msi_enabled)
|
||||
g->ops.xve.rearm_msi(g);
|
||||
#endif
|
||||
|
||||
return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
|
||||
}
|
||||
|
||||
static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
|
||||
{
|
||||
struct gk20a *g = dev_id;
|
||||
|
||||
return nvgpu_intr_thread_stall(g);
|
||||
}
|
||||
|
||||
static int nvgpu_pci_init_support(struct pci_dev *pdev)
|
||||
{
|
||||
int err = 0;
|
||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
l->regs = ioremap(pci_resource_start(pdev, 0),
|
||||
pci_resource_len(pdev, 0));
|
||||
if (IS_ERR(l->regs)) {
|
||||
nvgpu_err(g, "failed to remap gk20a registers");
|
||||
err = PTR_ERR(l->regs);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
l->bar1 = ioremap(pci_resource_start(pdev, 1),
|
||||
pci_resource_len(pdev, 1));
|
||||
if (IS_ERR(l->bar1)) {
|
||||
nvgpu_err(g, "failed to remap gk20a bar1");
|
||||
err = PTR_ERR(l->bar1);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
err = nvgpu_init_sim_support_linux_pci(g);
|
||||
if (err)
|
||||
goto fail;
|
||||
err = nvgpu_init_sim_support_pci(g);
|
||||
if (err)
|
||||
goto fail_sim;
|
||||
|
||||
nvgpu_pci_init_usermode_support(l);
|
||||
|
||||
return 0;
|
||||
|
||||
fail_sim:
|
||||
nvgpu_remove_sim_support_linux_pci(g);
|
||||
fail:
|
||||
if (l->regs) {
|
||||
iounmap(l->regs);
|
||||
l->regs = NULL;
|
||||
}
|
||||
if (l->bar1) {
|
||||
iounmap(l->bar1);
|
||||
l->bar1 = NULL;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
|
||||
{
|
||||
if (mode)
|
||||
*mode = S_IRUGO | S_IWUGO;
|
||||
return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
|
||||
}
|
||||
|
||||
static struct class nvgpu_pci_class = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = "nvidia-pci-gpu",
|
||||
.devnode = nvgpu_pci_devnode,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
static int nvgpu_pci_pm_runtime_resume(struct device *dev)
|
||||
{
|
||||
return gk20a_pm_finalize_poweron(dev);
|
||||
}
|
||||
|
||||
static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct dev_pm_ops nvgpu_pci_pm_ops = {
|
||||
.runtime_resume = nvgpu_pci_pm_runtime_resume,
|
||||
.runtime_suspend = nvgpu_pci_pm_runtime_suspend,
|
||||
.resume = nvgpu_pci_pm_runtime_resume,
|
||||
.suspend = nvgpu_pci_pm_runtime_suspend,
|
||||
};
|
||||
#endif
|
||||
|
||||
static int nvgpu_pci_pm_init(struct device *dev)
|
||||
{
|
||||
#ifdef CONFIG_PM
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (!g->can_railgate) {
|
||||
pm_runtime_disable(dev);
|
||||
} else {
|
||||
if (g->railgate_delay)
|
||||
pm_runtime_set_autosuspend_delay(dev,
|
||||
g->railgate_delay);
|
||||
|
||||
/*
|
||||
* Runtime PM for PCI devices is disabled by default,
|
||||
* so we need to enable it first
|
||||
*/
|
||||
pm_runtime_use_autosuspend(dev);
|
||||
pm_runtime_put_noidle(dev);
|
||||
pm_runtime_allow(dev);
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nvgpu_pci_probe(struct pci_dev *pdev,
|
||||
const struct pci_device_id *pent)
|
||||
{
|
||||
struct gk20a_platform *platform = NULL;
|
||||
struct nvgpu_os_linux *l;
|
||||
struct gk20a *g;
|
||||
int err;
|
||||
char nodefmt[64];
|
||||
struct device_node *np;
|
||||
|
||||
/* make sure driver_data is a sane index */
|
||||
if (pent->driver_data >= sizeof(nvgpu_pci_device) /
|
||||
sizeof(nvgpu_pci_device[0])) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
l = kzalloc(sizeof(*l), GFP_KERNEL);
|
||||
if (!l) {
|
||||
dev_err(&pdev->dev, "couldn't allocate gk20a support");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
hash_init(l->ecc_sysfs_stats_htable);
|
||||
|
||||
g = &l->g;
|
||||
nvgpu_init_gk20a(g);
|
||||
|
||||
nvgpu_kmem_init(g);
|
||||
|
||||
/* Allocate memory to hold platform data*/
|
||||
platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
|
||||
sizeof(struct gk20a_platform));
|
||||
if (!platform) {
|
||||
dev_err(&pdev->dev, "couldn't allocate platform data");
|
||||
err = -ENOMEM;
|
||||
goto err_free_l;
|
||||
}
|
||||
|
||||
/* copy detected device data to allocated platform space*/
|
||||
memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
|
||||
sizeof(struct gk20a_platform));
|
||||
|
||||
pci_set_drvdata(pdev, platform);
|
||||
|
||||
err = nvgpu_init_enabled_flags(g);
|
||||
if (err)
|
||||
goto err_free_platform;
|
||||
|
||||
platform->g = g;
|
||||
l->dev = &pdev->dev;
|
||||
|
||||
np = nvgpu_get_node(g);
|
||||
if (of_dma_is_coherent(np)) {
|
||||
__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
|
||||
}
|
||||
|
||||
err = pci_enable_device(pdev);
|
||||
if (err)
|
||||
goto err_free_platform;
|
||||
pci_set_master(pdev);
|
||||
|
||||
g->pci_vendor_id = pdev->vendor;
|
||||
g->pci_device_id = pdev->device;
|
||||
g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
|
||||
g->pci_subsystem_device_id = pdev->subsystem_device;
|
||||
g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
|
||||
g->pci_revision = pdev->revision;
|
||||
|
||||
g->ina3221_dcb_index = platform->ina3221_dcb_index;
|
||||
g->ina3221_i2c_address = platform->ina3221_i2c_address;
|
||||
g->ina3221_i2c_port = platform->ina3221_i2c_port;
|
||||
g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
|
||||
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
err = pci_enable_msi(pdev);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"MSI could not be enabled, falling back to legacy");
|
||||
g->msi_enabled = false;
|
||||
} else
|
||||
g->msi_enabled = true;
|
||||
#endif
|
||||
|
||||
g->irq_stall = pdev->irq;
|
||||
g->irq_nonstall = pdev->irq;
|
||||
if (g->irq_stall < 0) {
|
||||
err = -ENXIO;
|
||||
goto err_disable_msi;
|
||||
}
|
||||
|
||||
err = devm_request_threaded_irq(&pdev->dev,
|
||||
g->irq_stall,
|
||||
nvgpu_pci_isr,
|
||||
nvgpu_pci_intr_thread,
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
g->msi_enabled ? 0 :
|
||||
#endif
|
||||
IRQF_SHARED, "nvgpu", g);
|
||||
if (err) {
|
||||
nvgpu_err(g,
|
||||
"failed to request irq @ %d", g->irq_stall);
|
||||
goto err_disable_msi;
|
||||
}
|
||||
disable_irq(g->irq_stall);
|
||||
|
||||
err = nvgpu_pci_init_support(pdev);
|
||||
if (err)
|
||||
goto err_free_irq;
|
||||
|
||||
if (strchr(dev_name(&pdev->dev), '%')) {
|
||||
nvgpu_err(g, "illegal character in device name");
|
||||
err = -EINVAL;
|
||||
goto err_free_irq;
|
||||
}
|
||||
|
||||
snprintf(nodefmt, sizeof(nodefmt),
|
||||
PCI_INTERFACE_NAME, dev_name(&pdev->dev));
|
||||
|
||||
err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
|
||||
if (err)
|
||||
goto err_free_irq;
|
||||
|
||||
err = nvgpu_pci_pm_init(&pdev->dev);
|
||||
if (err) {
|
||||
nvgpu_err(g, "pm init failed");
|
||||
goto err_free_irq;
|
||||
}
|
||||
|
||||
err = nvgpu_nvlink_probe(g);
|
||||
/*
|
||||
* ENODEV is a legal error which means there is no NVLINK
|
||||
* any other error is fatal
|
||||
*/
|
||||
if (err) {
|
||||
if (err != -ENODEV) {
|
||||
nvgpu_err(g, "fatal error probing nvlink, bailing out");
|
||||
goto err_free_irq;
|
||||
}
|
||||
/* Enable Semaphore SHIM on nvlink only for now. */
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
|
||||
g->has_syncpoints = false;
|
||||
} else {
|
||||
err = nvgpu_nvhost_syncpt_init(g);
|
||||
if (err) {
|
||||
if (err != -ENOSYS) {
|
||||
nvgpu_err(g, "syncpt init failed");
|
||||
goto err_free_irq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
g->mm.has_physical_mode = false;
|
||||
|
||||
return 0;
|
||||
|
||||
err_free_irq:
|
||||
nvgpu_free_irq(g);
|
||||
err_disable_msi:
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
if (g->msi_enabled)
|
||||
pci_disable_msi(pdev);
|
||||
#endif
|
||||
err_free_platform:
|
||||
nvgpu_kfree(g, platform);
|
||||
err_free_l:
|
||||
kfree(l);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
int err;
|
||||
|
||||
/* no support yet for unbind if DGPU is in VGPU mode */
|
||||
if (gk20a_gpu_is_virtual(dev))
|
||||
return;
|
||||
|
||||
nvgpu_nvlink_remove(g);
|
||||
|
||||
gk20a_driver_start_unload(g);
|
||||
err = nvgpu_quiesce(g);
|
||||
/* TODO: handle failure to idle */
|
||||
WARN(err, "gpu failed to idle during driver removal");
|
||||
|
||||
nvgpu_free_irq(g);
|
||||
|
||||
nvgpu_remove(dev, &nvgpu_pci_class);
|
||||
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
if (g->msi_enabled)
|
||||
pci_disable_msi(pdev);
|
||||
else {
|
||||
/* IRQ does not need to be enabled in MSI as the line is not
|
||||
* shared
|
||||
*/
|
||||
enable_irq(g->irq_stall);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* free allocated platform data space */
|
||||
nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
|
||||
|
||||
gk20a_get_platform(&pdev->dev)->g = NULL;
|
||||
gk20a_put(g);
|
||||
}
|
||||
|
||||
static struct pci_driver nvgpu_pci_driver = {
|
||||
.name = "nvgpu",
|
||||
.id_table = nvgpu_pci_table,
|
||||
.probe = nvgpu_pci_probe,
|
||||
.remove = nvgpu_pci_remove,
|
||||
#ifdef CONFIG_PM
|
||||
.driver.pm = &nvgpu_pci_pm_ops,
|
||||
#endif
|
||||
};
|
||||
|
||||
int __init nvgpu_pci_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = class_register(&nvgpu_pci_class);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return pci_register_driver(&nvgpu_pci_driver);
|
||||
}
|
||||
|
||||
void __exit nvgpu_pci_exit(void)
|
||||
{
|
||||
pci_unregister_driver(&nvgpu_pci_driver);
|
||||
class_unregister(&nvgpu_pci_class);
|
||||
}
|
||||
27
drivers/gpu/nvgpu/os/linux/pci.h
Normal file
27
drivers/gpu/nvgpu/os/linux/pci.h
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef NVGPU_PCI_H
|
||||
#define NVGPU_PCI_H
|
||||
|
||||
#ifdef CONFIG_GK20A_PCI
|
||||
int nvgpu_pci_init(void);
|
||||
void nvgpu_pci_exit(void);
|
||||
#else
|
||||
static inline int nvgpu_pci_init(void) { return 0; }
|
||||
static inline void nvgpu_pci_exit(void) {}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
24
drivers/gpu/nvgpu/os/linux/pci_usermode.c
Normal file
24
drivers/gpu/nvgpu/os/linux/pci_usermode.c
Normal file
@@ -0,0 +1,24 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
|
||||
{
|
||||
l->usermode_regs = l->regs + usermode_cfg0_r();
|
||||
l->usermode_regs_saved = l->usermode_regs;
|
||||
}
|
||||
23
drivers/gpu/nvgpu/os/linux/pci_usermode.h
Normal file
23
drivers/gpu/nvgpu/os/linux/pci_usermode.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __NVGPU_PCI_USERMODE_H__
|
||||
#define __NVGPU_PCI_USERMODE_H__
|
||||
|
||||
struct nvgpu_os_linux;
|
||||
|
||||
void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
|
||||
|
||||
#endif
|
||||
269
drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
Normal file
269
drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.c
Normal file
@@ -0,0 +1,269 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/hashtable.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/hashtable.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
static u32 gen_ecc_hash_key(char *str)
|
||||
{
|
||||
int i = 0;
|
||||
u32 hash_key = 0x811c9dc5;
|
||||
|
||||
while (str[i]) {
|
||||
hash_key *= 0x1000193;
|
||||
hash_key ^= (u32)(str[i]);
|
||||
i++;
|
||||
};
|
||||
|
||||
return hash_key;
|
||||
}
|
||||
|
||||
static ssize_t ecc_stat_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
const char *ecc_stat_full_name = attr->attr.name;
|
||||
const char *ecc_stat_base_name;
|
||||
unsigned int hw_unit;
|
||||
unsigned int subunit;
|
||||
struct gk20a_ecc_stat *ecc_stat;
|
||||
u32 hash_key;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
|
||||
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
|
||||
&subunit) == 2) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
|
||||
hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
|
||||
} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
|
||||
} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
|
||||
ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
|
||||
} else {
|
||||
return snprintf(buf,
|
||||
PAGE_SIZE,
|
||||
"Error: Invalid ECC stat name!\n");
|
||||
}
|
||||
|
||||
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
|
||||
|
||||
hash_for_each_possible(l->ecc_sysfs_stats_htable,
|
||||
ecc_stat,
|
||||
hash_node,
|
||||
hash_key) {
|
||||
if (hw_unit >= ecc_stat->count)
|
||||
continue;
|
||||
if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
|
||||
}
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
|
||||
}
|
||||
|
||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
||||
int is_l2, char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
char *ltc_unit_name = "ltc";
|
||||
char *gr_unit_name = "gpc0_tpc";
|
||||
char *lts_unit_name = "lts";
|
||||
int num_hw_units = 0;
|
||||
int num_subunits = 0;
|
||||
|
||||
if (is_l2 == 1)
|
||||
num_hw_units = g->ltc_count;
|
||||
else if (is_l2 == 2) {
|
||||
num_hw_units = g->ltc_count;
|
||||
num_subunits = g->gr.slices_per_ltc;
|
||||
} else
|
||||
num_hw_units = g->gr.tpc_count;
|
||||
|
||||
|
||||
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
|
||||
is_l2 ? ltc_unit_name : gr_unit_name,
|
||||
num_subunits ? lts_unit_name: NULL,
|
||||
ecc_stat_name,
|
||||
ecc_stat);
|
||||
}
|
||||
|
||||
int nvgpu_ecc_stat_create(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
char *ecc_unit_name, char *ecc_subunit_name,
|
||||
char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
int error = 0;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
int hw_unit = 0;
|
||||
int subunit = 0;
|
||||
int element = 0;
|
||||
u32 hash_key = 0;
|
||||
struct device_attribute *dev_attr_array;
|
||||
|
||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
||||
num_hw_units;
|
||||
|
||||
/* Allocate arrays */
|
||||
dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
|
||||
num_elements);
|
||||
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
|
||||
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
|
||||
|
||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
|
||||
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
|
||||
ECC_STAT_NAME_MAX_SIZE);
|
||||
}
|
||||
ecc_stat->count = num_elements;
|
||||
if (num_subunits) {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
||||
element = hw_unit*num_subunits + subunit;
|
||||
|
||||
snprintf(ecc_stat->names[element],
|
||||
ECC_STAT_NAME_MAX_SIZE,
|
||||
"%s%d_%s%d_%s",
|
||||
ecc_unit_name,
|
||||
hw_unit,
|
||||
ecc_subunit_name,
|
||||
subunit,
|
||||
ecc_stat_name);
|
||||
|
||||
sysfs_attr_init(&dev_attr_array[element].attr);
|
||||
dev_attr_array[element].attr.name =
|
||||
ecc_stat->names[element];
|
||||
dev_attr_array[element].attr.mode =
|
||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
dev_attr_array[element].show = ecc_stat_show;
|
||||
dev_attr_array[element].store = NULL;
|
||||
|
||||
/* Create sysfs file */
|
||||
error |= device_create_file(dev,
|
||||
&dev_attr_array[element]);
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
|
||||
/* Fill in struct device_attribute members */
|
||||
snprintf(ecc_stat->names[hw_unit],
|
||||
ECC_STAT_NAME_MAX_SIZE,
|
||||
"%s%d_%s",
|
||||
ecc_unit_name,
|
||||
hw_unit,
|
||||
ecc_stat_name);
|
||||
|
||||
sysfs_attr_init(&dev_attr_array[hw_unit].attr);
|
||||
dev_attr_array[hw_unit].attr.name =
|
||||
ecc_stat->names[hw_unit];
|
||||
dev_attr_array[hw_unit].attr.mode =
|
||||
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
|
||||
dev_attr_array[hw_unit].show = ecc_stat_show;
|
||||
dev_attr_array[hw_unit].store = NULL;
|
||||
|
||||
/* Create sysfs file */
|
||||
error |= device_create_file(dev,
|
||||
&dev_attr_array[hw_unit]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add hash table entry */
|
||||
hash_key = gen_ecc_hash_key(ecc_stat_name);
|
||||
hash_add(l->ecc_sysfs_stats_htable,
|
||||
&ecc_stat->hash_node,
|
||||
hash_key);
|
||||
|
||||
ecc_stat->attr_array = dev_attr_array;
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
||||
int is_l2, struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
int num_hw_units = 0;
|
||||
int num_subunits = 0;
|
||||
|
||||
if (is_l2 == 1)
|
||||
num_hw_units = g->ltc_count;
|
||||
else if (is_l2 == 2) {
|
||||
num_hw_units = g->ltc_count;
|
||||
num_subunits = g->gr.slices_per_ltc;
|
||||
} else
|
||||
num_hw_units = g->gr.tpc_count;
|
||||
|
||||
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
|
||||
}
|
||||
|
||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
struct gk20a_ecc_stat *ecc_stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
|
||||
int hw_unit = 0;
|
||||
int subunit = 0;
|
||||
int element = 0;
|
||||
int num_elements = num_subunits ? num_subunits * num_hw_units :
|
||||
num_hw_units;
|
||||
|
||||
/* Remove sysfs files */
|
||||
if (num_subunits) {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
|
||||
for (subunit = 0; subunit < num_subunits; subunit++) {
|
||||
element = hw_unit * num_subunits + subunit;
|
||||
|
||||
device_remove_file(dev,
|
||||
&dev_attr_array[element]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
|
||||
device_remove_file(dev, &dev_attr_array[hw_unit]);
|
||||
}
|
||||
|
||||
/* Remove hash table entry */
|
||||
hash_del(&ecc_stat->hash_node);
|
||||
|
||||
/* Free arrays */
|
||||
nvgpu_kfree(g, ecc_stat->counters);
|
||||
|
||||
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
|
||||
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
|
||||
|
||||
nvgpu_kfree(g, ecc_stat->names);
|
||||
nvgpu_kfree(g, dev_attr_array);
|
||||
}
|
||||
37
drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
Normal file
37
drivers/gpu/nvgpu/os/linux/platform_ecc_sysfs.h
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _NVGPU_PLATFORM_SYSFS_H_
|
||||
#define _NVGPU_PLATFORM_SYSFS_H_
|
||||
|
||||
#include "gp10b/gr_gp10b.h"
|
||||
|
||||
#define ECC_STAT_NAME_MAX_SIZE 100
|
||||
|
||||
int nvgpu_gr_ecc_stat_create(struct device *dev,
|
||||
int is_l2, char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
int nvgpu_ecc_stat_create(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
char *ecc_unit_name, char *ecc_subunit_name,
|
||||
char *ecc_stat_name,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
void nvgpu_gr_ecc_stat_remove(struct device *dev,
|
||||
int is_l2, struct gk20a_ecc_stat *ecc_stat);
|
||||
void nvgpu_ecc_stat_remove(struct device *dev,
|
||||
int num_hw_units, int num_subunits,
|
||||
struct gk20a_ecc_stat *ecc_stat);
|
||||
#endif
|
||||
317
drivers/gpu/nvgpu/os/linux/platform_gk20a.h
Normal file
317
drivers/gpu/nvgpu/os/linux/platform_gk20a.h
Normal file
@@ -0,0 +1,317 @@
|
||||
/*
|
||||
* GK20A Platform (SoC) Interface
|
||||
*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef _GK20A_PLATFORM_H_
|
||||
#define _GK20A_PLATFORM_H_
|
||||
|
||||
#include <linux/device.h>
|
||||
|
||||
#include <nvgpu/lock.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#define GK20A_CLKS_MAX 4
|
||||
|
||||
struct gk20a;
|
||||
struct channel_gk20a;
|
||||
struct gr_ctx_buffer_desc;
|
||||
struct gk20a_scale_profile;
|
||||
|
||||
struct secure_page_buffer {
|
||||
void (*destroy)(struct gk20a *, struct secure_page_buffer *);
|
||||
size_t size;
|
||||
dma_addr_t phys;
|
||||
size_t used;
|
||||
};
|
||||
|
||||
struct gk20a_platform {
|
||||
/* Populated by the gk20a driver before probing the platform. */
|
||||
struct gk20a *g;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool can_railgate_init;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool can_elpg_init;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool has_syncpoints;
|
||||
|
||||
/* channel limit after which to start aggressive sync destroy */
|
||||
unsigned int aggressive_sync_destroy_thresh;
|
||||
|
||||
/* flag to set sync destroy aggressiveness */
|
||||
bool aggressive_sync_destroy;
|
||||
|
||||
/* set if ASPM should be disabled on boot; only makes sense for PCI */
|
||||
bool disable_aspm;
|
||||
|
||||
/* Set if the platform can unify the small/large address spaces. */
|
||||
bool unify_address_spaces;
|
||||
|
||||
/* Clock configuration is stored here. Platform probe is responsible
|
||||
* for filling this data. */
|
||||
struct clk *clk[GK20A_CLKS_MAX];
|
||||
int num_clks;
|
||||
int maxmin_clk_id;
|
||||
|
||||
#ifdef CONFIG_RESET_CONTROLLER
|
||||
/* Reset control for device */
|
||||
struct reset_control *reset_control;
|
||||
#endif
|
||||
|
||||
/* Delay before rail gated */
|
||||
int railgate_delay_init;
|
||||
|
||||
/* init value for slowdown factor */
|
||||
u8 ldiv_slowdown_factor_init;
|
||||
|
||||
/* Second Level Clock Gating: true = enable false = disable */
|
||||
bool enable_slcg;
|
||||
|
||||
/* Block Level Clock Gating: true = enable flase = disable */
|
||||
bool enable_blcg;
|
||||
|
||||
/* Engine Level Clock Gating: true = enable flase = disable */
|
||||
bool enable_elcg;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool can_slcg;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool can_blcg;
|
||||
|
||||
/* Should be populated at probe. */
|
||||
bool can_elcg;
|
||||
|
||||
/* Engine Level Power Gating: true = enable flase = disable */
|
||||
bool enable_elpg;
|
||||
|
||||
/* Adaptative ELPG: true = enable flase = disable */
|
||||
bool enable_aelpg;
|
||||
|
||||
/* PMU Perfmon: true = enable false = disable */
|
||||
bool enable_perfmon;
|
||||
|
||||
/* Memory System Clock Gating: true = enable flase = disable*/
|
||||
bool enable_mscg;
|
||||
|
||||
/* Timeout for per-channel watchdog (in mS) */
|
||||
u32 ch_wdt_timeout_ms;
|
||||
|
||||
/* Disable big page support */
|
||||
bool disable_bigpage;
|
||||
|
||||
/*
|
||||
* gk20a_do_idle() API can take GPU either into rail gate or CAR reset
|
||||
* This flag can be used to force CAR reset case instead of rail gate
|
||||
*/
|
||||
bool force_reset_in_do_idle;
|
||||
|
||||
/* guest/vm id, needed for IPA to PA transation */
|
||||
int vmid;
|
||||
|
||||
/* Initialize the platform interface of the gk20a driver.
|
||||
*
|
||||
* The platform implementation of this function must
|
||||
* - set the power and clocks of the gk20a device to a known
|
||||
* state, and
|
||||
* - populate the gk20a_platform structure (a pointer to the
|
||||
* structure can be obtained by calling gk20a_get_platform).
|
||||
*
|
||||
* After this function is finished, the driver will initialise
|
||||
* pm runtime and genpd based on the platform configuration.
|
||||
*/
|
||||
int (*probe)(struct device *dev);
|
||||
|
||||
/* Second stage initialisation - called once all power management
|
||||
* initialisations are done.
|
||||
*/
|
||||
int (*late_probe)(struct device *dev);
|
||||
|
||||
/* Remove device after power management has been done
|
||||
*/
|
||||
int (*remove)(struct device *dev);
|
||||
|
||||
/* Poweron platform dependencies */
|
||||
int (*busy)(struct device *dev);
|
||||
|
||||
/* Powerdown platform dependencies */
|
||||
void (*idle)(struct device *dev);
|
||||
|
||||
/* Preallocated VPR buffer for kernel */
|
||||
size_t secure_buffer_size;
|
||||
struct secure_page_buffer secure_buffer;
|
||||
|
||||
/* Device is going to be suspended */
|
||||
int (*suspend)(struct device *);
|
||||
|
||||
/* Called to turn off the device */
|
||||
int (*railgate)(struct device *dev);
|
||||
|
||||
/* Called to turn on the device */
|
||||
int (*unrailgate)(struct device *dev);
|
||||
struct nvgpu_mutex railgate_lock;
|
||||
|
||||
/* Called to check state of device */
|
||||
bool (*is_railgated)(struct device *dev);
|
||||
|
||||
/* get supported frequency list */
|
||||
int (*get_clk_freqs)(struct device *pdev,
|
||||
unsigned long **freqs, int *num_freqs);
|
||||
|
||||
/* clk related supported functions */
|
||||
long (*clk_round_rate)(struct device *dev,
|
||||
unsigned long rate);
|
||||
|
||||
/* Called to register GPCPLL with common clk framework */
|
||||
int (*clk_register)(struct gk20a *g);
|
||||
|
||||
/* platform specific scale init quirks */
|
||||
void (*initscale)(struct device *dev);
|
||||
|
||||
/* Postscale callback is called after frequency change */
|
||||
void (*postscale)(struct device *dev,
|
||||
unsigned long freq);
|
||||
|
||||
/* Pre callback is called before frequency change */
|
||||
void (*prescale)(struct device *dev);
|
||||
|
||||
/* Devfreq governor name. If scaling is enabled, we request
|
||||
* this governor to be used in scaling */
|
||||
const char *devfreq_governor;
|
||||
|
||||
/* Quality of service notifier callback. If this is set, the scaling
|
||||
* routines will register a callback to Qos. Each time we receive
|
||||
* a new value, this callback gets called. */
|
||||
int (*qos_notify)(struct notifier_block *nb,
|
||||
unsigned long n, void *p);
|
||||
|
||||
/* Called as part of debug dump. If the gpu gets hung, this function
|
||||
* is responsible for delivering all necessary debug data of other
|
||||
* hw units which may interact with the gpu without direct supervision
|
||||
* of the CPU.
|
||||
*/
|
||||
void (*dump_platform_dependencies)(struct device *dev);
|
||||
|
||||
/* Defined when SMMU stage-2 is enabled, and we need to use physical
|
||||
* addresses (not IPA). This is the case for GV100 nvlink in HV+L
|
||||
* configuration, when dGPU is in pass-through mode.
|
||||
*/
|
||||
u64 (*phys_addr)(struct gk20a *g, u64 ipa);
|
||||
|
||||
/* Callbacks to assert/deassert GPU reset */
|
||||
int (*reset_assert)(struct device *dev);
|
||||
int (*reset_deassert)(struct device *dev);
|
||||
struct clk *clk_reset;
|
||||
struct dvfs_rail *gpu_rail;
|
||||
|
||||
bool virtual_dev;
|
||||
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
||||
void *vgpu_priv;
|
||||
#endif
|
||||
/* source frequency for ptimer in hz */
|
||||
u32 ptimer_src_freq;
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_CDE
|
||||
bool has_cde;
|
||||
#endif
|
||||
|
||||
/* soc name for finding firmware files */
|
||||
const char *soc_name;
|
||||
|
||||
/* false if vidmem aperture actually points to sysmem */
|
||||
bool honors_aperture;
|
||||
/* unified or split memory with separate vidmem? */
|
||||
bool unified_memory;
|
||||
|
||||
/*
|
||||
* DMA mask for Linux (both coh and non-coh). If not set defaults to
|
||||
* 0x3ffffffff (i.e a 34 bit mask).
|
||||
*/
|
||||
u64 dma_mask;
|
||||
|
||||
/* minimum supported VBIOS version */
|
||||
u32 vbios_min_version;
|
||||
|
||||
/* true if we run preos microcode on this board */
|
||||
bool run_preos;
|
||||
|
||||
/* true if we need to program sw threshold for
|
||||
* power limits
|
||||
*/
|
||||
bool hardcode_sw_threshold;
|
||||
|
||||
/* i2c device index, port and address for INA3221 */
|
||||
u32 ina3221_dcb_index;
|
||||
u32 ina3221_i2c_address;
|
||||
u32 ina3221_i2c_port;
|
||||
|
||||
/* stream id to use */
|
||||
u32 ltc_streamid;
|
||||
|
||||
/* scaling rate */
|
||||
unsigned long cached_rate;
|
||||
};
|
||||
|
||||
static inline struct gk20a_platform *gk20a_get_platform(
|
||||
struct device *dev)
|
||||
{
|
||||
return (struct gk20a_platform *)dev_get_drvdata(dev);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A
|
||||
extern struct gk20a_platform gm20b_tegra_platform;
|
||||
extern struct gk20a_platform gp10b_tegra_platform;
|
||||
extern struct gk20a_platform gv11b_tegra_platform;
|
||||
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
||||
extern struct gk20a_platform vgpu_tegra_platform;
|
||||
extern struct gk20a_platform gv11b_vgpu_tegra_platform;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int gk20a_tegra_busy(struct device *dev);
|
||||
void gk20a_tegra_idle(struct device *dev);
|
||||
void gk20a_tegra_debug_dump(struct device *pdev);
|
||||
|
||||
static inline struct gk20a *get_gk20a(struct device *dev)
|
||||
{
|
||||
return gk20a_get_platform(dev)->g;
|
||||
}
|
||||
static inline struct gk20a *gk20a_from_dev(struct device *dev)
|
||||
{
|
||||
if (!dev)
|
||||
return NULL;
|
||||
|
||||
return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
|
||||
}
|
||||
static inline bool gk20a_gpu_is_virtual(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
|
||||
return platform->virtual_dev;
|
||||
}
|
||||
|
||||
static inline int support_gk20a_pmu(struct device *dev)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_GK20A_PMU)) {
|
||||
/* gPMU is not supported for vgpu */
|
||||
return !gk20a_gpu_is_virtual(dev);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
957
drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
Normal file
957
drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.c
Normal file
@@ -0,0 +1,957 @@
|
||||
/*
|
||||
* GK20A Tegra Platform Interface
|
||||
*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/clkdev.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/platform_data/tegra_edp.h>
|
||||
#include <linux/delay.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/dma-attrs.h>
|
||||
#include <linux/nvmap.h>
|
||||
#include <linux/reset.h>
|
||||
#if defined(CONFIG_TEGRA_DVFS)
|
||||
#include <linux/tegra_soctherm.h>
|
||||
#endif
|
||||
#include <linux/platform/tegra/common.h>
|
||||
#include <linux/platform/tegra/mc.h>
|
||||
#include <linux/clk/tegra.h>
|
||||
#if defined(CONFIG_COMMON_CLK)
|
||||
#include <soc/tegra/tegra-dvfs.h>
|
||||
#endif
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
#include <linux/platform/tegra/emc_bwmgr.h>
|
||||
#endif
|
||||
|
||||
#include <linux/platform/tegra/tegra_emc.h>
|
||||
#include <soc/tegra/chip-id.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/nvhost.h>
|
||||
|
||||
#include <nvgpu/linux/dma.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gm20b/clk_gm20b.h"
|
||||
|
||||
#include "scale.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "clk.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
#include "../../../arch/arm/mach-tegra/iomap.h"
|
||||
#include <soc/tegra/pmc.h>
|
||||
|
||||
#define TEGRA_GK20A_BW_PER_FREQ 32
|
||||
#define TEGRA_GM20B_BW_PER_FREQ 64
|
||||
#define TEGRA_DDR3_BW_PER_FREQ 16
|
||||
#define TEGRA_DDR4_BW_PER_FREQ 16
|
||||
#define MC_CLIENT_GPU 34
|
||||
#define PMC_GPU_RG_CNTRL_0 0x2d4
|
||||
|
||||
#ifdef CONFIG_COMMON_CLK
|
||||
#define GPU_RAIL_NAME "vdd-gpu"
|
||||
#else
|
||||
#define GPU_RAIL_NAME "vdd_gpu"
|
||||
#endif
|
||||
|
||||
extern struct device tegra_vpr_dev;
|
||||
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
struct gk20a_emc_params {
|
||||
unsigned long bw_ratio;
|
||||
unsigned long freq_last_set;
|
||||
struct tegra_bwmgr_client *bwmgr_cl;
|
||||
};
|
||||
#else
|
||||
struct gk20a_emc_params {
|
||||
unsigned long bw_ratio;
|
||||
unsigned long freq_last_set;
|
||||
};
|
||||
#endif
|
||||
|
||||
#define MHZ_TO_HZ(x) ((x) * 1000000)
|
||||
#define HZ_TO_MHZ(x) ((x) / 1000000)
|
||||
|
||||
static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
|
||||
struct secure_page_buffer *secure_buffer)
|
||||
{
|
||||
DEFINE_DMA_ATTRS(attrs);
|
||||
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
|
||||
dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
|
||||
(void *)(uintptr_t)secure_buffer->phys,
|
||||
secure_buffer->phys, __DMA_ATTR(attrs));
|
||||
|
||||
secure_buffer->destroy = NULL;
|
||||
}
|
||||
|
||||
static int gk20a_tegra_secure_alloc(struct gk20a *g,
|
||||
struct gr_ctx_buffer_desc *desc,
|
||||
size_t size)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
|
||||
dma_addr_t phys;
|
||||
struct sg_table *sgt;
|
||||
struct page *page;
|
||||
int err = 0;
|
||||
size_t aligned_size = PAGE_ALIGN(size);
|
||||
|
||||
if (nvgpu_mem_is_valid(&desc->mem))
|
||||
return 0;
|
||||
|
||||
/* We ran out of preallocated memory */
|
||||
if (secure_buffer->used + aligned_size > secure_buffer->size) {
|
||||
nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
|
||||
size, secure_buffer->used, secure_buffer->size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
phys = secure_buffer->phys + secure_buffer->used;
|
||||
|
||||
sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
|
||||
if (!sgt) {
|
||||
nvgpu_err(platform->g, "failed to allocate memory");
|
||||
return -ENOMEM;
|
||||
}
|
||||
err = sg_alloc_table(sgt, 1, GFP_KERNEL);
|
||||
if (err) {
|
||||
nvgpu_err(platform->g, "failed to allocate sg_table");
|
||||
goto fail_sgt;
|
||||
}
|
||||
page = phys_to_page(phys);
|
||||
sg_set_page(sgt->sgl, page, size, 0);
|
||||
/* This bypasses SMMU for VPR during gmmu_map. */
|
||||
sg_dma_address(sgt->sgl) = 0;
|
||||
|
||||
desc->destroy = NULL;
|
||||
|
||||
desc->mem.priv.sgt = sgt;
|
||||
desc->mem.size = size;
|
||||
desc->mem.aperture = APERTURE_SYSMEM;
|
||||
|
||||
secure_buffer->used += aligned_size;
|
||||
|
||||
return err;
|
||||
|
||||
fail_sgt:
|
||||
nvgpu_kfree(platform->g, sgt);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_tegra_get_emc_rate()
|
||||
*
|
||||
* This function returns the minimum emc clock based on gpu frequency
|
||||
*/
|
||||
|
||||
static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
|
||||
struct gk20a_emc_params *emc_params)
|
||||
{
|
||||
unsigned long gpu_freq, gpu_fmax_at_vmin;
|
||||
unsigned long emc_rate, emc_scale;
|
||||
|
||||
gpu_freq = clk_get_rate(g->clk.tegra_clk);
|
||||
gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
|
||||
clk_get_parent(g->clk.tegra_clk));
|
||||
|
||||
/* When scaling emc, account for the gpu load when the
|
||||
* gpu frequency is less than or equal to fmax@vmin. */
|
||||
if (gpu_freq <= gpu_fmax_at_vmin)
|
||||
emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
|
||||
else
|
||||
emc_scale = g->emc3d_ratio;
|
||||
|
||||
emc_rate =
|
||||
(HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
|
||||
|
||||
return MHZ_TO_HZ(emc_rate);
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_tegra_prescale(profile, freq)
|
||||
*
|
||||
* This function informs EDP about changed constraints.
|
||||
*/
|
||||
|
||||
static void gk20a_tegra_prescale(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
u32 avg = 0;
|
||||
|
||||
nvgpu_pmu_load_norm(g, &avg);
|
||||
tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_tegra_calibrate_emc()
|
||||
*
|
||||
*/
|
||||
|
||||
static void gk20a_tegra_calibrate_emc(struct device *dev,
|
||||
struct gk20a_emc_params *emc_params)
|
||||
{
|
||||
enum tegra_chipid cid = tegra_get_chip_id();
|
||||
long gpu_bw, emc_bw;
|
||||
|
||||
/* store gpu bw based on soc */
|
||||
switch (cid) {
|
||||
case TEGRA210:
|
||||
gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
|
||||
break;
|
||||
case TEGRA124:
|
||||
case TEGRA132:
|
||||
gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
|
||||
break;
|
||||
default:
|
||||
gpu_bw = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* TODO detect DDR type.
|
||||
* Okay for now since DDR3 and DDR4 have the same BW ratio */
|
||||
emc_bw = TEGRA_DDR3_BW_PER_FREQ;
|
||||
|
||||
/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
|
||||
* NOTE the ratio must come out as an integer */
|
||||
emc_params->bw_ratio = (gpu_bw / emc_bw);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
#ifdef CONFIG_TEGRA_DVFS
|
||||
static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
|
||||
{
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a_emc_params *params;
|
||||
unsigned long rate;
|
||||
|
||||
if (!profile || !profile->private_data)
|
||||
return;
|
||||
|
||||
params = (struct gk20a_emc_params *)profile->private_data;
|
||||
rate = (enb) ? params->freq_last_set : 0;
|
||||
tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a_emc_params *emc_params;
|
||||
unsigned long emc_rate;
|
||||
|
||||
if (!profile || !profile->private_data)
|
||||
return;
|
||||
|
||||
emc_params = profile->private_data;
|
||||
emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
|
||||
|
||||
if (emc_rate > tegra_bwmgr_get_max_emc_rate())
|
||||
emc_rate = tegra_bwmgr_get_max_emc_rate();
|
||||
|
||||
emc_params->freq_last_set = emc_rate;
|
||||
if (platform->is_railgated && platform->is_railgated(dev))
|
||||
return;
|
||||
|
||||
tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
|
||||
TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TEGRA_DVFS)
|
||||
/*
|
||||
* gk20a_tegra_is_railgated()
|
||||
*
|
||||
* Check status of gk20a power rail
|
||||
*/
|
||||
|
||||
static bool gk20a_tegra_is_railgated(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
bool ret = false;
|
||||
|
||||
if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
|
||||
ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* gm20b_tegra_railgate()
|
||||
*
|
||||
* Gate (disable) gm20b power rail
|
||||
*/
|
||||
|
||||
static int gm20b_tegra_railgate(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
int ret = 0;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
|
||||
!tegra_dvfs_is_rail_up(platform->gpu_rail))
|
||||
return 0;
|
||||
|
||||
tegra_mc_flush(MC_CLIENT_GPU);
|
||||
|
||||
udelay(10);
|
||||
|
||||
/* enable clamp */
|
||||
tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
|
||||
tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
|
||||
|
||||
udelay(10);
|
||||
|
||||
platform->reset_assert(dev);
|
||||
|
||||
udelay(10);
|
||||
|
||||
/*
|
||||
* GPCPLL is already disabled before entering this function; reference
|
||||
* clocks are enabled until now - disable them just before rail gating
|
||||
*/
|
||||
clk_disable_unprepare(platform->clk_reset);
|
||||
clk_disable_unprepare(platform->clk[0]);
|
||||
clk_disable_unprepare(platform->clk[1]);
|
||||
if (platform->clk[3])
|
||||
clk_disable_unprepare(platform->clk[3]);
|
||||
|
||||
udelay(10);
|
||||
|
||||
tegra_soctherm_gpu_tsens_invalidate(1);
|
||||
|
||||
if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
|
||||
ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
|
||||
if (ret)
|
||||
goto err_power_off;
|
||||
} else
|
||||
pr_info("No GPU regulator?\n");
|
||||
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
gm20b_bwmgr_set_rate(platform, false);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
err_power_off:
|
||||
nvgpu_err(platform->g, "Could not railgate GPU");
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* gm20b_tegra_unrailgate()
|
||||
*
|
||||
* Ungate (enable) gm20b power rail
|
||||
*/
|
||||
|
||||
static int gm20b_tegra_unrailgate(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
int ret = 0;
|
||||
bool first = false;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
|
||||
return 0;
|
||||
|
||||
ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
gm20b_bwmgr_set_rate(platform, true);
|
||||
#endif
|
||||
|
||||
tegra_soctherm_gpu_tsens_invalidate(0);
|
||||
|
||||
if (!platform->clk_reset) {
|
||||
platform->clk_reset = clk_get(dev, "gpu_gate");
|
||||
if (IS_ERR(platform->clk_reset)) {
|
||||
nvgpu_err(g, "fail to get gpu reset clk");
|
||||
goto err_clk_on;
|
||||
}
|
||||
}
|
||||
|
||||
if (!first) {
|
||||
ret = clk_prepare_enable(platform->clk_reset);
|
||||
if (ret) {
|
||||
nvgpu_err(g, "could not turn on gpu_gate");
|
||||
goto err_clk_on;
|
||||
}
|
||||
|
||||
ret = clk_prepare_enable(platform->clk[0]);
|
||||
if (ret) {
|
||||
nvgpu_err(g, "could not turn on gpu pll");
|
||||
goto err_clk_on;
|
||||
}
|
||||
ret = clk_prepare_enable(platform->clk[1]);
|
||||
if (ret) {
|
||||
nvgpu_err(g, "could not turn on pwr clock");
|
||||
goto err_clk_on;
|
||||
}
|
||||
|
||||
if (platform->clk[3]) {
|
||||
ret = clk_prepare_enable(platform->clk[3]);
|
||||
if (ret) {
|
||||
nvgpu_err(g, "could not turn on fuse clock");
|
||||
goto err_clk_on;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
udelay(10);
|
||||
|
||||
platform->reset_assert(dev);
|
||||
|
||||
udelay(10);
|
||||
|
||||
tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
|
||||
tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
|
||||
|
||||
udelay(10);
|
||||
|
||||
clk_disable(platform->clk_reset);
|
||||
platform->reset_deassert(dev);
|
||||
clk_enable(platform->clk_reset);
|
||||
|
||||
/* Flush MC after boot/railgate/SC7 */
|
||||
tegra_mc_flush(MC_CLIENT_GPU);
|
||||
|
||||
udelay(10);
|
||||
|
||||
tegra_mc_flush_done(MC_CLIENT_GPU);
|
||||
|
||||
udelay(10);
|
||||
|
||||
return 0;
|
||||
|
||||
err_clk_on:
|
||||
tegra_dvfs_rail_power_down(platform->gpu_rail);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
static struct {
|
||||
char *name;
|
||||
unsigned long default_rate;
|
||||
} tegra_gk20a_clocks[] = {
|
||||
{"gpu_ref", UINT_MAX},
|
||||
{"pll_p_out5", 204000000},
|
||||
{"emc", UINT_MAX},
|
||||
{"fuse", UINT_MAX},
|
||||
};
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* gk20a_tegra_get_clocks()
|
||||
*
|
||||
* This function finds clocks in tegra platform and populates
|
||||
* the clock information to gk20a platform data.
|
||||
*/
|
||||
|
||||
static int gk20a_tegra_get_clocks(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
char devname[16];
|
||||
unsigned int i;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
|
||||
|
||||
snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
|
||||
|
||||
platform->num_clks = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
|
||||
long rate = tegra_gk20a_clocks[i].default_rate;
|
||||
struct clk *c;
|
||||
|
||||
c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
|
||||
if (IS_ERR(c)) {
|
||||
ret = PTR_ERR(c);
|
||||
goto err_get_clock;
|
||||
}
|
||||
rate = clk_round_rate(c, rate);
|
||||
clk_set_rate(c, rate);
|
||||
platform->clk[i] = c;
|
||||
if (i == 0)
|
||||
platform->cached_rate = rate;
|
||||
}
|
||||
platform->num_clks = i;
|
||||
|
||||
return 0;
|
||||
|
||||
err_get_clock:
|
||||
|
||||
while (i--)
|
||||
clk_put(platform->clk[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
|
||||
static int gm20b_tegra_reset_assert(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
|
||||
if (!platform->reset_control) {
|
||||
WARN(1, "Reset control not initialized\n");
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
return reset_control_assert(platform->reset_control);
|
||||
}
|
||||
|
||||
static int gm20b_tegra_reset_deassert(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
|
||||
if (!platform->reset_control) {
|
||||
WARN(1, "Reset control not initialized\n");
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
return reset_control_deassert(platform->reset_control);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void gk20a_tegra_scale_init(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a_emc_params *emc_params;
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
if (!profile)
|
||||
return;
|
||||
|
||||
if (profile->private_data)
|
||||
return;
|
||||
|
||||
emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
|
||||
if (!emc_params)
|
||||
return;
|
||||
|
||||
emc_params->freq_last_set = -1;
|
||||
gk20a_tegra_calibrate_emc(dev, emc_params);
|
||||
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
|
||||
if (!emc_params->bwmgr_cl) {
|
||||
nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
profile->private_data = emc_params;
|
||||
}
|
||||
|
||||
static void gk20a_tegra_scale_exit(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a_emc_params *emc_params;
|
||||
|
||||
if (!profile)
|
||||
return;
|
||||
|
||||
emc_params = profile->private_data;
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
tegra_bwmgr_unregister(emc_params->bwmgr_cl);
|
||||
#endif
|
||||
|
||||
nvgpu_kfree(platform->g, profile->private_data);
|
||||
}
|
||||
|
||||
void gk20a_tegra_debug_dump(struct device *dev)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
if (g->nvhost_dev)
|
||||
nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_tegra_busy(struct device *dev)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
if (g->nvhost_dev)
|
||||
return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_tegra_idle(struct device *dev)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
if (g->nvhost_dev)
|
||||
nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
|
||||
#endif
|
||||
}
|
||||
|
||||
int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
|
||||
{
|
||||
struct gk20a *g = platform->g;
|
||||
struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
|
||||
DEFINE_DMA_ATTRS(attrs);
|
||||
dma_addr_t iova;
|
||||
|
||||
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
|
||||
return 0;
|
||||
|
||||
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
|
||||
(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
|
||||
GFP_KERNEL, __DMA_ATTR(attrs));
|
||||
/* Some platforms disable VPR. In that case VPR allocations always
|
||||
* fail. Just disable VPR usage in nvgpu in that case. */
|
||||
if (dma_mapping_error(&tegra_vpr_dev, iova))
|
||||
return 0;
|
||||
|
||||
secure_buffer->size = platform->secure_buffer_size;
|
||||
secure_buffer->phys = iova;
|
||||
secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
|
||||
|
||||
g->ops.secure_alloc = gk20a_tegra_secure_alloc;
|
||||
__nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMMON_CLK
|
||||
static struct clk *gk20a_clk_get(struct gk20a *g)
|
||||
{
|
||||
if (!g->clk.tegra_clk) {
|
||||
struct clk *clk;
|
||||
char clk_dev_id[32];
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
|
||||
|
||||
clk = clk_get_sys(clk_dev_id, "gpu");
|
||||
if (IS_ERR(clk)) {
|
||||
nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
|
||||
clk_dev_id);
|
||||
return NULL;
|
||||
}
|
||||
g->clk.tegra_clk = clk;
|
||||
}
|
||||
|
||||
return g->clk.tegra_clk;
|
||||
}
|
||||
|
||||
static int gm20b_clk_prepare_ops(struct clk_hw *hw)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
return gm20b_clk_prepare(clk);
|
||||
}
|
||||
|
||||
static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
gm20b_clk_unprepare(clk);
|
||||
}
|
||||
|
||||
static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
return gm20b_clk_is_prepared(clk);
|
||||
}
|
||||
|
||||
static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
return gm20b_recalc_rate(clk, parent_rate);
|
||||
}
|
||||
|
||||
static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
|
||||
unsigned long parent_rate)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
|
||||
}
|
||||
|
||||
static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
|
||||
unsigned long *parent_rate)
|
||||
{
|
||||
struct clk_gk20a *clk = to_clk_gk20a(hw);
|
||||
return gm20b_round_rate(clk, rate, parent_rate);
|
||||
}
|
||||
|
||||
static const struct clk_ops gm20b_clk_ops = {
|
||||
.prepare = gm20b_clk_prepare_ops,
|
||||
.unprepare = gm20b_clk_unprepare_ops,
|
||||
.is_prepared = gm20b_clk_is_prepared_ops,
|
||||
.recalc_rate = gm20b_recalc_rate_ops,
|
||||
.set_rate = gm20b_gpcclk_set_rate_ops,
|
||||
.round_rate = gm20b_round_rate_ops,
|
||||
};
|
||||
|
||||
static int gm20b_register_gpcclk(struct gk20a *g)
|
||||
{
|
||||
const char *parent_name = "pllg_ref";
|
||||
struct clk_gk20a *clk = &g->clk;
|
||||
struct clk_init_data init;
|
||||
struct clk *c;
|
||||
int err = 0;
|
||||
|
||||
/* make sure the clock is available */
|
||||
if (!gk20a_clk_get(g))
|
||||
return -ENOSYS;
|
||||
|
||||
err = gm20b_init_clk_setup_sw(g);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
init.name = "gpcclk";
|
||||
init.ops = &gm20b_clk_ops;
|
||||
init.parent_names = &parent_name;
|
||||
init.num_parents = 1;
|
||||
init.flags = 0;
|
||||
|
||||
/* Data in .init is copied by clk_register(), so stack variable OK */
|
||||
clk->hw.init = &init;
|
||||
c = clk_register(dev_from_gk20a(g), &clk->hw);
|
||||
if (IS_ERR(c)) {
|
||||
nvgpu_err(g, "Failed to register GPCPLL clock");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
clk->g = g;
|
||||
clk_register_clkdev(c, "gpcclk", "gpcclk");
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif /* CONFIG_COMMON_CLK */
|
||||
|
||||
static int gk20a_tegra_probe(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct device_node *np = dev->of_node;
|
||||
bool joint_xpu_rail = false;
|
||||
int ret;
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
#ifdef CONFIG_COMMON_CLK
|
||||
/* DVFS is not guaranteed to be initialized at the time of probe on
|
||||
* kernels with Common Clock Framework enabled.
|
||||
*/
|
||||
if (!platform->gpu_rail) {
|
||||
platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
|
||||
if (!platform->gpu_rail) {
|
||||
nvgpu_log_info(g, "deferring probe no gpu_rail");
|
||||
return -EPROBE_DEFER;
|
||||
}
|
||||
}
|
||||
|
||||
if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
|
||||
nvgpu_log_info(g, "deferring probe gpu_rail not ready");
|
||||
return -EPROBE_DEFER;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
ret = nvgpu_get_nvhost_dev(platform->g);
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_OF
|
||||
joint_xpu_rail = of_property_read_bool(of_chosen,
|
||||
"nvidia,tegra-joint_xpu_rail");
|
||||
#endif
|
||||
|
||||
if (joint_xpu_rail) {
|
||||
nvgpu_log_info(g, "XPU rails are joint\n");
|
||||
platform->g->can_railgate = false;
|
||||
}
|
||||
|
||||
platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
|
||||
if (tegra_get_chip_id() == TEGRA210) {
|
||||
/* WAR for bug 1547668: Disable railgating and scaling
|
||||
irrespective of platform data if the rework was not made. */
|
||||
np = of_find_node_by_path("/gpu-dvfs-rework");
|
||||
if (!(np && of_device_is_available(np))) {
|
||||
platform->devfreq_governor = "";
|
||||
dev_warn(dev, "board does not support scaling");
|
||||
}
|
||||
platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
|
||||
if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
|
||||
platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
|
||||
}
|
||||
|
||||
if (tegra_get_chip_id() == TEGRA132)
|
||||
platform->soc_name = "tegra13x";
|
||||
|
||||
gk20a_tegra_get_clocks(dev);
|
||||
nvgpu_linux_init_clk_support(platform->g);
|
||||
ret = gk20a_tegra_init_secure_alloc(platform);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (platform->clk_register) {
|
||||
ret = platform->clk_register(platform->g);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_tegra_late_probe(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_tegra_remove(struct device *dev)
|
||||
{
|
||||
/* deinitialise tegra specific scaling quirks */
|
||||
gk20a_tegra_scale_exit(dev);
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
nvgpu_free_nvhost_dev(get_gk20a(dev));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_tegra_suspend(struct device *dev)
|
||||
{
|
||||
tegra_edp_notify_gpu_load(0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_COMMON_CLK)
|
||||
static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
/* make sure the clock is available */
|
||||
if (!gk20a_clk_get(g))
|
||||
return rate;
|
||||
|
||||
return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
|
||||
}
|
||||
|
||||
static int gk20a_clk_get_freqs(struct device *dev,
|
||||
unsigned long **freqs, int *num_freqs)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
|
||||
/* make sure the clock is available */
|
||||
if (!gk20a_clk_get(g))
|
||||
return -ENOSYS;
|
||||
|
||||
return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
|
||||
freqs, num_freqs);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct gk20a_platform gm20b_tegra_platform = {
|
||||
.has_syncpoints = true,
|
||||
.aggressive_sync_destroy_thresh = 64,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = true,
|
||||
.can_elpg_init = true,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_elcg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
.enable_elpg = true,
|
||||
.enable_aelpg = true,
|
||||
.enable_perfmon = true,
|
||||
.ptimer_src_freq = 19200000,
|
||||
|
||||
.force_reset_in_do_idle = false,
|
||||
|
||||
.ch_wdt_timeout_ms = 5000,
|
||||
|
||||
.probe = gk20a_tegra_probe,
|
||||
.late_probe = gk20a_tegra_late_probe,
|
||||
.remove = gk20a_tegra_remove,
|
||||
/* power management callbacks */
|
||||
.suspend = gk20a_tegra_suspend,
|
||||
|
||||
#if defined(CONFIG_TEGRA_DVFS)
|
||||
.railgate = gm20b_tegra_railgate,
|
||||
.unrailgate = gm20b_tegra_unrailgate,
|
||||
.is_railgated = gk20a_tegra_is_railgated,
|
||||
#endif
|
||||
|
||||
.busy = gk20a_tegra_busy,
|
||||
.idle = gk20a_tegra_idle,
|
||||
|
||||
#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
|
||||
.reset_assert = gm20b_tegra_reset_assert,
|
||||
.reset_deassert = gm20b_tegra_reset_deassert,
|
||||
#else
|
||||
.reset_assert = gk20a_tegra_reset_assert,
|
||||
.reset_deassert = gk20a_tegra_reset_deassert,
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_COMMON_CLK)
|
||||
.clk_round_rate = gk20a_round_clk_rate,
|
||||
.get_clk_freqs = gk20a_clk_get_freqs,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_COMMON_CLK
|
||||
.clk_register = gm20b_register_gpcclk,
|
||||
#endif
|
||||
|
||||
/* frequency scaling configuration */
|
||||
.initscale = gk20a_tegra_scale_init,
|
||||
.prescale = gk20a_tegra_prescale,
|
||||
#ifdef CONFIG_TEGRA_BWMGR
|
||||
.postscale = gm20b_tegra_postscale,
|
||||
#endif
|
||||
.devfreq_governor = "nvhost_podgov",
|
||||
.qos_notify = gk20a_scale_qos_notify,
|
||||
|
||||
.dump_platform_dependencies = gk20a_tegra_debug_dump,
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_CDE
|
||||
.has_cde = true,
|
||||
#endif
|
||||
|
||||
.soc_name = "tegra21x",
|
||||
|
||||
.unified_memory = true,
|
||||
.dma_mask = DMA_BIT_MASK(34),
|
||||
|
||||
.secure_buffer_size = 335872,
|
||||
};
|
||||
23
drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h
Normal file
23
drivers/gpu/nvgpu/os/linux/platform_gk20a_tegra.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* GK20A Platform (SoC) Interface
|
||||
*
|
||||
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
|
||||
#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
|
||||
|
||||
struct gk20a_platform;
|
||||
|
||||
int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
|
||||
|
||||
#endif
|
||||
39
drivers/gpu/nvgpu/os/linux/platform_gp10b.h
Normal file
39
drivers/gpu/nvgpu/os/linux/platform_gp10b.h
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* GP10B Platform (SoC) Interface
|
||||
*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef _GP10B_PLATFORM_H_
|
||||
#define _GP10B_PLATFORM_H_
|
||||
|
||||
struct device;
|
||||
|
||||
int gp10b_tegra_get_clocks(struct device *dev);
|
||||
int gp10b_tegra_reset_assert(struct device *dev);
|
||||
int gp10b_tegra_reset_deassert(struct device *dev);
|
||||
void gp10b_tegra_scale_init(struct device *dev);
|
||||
long gp10b_round_clk_rate(struct device *dev, unsigned long rate);
|
||||
int gp10b_clk_get_freqs(struct device *dev,
|
||||
unsigned long **freqs, int *num_freqs);
|
||||
void gp10b_tegra_prescale(struct device *dev);
|
||||
void gp10b_tegra_postscale(struct device *pdev, unsigned long freq);
|
||||
#endif
|
||||
607
drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
Normal file
607
drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.c
Normal file
@@ -0,0 +1,607 @@
|
||||
/*
|
||||
* GP10B Tegra Platform Interface
|
||||
*
|
||||
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/nvmap.h>
|
||||
#include <linux/reset.h>
|
||||
#include <linux/platform/tegra/emc_bwmgr.h>
|
||||
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <soc/tegra/tegra_bpmp.h>
|
||||
#include <soc/tegra/tegra_powergate.h>
|
||||
#include <soc/tegra/tegra-bpmp-dvfs.h>
|
||||
|
||||
#include <dt-bindings/memory/tegra-swgroup.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/enabled.h>
|
||||
#include <nvgpu/hashtable.h>
|
||||
#include <nvgpu/nvhost.h>
|
||||
|
||||
#include "os_linux.h"
|
||||
|
||||
#include "clk.h"
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
|
||||
#include "platform_gk20a.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
#include "scale.h"
|
||||
|
||||
/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
|
||||
#define GP10B_FREQ_SELECT_STEP 8
|
||||
/* Max number of freq supported in h/w */
|
||||
#define GP10B_MAX_SUPPORTED_FREQS 120
|
||||
static unsigned long
|
||||
gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
|
||||
|
||||
#define TEGRA_GP10B_BW_PER_FREQ 64
|
||||
#define TEGRA_DDR4_BW_PER_FREQ 16
|
||||
|
||||
#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
|
||||
|
||||
#define GPCCLK_INIT_RATE 1000000000
|
||||
|
||||
static struct {
|
||||
char *name;
|
||||
unsigned long default_rate;
|
||||
} tegra_gp10b_clocks[] = {
|
||||
{"gpu", GPCCLK_INIT_RATE},
|
||||
{"gpu_sys", 204000000} };
|
||||
|
||||
/*
|
||||
* gp10b_tegra_get_clocks()
|
||||
*
|
||||
* This function finds clocks in tegra platform and populates
|
||||
* the clock information to gp10b platform data.
|
||||
*/
|
||||
|
||||
int gp10b_tegra_get_clocks(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
unsigned int i;
|
||||
|
||||
platform->num_clks = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
|
||||
long rate = tegra_gp10b_clocks[i].default_rate;
|
||||
struct clk *c;
|
||||
|
||||
c = clk_get(dev, tegra_gp10b_clocks[i].name);
|
||||
if (IS_ERR(c)) {
|
||||
nvgpu_err(platform->g, "cannot get clock %s",
|
||||
tegra_gp10b_clocks[i].name);
|
||||
} else {
|
||||
clk_set_rate(c, rate);
|
||||
platform->clk[i] = c;
|
||||
if (i == 0)
|
||||
platform->cached_rate = rate;
|
||||
}
|
||||
}
|
||||
platform->num_clks = i;
|
||||
|
||||
if (platform->clk[0]) {
|
||||
i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
|
||||
tegra_gp10b_clocks[0].name);
|
||||
if (i > 0)
|
||||
platform->maxmin_clk_id = i;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gp10b_tegra_scale_init(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct tegra_bwmgr_client *bwmgr_handle;
|
||||
|
||||
if (!profile)
|
||||
return;
|
||||
|
||||
if ((struct tegra_bwmgr_client *)profile->private_data)
|
||||
return;
|
||||
|
||||
bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
|
||||
if (!bwmgr_handle)
|
||||
return;
|
||||
|
||||
profile->private_data = (void *)bwmgr_handle;
|
||||
}
|
||||
|
||||
static void gp10b_tegra_scale_exit(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
|
||||
if (profile)
|
||||
tegra_bwmgr_unregister(
|
||||
(struct tegra_bwmgr_client *)profile->private_data);
|
||||
}
|
||||
|
||||
static int gp10b_tegra_probe(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
int ret;
|
||||
|
||||
ret = nvgpu_get_nvhost_dev(platform->g);
|
||||
if (ret)
|
||||
return ret;
|
||||
#endif
|
||||
|
||||
ret = gk20a_tegra_init_secure_alloc(platform);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
platform->disable_bigpage = !device_is_iommuable(dev);
|
||||
|
||||
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
|
||||
= false;
|
||||
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
|
||||
= false;
|
||||
|
||||
platform->g->gr.ctx_vars.force_preemption_gfxp = false;
|
||||
platform->g->gr.ctx_vars.force_preemption_cilp = false;
|
||||
|
||||
gp10b_tegra_get_clocks(dev);
|
||||
nvgpu_linux_init_clk_support(platform->g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gp10b_tegra_late_probe(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gp10b_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
/* deinitialise tegra specific scaling quirks */
|
||||
gp10b_tegra_scale_exit(dev);
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
nvgpu_free_nvhost_dev(get_gk20a(dev));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gp10b_tegra_is_railgated(struct device *dev)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
if (tegra_bpmp_running())
|
||||
ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gp10b_tegra_railgate(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
|
||||
/* remove emc frequency floor */
|
||||
if (profile)
|
||||
tegra_bwmgr_set_emc(
|
||||
(struct tegra_bwmgr_client *)profile->private_data,
|
||||
0, TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
|
||||
if (tegra_bpmp_running() &&
|
||||
tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
|
||||
int i;
|
||||
for (i = 0; i < platform->num_clks; i++) {
|
||||
if (platform->clk[i])
|
||||
clk_disable_unprepare(platform->clk[i]);
|
||||
}
|
||||
tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gp10b_tegra_unrailgate(struct device *dev)
|
||||
{
|
||||
int ret = 0;
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
|
||||
if (tegra_bpmp_running()) {
|
||||
int i;
|
||||
ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
|
||||
for (i = 0; i < platform->num_clks; i++) {
|
||||
if (platform->clk[i])
|
||||
clk_prepare_enable(platform->clk[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* to start with set emc frequency floor to max rate*/
|
||||
if (profile)
|
||||
tegra_bwmgr_set_emc(
|
||||
(struct tegra_bwmgr_client *)profile->private_data,
|
||||
tegra_bwmgr_get_max_emc_rate(),
|
||||
TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gp10b_tegra_suspend(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gp10b_tegra_reset_assert(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
int ret = 0;
|
||||
|
||||
if (!platform->reset_control)
|
||||
return -EINVAL;
|
||||
|
||||
ret = reset_control_assert(platform->reset_control);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int gp10b_tegra_reset_deassert(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
int ret = 0;
|
||||
|
||||
if (!platform->reset_control)
|
||||
return -EINVAL;
|
||||
|
||||
ret = reset_control_deassert(platform->reset_control);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gp10b_tegra_prescale(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
u32 avg = 0;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
nvgpu_pmu_load_norm(g, &avg);
|
||||
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
void gp10b_tegra_postscale(struct device *pdev,
|
||||
unsigned long freq)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(pdev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a *g = get_gk20a(pdev);
|
||||
unsigned long emc_rate;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
if (profile && !platform->is_railgated(pdev)) {
|
||||
unsigned long emc_scale;
|
||||
|
||||
if (freq <= gp10b_freq_table[0])
|
||||
emc_scale = 0;
|
||||
else
|
||||
emc_scale = g->emc3d_ratio;
|
||||
|
||||
emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
|
||||
|
||||
if (emc_rate > tegra_bwmgr_get_max_emc_rate())
|
||||
emc_rate = tegra_bwmgr_get_max_emc_rate();
|
||||
|
||||
tegra_bwmgr_set_emc(
|
||||
(struct tegra_bwmgr_client *)profile->private_data,
|
||||
emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
}
|
||||
nvgpu_log_fn(g, "done");
|
||||
}
|
||||
|
||||
long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||
unsigned long *freq_table = profile->devfreq_profile.freq_table;
|
||||
int max_states = profile->devfreq_profile.max_state;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < max_states; ++i)
|
||||
if (freq_table[i] >= rate)
|
||||
return freq_table[i];
|
||||
|
||||
return freq_table[max_states - 1];
|
||||
}
|
||||
|
||||
int gp10b_clk_get_freqs(struct device *dev,
|
||||
unsigned long **freqs, int *num_freqs)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
unsigned long max_rate;
|
||||
unsigned long new_rate = 0, prev_rate = 0;
|
||||
int i = 0, freq_counter = 0;
|
||||
|
||||
max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
|
||||
|
||||
/*
|
||||
* Walk the h/w frequency table and only select
|
||||
* GP10B_FREQ_SELECT_STEP'th frequencies and
|
||||
* add MAX freq to last
|
||||
*/
|
||||
for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
|
||||
prev_rate = new_rate;
|
||||
new_rate = clk_round_rate(platform->clk[0], prev_rate + 1);
|
||||
|
||||
if (i % GP10B_FREQ_SELECT_STEP == 0 ||
|
||||
new_rate == max_rate) {
|
||||
gp10b_freq_table[freq_counter++] = new_rate;
|
||||
|
||||
if (new_rate == max_rate)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS);
|
||||
|
||||
/* Fill freq table */
|
||||
*freqs = gp10b_freq_table;
|
||||
*num_freqs = freq_counter;
|
||||
|
||||
nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
|
||||
gp10b_freq_table[0], max_rate, *num_freqs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct gk20a_platform gp10b_tegra_platform = {
|
||||
.has_syncpoints = true,
|
||||
|
||||
/* power management configuration */
|
||||
.railgate_delay_init = 500,
|
||||
|
||||
/* ldiv slowdown factor */
|
||||
.ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
|
||||
|
||||
/* power management configuration */
|
||||
.can_railgate_init = true,
|
||||
.enable_elpg = true,
|
||||
.can_elpg_init = true,
|
||||
.enable_blcg = true,
|
||||
.enable_slcg = true,
|
||||
.enable_elcg = true,
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
.enable_aelpg = true,
|
||||
.enable_perfmon = true,
|
||||
|
||||
/* ptimer src frequency in hz*/
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.ch_wdt_timeout_ms = 5000,
|
||||
|
||||
.probe = gp10b_tegra_probe,
|
||||
.late_probe = gp10b_tegra_late_probe,
|
||||
.remove = gp10b_tegra_remove,
|
||||
|
||||
/* power management callbacks */
|
||||
.suspend = gp10b_tegra_suspend,
|
||||
.railgate = gp10b_tegra_railgate,
|
||||
.unrailgate = gp10b_tegra_unrailgate,
|
||||
.is_railgated = gp10b_tegra_is_railgated,
|
||||
|
||||
.busy = gk20a_tegra_busy,
|
||||
.idle = gk20a_tegra_idle,
|
||||
|
||||
.dump_platform_dependencies = gk20a_tegra_debug_dump,
|
||||
|
||||
#ifdef CONFIG_NVGPU_SUPPORT_CDE
|
||||
.has_cde = true,
|
||||
#endif
|
||||
|
||||
.clk_round_rate = gp10b_round_clk_rate,
|
||||
.get_clk_freqs = gp10b_clk_get_freqs,
|
||||
|
||||
/* frequency scaling configuration */
|
||||
.initscale = gp10b_tegra_scale_init,
|
||||
.prescale = gp10b_tegra_prescale,
|
||||
.postscale = gp10b_tegra_postscale,
|
||||
.devfreq_governor = "nvhost_podgov",
|
||||
|
||||
.qos_notify = gk20a_scale_qos_notify,
|
||||
|
||||
.reset_assert = gp10b_tegra_reset_assert,
|
||||
.reset_deassert = gp10b_tegra_reset_deassert,
|
||||
|
||||
.force_reset_in_do_idle = false,
|
||||
|
||||
.soc_name = "tegra18x",
|
||||
|
||||
.unified_memory = true,
|
||||
.dma_mask = DMA_BIT_MASK(36),
|
||||
|
||||
.ltc_streamid = TEGRA_SID_GPUB,
|
||||
|
||||
.secure_buffer_size = 401408,
|
||||
};
|
||||
|
||||
void gr_gp10b_create_sysfs(struct gk20a *g)
|
||||
{
|
||||
int error = 0;
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
/* This stat creation function is called on GR init. GR can get
|
||||
initialized multiple times but we only need to create the ECC
|
||||
stats once. Therefore, add the following check to avoid
|
||||
creating duplicate stat sysfs nodes. */
|
||||
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
|
||||
return;
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_lrf_ecc_single_err_count",
|
||||
&g->ecc.gr.sm_lrf_single_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_lrf_ecc_double_err_count",
|
||||
&g->ecc.gr.sm_lrf_double_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_sec_count",
|
||||
&g->ecc.gr.sm_shm_sec_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_sed_count",
|
||||
&g->ecc.gr.sm_shm_sed_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_shm_ecc_ded_count",
|
||||
&g->ecc.gr.sm_shm_ded_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_sec_pipe0_count",
|
||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_ded_pipe0_count",
|
||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_sec_pipe0_count",
|
||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_ded_pipe0_count",
|
||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_sec_pipe1_count",
|
||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_total_ded_pipe1_count",
|
||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_sec_pipe1_count",
|
||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"tex_ecc_unique_ded_pipe1_count",
|
||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
2,
|
||||
"ecc_sec_count",
|
||||
&g->ecc.ltc.l2_sec_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
2,
|
||||
"ecc_ded_count",
|
||||
&g->ecc.ltc.l2_ded_count);
|
||||
|
||||
if (error)
|
||||
dev_err(dev, "Failed to create sysfs attributes!\n");
|
||||
}
|
||||
|
||||
void gr_gp10b_remove_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
|
||||
return;
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_lrf_single_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_lrf_double_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_sec_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_sed_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_shm_ded_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_sec_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_ded_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_sec_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_ded_pipe0_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_sec_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_total_ded_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_sec_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.tex_unique_ded_pipe1_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
2,
|
||||
&g->ecc.ltc.l2_sec_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
2,
|
||||
&g->ecc.ltc.l2_ded_count);
|
||||
}
|
||||
23
drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
Normal file
23
drivers/gpu/nvgpu/os/linux/platform_gp10b_tegra.h
Normal file
@@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef _PLATFORM_GP10B_TEGRA_H_
|
||||
#define _PLATFORM_GP10B_TEGRA_H_
|
||||
|
||||
#include "gp10b/gr_gp10b.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
#endif
|
||||
588
drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
Normal file
588
drivers/gpu/nvgpu/os/linux/platform_gv11b_tegra.c
Normal file
@@ -0,0 +1,588 @@
|
||||
/*
|
||||
* GV11B Tegra Platform Interface
|
||||
*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/nvmap.h>
|
||||
#include <linux/reset.h>
|
||||
#include <linux/hashtable.h>
|
||||
#include <linux/clk.h>
|
||||
#include <linux/platform/tegra/emc_bwmgr.h>
|
||||
|
||||
#include <nvgpu/nvhost.h>
|
||||
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <soc/tegra/tegra_bpmp.h>
|
||||
#include <soc/tegra/tegra_powergate.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "clk.h"
|
||||
#include "scale.h"
|
||||
|
||||
#include "platform_gp10b.h"
|
||||
#include "platform_gp10b_tegra.h"
|
||||
#include "platform_ecc_sysfs.h"
|
||||
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a_tegra.h"
|
||||
#include "gv11b/gr_gv11b.h"
|
||||
|
||||
static void gv11b_tegra_scale_exit(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
|
||||
if (profile)
|
||||
tegra_bwmgr_unregister(
|
||||
(struct tegra_bwmgr_client *)profile->private_data);
|
||||
}
|
||||
|
||||
static int gv11b_tegra_probe(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
int err;
|
||||
|
||||
err = nvgpu_nvhost_syncpt_init(platform->g);
|
||||
if (err) {
|
||||
if (err != -ENOSYS)
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gk20a_tegra_init_secure_alloc(platform);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
platform->disable_bigpage = !device_is_iommuable(dev);
|
||||
|
||||
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
|
||||
= false;
|
||||
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
|
||||
= false;
|
||||
|
||||
platform->g->gr.ctx_vars.force_preemption_gfxp = false;
|
||||
platform->g->gr.ctx_vars.force_preemption_cilp = false;
|
||||
|
||||
gp10b_tegra_get_clocks(dev);
|
||||
nvgpu_linux_init_clk_support(platform->g);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gv11b_tegra_late_probe(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int gv11b_tegra_remove(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (g->ops.gr.remove_gr_sysfs)
|
||||
g->ops.gr.remove_gr_sysfs(g);
|
||||
|
||||
gv11b_tegra_scale_exit(dev);
|
||||
|
||||
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
||||
nvgpu_free_nvhost_dev(get_gk20a(dev));
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool gv11b_tegra_is_railgated(struct device *dev)
|
||||
{
|
||||
bool ret = false;
|
||||
#ifdef TEGRA194_POWER_DOMAIN_GPU
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (tegra_bpmp_running()) {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp running");
|
||||
ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gv11b_tegra_railgate(struct device *dev)
|
||||
{
|
||||
#ifdef TEGRA194_POWER_DOMAIN_GPU
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
int i;
|
||||
|
||||
/* remove emc frequency floor */
|
||||
if (profile)
|
||||
tegra_bwmgr_set_emc(
|
||||
(struct tegra_bwmgr_client *)profile->private_data,
|
||||
0, TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
|
||||
if (tegra_bpmp_running()) {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp running");
|
||||
if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
|
||||
nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
|
||||
return 0;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
|
||||
for (i = 0; i < platform->num_clks; i++) {
|
||||
if (platform->clk[i])
|
||||
clk_disable_unprepare(platform->clk[i]);
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_info, "powergate_partition");
|
||||
tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gv11b_tegra_unrailgate(struct device *dev)
|
||||
{
|
||||
int ret = 0;
|
||||
#ifdef TEGRA194_POWER_DOMAIN_GPU
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
int i;
|
||||
|
||||
if (tegra_bpmp_running()) {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp running");
|
||||
ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
|
||||
if (ret) {
|
||||
nvgpu_log(g, gpu_dbg_info,
|
||||
"unpowergate partition failed");
|
||||
return ret;
|
||||
}
|
||||
nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
|
||||
for (i = 0; i < platform->num_clks; i++) {
|
||||
if (platform->clk[i])
|
||||
clk_prepare_enable(platform->clk[i]);
|
||||
}
|
||||
} else {
|
||||
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
|
||||
}
|
||||
|
||||
/* to start with set emc frequency floor to max rate*/
|
||||
if (profile)
|
||||
tegra_bwmgr_set_emc(
|
||||
(struct tegra_bwmgr_client *)profile->private_data,
|
||||
tegra_bwmgr_get_max_emc_rate(),
|
||||
TEGRA_BWMGR_SET_EMC_FLOOR);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int gv11b_tegra_suspend(struct device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct gk20a_platform gv11b_tegra_platform = {
|
||||
.has_syncpoints = true,
|
||||
|
||||
/* ptimer src frequency in hz*/
|
||||
.ptimer_src_freq = 31250000,
|
||||
|
||||
.ch_wdt_timeout_ms = 5000,
|
||||
|
||||
.probe = gv11b_tegra_probe,
|
||||
.late_probe = gv11b_tegra_late_probe,
|
||||
.remove = gv11b_tegra_remove,
|
||||
.railgate_delay_init = 500,
|
||||
.can_railgate_init = true,
|
||||
|
||||
.can_slcg = true,
|
||||
.can_blcg = true,
|
||||
.can_elcg = true,
|
||||
.enable_slcg = true,
|
||||
.enable_blcg = true,
|
||||
.enable_elcg = true,
|
||||
.enable_perfmon = true,
|
||||
|
||||
/* power management configuration */
|
||||
.enable_elpg = true,
|
||||
.can_elpg_init = true,
|
||||
.enable_aelpg = true,
|
||||
|
||||
/* power management callbacks */
|
||||
.suspend = gv11b_tegra_suspend,
|
||||
.railgate = gv11b_tegra_railgate,
|
||||
.unrailgate = gv11b_tegra_unrailgate,
|
||||
.is_railgated = gv11b_tegra_is_railgated,
|
||||
|
||||
.busy = gk20a_tegra_busy,
|
||||
.idle = gk20a_tegra_idle,
|
||||
|
||||
.clk_round_rate = gp10b_round_clk_rate,
|
||||
.get_clk_freqs = gp10b_clk_get_freqs,
|
||||
|
||||
/* frequency scaling configuration */
|
||||
.initscale = gp10b_tegra_scale_init,
|
||||
.prescale = gp10b_tegra_prescale,
|
||||
.postscale = gp10b_tegra_postscale,
|
||||
.devfreq_governor = "nvhost_podgov",
|
||||
|
||||
.qos_notify = gk20a_scale_qos_notify,
|
||||
|
||||
.dump_platform_dependencies = gk20a_tegra_debug_dump,
|
||||
|
||||
.soc_name = "tegra19x",
|
||||
|
||||
.honors_aperture = true,
|
||||
.unified_memory = true,
|
||||
.dma_mask = DMA_BIT_MASK(36),
|
||||
|
||||
.reset_assert = gp10b_tegra_reset_assert,
|
||||
.reset_deassert = gp10b_tegra_reset_deassert,
|
||||
|
||||
.secure_buffer_size = 667648,
|
||||
};
|
||||
|
||||
void gr_gv11b_create_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
int error = 0;
|
||||
|
||||
/* This stat creation function is called on GR init. GR can get
|
||||
initialized multiple times but we only need to create the ECC
|
||||
stats once. Therefore, add the following check to avoid
|
||||
creating duplicate stat sysfs nodes. */
|
||||
if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
|
||||
return;
|
||||
|
||||
gr_gp10b_create_sysfs(g);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_tag_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_tag_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_cbu_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_cbu_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_data_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_l1_data_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_icache_ecc_corrected_err_count",
|
||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"sm_icache_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_corrected_err_count",
|
||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
||||
|
||||
error |= nvgpu_gr_ecc_stat_create(dev,
|
||||
0,
|
||||
"gcc_l15_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
"ltc",
|
||||
NULL,
|
||||
"l2_cache_uncorrected_err_count",
|
||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
"ltc",
|
||||
NULL,
|
||||
"l2_cache_corrected_err_count",
|
||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"fecs_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"fecs_ecc_corrected_err_count",
|
||||
&g->ecc.gr.fecs_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"gpccs_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"gpccs_ecc_corrected_err_count",
|
||||
&g->ecc.gr.gpccs_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"mmu_l1tlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
"gpc",
|
||||
NULL,
|
||||
"mmu_l1tlb_ecc_corrected_err_count",
|
||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_l2tlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_l2tlb_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_hubtlb_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_hubtlb_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_fillunit_ecc_uncorrected_err_count",
|
||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"mmu_fillunit_ecc_corrected_err_count",
|
||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"pmu_ecc_uncorrected_err_count",
|
||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
||||
|
||||
error |= nvgpu_ecc_stat_create(dev,
|
||||
1,
|
||||
0,
|
||||
"eng",
|
||||
NULL,
|
||||
"pmu_ecc_corrected_err_count",
|
||||
&g->ecc.pmu.pmu_corrected_err_count);
|
||||
|
||||
if (error)
|
||||
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
|
||||
}
|
||||
|
||||
void gr_gv11b_remove_sysfs(struct gk20a *g)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
|
||||
return;
|
||||
gr_gp10b_remove_sysfs(g);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_tag_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_cbu_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_cbu_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_data_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_icache_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.sm_icache_uncorrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.gcc_l15_corrected_err_count);
|
||||
|
||||
nvgpu_gr_ecc_stat_remove(dev,
|
||||
0,
|
||||
&g->ecc.gr.gcc_l15_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
&g->ecc.ltc.l2_cache_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->ltc_count,
|
||||
0,
|
||||
&g->ecc.ltc.l2_cache_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.gr.fecs_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.gr.fecs_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.gpccs_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.gpccs_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
g->gr.gpc_count,
|
||||
0,
|
||||
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.fb.mmu_fillunit_corrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.pmu.pmu_uncorrected_err_count);
|
||||
|
||||
nvgpu_ecc_stat_remove(dev,
|
||||
1,
|
||||
0,
|
||||
&g->ecc.pmu.pmu_corrected_err_count);
|
||||
}
|
||||
39
drivers/gpu/nvgpu/os/linux/rwsem.c
Normal file
39
drivers/gpu/nvgpu/os/linux/rwsem.c
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <nvgpu/rwsem.h>
|
||||
|
||||
void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
|
||||
{
|
||||
init_rwsem(&rwsem->rwsem);
|
||||
}
|
||||
|
||||
void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
|
||||
{
|
||||
up_read(&rwsem->rwsem);
|
||||
}
|
||||
|
||||
void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
|
||||
{
|
||||
down_read(&rwsem->rwsem);
|
||||
}
|
||||
|
||||
void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
|
||||
{
|
||||
up_write(&rwsem->rwsem);
|
||||
}
|
||||
|
||||
void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
|
||||
{
|
||||
down_write(&rwsem->rwsem);
|
||||
}
|
||||
428
drivers/gpu/nvgpu/os/linux/scale.c
Normal file
428
drivers/gpu/nvgpu/os/linux/scale.c
Normal file
@@ -0,0 +1,428 @@
|
||||
/*
|
||||
* gk20a clock scaling profile
|
||||
*
|
||||
* Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/devfreq.h>
|
||||
#include <linux/export.h>
|
||||
#include <soc/tegra/chip-id.h>
|
||||
#include <linux/pm_qos.h>
|
||||
|
||||
#include <governor.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "scale.h"
|
||||
#include "os_linux.h"
|
||||
|
||||
/*
|
||||
* gk20a_scale_qos_notify()
|
||||
*
|
||||
* This function is called when the minimum QoS requirement for the device
|
||||
* has changed. The function calls postscaling callback if it is defined.
|
||||
*/
|
||||
|
||||
#if defined(CONFIG_COMMON_CLK)
|
||||
int gk20a_scale_qos_notify(struct notifier_block *nb,
|
||||
unsigned long n, void *p)
|
||||
{
|
||||
struct gk20a_scale_profile *profile =
|
||||
container_of(nb, struct gk20a_scale_profile,
|
||||
qos_notify_block);
|
||||
struct gk20a *g = get_gk20a(profile->dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct devfreq *devfreq = l->devfreq;
|
||||
|
||||
if (!devfreq)
|
||||
return NOTIFY_OK;
|
||||
|
||||
mutex_lock(&devfreq->lock);
|
||||
/* check for pm_qos min and max frequency requirement */
|
||||
profile->qos_min_freq =
|
||||
(unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
|
||||
profile->qos_max_freq =
|
||||
(unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
|
||||
|
||||
if (profile->qos_min_freq > profile->qos_max_freq) {
|
||||
nvgpu_err(g,
|
||||
"QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
|
||||
profile->qos_min_freq, profile->qos_max_freq);
|
||||
profile->qos_min_freq = profile->qos_max_freq;
|
||||
}
|
||||
|
||||
update_devfreq(devfreq);
|
||||
mutex_unlock(&devfreq->lock);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#else
|
||||
int gk20a_scale_qos_notify(struct notifier_block *nb,
|
||||
unsigned long n, void *p)
|
||||
{
|
||||
struct gk20a_scale_profile *profile =
|
||||
container_of(nb, struct gk20a_scale_profile,
|
||||
qos_notify_block);
|
||||
struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
|
||||
struct gk20a *g = get_gk20a(profile->dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
unsigned long freq;
|
||||
|
||||
if (!platform->postscale)
|
||||
return NOTIFY_OK;
|
||||
|
||||
/* get the frequency requirement. if devfreq is enabled, check if it
|
||||
* has higher demand than qos */
|
||||
freq = platform->clk_round_rate(profile->dev,
|
||||
(u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
|
||||
if (l->devfreq)
|
||||
freq = max(l->devfreq->previous_freq, freq);
|
||||
|
||||
/* Update gpu load because we may scale the emc target
|
||||
* if the gpu load changed. */
|
||||
nvgpu_pmu_load_update(g);
|
||||
platform->postscale(profile->dev, freq);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* gk20a_scale_make_freq_table(profile)
|
||||
*
|
||||
* This function initialises the frequency table for the given device profile
|
||||
*/
|
||||
|
||||
static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
|
||||
int num_freqs, err;
|
||||
unsigned long *freqs;
|
||||
|
||||
if (platform->get_clk_freqs) {
|
||||
/* get gpu frequency table */
|
||||
err = platform->get_clk_freqs(profile->dev, &freqs,
|
||||
&num_freqs);
|
||||
if (err)
|
||||
return -ENOSYS;
|
||||
} else
|
||||
return -ENOSYS;
|
||||
|
||||
profile->devfreq_profile.freq_table = (unsigned long *)freqs;
|
||||
profile->devfreq_profile.max_state = num_freqs;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_scale_target(dev, *freq, flags)
|
||||
*
|
||||
* This function scales the clock
|
||||
*/
|
||||
|
||||
static int gk20a_scale_target(struct device *dev, unsigned long *freq,
|
||||
u32 flags)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||
struct devfreq *devfreq = l->devfreq;
|
||||
unsigned long local_freq = *freq;
|
||||
unsigned long rounded_rate;
|
||||
unsigned long min_freq = 0, max_freq = 0;
|
||||
|
||||
/*
|
||||
* Calculate floor and cap frequency values
|
||||
*
|
||||
* Policy :
|
||||
* We have two APIs to clip the frequency
|
||||
* 1. devfreq
|
||||
* 2. pm_qos
|
||||
*
|
||||
* To calculate floor (min) freq, we select MAX of floor frequencies
|
||||
* requested from both APIs
|
||||
* To get cap (max) freq, we select MIN of max frequencies
|
||||
*
|
||||
* In case we have conflict (min_freq > max_freq) after above
|
||||
* steps, we ensure that max_freq wins over min_freq
|
||||
*/
|
||||
min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
|
||||
max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
|
||||
|
||||
if (min_freq > max_freq)
|
||||
min_freq = max_freq;
|
||||
|
||||
/* Clip requested frequency */
|
||||
if (local_freq < min_freq)
|
||||
local_freq = min_freq;
|
||||
|
||||
if (local_freq > max_freq)
|
||||
local_freq = max_freq;
|
||||
|
||||
/* set the final frequency */
|
||||
rounded_rate = platform->clk_round_rate(dev, local_freq);
|
||||
|
||||
/* Check for duplicate request */
|
||||
if (rounded_rate == g->last_freq)
|
||||
return 0;
|
||||
|
||||
if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
|
||||
*freq = rounded_rate;
|
||||
else {
|
||||
g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
|
||||
*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
|
||||
}
|
||||
|
||||
g->last_freq = *freq;
|
||||
|
||||
/* postscale will only scale emc (dram clock) if evaluating
|
||||
* gk20a_tegra_get_emc_rate() produces a new or different emc
|
||||
* target because the load or_and gpufreq has changed */
|
||||
if (platform->postscale)
|
||||
platform->postscale(dev, rounded_rate);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* update_load_estimate_gpmu(profile)
|
||||
*
|
||||
* Update load estimate using gpmu. The gpmu value is normalised
|
||||
* based on the time it was asked last time.
|
||||
*/
|
||||
|
||||
static void update_load_estimate_gpmu(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||
unsigned long dt;
|
||||
u32 busy_time;
|
||||
ktime_t t;
|
||||
|
||||
t = ktime_get();
|
||||
dt = ktime_us_delta(t, profile->last_event_time);
|
||||
|
||||
profile->dev_stat.total_time = dt;
|
||||
profile->last_event_time = t;
|
||||
nvgpu_pmu_load_norm(g, &busy_time);
|
||||
profile->dev_stat.busy_time = (busy_time * dt) / 1000;
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_scale_suspend(dev)
|
||||
*
|
||||
* This function informs devfreq of suspend
|
||||
*/
|
||||
|
||||
void gk20a_scale_suspend(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct devfreq *devfreq = l->devfreq;
|
||||
|
||||
if (!devfreq)
|
||||
return;
|
||||
|
||||
devfreq_suspend_device(devfreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_scale_resume(dev)
|
||||
*
|
||||
* This functions informs devfreq of resume
|
||||
*/
|
||||
|
||||
void gk20a_scale_resume(struct device *dev)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct devfreq *devfreq = l->devfreq;
|
||||
|
||||
if (!devfreq)
|
||||
return;
|
||||
|
||||
g->last_freq = 0;
|
||||
devfreq_resume_device(devfreq);
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_scale_get_dev_status(dev, *stat)
|
||||
*
|
||||
* This function queries the current device status.
|
||||
*/
|
||||
|
||||
static int gk20a_scale_get_dev_status(struct device *dev,
|
||||
struct devfreq_dev_status *stat)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct gk20a_scale_profile *profile = g->scale_profile;
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
|
||||
/* update the software shadow */
|
||||
nvgpu_pmu_load_update(g);
|
||||
|
||||
/* inform edp about new constraint */
|
||||
if (platform->prescale)
|
||||
platform->prescale(dev);
|
||||
|
||||
/* Make sure there are correct values for the current frequency */
|
||||
profile->dev_stat.current_frequency =
|
||||
g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
|
||||
|
||||
/* Update load estimate */
|
||||
update_load_estimate_gpmu(dev);
|
||||
|
||||
/* Copy the contents of the current device status */
|
||||
*stat = profile->dev_stat;
|
||||
|
||||
/* Finally, clear out the local values */
|
||||
profile->dev_stat.total_time = 0;
|
||||
profile->dev_stat.busy_time = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* get_cur_freq(struct device *dev, unsigned long *freq)
|
||||
*
|
||||
* This function gets the current GPU clock rate.
|
||||
*/
|
||||
|
||||
static int get_cur_freq(struct device *dev, unsigned long *freq)
|
||||
{
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* gk20a_scale_init(dev)
|
||||
*/
|
||||
|
||||
void gk20a_scale_init(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_scale_profile *profile;
|
||||
int err;
|
||||
|
||||
if (g->scale_profile)
|
||||
return;
|
||||
|
||||
if (!platform->devfreq_governor && !platform->qos_notify)
|
||||
return;
|
||||
|
||||
profile = nvgpu_kzalloc(g, sizeof(*profile));
|
||||
|
||||
profile->dev = dev;
|
||||
profile->dev_stat.busy = false;
|
||||
|
||||
/* Create frequency table */
|
||||
err = gk20a_scale_make_freq_table(profile);
|
||||
if (err || !profile->devfreq_profile.max_state)
|
||||
goto err_get_freqs;
|
||||
|
||||
profile->qos_min_freq = 0;
|
||||
profile->qos_max_freq = UINT_MAX;
|
||||
|
||||
/* Store device profile so we can access it if devfreq governor
|
||||
* init needs that */
|
||||
g->scale_profile = profile;
|
||||
|
||||
if (platform->devfreq_governor) {
|
||||
struct devfreq *devfreq;
|
||||
|
||||
profile->devfreq_profile.initial_freq =
|
||||
profile->devfreq_profile.freq_table[0];
|
||||
profile->devfreq_profile.target = gk20a_scale_target;
|
||||
profile->devfreq_profile.get_dev_status =
|
||||
gk20a_scale_get_dev_status;
|
||||
profile->devfreq_profile.get_cur_freq = get_cur_freq;
|
||||
profile->devfreq_profile.polling_ms = 25;
|
||||
|
||||
devfreq = devfreq_add_device(dev,
|
||||
&profile->devfreq_profile,
|
||||
platform->devfreq_governor, NULL);
|
||||
|
||||
if (IS_ERR(devfreq))
|
||||
devfreq = NULL;
|
||||
|
||||
l->devfreq = devfreq;
|
||||
}
|
||||
|
||||
/* Should we register QoS callback for this device? */
|
||||
if (platform->qos_notify) {
|
||||
profile->qos_notify_block.notifier_call =
|
||||
platform->qos_notify;
|
||||
|
||||
pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
|
||||
&profile->qos_notify_block);
|
||||
pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
|
||||
&profile->qos_notify_block);
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
err_get_freqs:
|
||||
nvgpu_kfree(g, profile);
|
||||
}
|
||||
|
||||
void gk20a_scale_exit(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a *g = platform->g;
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
int err;
|
||||
|
||||
if (platform->qos_notify) {
|
||||
pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
|
||||
&g->scale_profile->qos_notify_block);
|
||||
pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
|
||||
&g->scale_profile->qos_notify_block);
|
||||
}
|
||||
|
||||
if (platform->devfreq_governor) {
|
||||
err = devfreq_remove_device(l->devfreq);
|
||||
l->devfreq = NULL;
|
||||
}
|
||||
|
||||
nvgpu_kfree(g, g->scale_profile);
|
||||
g->scale_profile = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* gk20a_scale_hw_init(dev)
|
||||
*
|
||||
* Initialize hardware portion of the device
|
||||
*/
|
||||
|
||||
void gk20a_scale_hw_init(struct device *dev)
|
||||
{
|
||||
struct gk20a_platform *platform = dev_get_drvdata(dev);
|
||||
struct gk20a_scale_profile *profile = platform->g->scale_profile;
|
||||
|
||||
/* make sure that scaling has bee initialised */
|
||||
if (!profile)
|
||||
return;
|
||||
|
||||
profile->dev_stat.total_time = 0;
|
||||
profile->last_event_time = ktime_get();
|
||||
}
|
||||
66
drivers/gpu/nvgpu/os/linux/scale.h
Normal file
66
drivers/gpu/nvgpu/os/linux/scale.h
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
* gk20a clock scaling profile
|
||||
*
|
||||
* Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GK20A_SCALE_H
|
||||
#define GK20A_SCALE_H
|
||||
|
||||
#include <linux/devfreq.h>
|
||||
|
||||
struct clk;
|
||||
|
||||
struct gk20a_scale_profile {
|
||||
struct device *dev;
|
||||
ktime_t last_event_time;
|
||||
struct devfreq_dev_profile devfreq_profile;
|
||||
struct devfreq_dev_status dev_stat;
|
||||
struct notifier_block qos_notify_block;
|
||||
unsigned long qos_min_freq;
|
||||
unsigned long qos_max_freq;
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
/* Initialization and de-initialization for module */
|
||||
void gk20a_scale_init(struct device *);
|
||||
void gk20a_scale_exit(struct device *);
|
||||
void gk20a_scale_hw_init(struct device *dev);
|
||||
|
||||
#if defined(CONFIG_GK20A_DEVFREQ)
|
||||
/*
|
||||
* call when performing submit to notify scaling mechanism that the module is
|
||||
* in use
|
||||
*/
|
||||
void gk20a_scale_notify_busy(struct device *);
|
||||
void gk20a_scale_notify_idle(struct device *);
|
||||
|
||||
void gk20a_scale_suspend(struct device *);
|
||||
void gk20a_scale_resume(struct device *);
|
||||
int gk20a_scale_qos_notify(struct notifier_block *nb,
|
||||
unsigned long n, void *p);
|
||||
#else
|
||||
static inline void gk20a_scale_notify_busy(struct device *dev) {}
|
||||
static inline void gk20a_scale_notify_idle(struct device *dev) {}
|
||||
static inline void gk20a_scale_suspend(struct device *dev) {}
|
||||
static inline void gk20a_scale_resume(struct device *dev) {}
|
||||
static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
|
||||
unsigned long n, void *p)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
676
drivers/gpu/nvgpu/os/linux/sched.c
Normal file
676
drivers/gpu/nvgpu/os/linux/sched.c
Normal file
@@ -0,0 +1,676 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <asm/barrier.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/poll.h>
|
||||
#include <uapi/linux/nvgpu.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/gr_gk20a.h"
|
||||
#include "sched.h"
|
||||
#include "os_linux.h"
|
||||
#include "ioctl_tsg.h"
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
|
||||
size_t size, loff_t *off)
|
||||
{
|
||||
struct gk20a_sched_ctrl *sched = filp->private_data;
|
||||
struct gk20a *g = sched->g;
|
||||
struct nvgpu_sched_event_arg event = { 0 };
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
|
||||
"filp=%p buf=%p size=%zu", filp, buf, size);
|
||||
|
||||
if (size < sizeof(event))
|
||||
return -EINVAL;
|
||||
size = sizeof(event);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
while (!sched->status) {
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
if (filp->f_flags & O_NONBLOCK)
|
||||
return -EAGAIN;
|
||||
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
|
||||
sched->status, 0);
|
||||
if (err)
|
||||
return err;
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
}
|
||||
|
||||
event.reserved = 0;
|
||||
event.status = sched->status;
|
||||
|
||||
if (copy_to_user(buf, &event, size)) {
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
sched->status = 0;
|
||||
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
|
||||
{
|
||||
struct gk20a_sched_ctrl *sched = filp->private_data;
|
||||
struct gk20a *g = sched->g;
|
||||
unsigned int mask = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
poll_wait(filp, &sched->readout_wq.wq, wait);
|
||||
if (sched->status)
|
||||
mask |= POLLIN | POLLRDNORM;
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_get_tsgs_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
|
||||
arg->size, arg->buffer);
|
||||
|
||||
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
|
||||
arg->size = sched->bitmap_size;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
|
||||
sched->active_tsg_bitmap, sched->bitmap_size)) {
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_get_tsgs_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
|
||||
arg->size, arg->buffer);
|
||||
|
||||
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
|
||||
arg->size = sched->bitmap_size;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
|
||||
sched->recent_tsg_bitmap, sched->bitmap_size)) {
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_get_tsgs_by_pid_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u64 *bitmap;
|
||||
unsigned int tsgid;
|
||||
/* pid at user level corresponds to kernel tgid */
|
||||
pid_t tgid = (pid_t)arg->pid;
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
|
||||
(pid_t)arg->pid, arg->size, arg->buffer);
|
||||
|
||||
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
|
||||
arg->size = sched->bitmap_size;
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size);
|
||||
if (!bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
|
||||
if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
|
||||
tsg = &f->tsg[tsgid];
|
||||
if (tsg->tgid == tgid)
|
||||
NVGPU_SCHED_SET(tsgid, bitmap);
|
||||
}
|
||||
}
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
|
||||
bitmap, sched->bitmap_size))
|
||||
err = -EFAULT;
|
||||
|
||||
nvgpu_kfree(sched->g, bitmap);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_tsg_get_params_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 tsgid = arg->tsgid;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
|
||||
|
||||
if (tsgid >= f->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
tsg = &f->tsg[tsgid];
|
||||
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
|
||||
return -ENXIO;
|
||||
|
||||
arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */
|
||||
arg->runlist_interleave = tsg->interleave_level;
|
||||
arg->timeslice = tsg->timeslice_us;
|
||||
|
||||
arg->graphics_preempt_mode =
|
||||
tsg->gr_ctx.graphics_preempt_mode;
|
||||
arg->compute_preempt_mode =
|
||||
tsg->gr_ctx.compute_preempt_mode;
|
||||
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
|
||||
struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_tsg_timeslice_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 tsgid = arg->tsgid;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
|
||||
|
||||
if (tsgid >= f->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
tsg = &f->tsg[tsgid];
|
||||
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
|
||||
return -ENXIO;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
done:
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
|
||||
struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_tsg_runlist_interleave_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 tsgid = arg->tsgid;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
|
||||
|
||||
if (tsgid >= f->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
tsg = &f->tsg[tsgid];
|
||||
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
|
||||
return -ENXIO;
|
||||
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
|
||||
|
||||
gk20a_idle(g);
|
||||
|
||||
done:
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
|
||||
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
sched->control_locked = true;
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
|
||||
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
sched->control_locked = false;
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_api_version_args *args)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
|
||||
|
||||
args->version = NVGPU_SCHED_API_VERSION;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_tsg_refcount_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 tsgid = arg->tsgid;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
|
||||
|
||||
if (tsgid >= f->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
tsg = &f->tsg[tsgid];
|
||||
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
|
||||
return -ENXIO;
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
|
||||
nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
|
||||
/* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
/* keep reference on TSG, will be released on
|
||||
* NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
|
||||
*/
|
||||
NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
|
||||
struct nvgpu_sched_tsg_refcount_args *arg)
|
||||
{
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
u32 tsgid = arg->tsgid;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
|
||||
|
||||
if (tsgid >= f->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
nvgpu_speculation_barrier();
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
|
||||
return -ENXIO;
|
||||
}
|
||||
NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
|
||||
tsg = &f->tsg[tsgid];
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct nvgpu_os_linux *l = container_of(inode->i_cdev,
|
||||
struct nvgpu_os_linux, sched.cdev);
|
||||
struct gk20a *g;
|
||||
struct gk20a_sched_ctrl *sched;
|
||||
int err = 0;
|
||||
|
||||
g = gk20a_get(&l->g);
|
||||
if (!g)
|
||||
return -ENODEV;
|
||||
sched = &l->sched_ctrl;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
|
||||
|
||||
if (!sched->sw_ready) {
|
||||
err = gk20a_busy(g);
|
||||
if (err)
|
||||
goto free_ref;
|
||||
|
||||
gk20a_idle(g);
|
||||
}
|
||||
|
||||
if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
|
||||
err = -EBUSY;
|
||||
goto free_ref;
|
||||
}
|
||||
|
||||
memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
|
||||
sched->bitmap_size);
|
||||
memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
|
||||
|
||||
filp->private_data = sched;
|
||||
nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
|
||||
|
||||
free_ref:
|
||||
if (err)
|
||||
gk20a_put(g);
|
||||
return err;
|
||||
}
|
||||
|
||||
long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct gk20a_sched_ctrl *sched = filp->private_data;
|
||||
struct gk20a *g = sched->g;
|
||||
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
|
||||
int err = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
|
||||
|
||||
if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
|
||||
(_IOC_NR(cmd) == 0) ||
|
||||
(_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
|
||||
(_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
memset(buf, 0, sizeof(buf));
|
||||
if (_IOC_DIR(cmd) & _IOC_WRITE) {
|
||||
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
switch (cmd) {
|
||||
case NVGPU_SCHED_IOCTL_GET_TSGS:
|
||||
err = gk20a_sched_dev_ioctl_get_tsgs(sched,
|
||||
(struct nvgpu_sched_get_tsgs_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
|
||||
err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
|
||||
(struct nvgpu_sched_get_tsgs_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
|
||||
err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
|
||||
(struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
|
||||
err = gk20a_sched_dev_ioctl_get_params(sched,
|
||||
(struct nvgpu_sched_tsg_get_params_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
|
||||
err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
|
||||
(struct nvgpu_sched_tsg_timeslice_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
|
||||
err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
|
||||
(struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
|
||||
err = gk20a_sched_dev_ioctl_lock_control(sched);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
|
||||
err = gk20a_sched_dev_ioctl_unlock_control(sched);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_GET_API_VERSION:
|
||||
err = gk20a_sched_dev_ioctl_get_api_version(sched,
|
||||
(struct nvgpu_sched_api_version_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_GET_TSG:
|
||||
err = gk20a_sched_dev_ioctl_get_tsg(sched,
|
||||
(struct nvgpu_sched_tsg_refcount_args *)buf);
|
||||
break;
|
||||
case NVGPU_SCHED_IOCTL_PUT_TSG:
|
||||
err = gk20a_sched_dev_ioctl_put_tsg(sched,
|
||||
(struct nvgpu_sched_tsg_refcount_args *)buf);
|
||||
break;
|
||||
default:
|
||||
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||
err = -ENOTTY;
|
||||
}
|
||||
|
||||
/* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
|
||||
* purpose with NULL buffer and/or zero size to discover TSG bitmap
|
||||
* size. We need to update user arguments in this case too, even
|
||||
* if we return an error.
|
||||
*/
|
||||
if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
|
||||
if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
|
||||
err = -EFAULT;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct gk20a_sched_ctrl *sched = filp->private_data;
|
||||
struct gk20a *g = sched->g;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
struct tsg_gk20a *tsg;
|
||||
unsigned int tsgid;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
|
||||
|
||||
/* release any reference to TSGs */
|
||||
for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
|
||||
if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
|
||||
tsg = &f->tsg[tsgid];
|
||||
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
|
||||
}
|
||||
}
|
||||
|
||||
/* unlock control */
|
||||
nvgpu_mutex_acquire(&sched->control_lock);
|
||||
sched->control_locked = false;
|
||||
nvgpu_mutex_release(&sched->control_lock);
|
||||
|
||||
nvgpu_mutex_release(&sched->busy_lock);
|
||||
gk20a_put(g);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
int err;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
|
||||
|
||||
if (!sched->sw_ready) {
|
||||
err = gk20a_busy(g);
|
||||
if (err) {
|
||||
WARN_ON(err);
|
||||
return;
|
||||
}
|
||||
|
||||
gk20a_idle(g);
|
||||
}
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
|
||||
NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
|
||||
sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
nvgpu_cond_signal_interruptible(&sched->readout_wq);
|
||||
}
|
||||
|
||||
void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
|
||||
|
||||
nvgpu_mutex_acquire(&sched->status_lock);
|
||||
NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
|
||||
|
||||
/* clear recent_tsg_bitmap as well: if app manager did not
|
||||
* notice that TSG was previously added, no need to notify it
|
||||
* if the TSG has been released in the meantime. If the
|
||||
* TSG gets reallocated, app manager will be notified as usual.
|
||||
*/
|
||||
NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
|
||||
|
||||
/* do not set event_pending, we only want to notify app manager
|
||||
* when TSGs are added, so that it can apply sched params
|
||||
*/
|
||||
nvgpu_mutex_release(&sched->status_lock);
|
||||
}
|
||||
|
||||
int gk20a_sched_ctrl_init(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
struct fifo_gk20a *f = &g->fifo;
|
||||
int err;
|
||||
|
||||
if (sched->sw_ready)
|
||||
return 0;
|
||||
|
||||
sched->g = g;
|
||||
sched->bitmap_size = roundup(f->num_channels, 64) / 8;
|
||||
sched->status = 0;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
|
||||
g, sched, sched->bitmap_size);
|
||||
|
||||
sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
|
||||
if (!sched->active_tsg_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
|
||||
if (!sched->recent_tsg_bitmap) {
|
||||
err = -ENOMEM;
|
||||
goto free_active;
|
||||
}
|
||||
|
||||
sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
|
||||
if (!sched->ref_tsg_bitmap) {
|
||||
err = -ENOMEM;
|
||||
goto free_recent;
|
||||
}
|
||||
|
||||
nvgpu_cond_init(&sched->readout_wq);
|
||||
|
||||
err = nvgpu_mutex_init(&sched->status_lock);
|
||||
if (err)
|
||||
goto free_ref;
|
||||
|
||||
err = nvgpu_mutex_init(&sched->control_lock);
|
||||
if (err)
|
||||
goto free_status_lock;
|
||||
|
||||
err = nvgpu_mutex_init(&sched->busy_lock);
|
||||
if (err)
|
||||
goto free_control_lock;
|
||||
|
||||
sched->sw_ready = true;
|
||||
|
||||
return 0;
|
||||
|
||||
free_control_lock:
|
||||
nvgpu_mutex_destroy(&sched->control_lock);
|
||||
free_status_lock:
|
||||
nvgpu_mutex_destroy(&sched->status_lock);
|
||||
free_ref:
|
||||
nvgpu_kfree(g, sched->ref_tsg_bitmap);
|
||||
free_recent:
|
||||
nvgpu_kfree(g, sched->recent_tsg_bitmap);
|
||||
free_active:
|
||||
nvgpu_kfree(g, sched->active_tsg_bitmap);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void gk20a_sched_ctrl_cleanup(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
|
||||
|
||||
nvgpu_kfree(g, sched->active_tsg_bitmap);
|
||||
nvgpu_kfree(g, sched->recent_tsg_bitmap);
|
||||
nvgpu_kfree(g, sched->ref_tsg_bitmap);
|
||||
sched->active_tsg_bitmap = NULL;
|
||||
sched->recent_tsg_bitmap = NULL;
|
||||
sched->ref_tsg_bitmap = NULL;
|
||||
|
||||
nvgpu_mutex_destroy(&sched->status_lock);
|
||||
nvgpu_mutex_destroy(&sched->control_lock);
|
||||
nvgpu_mutex_destroy(&sched->busy_lock);
|
||||
|
||||
sched->sw_ready = false;
|
||||
}
|
||||
55
drivers/gpu/nvgpu/os/linux/sched.h
Normal file
55
drivers/gpu/nvgpu/os/linux/sched.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef __NVGPU_SCHED_H
|
||||
#define __NVGPU_SCHED_H
|
||||
|
||||
struct gk20a;
|
||||
struct gpu_ops;
|
||||
struct tsg_gk20a;
|
||||
struct poll_table_struct;
|
||||
|
||||
struct gk20a_sched_ctrl {
|
||||
struct gk20a *g;
|
||||
|
||||
struct nvgpu_mutex control_lock;
|
||||
bool control_locked;
|
||||
bool sw_ready;
|
||||
struct nvgpu_mutex status_lock;
|
||||
struct nvgpu_mutex busy_lock;
|
||||
|
||||
u64 status;
|
||||
|
||||
size_t bitmap_size;
|
||||
u64 *active_tsg_bitmap;
|
||||
u64 *recent_tsg_bitmap;
|
||||
u64 *ref_tsg_bitmap;
|
||||
|
||||
struct nvgpu_cond readout_wq;
|
||||
};
|
||||
|
||||
int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
|
||||
int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
|
||||
long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
|
||||
ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
|
||||
unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
|
||||
|
||||
void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
|
||||
void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
|
||||
int gk20a_sched_ctrl_init(struct gk20a *);
|
||||
|
||||
void gk20a_sched_ctrl_cleanup(struct gk20a *g);
|
||||
|
||||
#endif /* __NVGPU_SCHED_H */
|
||||
95
drivers/gpu/nvgpu/os/linux/sim.c
Normal file
95
drivers/gpu/nvgpu/os/linux/sim.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/linux/vm.h>
|
||||
#include <nvgpu/bitops.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/hw_sim.h>
|
||||
#include <nvgpu/sim.h>
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "platform_gk20a.h"
|
||||
#include "os_linux.h"
|
||||
#include "module.h"
|
||||
|
||||
void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
|
||||
{
|
||||
struct sim_nvgpu_linux *sim_linux =
|
||||
container_of(sim, struct sim_nvgpu_linux, sim);
|
||||
|
||||
writel(v, sim_linux->regs + r);
|
||||
}
|
||||
|
||||
u32 sim_readl(struct sim_nvgpu *sim, u32 r)
|
||||
{
|
||||
struct sim_nvgpu_linux *sim_linux =
|
||||
container_of(sim, struct sim_nvgpu_linux, sim);
|
||||
|
||||
return readl(sim_linux->regs + r);
|
||||
}
|
||||
|
||||
void nvgpu_remove_sim_support_linux(struct gk20a *g)
|
||||
{
|
||||
struct sim_nvgpu_linux *sim_linux;
|
||||
|
||||
if (!g->sim)
|
||||
return;
|
||||
|
||||
sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
|
||||
if (sim_linux->regs) {
|
||||
sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
|
||||
iounmap(sim_linux->regs);
|
||||
sim_linux->regs = NULL;
|
||||
}
|
||||
nvgpu_kfree(g, sim_linux);
|
||||
g->sim = NULL;
|
||||
}
|
||||
|
||||
int nvgpu_init_sim_support_linux(struct gk20a *g,
|
||||
struct platform_device *dev)
|
||||
{
|
||||
struct sim_nvgpu_linux *sim_linux;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (!nvgpu_platform_is_simulation(g))
|
||||
return 0;
|
||||
|
||||
sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
|
||||
if (!sim_linux)
|
||||
return err;
|
||||
g->sim = &sim_linux->sim;
|
||||
g->sim->g = g;
|
||||
sim_linux->regs = nvgpu_ioremap_resource(dev,
|
||||
GK20A_SIM_IORESOURCE_MEM,
|
||||
&sim_linux->reg_mem);
|
||||
if (IS_ERR(sim_linux->regs)) {
|
||||
nvgpu_err(g, "failed to remap gk20a sim regs");
|
||||
err = PTR_ERR(sim_linux->regs);
|
||||
goto fail;
|
||||
}
|
||||
sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
nvgpu_remove_sim_support_linux(g);
|
||||
return err;
|
||||
}
|
||||
91
drivers/gpu/nvgpu/os/linux/sim_pci.c
Normal file
91
drivers/gpu/nvgpu/os/linux/sim_pci.c
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/io.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/platform_device.h>
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/linux/vm.h>
|
||||
#include <nvgpu/bitops.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/hw_sim_pci.h>
|
||||
#include <nvgpu/sim.h>
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "os_linux.h"
|
||||
#include "module.h"
|
||||
|
||||
static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
|
||||
{
|
||||
u32 cfg;
|
||||
bool is_simulation = false;
|
||||
|
||||
cfg = nvgpu_readl(g, sim_base + sim_config_r());
|
||||
if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
|
||||
is_simulation = true;
|
||||
|
||||
return is_simulation;
|
||||
}
|
||||
|
||||
void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
|
||||
{
|
||||
struct sim_nvgpu_linux *sim_linux;
|
||||
bool is_simulation;
|
||||
|
||||
is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
|
||||
|
||||
if (!is_simulation) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!g->sim) {
|
||||
nvgpu_warn(g, "sim_gk20a not allocated");
|
||||
return;
|
||||
}
|
||||
sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
|
||||
|
||||
if (sim_linux->regs) {
|
||||
sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
|
||||
sim_linux->regs = NULL;
|
||||
}
|
||||
nvgpu_kfree(g, sim_linux);
|
||||
g->sim = NULL;
|
||||
}
|
||||
|
||||
int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct sim_nvgpu_linux *sim_linux;
|
||||
int err = -ENOMEM;
|
||||
bool is_simulation;
|
||||
|
||||
is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
|
||||
__nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
|
||||
|
||||
if (!is_simulation)
|
||||
return 0;
|
||||
|
||||
sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
|
||||
if (!sim_linux)
|
||||
return err;
|
||||
g->sim = &sim_linux->sim;
|
||||
g->sim->g = g;
|
||||
sim_linux->regs = l->regs + sim_r();
|
||||
sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
|
||||
|
||||
return 0;
|
||||
}
|
||||
122
drivers/gpu/nvgpu/os/linux/soc.c
Normal file
122
drivers/gpu/nvgpu/os/linux/soc.c
Normal file
@@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <soc/tegra/chip-id.h>
|
||||
#include <soc/tegra/fuse.h>
|
||||
#include <soc/tegra/tegra_bpmp.h>
|
||||
#ifdef CONFIG_TEGRA_HV_MANAGER
|
||||
#include <soc/tegra/virt/syscalls.h>
|
||||
#endif
|
||||
|
||||
#include <nvgpu/soc.h>
|
||||
#include "os_linux.h"
|
||||
#include "platform_gk20a.h"
|
||||
|
||||
bool nvgpu_platform_is_silicon(struct gk20a *g)
|
||||
{
|
||||
return tegra_platform_is_silicon();
|
||||
}
|
||||
|
||||
bool nvgpu_platform_is_simulation(struct gk20a *g)
|
||||
{
|
||||
return tegra_platform_is_vdk();
|
||||
}
|
||||
|
||||
bool nvgpu_platform_is_fpga(struct gk20a *g)
|
||||
{
|
||||
return tegra_platform_is_fpga();
|
||||
}
|
||||
|
||||
bool nvgpu_is_hypervisor_mode(struct gk20a *g)
|
||||
{
|
||||
return is_tegra_hypervisor_mode();
|
||||
}
|
||||
|
||||
bool nvgpu_is_bpmp_running(struct gk20a *g)
|
||||
{
|
||||
return tegra_bpmp_running();
|
||||
}
|
||||
|
||||
bool nvgpu_is_soc_t194_a01(struct gk20a *g)
|
||||
{
|
||||
return ((tegra_get_chip_id() == TEGRA194 &&
|
||||
tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
|
||||
true : false);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TEGRA_HV_MANAGER
|
||||
/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
|
||||
* There is no SMMU translation. However, the device initially enumerates as a
|
||||
* PCIe device. As such, when allocation memory for this PCIe device, the DMA
|
||||
* framework ends up allocating memory using SMMU (if enabled in device tree).
|
||||
* As a result, when we switch to nvlink, we need to use underlying physical
|
||||
* addresses, even if memory mappings exist in SMMU.
|
||||
* In addition, when stage-2 SMMU translation is enabled (for instance when HV
|
||||
* is enabled), the addresses we get from dma_alloc are IPAs. We need to
|
||||
* convert them to PA.
|
||||
*/
|
||||
static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
|
||||
{
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
struct hyp_ipa_pa_info info;
|
||||
int err;
|
||||
u64 pa = 0ULL;
|
||||
|
||||
err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
|
||||
if (err < 0) {
|
||||
/* WAR for bug 2096877
|
||||
* hyp_read_ipa_pa_info only looks up RAM mappings.
|
||||
* assume one to one IPA:PA mapping for syncpt aperture
|
||||
*/
|
||||
u64 start = g->syncpt_unit_base;
|
||||
u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
|
||||
if ((ipa >= start) && (ipa < end)) {
|
||||
pa = ipa;
|
||||
nvgpu_log(g, gpu_dbg_map_v,
|
||||
"ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
|
||||
ipa, platform->vmid, pa);
|
||||
} else {
|
||||
nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
|
||||
ipa, platform->vmid, err);
|
||||
}
|
||||
} else {
|
||||
pa = info.base + info.offset;
|
||||
nvgpu_log(g, gpu_dbg_map_v,
|
||||
"ipa=%llx vmid=%d -> pa=%llx "
|
||||
"base=%llx offset=%llx size=%llx\n",
|
||||
ipa, platform->vmid, pa, info.base,
|
||||
info.offset, info.size);
|
||||
}
|
||||
return pa;
|
||||
}
|
||||
#endif
|
||||
|
||||
int nvgpu_init_soc_vars(struct gk20a *g)
|
||||
{
|
||||
#ifdef CONFIG_TEGRA_HV_MANAGER
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
int err;
|
||||
|
||||
if (nvgpu_is_hypervisor_mode(g)) {
|
||||
err = hyp_read_gid(&platform->vmid);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to read vmid");
|
||||
return err;
|
||||
}
|
||||
platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
419
drivers/gpu/nvgpu/os/linux/sync_sema_android.c
Normal file
419
drivers/gpu/nvgpu/os/linux/sync_sema_android.c
Normal file
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Semaphore Sync Framework Integration
|
||||
*
|
||||
* Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/module.h>
|
||||
#include <nvgpu/lock.h>
|
||||
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/semaphore.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/kref.h>
|
||||
#include "../linux/channel.h"
|
||||
|
||||
#include "../drivers/staging/android/sync.h"
|
||||
|
||||
#include "sync_sema_android.h"
|
||||
|
||||
static const struct sync_timeline_ops gk20a_sync_timeline_ops;
|
||||
|
||||
struct gk20a_sync_timeline {
|
||||
struct sync_timeline obj;
|
||||
u32 max;
|
||||
u32 min;
|
||||
};
|
||||
|
||||
/**
|
||||
* The sync framework dups pts when merging fences. We share a single
|
||||
* refcounted gk20a_sync_pt for each duped pt.
|
||||
*/
|
||||
struct gk20a_sync_pt {
|
||||
struct gk20a *g;
|
||||
struct nvgpu_ref refcount;
|
||||
u32 thresh;
|
||||
struct nvgpu_semaphore *sema;
|
||||
struct gk20a_sync_timeline *obj;
|
||||
|
||||
/*
|
||||
* Use a spin lock here since it will have better performance
|
||||
* than a mutex - there should be very little contention on this
|
||||
* lock.
|
||||
*/
|
||||
struct nvgpu_spinlock lock;
|
||||
};
|
||||
|
||||
struct gk20a_sync_pt_inst {
|
||||
struct sync_pt pt;
|
||||
struct gk20a_sync_pt *shared;
|
||||
};
|
||||
|
||||
/**
|
||||
* Compares sync pt values a and b, both of which will trigger either before
|
||||
* or after ref (i.e. a and b trigger before ref, or a and b trigger after
|
||||
* ref). Supplying ref allows us to handle wrapping correctly.
|
||||
*
|
||||
* Returns -1 if a < b (a triggers before b)
|
||||
* 0 if a = b (a and b trigger at the same time)
|
||||
* 1 if a > b (b triggers before a)
|
||||
*/
|
||||
static int __gk20a_sync_pt_compare_ref(
|
||||
u32 ref,
|
||||
u32 a,
|
||||
u32 b)
|
||||
{
|
||||
/*
|
||||
* We normalize both a and b by subtracting ref from them.
|
||||
* Denote the normalized values by a_n and b_n. Note that because
|
||||
* of wrapping, a_n and/or b_n may be negative.
|
||||
*
|
||||
* The normalized values a_n and b_n satisfy:
|
||||
* - a positive value triggers before a negative value
|
||||
* - a smaller positive value triggers before a greater positive value
|
||||
* - a smaller negative value (greater in absolute value) triggers
|
||||
* before a greater negative value (smaller in absolute value).
|
||||
*
|
||||
* Thus we can just stick to unsigned arithmetic and compare
|
||||
* (u32)a_n to (u32)b_n.
|
||||
*
|
||||
* Just to reiterate the possible cases:
|
||||
*
|
||||
* 1A) ...ref..a....b....
|
||||
* 1B) ...ref..b....a....
|
||||
* 2A) ...b....ref..a.... b_n < 0
|
||||
* 2B) ...a....ref..b.... a_n > 0
|
||||
* 3A) ...a....b....ref.. a_n < 0, b_n < 0
|
||||
* 3A) ...b....a....ref.. a_n < 0, b_n < 0
|
||||
*/
|
||||
u32 a_n = a - ref;
|
||||
u32 b_n = b - ref;
|
||||
if (a_n < b_n)
|
||||
return -1;
|
||||
else if (a_n > b_n)
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
|
||||
{
|
||||
struct gk20a_sync_pt_inst *pti =
|
||||
container_of(pt, struct gk20a_sync_pt_inst, pt);
|
||||
return pti->shared;
|
||||
}
|
||||
static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
|
||||
{
|
||||
if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
|
||||
return NULL;
|
||||
return (struct gk20a_sync_timeline *)obj;
|
||||
}
|
||||
|
||||
static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
|
||||
{
|
||||
struct gk20a_sync_pt *pt =
|
||||
container_of(ref, struct gk20a_sync_pt, refcount);
|
||||
struct gk20a *g = pt->g;
|
||||
|
||||
if (pt->sema)
|
||||
nvgpu_semaphore_put(pt->sema);
|
||||
nvgpu_kfree(g, pt);
|
||||
}
|
||||
|
||||
static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
|
||||
struct gk20a *g,
|
||||
struct gk20a_sync_timeline *obj,
|
||||
struct nvgpu_semaphore *sema)
|
||||
{
|
||||
struct gk20a_sync_pt *shared;
|
||||
|
||||
shared = nvgpu_kzalloc(g, sizeof(*shared));
|
||||
if (!shared)
|
||||
return NULL;
|
||||
|
||||
nvgpu_ref_init(&shared->refcount);
|
||||
shared->g = g;
|
||||
shared->obj = obj;
|
||||
shared->sema = sema;
|
||||
shared->thresh = ++obj->max; /* sync framework has a lock */
|
||||
|
||||
nvgpu_spinlock_init(&shared->lock);
|
||||
|
||||
nvgpu_semaphore_get(sema);
|
||||
|
||||
return shared;
|
||||
}
|
||||
|
||||
static struct sync_pt *gk20a_sync_pt_create_inst(
|
||||
struct gk20a *g,
|
||||
struct gk20a_sync_timeline *obj,
|
||||
struct nvgpu_semaphore *sema)
|
||||
{
|
||||
struct gk20a_sync_pt_inst *pti;
|
||||
|
||||
pti = (struct gk20a_sync_pt_inst *)
|
||||
sync_pt_create(&obj->obj, sizeof(*pti));
|
||||
if (!pti)
|
||||
return NULL;
|
||||
|
||||
pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
|
||||
if (!pti->shared) {
|
||||
sync_pt_free(&pti->pt);
|
||||
return NULL;
|
||||
}
|
||||
return &pti->pt;
|
||||
}
|
||||
|
||||
static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
|
||||
{
|
||||
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
|
||||
if (pt)
|
||||
nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
|
||||
}
|
||||
|
||||
static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
|
||||
{
|
||||
struct gk20a_sync_pt_inst *pti;
|
||||
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
|
||||
|
||||
pti = (struct gk20a_sync_pt_inst *)
|
||||
sync_pt_create(&pt->obj->obj, sizeof(*pti));
|
||||
if (!pti)
|
||||
return NULL;
|
||||
pti->shared = pt;
|
||||
nvgpu_ref_get(&pt->refcount);
|
||||
return &pti->pt;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function must be able to run on the same sync_pt concurrently. This
|
||||
* requires a lock to protect access to the sync_pt's internal data structures
|
||||
* which are modified as a side effect of calling this function.
|
||||
*/
|
||||
static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
|
||||
{
|
||||
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
|
||||
struct gk20a_sync_timeline *obj = pt->obj;
|
||||
bool signaled = true;
|
||||
|
||||
nvgpu_spinlock_acquire(&pt->lock);
|
||||
if (!pt->sema)
|
||||
goto done;
|
||||
|
||||
/* Acquired == not realeased yet == active == not signaled. */
|
||||
signaled = !nvgpu_semaphore_is_acquired(pt->sema);
|
||||
|
||||
if (signaled) {
|
||||
/* Update min if necessary. */
|
||||
if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
|
||||
obj->min) == 1)
|
||||
obj->min = pt->thresh;
|
||||
|
||||
/* Release the semaphore to the pool. */
|
||||
nvgpu_semaphore_put(pt->sema);
|
||||
pt->sema = NULL;
|
||||
}
|
||||
done:
|
||||
nvgpu_spinlock_release(&pt->lock);
|
||||
|
||||
return signaled;
|
||||
}
|
||||
|
||||
static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
|
||||
{
|
||||
bool a_expired;
|
||||
bool b_expired;
|
||||
struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
|
||||
struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
|
||||
|
||||
if (WARN_ON(pt_a->obj != pt_b->obj))
|
||||
return 0;
|
||||
|
||||
/* Early out */
|
||||
if (a == b)
|
||||
return 0;
|
||||
|
||||
a_expired = gk20a_sync_pt_has_signaled(a);
|
||||
b_expired = gk20a_sync_pt_has_signaled(b);
|
||||
if (a_expired && !b_expired) {
|
||||
/* Easy, a was earlier */
|
||||
return -1;
|
||||
} else if (!a_expired && b_expired) {
|
||||
/* Easy, b was earlier */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Both a and b are expired (trigger before min) or not
|
||||
* expired (trigger after min), so we can use min
|
||||
* as a reference value for __gk20a_sync_pt_compare_ref.
|
||||
*/
|
||||
return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
|
||||
pt_a->thresh, pt_b->thresh);
|
||||
}
|
||||
|
||||
static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
|
||||
{
|
||||
return obj->min;
|
||||
}
|
||||
|
||||
static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
|
||||
char *str, int size)
|
||||
{
|
||||
struct gk20a_sync_timeline *obj =
|
||||
(struct gk20a_sync_timeline *)timeline;
|
||||
snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
|
||||
}
|
||||
|
||||
static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
|
||||
char *str, int size)
|
||||
{
|
||||
struct nvgpu_semaphore *s = pt->sema;
|
||||
|
||||
snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
|
||||
s->location.pool->page_idx,
|
||||
nvgpu_semaphore_get_value(s),
|
||||
nvgpu_semaphore_read(s));
|
||||
}
|
||||
|
||||
static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
|
||||
int size)
|
||||
{
|
||||
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
|
||||
|
||||
if (pt->sema) {
|
||||
gk20a_sync_pt_value_str_for_sema(pt, str, size);
|
||||
return;
|
||||
}
|
||||
|
||||
snprintf(str, size, "%d", pt->thresh);
|
||||
}
|
||||
|
||||
static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
|
||||
.driver_name = "nvgpu_semaphore",
|
||||
.dup = gk20a_sync_pt_dup_inst,
|
||||
.has_signaled = gk20a_sync_pt_has_signaled,
|
||||
.compare = gk20a_sync_pt_compare,
|
||||
.free_pt = gk20a_sync_pt_free_inst,
|
||||
.timeline_value_str = gk20a_sync_timeline_value_str,
|
||||
.pt_value_str = gk20a_sync_pt_value_str,
|
||||
};
|
||||
|
||||
/* Public API */
|
||||
|
||||
struct sync_fence *gk20a_sync_fence_fdget(int fd)
|
||||
{
|
||||
struct sync_fence *fence = sync_fence_fdget(fd);
|
||||
int i;
|
||||
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < fence->num_fences; i++) {
|
||||
struct fence *pt = fence->cbs[i].sync_pt;
|
||||
struct sync_pt *spt = sync_pt_from_fence(pt);
|
||||
struct sync_timeline *t;
|
||||
|
||||
if (spt == NULL) {
|
||||
sync_fence_put(fence);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
t = sync_pt_parent(spt);
|
||||
if (t->ops != &gk20a_sync_timeline_ops) {
|
||||
sync_fence_put(fence);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
|
||||
{
|
||||
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
|
||||
struct nvgpu_semaphore *sema;
|
||||
|
||||
nvgpu_spinlock_acquire(&pt->lock);
|
||||
sema = pt->sema;
|
||||
if (sema)
|
||||
nvgpu_semaphore_get(sema);
|
||||
nvgpu_spinlock_release(&pt->lock);
|
||||
|
||||
return sema;
|
||||
}
|
||||
|
||||
void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
|
||||
{
|
||||
sync_timeline_signal(timeline, 0);
|
||||
}
|
||||
|
||||
void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
|
||||
{
|
||||
sync_timeline_destroy(timeline);
|
||||
}
|
||||
|
||||
struct sync_timeline *gk20a_sync_timeline_create(
|
||||
const char *name)
|
||||
{
|
||||
struct gk20a_sync_timeline *obj;
|
||||
|
||||
obj = (struct gk20a_sync_timeline *)
|
||||
sync_timeline_create(&gk20a_sync_timeline_ops,
|
||||
sizeof(struct gk20a_sync_timeline),
|
||||
name);
|
||||
if (!obj)
|
||||
return NULL;
|
||||
obj->max = 0;
|
||||
obj->min = 0;
|
||||
return &obj->obj;
|
||||
}
|
||||
|
||||
struct sync_fence *gk20a_sync_fence_create(
|
||||
struct channel_gk20a *c,
|
||||
struct nvgpu_semaphore *sema,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
char name[30];
|
||||
va_list args;
|
||||
struct sync_pt *pt;
|
||||
struct sync_fence *fence;
|
||||
struct gk20a *g = c->g;
|
||||
|
||||
struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
|
||||
struct nvgpu_os_fence_framework *fence_framework = NULL;
|
||||
struct gk20a_sync_timeline *timeline = NULL;
|
||||
|
||||
fence_framework = &os_channel_priv->fence_framework;
|
||||
|
||||
timeline = to_gk20a_timeline(fence_framework->timeline);
|
||||
|
||||
pt = gk20a_sync_pt_create_inst(g, timeline, sema);
|
||||
if (pt == NULL)
|
||||
return NULL;
|
||||
|
||||
va_start(args, fmt);
|
||||
vsnprintf(name, sizeof(name), fmt, args);
|
||||
va_end(args);
|
||||
|
||||
fence = sync_fence_create(name, pt);
|
||||
if (fence == NULL) {
|
||||
sync_pt_free(pt);
|
||||
return NULL;
|
||||
}
|
||||
return fence;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user