gpu: nvgpu: Move Linux files away from common

Move all Linux source code files to drivers/gpu/nvgpu/os/linux from
drivers/gpu/nvgpu/common/linux. This changes the meaning of common
to be OS independent.

JIRA NVGPU-598
JIRA NVGPU-601

Change-Id: Ib7f2a43d3688bb0d0b7dcc48469a6783fd988ce9
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1747714
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Terje Bergstrom
2018-04-18 12:59:00 -07:00
committed by mobile promotions
parent 98d996f4ff
commit 2a2c16af5f
124 changed files with 147 additions and 134 deletions

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,326 @@
/*
* GK20A color decompression engine support
*
* Copyright (c) 2014-2017, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _CDE_GK20A_H_
#define _CDE_GK20A_H_
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <linux/kobject.h>
#include <linux/workqueue.h>
#define MAX_CDE_BUFS 10
#define MAX_CDE_PARAMS 64
#define MAX_CDE_USER_PARAMS 40
#define MAX_CDE_ARRAY_ENTRIES 9
/*
* The size of the context ring buffer that is dedicated for handling cde
* jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
* wait on the previous job to that channel, so increasing this value
* reduces the likelihood of stalls.
*/
#define NUM_CDE_CONTEXTS 4
struct dma_buf;
struct device;
struct nvgpu_os_linux;
struct gk20a;
struct gk20a_fence;
struct nvgpu_channel_fence;
struct channel_gk20a;
struct vm_gk20a;
struct nvgpu_gpfifo_entry;
/*
* this element defines a buffer that is allocated and mapped into gpu address
* space. data_byte_offset defines the beginning of the buffer inside the
* firmare. num_bytes defines how many bytes the firmware contains.
*
* If data_byte_offset is zero, we allocate an empty buffer.
*/
struct gk20a_cde_hdr_buf {
u64 data_byte_offset;
u64 num_bytes;
};
/*
* this element defines a constant patching in buffers. It basically
* computes physical address to <source_buf>+source_byte_offset. The
* address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_replace {
u32 target_buf;
u32 source_buf;
s32 shift;
u32 type;
u64 target_byte_offset;
u64 source_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_TYPE_U32 = 0,
TYPE_PARAM_TYPE_U64_LITTLE,
TYPE_PARAM_TYPE_U64_BIG
};
/*
* this element defines a runtime patching in buffers. Parameters with id from
* 0 to 1024 are reserved for special usage as follows:
* 0 = comptags_per_cacheline,
* 1 = slices_per_fbp,
* 2 = num_fbps
* 3 = source buffer first page offset
* 4 = source buffer block height log2
* 5 = backing store memory address
* 6 = destination memory address
* 7 = destination size (bytes)
* 8 = backing store size (bytes)
* 9 = cache line size
*
* Parameters above id 1024 are user-specified. I.e. they determine where a
* parameters from user space should be placed in buffers, what is their
* type, etc.
*
* Once the value is available, we add data_offset to the value.
*
* The value address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_param {
u32 id;
u32 target_buf;
s32 shift;
u32 type;
s64 data_offset;
u64 target_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
TYPE_PARAM_GPU_CONFIGURATION,
TYPE_PARAM_FIRSTPAGEOFFSET,
TYPE_PARAM_NUMPAGES,
TYPE_PARAM_BACKINGSTORE,
TYPE_PARAM_DESTINATION,
TYPE_PARAM_DESTINATION_SIZE,
TYPE_PARAM_BACKINGSTORE_SIZE,
TYPE_PARAM_SOURCE_SMMU_ADDR,
TYPE_PARAM_BACKINGSTORE_BASE_HW,
TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
TYPE_PARAM_SCATTERBUFFER,
TYPE_PARAM_SCATTERBUFFER_SIZE,
NUM_RESERVED_PARAMS = 1024,
};
/*
* This header element defines a command. The op field determines whether the
* element is defining an init (0) or convert command (1). data_byte_offset
* denotes the beginning address of command elements in the file.
*/
struct gk20a_cde_hdr_command {
u32 op;
u32 num_entries;
u64 data_byte_offset;
};
enum {
TYPE_BUF_COMMAND_INIT = 0,
TYPE_BUF_COMMAND_CONVERT,
TYPE_BUF_COMMAND_NOOP
};
/*
* This is a command element defines one entry inside push buffer. target_buf
* defines the buffer including the pushbuffer entries, target_byte_offset the
* offset inside the buffer and num_bytes the number of words in the buffer.
*/
struct gk20a_cde_cmd_elem {
u32 target_buf;
u32 padding;
u64 target_byte_offset;
u64 num_bytes;
};
/*
* This element is used for storing a small array of data.
*/
enum {
ARRAY_PROGRAM_OFFSET = 0,
ARRAY_REGISTER_COUNT,
ARRAY_LAUNCH_COMMAND,
NUM_CDE_ARRAYS
};
struct gk20a_cde_hdr_array {
u32 id;
u32 data[MAX_CDE_ARRAY_ENTRIES];
};
/*
* Following defines a single header element. Each element has a type and
* some of the data structures.
*/
struct gk20a_cde_hdr_elem {
u32 type;
u32 padding;
union {
struct gk20a_cde_hdr_buf buf;
struct gk20a_cde_hdr_replace replace;
struct gk20a_cde_hdr_param param;
u32 required_class;
struct gk20a_cde_hdr_command command;
struct gk20a_cde_hdr_array array;
};
};
enum {
TYPE_BUF = 0,
TYPE_REPLACE,
TYPE_PARAM,
TYPE_REQUIRED_CLASS,
TYPE_COMMAND,
TYPE_ARRAY
};
struct gk20a_cde_param {
u32 id;
u32 padding;
u64 value;
};
struct gk20a_cde_ctx {
struct nvgpu_os_linux *l;
struct device *dev;
/* channel related data */
struct channel_gk20a *ch;
struct tsg_gk20a *tsg;
struct vm_gk20a *vm;
/* buf converter configuration */
struct nvgpu_mem mem[MAX_CDE_BUFS];
unsigned int num_bufs;
/* buffer patching params (where should patching be done) */
struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
unsigned int num_params;
/* storage for user space parameter values */
u32 user_param_values[MAX_CDE_USER_PARAMS];
u32 surf_param_offset;
u32 surf_param_lines;
u64 surf_vaddr;
u64 compbit_vaddr;
u64 compbit_size;
u64 scatterbuffer_vaddr;
u64 scatterbuffer_size;
u64 backing_store_vaddr;
struct nvgpu_gpfifo_entry *init_convert_cmd;
int init_cmd_num_entries;
struct nvgpu_gpfifo_entry *convert_cmd;
int convert_cmd_num_entries;
struct kobj_attribute attr;
bool init_cmd_executed;
struct nvgpu_list_node list;
bool is_temporary;
bool in_use;
struct delayed_work ctx_deleter_work;
};
static inline struct gk20a_cde_ctx *
gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
{
return (struct gk20a_cde_ctx *)
((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
};
struct gk20a_cde_app {
bool initialised;
struct nvgpu_mutex mutex;
struct nvgpu_list_node free_contexts;
struct nvgpu_list_node used_contexts;
unsigned int ctx_count;
unsigned int ctx_usecount;
unsigned int ctx_count_top;
u32 firmware_version;
u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
u32 shader_parameter;
};
void gk20a_cde_destroy(struct nvgpu_os_linux *l);
void gk20a_cde_suspend(struct nvgpu_os_linux *l);
int gk20a_init_cde_support(struct nvgpu_os_linux *l);
int gk20a_cde_reload(struct nvgpu_os_linux *l);
int gk20a_cde_convert(struct nvgpu_os_linux *l,
struct dma_buf *compbits_buf,
u64 compbits_byte_offset,
u64 scatterbuffer_byte_offset,
struct nvgpu_channel_fence *fence,
u32 __flags, struct gk20a_cde_param *params,
int num_params, struct gk20a_fence **fence_out);
int gk20a_prepare_compressible_read(
struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
u64 compbits_hoffset, u64 compbits_voffset,
u64 scatterbuffer_offset,
u32 width, u32 height, u32 block_height_log2,
u32 submit_flags, struct nvgpu_channel_fence *fence,
u32 *valid_compbits, u32 *zbc_color,
struct gk20a_fence **fence_out);
int gk20a_mark_compressible_write(
struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
u32 zbc_color);
int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
#endif

View File

@@ -0,0 +1,64 @@
/*
* GM20B CDE
*
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gk20a/gk20a.h"
#include "cde_gm20b.h"
enum programs {
PROG_HPASS = 0,
PROG_VPASS_LARGE = 1,
PROG_VPASS_SMALL = 2,
PROG_HPASS_DEBUG = 3,
PROG_VPASS_LARGE_DEBUG = 4,
PROG_VPASS_SMALL_DEBUG = 5,
PROG_PASSTHROUGH = 6,
};
static void gm20b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out)
{
int hprog = PROG_HPASS;
int vprog = (block_height_log2 >= 2) ?
PROG_VPASS_LARGE : PROG_VPASS_SMALL;
if (shader_parameter == 1) {
hprog = PROG_PASSTHROUGH;
vprog = PROG_PASSTHROUGH;
} else if (shader_parameter == 2) {
hprog = PROG_HPASS_DEBUG;
vprog = (block_height_log2 >= 2) ?
PROG_VPASS_LARGE_DEBUG :
PROG_VPASS_SMALL_DEBUG;
}
*hprog_out = hprog;
*vprog_out = vprog;
}
struct nvgpu_os_linux_ops gm20b_cde_ops = {
.cde = {
.get_program_numbers = gm20b_cde_get_program_numbers,
},
};

View File

@@ -0,0 +1,32 @@
/*
* GM20B CDE
*
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVHOST_GM20B_CDE
#define _NVHOST_GM20B_CDE
#include "os_linux.h"
extern struct nvgpu_os_linux_ops gm20b_cde_ops;
#endif

View File

@@ -0,0 +1,161 @@
/*
* GP10B CDE
*
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include "gk20a/gk20a.h"
#include "cde_gp10b.h"
#include <nvgpu/log.h>
#include <nvgpu/dma.h>
enum gp10b_programs {
GP10B_PROG_HPASS = 0,
GP10B_PROG_HPASS_4K = 1,
GP10B_PROG_VPASS = 2,
GP10B_PROG_VPASS_4K = 3,
GP10B_PROG_HPASS_DEBUG = 4,
GP10B_PROG_HPASS_4K_DEBUG = 5,
GP10B_PROG_VPASS_DEBUG = 6,
GP10B_PROG_VPASS_4K_DEBUG = 7,
GP10B_PROG_PASSTHROUGH = 8,
};
void gp10b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out)
{
int hprog, vprog;
if (shader_parameter == 1) {
hprog = GP10B_PROG_PASSTHROUGH;
vprog = GP10B_PROG_PASSTHROUGH;
} else {
hprog = GP10B_PROG_HPASS;
vprog = GP10B_PROG_VPASS;
if (shader_parameter == 2) {
hprog = GP10B_PROG_HPASS_DEBUG;
vprog = GP10B_PROG_VPASS_DEBUG;
}
if (!nvgpu_iommuable(g)) {
if (!g->mm.disable_bigpage) {
nvgpu_warn(g,
"When no IOMMU big pages cannot be used");
}
hprog |= 1;
vprog |= 1;
}
}
*hprog_out = hprog;
*vprog_out = vprog;
}
bool gp10b_need_scatter_buffer(struct gk20a *g)
{
return !nvgpu_iommuable(g);
}
static u8 parity(u32 a)
{
a ^= a>>16u;
a ^= a>>8u;
a ^= a>>4u;
a &= 0xfu;
return (0x6996u >> a) & 1u;
}
int gp10b_populate_scatter_buffer(struct gk20a *g,
struct sg_table *sgt,
size_t surface_size,
void *scatter_buffer_ptr,
size_t scatter_buffer_size)
{
/* map scatter buffer to CPU VA and fill it */
const u32 page_size_log2 = 12;
const u32 page_size = 1 << page_size_log2;
const u32 page_size_shift = page_size_log2 - 7u;
/* 0011 1111 1111 1111 1111 1110 0100 1000 */
const u32 getSliceMaskGP10B = 0x3ffffe48;
u8 *scatter_buffer = scatter_buffer_ptr;
size_t i;
struct scatterlist *sg = NULL;
u8 d = 0;
size_t page = 0;
size_t pages_left;
surface_size = round_up(surface_size, page_size);
pages_left = surface_size >> page_size_log2;
if ((pages_left >> 3) > scatter_buffer_size)
return -ENOMEM;
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
unsigned int j;
u64 surf_pa = sg_phys(sg);
unsigned int n = (int)(sg->length >> page_size_log2);
nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
u8 scatter_bit = parity(addr);
u8 bit = page & 7;
d |= scatter_bit << bit;
if (bit == 7) {
scatter_buffer[page >> 3] = d;
d = 0;
}
++page;
--pages_left;
}
if (pages_left == 0)
break;
}
/* write the last byte in case the number of pages is not divisible by 8 */
if ((page & 7) != 0)
scatter_buffer[page >> 3] = d;
if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
for (i = 0; i < page >> 3; i++) {
nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
}
}
return 0;
}
struct nvgpu_os_linux_ops gp10b_cde_ops = {
.cde = {
.get_program_numbers = gp10b_cde_get_program_numbers,
.need_scatter_buffer = gp10b_need_scatter_buffer,
.populate_scatter_buffer = gp10b_populate_scatter_buffer,
},
};

View File

@@ -0,0 +1,32 @@
/*
* GP10B CDE
*
* Copyright (c) 2015-2017, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _NVHOST_GP10B_CDE
#define _NVHOST_GP10B_CDE
#include "os_linux.h"
extern struct nvgpu_os_linux_ops gp10b_cde_ops;
#endif

View File

@@ -0,0 +1,155 @@
/*
* Copyright (c) 2017, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/types.h>
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
#include "gk20a/ce2_gk20a.h"
#include "gk20a/gk20a.h"
#include "channel.h"
static inline int gk20a_get_valid_launch_flags(struct gk20a *g, int launch_flags)
{
/* there is no local memory available,
don't allow local memory related CE flags */
if (!g->mm.vidmem.size) {
launch_flags &= ~(NVGPU_CE_SRC_LOCATION_LOCAL_FB |
NVGPU_CE_DST_LOCATION_LOCAL_FB);
}
return launch_flags;
}
int gk20a_ce_execute_ops(struct gk20a *g,
u32 ce_ctx_id,
u64 src_buf,
u64 dst_buf,
u64 size,
unsigned int payload,
int launch_flags,
int request_operation,
u32 submit_flags,
struct gk20a_fence **gk20a_fence_out)
{
int ret = -EPERM;
struct gk20a_ce_app *ce_app = &g->ce_app;
struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
bool found = false;
u32 *cmd_buf_cpu_va;
u64 cmd_buf_gpu_va = 0;
u32 methodSize;
u32 cmd_buf_read_offset;
u32 dma_copy_class;
struct nvgpu_gpfifo_entry gpfifo;
struct nvgpu_channel_fence fence = {0, 0};
struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
if (!ce_app->initialised ||ce_app->app_state != NVGPU_CE_ACTIVE)
goto end;
nvgpu_mutex_acquire(&ce_app->app_mutex);
nvgpu_list_for_each_entry_safe(ce_ctx, ce_ctx_save,
&ce_app->allocated_contexts, gk20a_gpu_ctx, list) {
if (ce_ctx->ctx_id == ce_ctx_id) {
found = true;
break;
}
}
nvgpu_mutex_release(&ce_app->app_mutex);
if (!found) {
ret = -EINVAL;
goto end;
}
if (ce_ctx->gpu_ctx_state != NVGPU_CE_GPU_CTX_ALLOCATED) {
ret = -ENODEV;
goto end;
}
nvgpu_mutex_acquire(&ce_ctx->gpu_ctx_mutex);
ce_ctx->cmd_buf_read_queue_offset %= NVGPU_CE_MAX_INFLIGHT_JOBS;
cmd_buf_read_offset = (ce_ctx->cmd_buf_read_queue_offset *
(NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF / sizeof(u32)));
cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset]) {
struct gk20a_fence **prev_post_fence =
&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
ret = gk20a_fence_wait(g, *prev_post_fence,
gk20a_get_gr_idle_timeout(g));
gk20a_fence_put(*prev_post_fence);
*prev_post_fence = NULL;
if (ret)
goto noop;
}
cmd_buf_gpu_va = (ce_ctx->cmd_buf_mem.gpu_va + (u64)(cmd_buf_read_offset *sizeof(u32)));
dma_copy_class = g->ops.get_litter_value(g, GPU_LIT_DMA_COPY_CLASS);
methodSize = gk20a_ce_prepare_submit(src_buf,
dst_buf,
size,
&cmd_buf_cpu_va[cmd_buf_read_offset],
NVGPU_CE_MAX_COMMAND_BUFF_BYTES_PER_KICKOFF,
payload,
gk20a_get_valid_launch_flags(g, launch_flags),
request_operation,
dma_copy_class);
if (methodSize) {
/* store the element into gpfifo */
gpfifo.entry0 =
u64_lo32(cmd_buf_gpu_va);
gpfifo.entry1 =
(u64_hi32(cmd_buf_gpu_va) |
pbdma_gp_entry1_length_f(methodSize));
/* take always the postfence as it is needed for protecting the ce context */
submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
nvgpu_smp_wmb();
ret = gk20a_submit_channel_gpfifo(ce_ctx->ch, &gpfifo, NULL,
1, submit_flags, &fence,
&ce_cmd_buf_fence_out, NULL);
if (!ret) {
ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
ce_cmd_buf_fence_out;
if (gk20a_fence_out) {
gk20a_fence_get(ce_cmd_buf_fence_out);
*gk20a_fence_out = ce_cmd_buf_fence_out;
}
/* Next available command buffer queue Index */
++ce_ctx->cmd_buf_read_queue_offset;
}
} else {
ret = -ENOMEM;
}
noop:
nvgpu_mutex_release(&ce_ctx->gpu_ctx_mutex);
end:
return ret;
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,96 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_CHANNEL_H__
#define __NVGPU_CHANNEL_H__
#include <linux/workqueue.h>
#include <linux/dma-buf.h>
#include <nvgpu/types.h>
struct channel_gk20a;
struct nvgpu_gpfifo;
struct nvgpu_submit_gpfifo_args;
struct nvgpu_channel_fence;
struct gk20a_fence;
struct fifo_profile_gk20a;
struct nvgpu_os_linux;
struct sync_fence;
struct sync_timeline;
struct nvgpu_channel_completion_cb {
/*
* Signal channel owner via a callback, if set, in job cleanup with
* schedule_work. Means that something finished on the channel (perhaps
* more than one job).
*/
void (*fn)(struct channel_gk20a *, void *);
void *user_data;
/* Make access to the two above atomic */
struct nvgpu_spinlock lock;
/* Per-channel async work task, cannot reschedule itself */
struct work_struct work;
};
struct nvgpu_error_notifier {
struct dma_buf *dmabuf;
void *vaddr;
struct nvgpu_notification *notification;
struct nvgpu_mutex mutex;
};
/*
* This struct contains fence_related data.
* e.g. sync_timeline for sync_fences.
*/
struct nvgpu_os_fence_framework {
struct sync_timeline *timeline;
};
struct nvgpu_channel_linux {
struct channel_gk20a *ch;
struct nvgpu_os_fence_framework fence_framework;
struct nvgpu_channel_completion_cb completion_cb;
struct nvgpu_error_notifier error_notifier;
struct dma_buf *cyclestate_buffer_handler;
};
u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
void (*update_fn)(struct channel_gk20a *, void *),
void *update_fn_data,
int runlist_id,
bool is_privileged_channel);
int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_gpfifo_entry *gpfifo,
struct nvgpu_submit_gpfifo_args *args,
u32 num_entries,
u32 flags,
struct nvgpu_channel_fence *fence,
struct gk20a_fence **fence_out,
struct fifo_profile_gk20a *profile);
#endif /* __NVGPU_CHANNEL_H__ */

View File

@@ -0,0 +1,165 @@
/*
* Linux clock support
*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/clk.h>
#include <soc/tegra/tegra-dvfs.h>
#include <soc/tegra/tegra-bpmp-dvfs.h>
#include "clk.h"
#include "os_linux.h"
#include "platform_gk20a.h"
#include "gk20a/gk20a.h"
static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
unsigned long ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
if (g->clk.tegra_clk)
ret = g->clk.cached_rate ?
g->clk.cached_rate :
clk_get_rate(g->clk.tegra_clk);
else
ret = platform->cached_rate ?
platform->cached_rate :
clk_get_rate(platform->clk[0]);
break;
case CTRL_CLK_DOMAIN_PWRCLK:
ret = clk_get_rate(platform->clk[1]);
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = 0;
break;
}
return ret;
}
static int nvgpu_linux_clk_set_rate(struct gk20a *g,
u32 api_domain, unsigned long rate)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
int ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
if (g->clk.tegra_clk) {
ret = clk_set_rate(g->clk.tegra_clk, rate);
if (!ret)
g->clk.cached_rate = rate;
} else {
ret = clk_set_rate(platform->clk[0], rate);
if (!ret)
platform->cached_rate = rate;
}
break;
case CTRL_CLK_DOMAIN_PWRCLK:
ret = clk_set_rate(platform->clk[1], rate);
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = -EINVAL;
break;
}
return ret;
}
static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
/*
* On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
* to frequency governor is a shared user on the gbus. The latter can be
* accessed as GPU clock parent, and incorporate DVFS related data.
*/
if (g->clk.tegra_clk)
return tegra_dvfs_get_fmax_at_vmin_safe_t(
clk_get_parent(g->clk.tegra_clk));
if (platform->maxmin_clk_id)
return tegra_bpmp_dvfs_get_fmax_at_vmin(
platform->maxmin_clk_id);
return 0;
}
static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
{
struct clk *c;
c = clk_get_sys("gpu_ref", "gpu_ref");
if (IS_ERR(c)) {
nvgpu_err(g, "failed to get GPCPLL reference clock");
return 0;
}
return clk_get_rate(c);
}
static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
unsigned long rate)
{
return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
clk_get_parent(clk->tegra_clk), rate);
}
static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
{
int ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
ret = tegra_dvfs_get_maxrate(clk_get_parent(g->clk.tegra_clk));
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = 0;
break;
}
return ret;
}
static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
{
return clk_prepare_enable(clk->tegra_clk);
}
static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
{
clk_disable_unprepare(clk->tegra_clk);
}
void nvgpu_linux_init_clk_support(struct gk20a *g)
{
g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_COMMON_LINUX_CLK_H
struct gk20a;
void nvgpu_linux_init_clk_support(struct gk20a *g);
#endif

View File

@@ -0,0 +1,140 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-buf.h>
#include <nvgpu/comptags.h>
#include <nvgpu/linux/vm.h>
#include "gk20a/gk20a.h"
#include "dmabuf.h"
void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
struct gk20a_comptags *comptags)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
if (!comptags)
return;
if (!priv) {
memset(comptags, 0, sizeof(*comptags));
return;
}
nvgpu_mutex_acquire(&priv->lock);
*comptags = priv->comptags;
nvgpu_mutex_release(&priv->lock);
}
int gk20a_alloc_or_get_comptags(struct gk20a *g,
struct nvgpu_os_buffer *buf,
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
u32 offset;
int err;
unsigned int ctag_granularity;
u32 lines;
if (!priv)
return -ENOSYS;
nvgpu_mutex_acquire(&priv->lock);
if (priv->comptags.allocated) {
/*
* already allocated
*/
*comptags = priv->comptags;
err = 0;
goto exit_locked;
}
ctag_granularity = g->ops.fb.compression_page_size(g);
lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
if (lines < 1) {
err = -EINVAL;
goto exit_locked;
}
/* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator;
err = gk20a_comptaglines_alloc(allocator, &offset, lines);
if (!err) {
priv->comptags.offset = offset;
priv->comptags.lines = lines;
priv->comptags.needs_clear = true;
} else {
priv->comptags.offset = 0;
priv->comptags.lines = 0;
priv->comptags.needs_clear = false;
}
/*
* We don't report an error here if comptag alloc failed. The
* caller will simply fallback to incompressible kinds. It
* would not be safe to re-allocate comptags anyways on
* successive calls, as that would break map aliasing.
*/
err = 0;
priv->comptags.allocated = true;
*comptags = priv->comptags;
exit_locked:
nvgpu_mutex_release(&priv->lock);
return err;
}
bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
bool clear_started = false;
if (priv) {
nvgpu_mutex_acquire(&priv->lock);
clear_started = priv->comptags.needs_clear;
if (!clear_started)
nvgpu_mutex_release(&priv->lock);
}
return clear_started;
}
void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
bool clear_successful)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
if (priv) {
if (clear_successful)
priv->comptags.needs_clear = false;
nvgpu_mutex_release(&priv->lock);
}
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/wait.h>
#include <linux/sched.h>
#include <nvgpu/cond.h>
int nvgpu_cond_init(struct nvgpu_cond *cond)
{
init_waitqueue_head(&cond->wq);
cond->initialized = true;
return 0;
}
void nvgpu_cond_destroy(struct nvgpu_cond *cond)
{
cond->initialized = false;
}
int nvgpu_cond_signal(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up(&cond->wq);
return 0;
}
int nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up_interruptible(&cond->wq);
return 0;
}
int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up_all(&cond->wq);
return 0;
}
int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up_interruptible_all(&cond->wq);
return 0;
}

View File

@@ -0,0 +1,730 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/wait.h>
#include <linux/ktime.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <trace/events/gk20a.h>
#include <uapi/linux/nvgpu.h>
#include "gk20a/gk20a.h"
#include "gk20a/gr_gk20a.h"
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/atomic.h>
#include <nvgpu/barrier.h>
#include "platform_gk20a.h"
#include "os_linux.h"
#include "ctxsw_trace.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*PAGE_SIZE)
/* Userland-facing FIFO (one global + eventually one per VM) */
struct gk20a_ctxsw_dev {
struct gk20a *g;
struct nvgpu_ctxsw_ring_header *hdr;
struct nvgpu_ctxsw_trace_entry *ents;
struct nvgpu_ctxsw_trace_filter filter;
bool write_enabled;
struct nvgpu_cond readout_wq;
size_t size;
u32 num_ents;
nvgpu_atomic_t vma_ref;
struct nvgpu_mutex write_lock;
};
struct gk20a_ctxsw_trace {
struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
};
static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
{
return (hdr->write_idx == hdr->read_idx);
}
static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
{
return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
}
static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
{
return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
}
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
loff_t *off)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
struct nvgpu_ctxsw_trace_entry __user *entry =
(struct nvgpu_ctxsw_trace_entry *) buf;
size_t copied = 0;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"filp=%p buf=%p size=%zu", filp, buf, size);
nvgpu_mutex_acquire(&dev->write_lock);
while (ring_is_empty(hdr)) {
nvgpu_mutex_release(&dev->write_lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
!ring_is_empty(hdr), 0);
if (err)
return err;
nvgpu_mutex_acquire(&dev->write_lock);
}
while (size >= sizeof(struct nvgpu_ctxsw_trace_entry)) {
if (ring_is_empty(hdr))
break;
if (copy_to_user(entry, &dev->ents[hdr->read_idx],
sizeof(*entry))) {
nvgpu_mutex_release(&dev->write_lock);
return -EFAULT;
}
hdr->read_idx++;
if (hdr->read_idx >= hdr->num_ents)
hdr->read_idx = 0;
entry++;
copied += sizeof(*entry);
size -= sizeof(*entry);
}
nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
hdr->read_idx);
*off = hdr->read_idx;
nvgpu_mutex_release(&dev->write_lock);
return copied;
}
static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
nvgpu_mutex_acquire(&dev->write_lock);
dev->write_enabled = true;
nvgpu_mutex_release(&dev->write_lock);
dev->g->ops.fecs_trace.enable(dev->g);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
dev->g->ops.fecs_trace.disable(dev->g);
nvgpu_mutex_acquire(&dev->write_lock);
dev->write_enabled = false;
nvgpu_mutex_release(&dev->write_lock);
return 0;
}
static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
size_t size)
{
struct gk20a *g = dev->g;
void *buf;
int err;
if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
return -EBUSY;
err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
if (err)
return err;
dev->hdr = buf;
dev->ents = (struct nvgpu_ctxsw_trace_entry *) (dev->hdr + 1);
dev->size = size;
dev->num_ents = dev->hdr->num_ents;
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
return 0;
}
int gk20a_ctxsw_dev_ring_alloc(struct gk20a *g,
void **buf, size_t *size)
{
struct nvgpu_ctxsw_ring_header *hdr;
*size = roundup(*size, PAGE_SIZE);
hdr = vmalloc_user(*size);
if (!hdr)
return -ENOMEM;
hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
/ sizeof(struct nvgpu_ctxsw_trace_entry);
hdr->ent_size = sizeof(struct nvgpu_ctxsw_trace_entry);
hdr->drop_count = 0;
hdr->read_idx = 0;
hdr->write_idx = 0;
hdr->write_seqno = 0;
*buf = hdr;
return 0;
}
int gk20a_ctxsw_dev_ring_free(struct gk20a *g)
{
struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
nvgpu_vfree(g, dev->hdr);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_ring_setup_args *args)
{
struct gk20a *g = dev->g;
size_t size = args->size;
int ret;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
return -EINVAL;
nvgpu_mutex_acquire(&dev->write_lock);
ret = gk20a_ctxsw_dev_alloc_buffer(dev, size);
nvgpu_mutex_release(&dev->write_lock);
return ret;
}
static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_trace_filter_args *args)
{
struct gk20a *g = dev->g;
nvgpu_mutex_acquire(&dev->write_lock);
dev->filter = args->filter;
nvgpu_mutex_release(&dev->write_lock);
if (g->ops.fecs_trace.set_filter)
g->ops.fecs_trace.set_filter(g, &dev->filter);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_trace_filter_args *args)
{
nvgpu_mutex_acquire(&dev->write_lock);
args->filter = dev->filter;
nvgpu_mutex_release(&dev->write_lock);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
err = gk20a_busy(g);
if (err)
return err;
if (g->ops.fecs_trace.flush)
err = g->ops.fecs_trace.flush(g);
if (likely(!err))
err = g->ops.fecs_trace.poll(g);
gk20a_idle(g);
return err;
}
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
{
struct nvgpu_os_linux *l;
struct gk20a *g;
struct gk20a_ctxsw_trace *trace;
struct gk20a_ctxsw_dev *dev;
int err;
size_t size;
u32 n;
/* only one VM for now */
const int vmid = 0;
l = container_of(inode->i_cdev, struct nvgpu_os_linux, ctxsw.cdev);
g = gk20a_get(&l->g);
if (!g)
return -ENODEV;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
if (!capable(CAP_SYS_ADMIN)) {
err = -EPERM;
goto free_ref;
}
err = gk20a_busy(g);
if (err)
goto free_ref;
trace = g->ctxsw_trace;
if (!trace) {
err = -ENODEV;
goto idle;
}
/* Allow only one user for this device */
dev = &trace->devs[vmid];
nvgpu_mutex_acquire(&dev->write_lock);
if (dev->hdr) {
err = -EBUSY;
goto done;
}
/* By default, allocate ring buffer big enough to accommodate
* FECS records with default event filter */
/* enable all traces by default */
NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
/* compute max number of entries generated with this filter */
n = g->ops.fecs_trace.max_entries(g, &dev->filter);
size = sizeof(struct nvgpu_ctxsw_ring_header) +
n * sizeof(struct nvgpu_ctxsw_trace_entry);
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
size, n, sizeof(struct nvgpu_ctxsw_trace_entry));
err = gk20a_ctxsw_dev_alloc_buffer(dev, size);
if (!err) {
filp->private_data = dev;
nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
filp, dev, size);
}
done:
nvgpu_mutex_release(&dev->write_lock);
idle:
gk20a_idle(g);
free_ref:
if (err)
gk20a_put(g);
return err;
}
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
g->ops.fecs_trace.disable(g);
nvgpu_mutex_acquire(&dev->write_lock);
dev->write_enabled = false;
nvgpu_mutex_release(&dev->write_lock);
if (dev->hdr) {
dev->g->ops.fecs_trace.free_user_buffer(dev->g);
dev->hdr = NULL;
}
gk20a_put(g);
return 0;
}
long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
switch (cmd) {
case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
break;
case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
break;
case NVGPU_CTXSW_IOCTL_RING_SETUP:
err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
(struct nvgpu_ctxsw_ring_setup_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_SET_FILTER:
err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
(struct nvgpu_ctxsw_trace_filter_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_GET_FILTER:
err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
(struct nvgpu_ctxsw_trace_filter_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_POLL:
err = gk20a_ctxsw_dev_ioctl_poll(dev);
break;
default:
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
cmd);
err = -ENOTTY;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
return err;
}
unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
unsigned int mask = 0;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
nvgpu_mutex_acquire(&dev->write_lock);
poll_wait(filp, &dev->readout_wq.wq, wait);
if (!ring_is_empty(hdr))
mask |= POLLIN | POLLRDNORM;
nvgpu_mutex_release(&dev->write_lock);
return mask;
}
static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
struct gk20a *g = dev->g;
nvgpu_atomic_inc(&dev->vma_ref);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
nvgpu_atomic_read(&dev->vma_ref));
}
static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
struct gk20a *g = dev->g;
nvgpu_atomic_dec(&dev->vma_ref);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
nvgpu_atomic_read(&dev->vma_ref));
}
static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
.open = gk20a_ctxsw_dev_vma_open,
.close = gk20a_ctxsw_dev_vma_close,
};
int gk20a_ctxsw_dev_mmap_buffer(struct gk20a *g,
struct vm_area_struct *vma)
{
return remap_vmalloc_range(vma, g->ctxsw_trace->devs[0].hdr, 0);
}
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
int ret;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
ret = dev->g->ops.fecs_trace.mmap_user_buffer(dev->g, vma);
if (likely(!ret)) {
vma->vm_private_data = dev;
vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
vma->vm_ops->open(vma);
}
return ret;
}
#ifdef CONFIG_GK20A_CTXSW_TRACE
static int gk20a_ctxsw_init_devs(struct gk20a *g)
{
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
struct gk20a_ctxsw_dev *dev = trace->devs;
int err;
int i;
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
dev->g = g;
dev->hdr = NULL;
dev->write_enabled = false;
nvgpu_cond_init(&dev->readout_wq);
err = nvgpu_mutex_init(&dev->write_lock);
if (err)
return err;
nvgpu_atomic_set(&dev->vma_ref, 0);
dev++;
}
return 0;
}
#endif
int gk20a_ctxsw_trace_init(struct gk20a *g)
{
#ifdef CONFIG_GK20A_CTXSW_TRACE
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
/* if tracing is not supported, skip this */
if (!g->ops.fecs_trace.init)
return 0;
if (likely(trace))
return 0;
trace = nvgpu_kzalloc(g, sizeof(*trace));
if (unlikely(!trace))
return -ENOMEM;
g->ctxsw_trace = trace;
err = gk20a_ctxsw_init_devs(g);
if (err)
goto fail;
err = g->ops.fecs_trace.init(g);
if (unlikely(err))
goto fail;
return 0;
fail:
memset(&g->ops.fecs_trace, 0, sizeof(g->ops.fecs_trace));
nvgpu_kfree(g, trace);
g->ctxsw_trace = NULL;
return err;
#else
return 0;
#endif
}
void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
{
#ifdef CONFIG_GK20A_CTXSW_TRACE
struct gk20a_ctxsw_trace *trace;
struct gk20a_ctxsw_dev *dev;
int i;
if (!g->ctxsw_trace)
return;
trace = g->ctxsw_trace;
dev = trace->devs;
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
nvgpu_mutex_destroy(&dev->write_lock);
dev++;
}
nvgpu_kfree(g, g->ctxsw_trace);
g->ctxsw_trace = NULL;
g->ops.fecs_trace.deinit(g);
#endif
}
int gk20a_ctxsw_trace_write(struct gk20a *g,
struct nvgpu_ctxsw_trace_entry *entry)
{
struct nvgpu_ctxsw_ring_header *hdr;
struct gk20a_ctxsw_dev *dev;
int ret = 0;
const char *reason;
u32 write_idx;
if (!g->ctxsw_trace)
return 0;
if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
return -ENODEV;
dev = &g->ctxsw_trace->devs[entry->vmid];
hdr = dev->hdr;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"dev=%p hdr=%p", dev, hdr);
nvgpu_mutex_acquire(&dev->write_lock);
if (unlikely(!hdr)) {
/* device has been released */
ret = -ENODEV;
goto done;
}
write_idx = hdr->write_idx;
if (write_idx >= dev->num_ents) {
nvgpu_err(dev->g,
"write_idx=%u out of range [0..%u]",
write_idx, dev->num_ents);
ret = -ENOSPC;
reason = "write_idx out of range";
goto disable;
}
entry->seqno = hdr->write_seqno++;
if (!dev->write_enabled) {
ret = -EBUSY;
reason = "write disabled";
goto drop;
}
if (unlikely(ring_is_full(hdr))) {
ret = -ENOSPC;
reason = "user fifo full";
goto drop;
}
if (!NVGPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
reason = "filtered out";
goto filter;
}
nvgpu_log(g, gpu_dbg_ctxsw,
"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
entry->seqno, entry->context_id, entry->pid,
entry->tag, entry->timestamp);
dev->ents[write_idx] = *entry;
/* ensure record is written before updating write index */
nvgpu_smp_wmb();
write_idx++;
if (unlikely(write_idx >= hdr->num_ents))
write_idx = 0;
hdr->write_idx = write_idx;
nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
hdr->read_idx, hdr->write_idx, ring_len(hdr));
nvgpu_mutex_release(&dev->write_lock);
return ret;
disable:
g->ops.fecs_trace.disable(g);
drop:
hdr->drop_count++;
filter:
nvgpu_log(g, gpu_dbg_ctxsw,
"dropping seqno=%d context_id=%08x pid=%lld "
"tag=%x time=%llx (%s)",
entry->seqno, entry->context_id, entry->pid,
entry->tag, entry->timestamp, reason);
done:
nvgpu_mutex_release(&dev->write_lock);
return ret;
}
void gk20a_ctxsw_trace_wake_up(struct gk20a *g, int vmid)
{
struct gk20a_ctxsw_dev *dev;
if (!g->ctxsw_trace)
return;
dev = &g->ctxsw_trace->devs[vmid];
nvgpu_cond_signal_interruptible(&dev->readout_wq);
}
void gk20a_ctxsw_trace_channel_reset(struct gk20a *g, struct channel_gk20a *ch)
{
#ifdef CONFIG_GK20A_CTXSW_TRACE
struct nvgpu_ctxsw_trace_entry entry = {
.vmid = 0,
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
.context_id = 0,
.pid = ch->tgid,
};
if (!g->ctxsw_trace)
return;
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
gk20a_ctxsw_trace_write(g, &entry);
gk20a_ctxsw_trace_wake_up(g, 0);
#endif
trace_gk20a_channel_reset(ch->chid, ch->tsgid);
}
void gk20a_ctxsw_trace_tsg_reset(struct gk20a *g, struct tsg_gk20a *tsg)
{
#ifdef CONFIG_GK20A_CTXSW_TRACE
struct nvgpu_ctxsw_trace_entry entry = {
.vmid = 0,
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
.context_id = 0,
.pid = tsg->tgid,
};
if (!g->ctxsw_trace)
return;
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
gk20a_ctxsw_trace_write(g, &entry);
gk20a_ctxsw_trace_wake_up(g, 0);
#endif
trace_gk20a_channel_reset(~0, tsg->tsgid);
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __CTXSW_TRACE_H__
#define __CTXSW_TRACE_H__
#include <nvgpu/types.h>
#define GK20A_CTXSW_TRACE_NUM_DEVS 1
struct file;
struct inode;
struct poll_table_struct;
struct gk20a;
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
long gk20a_ctxsw_dev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
size_t size, loff_t *offs);
unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
struct poll_table_struct *pts);
#endif /* __CTXSW_TRACE_H__ */

View File

@@ -0,0 +1,452 @@
/*
* Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_cde.h"
#include "debug_ce.h"
#include "debug_fifo.h"
#include "debug_gr.h"
#include "debug_allocator.h"
#include "debug_kmem.h"
#include "debug_pmu.h"
#include "debug_sched.h"
#include "debug_hal.h"
#include "debug_xve.h"
#include "os_linux.h"
#include "platform_gk20a.h"
#include "gk20a/gk20a.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <nvgpu/debug.h>
unsigned int gk20a_debug_trace_cmdbuf;
static inline void gk20a_debug_write_printk(void *ctx, const char *str,
size_t len)
{
pr_info("%s", str);
}
static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
size_t len)
{
seq_write((struct seq_file *)ctx, str, len);
}
void gk20a_debug_output(struct gk20a_debug_output *o,
const char *fmt, ...)
{
va_list args;
int len;
va_start(args, fmt);
len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);
o->fn(o->ctx, o->buf, len);
}
static int gk20a_gr_dump_regs(struct gk20a *g,
struct gk20a_debug_output *o)
{
if (g->ops.gr.dump_gr_regs)
gr_gk20a_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
return 0;
}
int gk20a_gr_debug_dump(struct gk20a *g)
{
struct gk20a_debug_output o = {
.fn = gk20a_debug_write_printk
};
gk20a_gr_dump_regs(g, &o);
return 0;
}
static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
{
struct device *dev = s->private;
struct gk20a *g = gk20a_get_platform(dev)->g;
struct gk20a_debug_output o = {
.fn = gk20a_debug_write_to_seqfile,
.ctx = s,
};
int err;
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu: %d", err);
return -EINVAL;
}
gk20a_gr_dump_regs(g, &o);
gk20a_idle(g);
return 0;
}
void gk20a_debug_dump(struct gk20a *g)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
struct gk20a_debug_output o = {
.fn = gk20a_debug_write_printk
};
if (platform->dump_platform_dependencies)
platform->dump_platform_dependencies(dev_from_gk20a(g));
/* HAL only initialized after 1st power-on */
if (g->ops.debug.show_dump)
g->ops.debug.show_dump(g, &o);
}
static int gk20a_debug_show(struct seq_file *s, void *unused)
{
struct device *dev = s->private;
struct gk20a_debug_output o = {
.fn = gk20a_debug_write_to_seqfile,
.ctx = s,
};
struct gk20a *g;
int err;
g = gk20a_get_platform(dev)->g;
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu: %d", err);
return -EFAULT;
}
/* HAL only initialized after 1st power-on */
if (g->ops.debug.show_dump)
g->ops.debug.show_dump(g, &o);
gk20a_idle(g);
return 0;
}
static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_gr_debug_show, inode->i_private);
}
static int gk20a_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_debug_show, inode->i_private);
}
static const struct file_operations gk20a_gr_debug_fops = {
.open = gk20a_gr_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static const struct file_operations gk20a_debug_fops = {
.open = gk20a_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
{
g->ops.fifo.dump_pbdma_status(g, o);
g->ops.fifo.dump_eng_status(g, o);
gk20a_debug_dump_all_channel_status_ramfc(g, o);
}
static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->mm.disable_bigpage)
buf[0] = 'Y';
else
buf[0] = 'N';
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
int buf_size;
bool bv;
struct gk20a *g = file->private_data;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
if (strtobool(buf, &bv) == 0) {
g->mm.disable_bigpage = bv;
gk20a_init_gpu_characteristics(g);
}
return count;
}
static struct file_operations disable_bigpage_fops = {
.open = simple_open,
.read = disable_bigpage_read,
.write = disable_bigpage_write,
};
static int railgate_residency_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
unsigned long time_since_last_state_transition_ms;
unsigned long total_rail_gate_time_ms;
unsigned long total_rail_ungate_time_ms;
if (platform->is_railgated(dev_from_gk20a(g))) {
time_since_last_state_transition_ms =
jiffies_to_msecs(jiffies -
g->pstats.last_rail_gate_complete);
total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
total_rail_gate_time_ms =
g->pstats.total_rail_gate_time_ms +
time_since_last_state_transition_ms;
} else {
time_since_last_state_transition_ms =
jiffies_to_msecs(jiffies -
g->pstats.last_rail_ungate_complete);
total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
total_rail_ungate_time_ms =
g->pstats.total_rail_ungate_time_ms +
time_since_last_state_transition_ms;
}
seq_printf(s, "Time with Rails Gated: %lu ms\n"
"Time with Rails UnGated: %lu ms\n"
"Total railgating cycles: %lu\n",
total_rail_gate_time_ms,
total_rail_ungate_time_ms,
g->pstats.railgating_cycle_count - 1);
return 0;
}
static int railgate_residency_open(struct inode *inode, struct file *file)
{
return single_open(file, railgate_residency_show, inode->i_private);
}
static const struct file_operations railgate_residency_fops = {
.open = railgate_residency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int gk20a_railgating_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *d;
d = debugfs_create_file(
"railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&railgate_residency_fops);
if (!d)
return -ENOMEM;
return 0;
}
static ssize_t timeouts_enabled_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (nvgpu_is_timeouts_enabled(g))
buf[0] = 'Y';
else
buf[0] = 'N';
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t timeouts_enabled_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
int buf_size;
bool timeouts_enabled;
struct gk20a *g = file->private_data;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
if (strtobool(buf, &timeouts_enabled) == 0) {
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (timeouts_enabled == false) {
/* requesting to disable timeouts */
if (g->timeouts_disabled_by_user == false) {
nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
g->timeouts_disabled_by_user = true;
}
} else {
/* requesting to enable timeouts */
if (g->timeouts_disabled_by_user == true) {
nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
g->timeouts_disabled_by_user = false;
}
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
}
return count;
}
static const struct file_operations timeouts_enabled_fops = {
.open = simple_open,
.read = timeouts_enabled_read,
.write = timeouts_enabled_write,
};
void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
if (!l->debugfs)
return;
if (debugfs_symlink)
l->debugfs_alias =
debugfs_create_symlink(debugfs_symlink,
NULL, dev_name(dev));
debugfs_create_file("status", S_IRUGO, l->debugfs,
dev, &gk20a_debug_fops);
debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
dev, &gk20a_gr_debug_fops);
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
l->debugfs, &gk20a_debug_trace_cmdbuf);
debugfs_create_u32("ch_wdt_timeout_ms", S_IRUGO|S_IWUSR,
l->debugfs, &g->ch_wdt_timeout_ms);
debugfs_create_u32("disable_syncpoints", S_IRUGO,
l->debugfs, &g->disable_syncpoints);
/* New debug logging API. */
debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
l->debugfs, &g->log_mask);
debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
l->debugfs, &g->log_trace);
l->debugfs_ltc_enabled =
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
l->debugfs,
&g->mm.ltc_enabled_target);
l->debugfs_gr_idle_timeout_default =
debugfs_create_u32("gr_idle_timeout_default_us",
S_IRUGO|S_IWUSR, l->debugfs,
&g->gr_idle_timeout_default);
l->debugfs_timeouts_enabled =
debugfs_create_file("timeouts_enabled",
S_IRUGO|S_IWUSR,
l->debugfs,
g,
&timeouts_enabled_fops);
l->debugfs_disable_bigpage =
debugfs_create_file("disable_bigpage",
S_IRUGO|S_IWUSR,
l->debugfs,
g,
&disable_bigpage_fops);
l->debugfs_timeslice_low_priority_us =
debugfs_create_u32("timeslice_low_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_low_priority_us);
l->debugfs_timeslice_medium_priority_us =
debugfs_create_u32("timeslice_medium_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_medium_priority_us);
l->debugfs_timeslice_high_priority_us =
debugfs_create_u32("timeslice_high_priority_us",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->timeslice_high_priority_us);
l->debugfs_runlist_interleave =
debugfs_create_bool("runlist_interleave",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->runlist_interleave);
l->debugfs_force_preemption_gfxp =
debugfs_create_bool("force_preemption_gfxp", S_IRUGO|S_IWUSR,
l->debugfs,
&g->gr.ctx_vars.force_preemption_gfxp);
l->debugfs_force_preemption_cilp =
debugfs_create_bool("force_preemption_cilp", S_IRUGO|S_IWUSR,
l->debugfs,
&g->gr.ctx_vars.force_preemption_cilp);
l->debugfs_dump_ctxsw_stats =
debugfs_create_bool("dump_ctxsw_stats_on_channel_close",
S_IRUGO|S_IWUSR, l->debugfs,
&g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close);
gr_gk20a_debugfs_init(g);
gk20a_pmu_debugfs_init(g);
gk20a_railgating_debugfs_init(g);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
gk20a_cde_debugfs_init(g);
#endif
gk20a_ce_debugfs_init(g);
nvgpu_alloc_debugfs_init(g);
nvgpu_hal_debugfs_init(g);
gk20a_fifo_debugfs_init(g);
gk20a_sched_debugfs_init(g);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
nvgpu_kmem_debugfs_init(g);
#endif
if (g->pci_vendor_id)
nvgpu_xve_debugfs_init(g);
}
void gk20a_debug_deinit(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs)
return;
gk20a_fifo_debugfs_deinit(g);
debugfs_remove_recursive(l->debugfs);
debugfs_remove(l->debugfs_alias);
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_allocator.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <nvgpu/allocator.h>
static int __alloc_show(struct seq_file *s, void *unused)
{
struct nvgpu_allocator *a = s->private;
nvgpu_alloc_print_stats(a, s, 1);
return 0;
}
static int __alloc_open(struct inode *inode, struct file *file)
{
return single_open(file, __alloc_show, inode->i_private);
}
static const struct file_operations __alloc_fops = {
.open = __alloc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs_allocators)
return;
a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
l->debugfs_allocators,
a, &__alloc_fops);
}
void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
{
}
void nvgpu_alloc_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
l->debugfs_allocators = NULL;
return;
}
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
#define __NVGPU_DEBUG_ALLOCATOR_H__
struct gk20a;
void nvgpu_alloc_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */

View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_cde.h"
#include "platform_gk20a.h"
#include "os_linux.h"
#include <linux/debugfs.h>
static ssize_t gk20a_cde_reload_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct nvgpu_os_linux *l = file->private_data;
gk20a_cde_reload(l);
return count;
}
static const struct file_operations gk20a_cde_reload_fops = {
.open = simple_open,
.write = gk20a_cde_reload_write,
};
void gk20a_cde_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
if (!platform->has_cde)
return;
debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.shader_parameter);
debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_count);
debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_usecount);
debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_count_top);
debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
l, &gk20a_cde_reload_fops);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_CDE_H__
#define __NVGPU_DEBUG_CDE_H__
struct gk20a;
void gk20a_cde_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_CDE_H__ */

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_ce.h"
#include "os_linux.h"
#include <linux/debugfs.h>
void gk20a_ce_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app.ctx_count);
debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app.app_state);
debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app.next_ctx_id);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_CE_H__
#define __NVGPU_DEBUG_CE_H__
struct gk20a;
void gk20a_ce_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_CE_H__ */

View File

@@ -0,0 +1,271 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "gm20b/clk_gm20b.h"
#include "os_linux.h"
#include "platform_gk20a.h"
static int rate_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
struct clk_gk20a *clk = &g->clk;
*val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
return 0;
}
static int rate_set(void *data, u64 val)
{
struct gk20a *g = (struct gk20a *)data;
return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
}
DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
static int pll_reg_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_clk_pll_debug_data d;
u32 reg, m, n, pl, f;
int err = 0;
if (g->ops.clk.get_pll_debug_data) {
err = g->ops.clk.get_pll_debug_data(g, &d);
if (err)
return err;
} else {
return -EINVAL;
}
seq_printf(s, "bypassctrl = %s, ",
d.trim_sys_bypassctrl_val ? "bypass" : "vco");
seq_printf(s, "sel_vco = %s, ",
d.trim_sys_sel_vco_val ? "vco" : "bypass");
seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
reg = d.trim_sys_gpcpll_coeff_val;
m = d.trim_sys_gpcpll_coeff_mdiv;
n = d.trim_sys_gpcpll_coeff_ndiv;
pl = d.trim_sys_gpcpll_coeff_pldiv;
f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
d.trim_sys_gpcpll_dvfs0_val,
d.trim_sys_gpcpll_dvfs0_dfs_coeff,
d.trim_sys_gpcpll_dvfs0_dfs_det_max,
d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
return 0;
}
static int pll_reg_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_reg_show, inode->i_private);
}
static const struct file_operations pll_reg_fops = {
.open = pll_reg_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int pll_reg_raw_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_clk_pll_debug_data d;
u32 reg;
int err = 0;
if (g->ops.clk.get_pll_debug_data) {
err = g->ops.clk.get_pll_debug_data(g, &d);
if (err)
return err;
} else {
return -EINVAL;
}
seq_puts(s, "GPCPLL REGISTERS:\n");
for (reg = d.trim_sys_gpcpll_cfg_reg;
reg <= d.trim_sys_gpcpll_dvfs2_reg;
reg += sizeof(u32))
seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
d.trim_sys_sel_vco_val);
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
d.trim_sys_gpc2clk_out_val);
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
d.trim_sys_bypassctrl_val);
return 0;
}
static int pll_reg_raw_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_reg_raw_show, inode->i_private);
}
static ssize_t pll_reg_raw_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct gk20a *g = file->f_path.dentry->d_inode->i_private;
char buf[80];
u32 reg, val;
int err = 0;
if (sizeof(buf) <= count)
return -EINVAL;
if (copy_from_user(buf, userbuf, count))
return -EFAULT;
/* terminate buffer and trim - white spaces may be appended
* at the end when invoked from shell command line */
buf[count] = '\0';
strim(buf);
if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
return -EINVAL;
if (g->ops.clk.pll_reg_write(g, reg, val))
err = g->ops.clk.pll_reg_write(g, reg, val);
else
err = -EINVAL;
return err;
}
static const struct file_operations pll_reg_raw_fops = {
.open = pll_reg_raw_open,
.read = seq_read,
.write = pll_reg_raw_write,
.llseek = seq_lseek,
.release = single_release,
};
static int monitor_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
int err = 0;
if (g->ops.clk.get_gpcclk_clock_counter)
err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
else
err = -EINVAL;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
static int voltage_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
int err = 0;
if (g->ops.clk.get_voltage)
err = g->ops.clk.get_voltage(&g->clk, val);
else
err = -EINVAL;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
static int pll_param_show(struct seq_file *s, void *data)
{
struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
gpc_pll_params->vco_ctrl);
return 0;
}
static int pll_param_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_param_show, inode->i_private);
}
static const struct file_operations pll_param_fops = {
.open = pll_param_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int gm20b_clk_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *d;
if (!l->debugfs)
return -EINVAL;
d = debugfs_create_file(
"rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
if (!d)
goto err_out;
d = debugfs_create_file("pll_reg_raw",
S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
if (!d)
goto err_out;
d = debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
(u32 *)&g->clk.gpc_pll.mode);
if (!d)
goto err_out;
d = debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
if (!d)
goto err_out;
return 0;
err_out:
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}

View File

@@ -0,0 +1,378 @@
/*
* Copyright (C) 2017-2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_fifo.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <nvgpu/sort.h>
#include <nvgpu/timers.h>
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);
static void *gk20a_fifo_sched_debugfs_seq_start(
struct seq_file *s, loff_t *pos)
{
struct gk20a *g = s->private;
struct fifo_gk20a *f = &g->fifo;
if (*pos >= f->num_channels)
return NULL;
return &f->channel[*pos];
}
static void *gk20a_fifo_sched_debugfs_seq_next(
struct seq_file *s, void *v, loff_t *pos)
{
struct gk20a *g = s->private;
struct fifo_gk20a *f = &g->fifo;
++(*pos);
if (*pos >= f->num_channels)
return NULL;
return &f->channel[*pos];
}
static void gk20a_fifo_sched_debugfs_seq_stop(
struct seq_file *s, void *v)
{
}
static int gk20a_fifo_sched_debugfs_seq_show(
struct seq_file *s, void *v)
{
struct gk20a *g = s->private;
struct fifo_gk20a *f = &g->fifo;
struct channel_gk20a *ch = v;
struct tsg_gk20a *tsg = NULL;
struct fifo_engine_info_gk20a *engine_info;
struct fifo_runlist_info_gk20a *runlist;
u32 runlist_id;
int ret = SEQ_SKIP;
u32 engine_id;
engine_id = gk20a_fifo_get_gr_engine_id(g);
engine_info = (f->engine_info + engine_id);
runlist_id = engine_info->runlist_id;
runlist = &f->runlist_info[runlist_id];
if (ch == f->channel) {
seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
seq_puts(s, " (usecs) (msecs)\n");
ret = 0;
}
if (!test_bit(ch->chid, runlist->active_channels))
return ret;
if (gk20a_channel_get(ch)) {
tsg = tsg_gk20a_from_ch(ch);
if (tsg)
seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
ch->chid,
ch->tsgid,
ch->tgid,
tsg->timeslice_us,
ch->timeout_ms_max,
tsg->interleave_level,
tsg->gr_ctx.graphics_preempt_mode,
tsg->gr_ctx.compute_preempt_mode);
gk20a_channel_put(ch);
}
return 0;
}
static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
.start = gk20a_fifo_sched_debugfs_seq_start,
.next = gk20a_fifo_sched_debugfs_seq_next,
.stop = gk20a_fifo_sched_debugfs_seq_stop,
.show = gk20a_fifo_sched_debugfs_seq_show
};
static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
struct file *file)
{
struct gk20a *g = inode->i_private;
int err;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
if (err)
return err;
nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
((struct seq_file *)file->private_data)->private = inode->i_private;
return 0;
};
/*
* The file operations structure contains our open function along with
* set of the canned seq_ ops.
*/
static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
.owner = THIS_MODULE,
.open = gk20a_fifo_sched_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
static int gk20a_fifo_profile_enable(void *data, u64 val)
{
struct gk20a *g = (struct gk20a *) data;
struct fifo_gk20a *f = &g->fifo;
nvgpu_mutex_acquire(&f->profile.lock);
if (val == 0) {
if (f->profile.enabled) {
f->profile.enabled = false;
nvgpu_ref_put(&f->profile.ref,
__gk20a_fifo_profile_free);
}
} else {
if (!f->profile.enabled) {
/* not kref init as it can have a running condition if
* we enable/disable/enable while kickoff is happening
*/
if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
f->profile.data = nvgpu_vzalloc(g,
FIFO_PROFILING_ENTRIES *
sizeof(struct fifo_profile_gk20a));
f->profile.sorted = nvgpu_vzalloc(g,
FIFO_PROFILING_ENTRIES *
sizeof(u64));
if (!(f->profile.data && f->profile.sorted)) {
nvgpu_vfree(g, f->profile.data);
nvgpu_vfree(g, f->profile.sorted);
nvgpu_mutex_release(&f->profile.lock);
return -ENOMEM;
}
nvgpu_ref_init(&f->profile.ref);
}
atomic_set(&f->profile.get.atomic_var, 0);
f->profile.enabled = true;
}
}
nvgpu_mutex_release(&f->profile.lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(
gk20a_fifo_profile_enable_debugfs_fops,
NULL,
gk20a_fifo_profile_enable,
"%llu\n"
);
static int __profile_cmp(const void *a, const void *b)
{
return *((unsigned long long *) a) - *((unsigned long long *) b);
}
/*
* This uses about 800b in the stack, but the function using it is not part
* of a callstack where much memory is being used, so it is fine
*/
#define PERCENTILE_WIDTH 5
#define PERCENTILE_RANGES (100/PERCENTILE_WIDTH)
static unsigned int __gk20a_fifo_create_stats(struct gk20a *g,
u64 *percentiles, u32 index_end, u32 index_start)
{
unsigned int nelem = 0;
unsigned int index;
struct fifo_profile_gk20a *profile;
for (index = 0; index < FIFO_PROFILING_ENTRIES; index++) {
profile = &g->fifo.profile.data[index];
if (profile->timestamp[index_end] >
profile->timestamp[index_start]) {
/* This is a valid element */
g->fifo.profile.sorted[nelem] =
profile->timestamp[index_end] -
profile->timestamp[index_start];
nelem++;
}
}
/* sort it */
sort(g->fifo.profile.sorted, nelem, sizeof(unsigned long long),
__profile_cmp, NULL);
/* build ranges */
for (index = 0; index < PERCENTILE_RANGES; index++) {
percentiles[index] = nelem < PERCENTILE_RANGES ? 0 :
g->fifo.profile.sorted[(PERCENTILE_WIDTH * (index + 1) *
nelem)/100 - 1];
}
return nelem;
}
static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
unsigned int get, nelem, index;
/*
* 800B in the stack, but function is declared statically and only
* called from debugfs handler
*/
u64 percentiles_ioctl[PERCENTILE_RANGES];
u64 percentiles_kickoff[PERCENTILE_RANGES];
u64 percentiles_jobtracking[PERCENTILE_RANGES];
u64 percentiles_append[PERCENTILE_RANGES];
u64 percentiles_userd[PERCENTILE_RANGES];
if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
seq_printf(s, "Profiling disabled\n");
return 0;
}
get = atomic_read(&g->fifo.profile.get.atomic_var);
__gk20a_fifo_create_stats(g, percentiles_ioctl,
PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
__gk20a_fifo_create_stats(g, percentiles_kickoff,
PROFILE_END, PROFILE_ENTRY);
__gk20a_fifo_create_stats(g, percentiles_jobtracking,
PROFILE_JOB_TRACKING, PROFILE_IOCTL_ENTRY);
__gk20a_fifo_create_stats(g, percentiles_append,
PROFILE_APPEND, PROFILE_JOB_TRACKING);
nelem = __gk20a_fifo_create_stats(g, percentiles_userd,
PROFILE_END, PROFILE_APPEND);
seq_printf(s, "Number of kickoffs: %d\n", nelem);
seq_printf(s, "Perc \t ioctl(ns) \t kickoff(ns) \t pbcopy(ns) \t jobtrack(ns) \t userd(ns)\n");
for (index = 0; index < PERCENTILE_RANGES; index++)
seq_printf(s, "[%2dpc]\t%8lld\t%8lld\t%8lld\t%8lld\t%8lld\n",
PERCENTILE_WIDTH * (index+1),
percentiles_ioctl[index],
percentiles_kickoff[index],
percentiles_append[index],
percentiles_jobtracking[index],
percentiles_userd[index]);
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
return 0;
}
static int gk20a_fifo_profile_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_fifo_profile_stats, inode->i_private);
}
static const struct file_operations gk20a_fifo_profile_stats_debugfs_fops = {
.open = gk20a_fifo_profile_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void gk20a_fifo_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
struct dentry *fifo_root;
struct dentry *profile_root;
fifo_root = debugfs_create_dir("fifo", gpu_root);
if (IS_ERR_OR_NULL(fifo_root))
return;
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
debugfs_create_file("sched", 0600, fifo_root, g,
&gk20a_fifo_sched_debugfs_fops);
profile_root = debugfs_create_dir("profile", fifo_root);
if (IS_ERR_OR_NULL(profile_root))
return;
nvgpu_mutex_init(&g->fifo.profile.lock);
g->fifo.profile.enabled = false;
atomic_set(&g->fifo.profile.get.atomic_var, 0);
atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);
debugfs_create_file("enable", 0600, profile_root, g,
&gk20a_fifo_profile_enable_debugfs_fops);
debugfs_create_file("stats", 0600, profile_root, g,
&gk20a_fifo_profile_stats_debugfs_fops);
}
void gk20a_fifo_profile_snapshot(struct fifo_profile_gk20a *profile, int idx)
{
if (profile)
profile->timestamp[idx] = nvgpu_current_time_ns();
}
void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
{
struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
profile.ref);
nvgpu_vfree(f->g, f->profile.data);
nvgpu_vfree(f->g, f->profile.sorted);
}
/* Get the next element in the ring buffer of profile entries
* and grab a reference to the structure
*/
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
{
struct fifo_gk20a *f = &g->fifo;
struct fifo_profile_gk20a *profile;
unsigned int index;
/* If kref is zero, profiling is not enabled */
if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
return NULL;
index = atomic_inc_return(&f->profile.get.atomic_var);
profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
return profile;
}
/* Free the reference to the structure. This allows deferred cleanups */
void gk20a_fifo_profile_release(struct gk20a *g,
struct fifo_profile_gk20a *profile)
{
nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
}
void gk20a_fifo_debugfs_deinit(struct gk20a *g)
{
struct fifo_gk20a *f = &g->fifo;
nvgpu_mutex_acquire(&f->profile.lock);
if (f->profile.enabled) {
f->profile.enabled = false;
nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
}
nvgpu_mutex_release(&f->profile.lock);
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_FIFO_H__
#define __NVGPU_DEBUG_FIFO_H__
struct gk20a;
void gk20a_fifo_debugfs_init(struct gk20a *g);
void gk20a_fifo_debugfs_deinit(struct gk20a *g);
#endif /* __NVGPU_DEBUG_FIFO_H__ */

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_gr.h"
#include "os_linux.h"
#include <linux/debugfs.h>
int gr_gk20a_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->debugfs_gr_default_attrib_cb_size =
debugfs_create_u32("gr_default_attrib_cb_size",
S_IRUGO|S_IWUSR, l->debugfs,
&g->gr.attrib_cb_default_size);
return 0;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_GR_H__
#define __NVGPU_DEBUG_GR_H__
struct gk20a;
int gr_gk20a_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_GR_H__ */

View File

@@ -0,0 +1,95 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_hal.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
/* Format and print a single function pointer to the specified seq_file. */
static void __hal_print_op(struct seq_file *s, void *op_ptr)
{
seq_printf(s, "%pF\n", op_ptr);
}
/*
* Prints an array of function pointer addresses in op_ptrs to the
* specified seq_file
*/
static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
{
int i;
for (i = 0; i < num_ops; i++)
__hal_print_op(s, op_ptrs[i]);
}
/*
* Show file operation, which generates content of the file once. Prints a list
* of gpu operations as defined by gops and the corresponding function pointer
* destination addresses. Relies on no compiler reordering of struct fields and
* assumption that all members are function pointers.
*/
static int __hal_show(struct seq_file *s, void *unused)
{
struct gpu_ops *gops = s->private;
__hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
return 0;
}
static int __hal_open(struct inode *inode, struct file *file)
{
return single_open(file, __hal_show, inode->i_private);
}
static const struct file_operations __hal_fops = {
.open = __hal_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_hal_debugfs_fini(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!(l->debugfs_hal == NULL))
debugfs_remove_recursive(l->debugfs_hal);
}
void nvgpu_hal_debugfs_init(struct gk20a *g)
{
struct dentry *d;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs)
return;
l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_hal)) {
l->debugfs_hal = NULL;
return;
}
/* Pass along reference to the gpu_ops struct as private data */
d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
&g->ops, &__hal_fops);
if (!d) {
nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
debugfs_remove_recursive(l->debugfs_hal);
return;
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_HAL_H__
#define __NVGPU_DEBUG_HAL_H__
struct gk20a;
void nvgpu_hal_debugfs_fini(struct gk20a *g);
void nvgpu_hal_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_HAL_H__ */

View File

@@ -0,0 +1,312 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "os_linux.h"
#include "debug_kmem.h"
#include "kmem_priv.h"
/**
* to_human_readable_bytes - Determine suffix for passed size.
*
* @bytes - Number of bytes to generate a suffix for.
* @hr_bytes [out] - The human readable number of bytes.
* @hr_suffix [out] - The suffix for the HR number of bytes.
*
* Computes a human readable decomposition of the passed number of bytes. The
* suffix for the bytes is passed back through the @hr_suffix pointer. The right
* number of bytes is then passed back in @hr_bytes. This returns the following
* ranges:
*
* 0 - 1023 B
* 1 - 1023 KB
* 1 - 1023 MB
* 1 - 1023 GB
* 1 - 1023 TB
* 1 - ... PB
*/
static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
const char **hr_suffix)
{
static const char *suffixes[] =
{ "B", "KB", "MB", "GB", "TB", "PB" };
u64 suffix_ind = 0;
while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
bytes >>= 10;
suffix_ind++;
}
/*
* Handle case where bytes > 1023PB.
*/
suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
suffix_ind : ARRAY_SIZE(suffixes) - 1;
*hr_bytes = bytes;
*hr_suffix = suffixes[suffix_ind];
}
/**
* print_hr_bytes - Print human readable bytes
*
* @s - A seq_file to print to. May be NULL.
* @msg - A message to print before the bytes.
* @bytes - Number of bytes.
*
* Print @msg followed by the human readable decomposition of the passed number
* of bytes.
*
* If @s is NULL then this prints will be made to the kernel log.
*/
static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
{
u64 hr_bytes;
const char *hr_suffix;
__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
}
/**
* print_histogram - Build a histogram of the memory usage.
*
* @tracker The tracking to pull data from.
* @s A seq_file to dump info into.
*/
static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
int i;
u64 pot_min, pot_max;
u64 nr_buckets;
unsigned int *buckets;
unsigned int total_allocs;
struct nvgpu_rbtree_node *node;
static const char histogram_line[] =
"++++++++++++++++++++++++++++++++++++++++";
/*
* pot_min is essentially a round down to the nearest power of 2. This
* is the start of the histogram. pot_max is just a round up to the
* nearest power of two. Each histogram bucket is one power of two so
* the histogram buckets are exponential.
*/
pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
nr_buckets = __ffs(pot_max) - __ffs(pot_min);
buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
if (!buckets) {
__pstat(s, "OOM: could not allocate bucket storage!?\n");
return;
}
/*
* Iterate across all of the allocs and determine what bucket they
* should go in. Round the size down to the nearest power of two to
* find the right bucket.
*/
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
int b;
u64 bucket_min;
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
bucket_min = (u64)rounddown_pow_of_two(alloc->size);
if (bucket_min < tracker->min_alloc)
bucket_min = tracker->min_alloc;
b = __ffs(bucket_min) - __ffs(pot_min);
/*
* Handle the one case were there's an alloc exactly as big as
* the maximum bucket size of the largest bucket. Most of the
* buckets have an inclusive minimum and exclusive maximum. But
* the largest bucket needs to have an _inclusive_ maximum as
* well.
*/
if (b == (int)nr_buckets)
b--;
buckets[b]++;
nvgpu_rbtree_enum_next(&node, node);
}
total_allocs = 0;
for (i = 0; i < (int)nr_buckets; i++)
total_allocs += buckets[i];
__pstat(s, "Alloc histogram:\n");
/*
* Actually compute the histogram lines.
*/
for (i = 0; i < (int)nr_buckets; i++) {
char this_line[sizeof(histogram_line) + 1];
u64 line_length;
u64 hr_bytes;
const char *hr_suffix;
memset(this_line, 0, sizeof(this_line));
/*
* Compute the normalized line length. Cant use floating point
* so we will just multiply everything by 1000 and use fixed
* point.
*/
line_length = (1000 * buckets[i]) / total_allocs;
line_length *= sizeof(histogram_line);
line_length /= 1000;
memset(this_line, '+', line_length);
__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
&hr_bytes, &hr_suffix);
__pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
hr_bytes, hr_bytes << 1,
hr_suffix, buckets[i], this_line);
}
}
/**
* nvgpu_kmem_print_stats - Print kmem tracking stats.
*
* @tracker The tracking to pull data from.
* @s A seq_file to dump info into.
*
* Print stats from a tracker. If @s is non-null then seq_printf() will be
* used with @s. Otherwise the stats are pr_info()ed.
*/
void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
nvgpu_lock_tracker(tracker);
__pstat(s, "Mem tracker: %s\n\n", tracker->name);
__pstat(s, "Basic Stats:\n");
__pstat(s, " Number of allocs %lld\n",
tracker->nr_allocs);
__pstat(s, " Number of frees %lld\n",
tracker->nr_frees);
print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
print_hr_bytes(s, " Bytes allocated (real) ",
tracker->bytes_alloced_real);
print_hr_bytes(s, " Bytes freed (real) ",
tracker->bytes_freed_real);
__pstat(s, "\n");
print_histogram(tracker, s);
nvgpu_unlock_tracker(tracker);
}
static int __kmem_tracking_show(struct seq_file *s, void *unused)
{
struct nvgpu_mem_alloc_tracker *tracker = s->private;
nvgpu_kmem_print_stats(tracker, s);
return 0;
}
static int __kmem_tracking_open(struct inode *inode, struct file *file)
{
return single_open(file, __kmem_tracking_show, inode->i_private);
}
static const struct file_operations __kmem_tracking_fops = {
.open = __kmem_tracking_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __kmem_traces_dump_tracker(struct gk20a *g,
struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
struct nvgpu_rbtree_node *node;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
kmem_print_mem_alloc(g, alloc, s);
nvgpu_rbtree_enum_next(&node, node);
}
return 0;
}
static int __kmem_traces_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
nvgpu_lock_tracker(g->vmallocs);
seq_puts(s, "Oustanding vmallocs:\n");
__kmem_traces_dump_tracker(g, g->vmallocs, s);
seq_puts(s, "\n");
nvgpu_unlock_tracker(g->vmallocs);
nvgpu_lock_tracker(g->kmallocs);
seq_puts(s, "Oustanding kmallocs:\n");
__kmem_traces_dump_tracker(g, g->kmallocs, s);
nvgpu_unlock_tracker(g->kmallocs);
return 0;
}
static int __kmem_traces_open(struct inode *inode, struct file *file)
{
return single_open(file, __kmem_traces_show, inode->i_private);
}
static const struct file_operations __kmem_traces_fops = {
.open = __kmem_traces_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_kmem_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *node;
l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_kmem))
return;
node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
l->debugfs_kmem,
g->vmallocs, &__kmem_tracking_fops);
node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
l->debugfs_kmem,
g->kmallocs, &__kmem_tracking_fops);
node = debugfs_create_file("traces", S_IRUGO,
l->debugfs_kmem,
g, &__kmem_traces_fops);
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_KMEM_H__
#define __NVGPU_DEBUG_KMEM_H__
struct gk20a;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
void nvgpu_kmem_debugfs_init(struct gk20a *g);
#endif
#endif /* __NVGPU_DEBUG_KMEM_H__ */

View File

@@ -0,0 +1,481 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/enabled.h>
#include "debug_pmu.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
static int lpwr_debug_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
if (g->ops.pmu.pmu_pg_engines_feature_list &&
g->ops.pmu.pmu_pg_engines_feature_list(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
seq_printf(s, "PSTATE: %u\n"
"RPPG Enabled: %u\n"
"RPPG ref count: %u\n"
"RPPG state: %u\n"
"MSCG Enabled: %u\n"
"MSCG pstate state: %u\n"
"MSCG transition state: %u\n",
g->ops.clk_arb.get_current_pstate(g),
g->elpg_enabled, g->pmu.elpg_refcnt,
g->pmu.elpg_stat, g->mscg_enabled,
g->pmu.mscg_stat, g->pmu.mscg_transition_state);
} else
seq_printf(s, "ELPG Enabled: %u\n"
"ELPG ref count: %u\n"
"ELPG state: %u\n",
g->elpg_enabled, g->pmu.elpg_refcnt,
g->pmu.elpg_stat);
return 0;
}
static int lpwr_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, lpwr_debug_show, inode->i_private);
}
static const struct file_operations lpwr_debug_fops = {
.open = lpwr_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int mscg_stat_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
u64 total_ingating, total_ungating, residency, divisor, dividend;
struct pmu_pg_stats_data pg_stat_data = { 0 };
int err;
/* Don't unnecessarily power on the device */
if (g->power_on) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
gk20a_idle(g);
}
total_ingating = g->pg_ingating_time_us +
(u64)pg_stat_data.ingating_time;
total_ungating = g->pg_ungating_time_us +
(u64)pg_stat_data.ungating_time;
divisor = total_ingating + total_ungating;
/* We compute the residency on a scale of 1000 */
dividend = total_ingating * 1000;
if (divisor)
residency = div64_u64(dividend, divisor);
else
residency = 0;
seq_printf(s,
"Time in MSCG: %llu us\n"
"Time out of MSCG: %llu us\n"
"MSCG residency ratio: %llu\n"
"MSCG Entry Count: %u\n"
"MSCG Avg Entry latency %u\n"
"MSCG Avg Exit latency %u\n",
total_ingating, total_ungating,
residency, pg_stat_data.gating_cnt,
pg_stat_data.avg_entry_latency_us,
pg_stat_data.avg_exit_latency_us);
return 0;
}
static int mscg_stat_open(struct inode *inode, struct file *file)
{
return single_open(file, mscg_stat_show, inode->i_private);
}
static const struct file_operations mscg_stat_fops = {
.open = mscg_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int mscg_transitions_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u32 total_gating_cnt;
int err;
if (g->power_on) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
gk20a_idle(g);
}
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
seq_printf(s, "%u\n", total_gating_cnt);
return 0;
}
static int mscg_transitions_open(struct inode *inode, struct file *file)
{
return single_open(file, mscg_transitions_show, inode->i_private);
}
static const struct file_operations mscg_transitions_fops = {
.open = mscg_transitions_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int elpg_stat_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u64 total_ingating, total_ungating, residency, divisor, dividend;
int err;
/* Don't unnecessarily power on the device */
if (g->power_on) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
gk20a_idle(g);
}
total_ingating = g->pg_ingating_time_us +
(u64)pg_stat_data.ingating_time;
total_ungating = g->pg_ungating_time_us +
(u64)pg_stat_data.ungating_time;
divisor = total_ingating + total_ungating;
/* We compute the residency on a scale of 1000 */
dividend = total_ingating * 1000;
if (divisor)
residency = div64_u64(dividend, divisor);
else
residency = 0;
seq_printf(s,
"Time in ELPG: %llu us\n"
"Time out of ELPG: %llu us\n"
"ELPG residency ratio: %llu\n"
"ELPG Entry Count: %u\n"
"ELPG Avg Entry latency %u us\n"
"ELPG Avg Exit latency %u us\n",
total_ingating, total_ungating,
residency, pg_stat_data.gating_cnt,
pg_stat_data.avg_entry_latency_us,
pg_stat_data.avg_exit_latency_us);
return 0;
}
static int elpg_stat_open(struct inode *inode, struct file *file)
{
return single_open(file, elpg_stat_show, inode->i_private);
}
static const struct file_operations elpg_stat_fops = {
.open = elpg_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int elpg_transitions_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u32 total_gating_cnt;
int err;
if (g->power_on) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
gk20a_idle(g);
}
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
seq_printf(s, "%u\n", total_gating_cnt);
return 0;
}
static int elpg_transitions_open(struct inode *inode, struct file *file)
{
return single_open(file, elpg_transitions_show, inode->i_private);
}
static const struct file_operations elpg_transitions_fops = {
.open = elpg_transitions_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int falc_trace_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_pmu *pmu = &g->pmu;
u32 i = 0, j = 0, k, l, m;
char part_str[40];
void *tracebuffer;
char *trace;
u32 *trace1;
/* allocate system memory to copy pmu trace buffer */
tracebuffer = nvgpu_kzalloc(g, GK20A_PMU_TRACE_BUFSIZE);
if (tracebuffer == NULL)
return -ENOMEM;
/* read pmu traces into system memory buffer */
nvgpu_mem_rd_n(g, &pmu->trace_buf,
0, tracebuffer, GK20A_PMU_TRACE_BUFSIZE);
trace = (char *)tracebuffer;
trace1 = (u32 *)tracebuffer;
for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
for (j = 0; j < 0x40; j++)
if (trace1[(i / 4) + j])
break;
if (j == 0x40)
break;
seq_printf(s, "Index %x: ", trace1[(i / 4)]);
l = 0;
m = 0;
while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
if (k >= 40)
break;
strncpy(part_str, (trace+i+20+m), k);
part_str[k] = 0;
seq_printf(s, "%s0x%x", part_str,
trace1[(i / 4) + 1 + l]);
l++;
m += k + 2;
}
seq_printf(s, "%s", (trace+i+20+m));
}
nvgpu_kfree(g, tracebuffer);
return 0;
}
static int falc_trace_open(struct inode *inode, struct file *file)
{
return single_open(file, falc_trace_show, inode->i_private);
}
static const struct file_operations falc_trace_fops = {
.open = falc_trace_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int perfmon_events_enable_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
return 0;
}
static int perfmon_events_enable_open(struct inode *inode, struct file *file)
{
return single_open(file, perfmon_events_enable_show, inode->i_private);
}
static ssize_t perfmon_events_enable_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct seq_file *s = file->private_data;
struct gk20a *g = s->private;
unsigned long val = 0;
char buf[40];
int buf_size;
int err;
memset(buf, 0, sizeof(buf));
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, userbuf, buf_size))
return -EFAULT;
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
/* Don't turn on gk20a unnecessarily */
if (g->power_on) {
err = gk20a_busy(g);
if (err)
return err;
if (val && !g->pmu.perfmon_sampling_enabled &&
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
g->pmu.perfmon_sampling_enabled = true;
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
} else if (!val && g->pmu.perfmon_sampling_enabled &&
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
g->pmu.perfmon_sampling_enabled = false;
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
}
gk20a_idle(g);
} else {
g->pmu.perfmon_sampling_enabled = val ? true : false;
}
return count;
}
static const struct file_operations perfmon_events_enable_fops = {
.open = perfmon_events_enable_open,
.read = seq_read,
.write = perfmon_events_enable_write,
.llseek = seq_lseek,
.release = single_release,
};
static int perfmon_events_count_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
return 0;
}
static int perfmon_events_count_open(struct inode *inode, struct file *file)
{
return single_open(file, perfmon_events_count_show, inode->i_private);
}
static const struct file_operations perfmon_events_count_fops = {
.open = perfmon_events_count_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int security_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%d\n", g->pmu.pmu_mode);
return 0;
}
static int security_open(struct inode *inode, struct file *file)
{
return single_open(file, security_show, inode->i_private);
}
static const struct file_operations security_fops = {
.open = security_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int gk20a_pmu_debugfs_init(struct gk20a *g)
{
struct dentry *d;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
d = debugfs_create_file(
"lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
&lpwr_debug_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&mscg_stat_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"mscg_transitions", S_IRUGO, l->debugfs, g,
&mscg_transitions_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&elpg_stat_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"elpg_transitions", S_IRUGO, l->debugfs, g,
&elpg_transitions_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"falc_trace", S_IRUGO, l->debugfs, g,
&falc_trace_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"perfmon_events_enable", S_IRUGO, l->debugfs, g,
&perfmon_events_enable_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"perfmon_events_count", S_IRUGO, l->debugfs, g,
&perfmon_events_count_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pmu_security", S_IRUGO, l->debugfs, g,
&security_fops);
if (!d)
goto err_out;
return 0;
err_out:
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_PMU_H__
#define __NVGPU_DEBUG_PMU_H__
struct gk20a;
int gk20a_pmu_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_PMU_H__ */

View File

@@ -0,0 +1,80 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_sched.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
bool sched_busy = true;
int n = sched->bitmap_size / sizeof(u64);
int i;
int err;
err = gk20a_busy(g);
if (err)
return err;
if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
sched_busy = false;
nvgpu_mutex_release(&sched->busy_lock);
}
seq_printf(s, "control_locked=%d\n", sched->control_locked);
seq_printf(s, "busy=%d\n", sched_busy);
seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
nvgpu_mutex_acquire(&sched->status_lock);
seq_puts(s, "active_tsg_bitmap\n");
for (i = 0; i < n; i++)
seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
seq_puts(s, "recent_tsg_bitmap\n");
for (i = 0; i < n; i++)
seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
nvgpu_mutex_release(&sched->status_lock);
gk20a_idle(g);
return 0;
}
static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
}
static const struct file_operations gk20a_sched_debugfs_fops = {
.open = gk20a_sched_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void gk20a_sched_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
g, &gk20a_sched_debugfs_fops);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_SCHED_H__
#define __NVGPU_DEBUG_SCHED_H__
struct gk20a;
void gk20a_sched_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_SCHED_H__ */

View File

@@ -0,0 +1,176 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/types.h>
#include <nvgpu/xve.h>
#include "debug_xve.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/uaccess.h>
static ssize_t xve_link_speed_write(struct file *filp,
const char __user *buff,
size_t len, loff_t *off)
{
struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
char kbuff[16];
u32 buff_size, check_len;
u32 link_speed = 0;
int ret;
buff_size = min_t(size_t, 16, len);
memset(kbuff, 0, 16);
if (copy_from_user(kbuff, buff, buff_size))
return -EFAULT;
check_len = strlen("Gen1");
if (strncmp(kbuff, "Gen1", check_len) == 0)
link_speed = GPU_XVE_SPEED_2P5;
else if (strncmp(kbuff, "Gen2", check_len) == 0)
link_speed = GPU_XVE_SPEED_5P0;
else if (strncmp(kbuff, "Gen3", check_len) == 0)
link_speed = GPU_XVE_SPEED_8P0;
else
nvgpu_err(g, "%s: Unknown PCIe speed: %s",
__func__, kbuff);
if (!link_speed)
return -EINVAL;
/* Brief pause... To help rate limit this. */
nvgpu_msleep(250);
/*
* And actually set the speed. Yay.
*/
ret = g->ops.xve.set_speed(g, link_speed);
if (ret)
return ret;
return len;
}
static int xve_link_speed_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 speed;
int err;
err = g->ops.xve.get_speed(g, &speed);
if (err)
return err;
seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed));
return 0;
}
static int xve_link_speed_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_link_speed_show, inode->i_private);
}
static const struct file_operations xve_link_speed_fops = {
.open = xve_link_speed_open,
.read = seq_read,
.write = xve_link_speed_write,
.llseek = seq_lseek,
.release = single_release,
};
static int xve_available_speeds_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 available_speeds;
g->ops.xve.available_speeds(g, &available_speeds);
seq_puts(s, "Available PCIe bus speeds:\n");
if (available_speeds & GPU_XVE_SPEED_2P5)
seq_puts(s, " Gen1\n");
if (available_speeds & GPU_XVE_SPEED_5P0)
seq_puts(s, " Gen2\n");
if (available_speeds & GPU_XVE_SPEED_8P0)
seq_puts(s, " Gen3\n");
return 0;
}
static int xve_available_speeds_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_available_speeds_show, inode->i_private);
}
static const struct file_operations xve_available_speeds_fops = {
.open = xve_available_speeds_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int xve_link_control_status_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 link_status;
link_status = g->ops.xve.get_link_control_status(g);
seq_printf(s, "0x%08x\n", link_status);
return 0;
}
static int xve_link_control_status_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_link_control_status_show, inode->i_private);
}
static const struct file_operations xve_link_control_status_fops = {
.open = xve_link_control_status_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int nvgpu_xve_debugfs_init(struct gk20a *g)
{
int err = -ENODEV;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
if (IS_ERR_OR_NULL(l->debugfs_xve))
goto fail;
/*
* These are just debug nodes. If they fail to get made it's not worth
* worrying the higher level SW.
*/
debugfs_create_file("link_speed", S_IRUGO,
l->debugfs_xve, g,
&xve_link_speed_fops);
debugfs_create_file("available_speeds", S_IRUGO,
l->debugfs_xve, g,
&xve_available_speeds_fops);
debugfs_create_file("link_control_status", S_IRUGO,
l->debugfs_xve, g,
&xve_link_control_status_fops);
err = 0;
fail:
return err;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_XVE_H__
#define __NVGPU_DEBUG_XVE_H__
struct gk20a;
int nvgpu_xve_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_SVE_H__ */

View File

@@ -0,0 +1,694 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-mapping.h>
#include <linux/version.h>
#include <nvgpu/log.h>
#include <nvgpu/dma.h>
#include <nvgpu/lock.h>
#include <nvgpu/bug.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/kmem.h>
#include <nvgpu/enabled.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/linux/dma.h>
#include <nvgpu/linux/vidmem.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "os_linux.h"
#ifdef __DMA_ATTRS_LONGS
#define NVGPU_DEFINE_DMA_ATTRS(x) \
struct dma_attrs x = { \
.flags = { [0 ... __DMA_ATTRS_LONGS-1] = 0 }, \
}
#define NVGPU_DMA_ATTR(attrs) &attrs
#else
#define NVGPU_DEFINE_DMA_ATTRS(attrs) unsigned long attrs = 0
#define NVGPU_DMA_ATTR(attrs) attrs
#endif
/*
* Enough to hold all the possible flags in string form. When a new flag is
* added it must be added here as well!!
*/
#define NVGPU_DMA_STR_SIZE \
sizeof("NO_KERNEL_MAPPING FORCE_CONTIGUOUS")
/*
* The returned string is kmalloc()ed here but must be freed by the caller.
*/
static char *nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags)
{
char *buf = nvgpu_kzalloc(g, NVGPU_DMA_STR_SIZE);
int bytes_available = NVGPU_DMA_STR_SIZE;
/*
* Return the empty buffer if there's no flags. Makes it easier on the
* calling code to just print it instead of any if (NULL) type logic.
*/
if (!flags)
return buf;
#define APPEND_FLAG(flag, str_flag) \
do { \
if (flags & flag) { \
strncat(buf, str_flag, bytes_available); \
bytes_available -= strlen(str_flag); \
} \
} while (0)
APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
APPEND_FLAG(NVGPU_DMA_FORCE_CONTIGUOUS, "FORCE_CONTIGUOUS ");
#undef APPEND_FLAG
return buf;
}
/**
* __dma_dbg - Debug print for DMA allocs and frees.
*
* @g - The GPU.
* @size - The requested size of the alloc (size_t).
* @flags - The flags (unsigned long).
* @type - A string describing the type (i.e: sysmem or vidmem).
* @what - A string with 'alloc' or 'free'.
*
* @flags is the DMA flags. If there are none or it doesn't make sense to print
* flags just pass 0.
*
* Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
*/
static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
const char *type, const char *what)
{
char *flags_str = NULL;
/*
* Don't bother making the flags_str if debugging is
* not enabled. This saves a malloc and a free.
*/
if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
return;
flags_str = nvgpu_dma_flags_to_str(g, flags);
__nvgpu_log_dbg(g, gpu_dbg_dma,
__func__, __LINE__,
"DMA %s: [%s] size=%-7zu "
"aligned=%-7zu total=%-10llukB %s",
what, type,
size, PAGE_ALIGN(size),
g->dma_memory_used >> 10,
flags_str);
if (flags_str)
nvgpu_kfree(g, flags_str);
}
#define dma_dbg_alloc(g, size, flags, type) \
__dma_dbg(g, size, flags, type, "alloc")
#define dma_dbg_free(g, size, flags, type) \
__dma_dbg(g, size, flags, type, "free")
/*
* For after the DMA alloc is done.
*/
#define __dma_dbg_done(g, size, type, what) \
nvgpu_log(g, gpu_dbg_dma, \
"DMA %s: [%s] size=%-7zu Done!", \
what, type, size); \
#define dma_dbg_alloc_done(g, size, type) \
__dma_dbg_done(g, size, type, "alloc")
#define dma_dbg_free_done(g, size, type) \
__dma_dbg_done(g, size, type, "free")
#if defined(CONFIG_GK20A_VIDMEM)
static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
size_t size)
{
u64 addr = 0;
if (at)
addr = nvgpu_alloc_fixed(allocator, at, size, 0);
else
addr = nvgpu_alloc(allocator, size);
return addr;
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
static void nvgpu_dma_flags_to_attrs(unsigned long *attrs,
unsigned long flags)
#define ATTR_ARG(x) *x
#else
static void nvgpu_dma_flags_to_attrs(struct dma_attrs *attrs,
unsigned long flags)
#define ATTR_ARG(x) x
#endif
{
if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
#undef ATTR_ARG
}
int nvgpu_dma_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags(g, 0, size, mem);
}
int nvgpu_dma_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
struct nvgpu_mem *mem)
{
if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
/*
* Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when
* using nvgpu_gmmu_alloc_map and it's vidmem, or if there's a
* difference, the user should use the flag explicitly anyway.
*
* Incoming flags are ignored here, since bits other than the
* no-kernel-mapping flag are ignored by the vidmem mapping
* functions anyway.
*/
int err = nvgpu_dma_alloc_flags_vid(g,
NVGPU_DMA_NO_KERNEL_MAPPING,
size, mem);
if (!err)
return 0;
/*
* Fall back to sysmem (which may then also fail) in case
* vidmem is exhausted.
*/
}
return nvgpu_dma_alloc_flags_sys(g, flags, size, mem);
}
int nvgpu_dma_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_sys(g, 0, size, mem);
}
int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
struct device *d = dev_from_gk20a(g);
int err;
dma_addr_t iova;
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
void *alloc_ret;
if (nvgpu_mem_is_valid(mem)) {
nvgpu_warn(g, "memory leak !!");
WARN_ON(1);
}
/*
* WAR for IO coherent chips: the DMA API does not seem to generate
* mappings that work correctly. Unclear why - Bug ID: 2040115.
*
* Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
* and then make a vmap() ourselves.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
/*
* Before the debug print so we see this in the total. But during
* cleanup in the fail path this has to be subtracted.
*/
g->dma_memory_used += PAGE_ALIGN(size);
dma_dbg_alloc(g, size, flags, "sysmem");
/*
* Save the old size but for actual allocation purposes the size is
* going to be page aligned.
*/
mem->size = size;
size = PAGE_ALIGN(size);
nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
alloc_ret = dma_alloc_attrs(d, size, &iova,
GFP_KERNEL|__GFP_ZERO,
NVGPU_DMA_ATTR(dma_attrs));
if (!alloc_ret)
return -ENOMEM;
if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
mem->priv.pages = alloc_ret;
err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
mem->priv.pages,
iova, size);
} else {
mem->cpu_va = alloc_ret;
err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
iova, size, flags);
}
if (err)
goto fail_free_dma;
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
mem->cpu_va = vmap(mem->priv.pages,
size >> PAGE_SHIFT,
0, PAGE_KERNEL);
if (!mem->cpu_va) {
err = -ENOMEM;
goto fail_free_sgt;
}
}
mem->aligned_size = size;
mem->aperture = APERTURE_SYSMEM;
mem->priv.flags = flags;
dma_dbg_alloc_done(g, mem->size, "sysmem");
return 0;
fail_free_sgt:
nvgpu_free_sgtable(g, &mem->priv.sgt);
fail_free_dma:
dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
mem->cpu_va = NULL;
mem->priv.sgt = NULL;
mem->size = 0;
g->dma_memory_used -= mem->aligned_size;
return err;
}
int nvgpu_dma_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_vid(g,
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
}
int nvgpu_dma_alloc_flags_vid(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_flags_vid_at(g, flags, size, mem, 0);
}
int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem, u64 at)
{
#if defined(CONFIG_GK20A_VIDMEM)
u64 addr;
int err;
struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
&g->mm.vidmem.allocator :
&g->mm.vidmem.bootstrap_allocator;
int before_pending;
if (nvgpu_mem_is_valid(mem)) {
nvgpu_warn(g, "memory leak !!");
WARN_ON(1);
}
dma_dbg_alloc(g, size, flags, "vidmem");
mem->size = size;
size = PAGE_ALIGN(size);
if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
return -ENOSYS;
/*
* Our own allocator doesn't have any flags yet, and we can't
* kernel-map these, so require explicit flags.
*/
WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
if (!addr) {
/*
* If memory is known to be freed soon, let the user know that
* it may be available after a while.
*/
if (before_pending)
return -EAGAIN;
else
return -ENOMEM;
}
if (at)
mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!mem->priv.sgt) {
err = -ENOMEM;
goto fail_physfree;
}
err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
if (err)
goto fail_kfree;
nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
mem->aligned_size = size;
mem->aperture = APERTURE_VIDMEM;
mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
mem->allocator = vidmem_alloc;
mem->priv.flags = flags;
nvgpu_init_list_node(&mem->clear_list_entry);
dma_dbg_alloc_done(g, mem->size, "vidmem");
return 0;
fail_kfree:
nvgpu_kfree(g, mem->priv.sgt);
fail_physfree:
nvgpu_free(&g->mm.vidmem.allocator, addr);
mem->size = 0;
return err;
#else
return -ENOSYS;
#endif
}
int nvgpu_dma_alloc_map(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_map_flags(vm, 0, size, mem);
}
int nvgpu_dma_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
if (!nvgpu_is_enabled(gk20a_from_vm(vm), NVGPU_MM_UNIFIED_MEMORY)) {
/*
* Force the no-kernel-mapping flag on because we don't support
* the lack of it for vidmem - the user should not care when
* using nvgpu_dma_alloc_map and it's vidmem, or if there's a
* difference, the user should use the flag explicitly anyway.
*/
int err = nvgpu_dma_alloc_map_flags_vid(vm,
flags | NVGPU_DMA_NO_KERNEL_MAPPING,
size, mem);
if (!err)
return 0;
/*
* Fall back to sysmem (which may then also fail) in case
* vidmem is exhausted.
*/
}
return nvgpu_dma_alloc_map_flags_sys(vm, flags, size, mem);
}
int nvgpu_dma_alloc_map_sys(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_map_flags_sys(vm, 0, size, mem);
}
int nvgpu_dma_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
int err = nvgpu_dma_alloc_flags_sys(vm->mm->g, flags, size, mem);
if (err)
return err;
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
gk20a_mem_flag_none, false,
mem->aperture);
if (!mem->gpu_va) {
err = -ENOMEM;
goto fail_free;
}
return 0;
fail_free:
nvgpu_dma_free(vm->mm->g, mem);
return err;
}
int nvgpu_dma_alloc_map_vid(struct vm_gk20a *vm, size_t size,
struct nvgpu_mem *mem)
{
return nvgpu_dma_alloc_map_flags_vid(vm,
NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
}
int nvgpu_dma_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
int err = nvgpu_dma_alloc_flags_vid(vm->mm->g, flags, size, mem);
if (err)
return err;
mem->gpu_va = nvgpu_gmmu_map(vm, mem, size, 0,
gk20a_mem_flag_none, false,
mem->aperture);
if (!mem->gpu_va) {
err = -ENOMEM;
goto fail_free;
}
return 0;
fail_free:
nvgpu_dma_free(vm->mm->g, mem);
return err;
}
static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
{
struct device *d = dev_from_gk20a(g);
g->dma_memory_used -= mem->aligned_size;
dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
!(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
(mem->cpu_va || mem->priv.pages)) {
/*
* Free side of WAR for bug 2040115.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
vunmap(mem->cpu_va);
if (mem->priv.flags) {
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
nvgpu_dma_flags_to_attrs(&dma_attrs, mem->priv.flags);
if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
dma_free_attrs(d, mem->aligned_size, mem->priv.pages,
sg_dma_address(mem->priv.sgt->sgl),
NVGPU_DMA_ATTR(dma_attrs));
} else {
dma_free_attrs(d, mem->aligned_size, mem->cpu_va,
sg_dma_address(mem->priv.sgt->sgl),
NVGPU_DMA_ATTR(dma_attrs));
}
} else {
dma_free_coherent(d, mem->aligned_size, mem->cpu_va,
sg_dma_address(mem->priv.sgt->sgl));
}
mem->cpu_va = NULL;
mem->priv.pages = NULL;
}
/*
* When this flag is set we expect that pages is still populated but not
* by the DMA API.
*/
if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
nvgpu_kfree(g, mem->priv.pages);
if (mem->priv.sgt)
nvgpu_free_sgtable(g, &mem->priv.sgt);
dma_dbg_free_done(g, mem->size, "sysmem");
mem->size = 0;
mem->aligned_size = 0;
mem->aperture = APERTURE_INVALID;
}
static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
{
#if defined(CONFIG_GK20A_VIDMEM)
size_t mem_size = mem->size;
dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
/* Sanity check - only this supported when allocating. */
WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
/*
* If there's an error here then that means we can't clear the
* vidmem. That's too bad; however, we still own the nvgpu_mem
* buf so we have to free that.
*
* We don't need to worry about the vidmem allocator itself
* since when that gets cleaned up in the driver shutdown path
* all the outstanding allocs are force freed.
*/
if (err)
nvgpu_kfree(g, mem);
} else {
nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
nvgpu_free(mem->allocator,
(u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
nvgpu_free_sgtable(g, &mem->priv.sgt);
mem->size = 0;
mem->aligned_size = 0;
mem->aperture = APERTURE_INVALID;
}
dma_dbg_free_done(g, mem_size, "vidmem");
#endif
}
void nvgpu_dma_free(struct gk20a *g, struct nvgpu_mem *mem)
{
switch (mem->aperture) {
case APERTURE_SYSMEM:
return nvgpu_dma_free_sys(g, mem);
case APERTURE_VIDMEM:
return nvgpu_dma_free_vid(g, mem);
default:
break; /* like free() on "null" memory */
}
}
void nvgpu_dma_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
{
if (mem->gpu_va)
nvgpu_gmmu_unmap(vm, mem, mem->gpu_va);
mem->gpu_va = 0;
nvgpu_dma_free(vm->mm->g, mem);
}
int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
void *cpuva, u64 iova, size_t size, unsigned long flags)
{
int err = 0;
struct sg_table *tbl;
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!tbl) {
err = -ENOMEM;
goto fail;
}
nvgpu_dma_flags_to_attrs(&dma_attrs, flags);
err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
size, NVGPU_DMA_ATTR(dma_attrs));
if (err)
goto fail;
sg_dma_address(tbl->sgl) = iova;
*sgt = tbl;
return 0;
fail:
if (tbl)
nvgpu_kfree(g, tbl);
return err;
}
int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
void *cpuva, u64 iova, size_t size)
{
return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
}
int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
struct page **pages, u64 iova, size_t size)
{
int err = 0;
struct sg_table *tbl;
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!tbl) {
err = -ENOMEM;
goto fail;
}
err = sg_alloc_table_from_pages(tbl, pages,
DIV_ROUND_UP(size, PAGE_SIZE),
0, size, GFP_KERNEL);
if (err)
goto fail;
sg_dma_address(tbl->sgl) = iova;
*sgt = tbl;
return 0;
fail:
if (tbl)
nvgpu_kfree(g, tbl);
return err;
}
void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
{
sg_free_table(*sgt);
nvgpu_kfree(g, *sgt);
*sgt = NULL;
}
bool nvgpu_iommuable(struct gk20a *g)
{
#ifdef CONFIG_TEGRA_GK20A
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
/*
* Check against the nvgpu device to see if it's been marked as
* IOMMU'able.
*/
if (!device_is_iommuable(l->dev))
return false;
#endif
return true;
}

View File

@@ -0,0 +1,218 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>
#include <nvgpu/comptags.h>
#include <nvgpu/enabled.h>
#include <nvgpu/linux/vm.h>
#include <nvgpu/linux/vidmem.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "dmabuf.h"
#include "os_linux.h"
static void gk20a_mm_delete_priv(void *_priv)
{
struct gk20a_buffer_state *s, *s_tmp;
struct gk20a_dmabuf_priv *priv = _priv;
struct gk20a *g;
if (!priv)
return;
g = priv->g;
if (priv->comptags.allocated && priv->comptags.lines) {
BUG_ON(!priv->comptag_allocator);
gk20a_comptaglines_free(priv->comptag_allocator,
priv->comptags.offset,
priv->comptags.lines);
}
/* Free buffer states */
nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
gk20a_buffer_state, list) {
gk20a_fence_put(s->fence);
nvgpu_list_del(&s->list);
nvgpu_kfree(g, s);
}
nvgpu_kfree(g, priv);
}
enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
struct dma_buf *dmabuf)
{
struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
if (buf_owner == NULL) {
/* Not nvgpu-allocated, assume system memory */
return APERTURE_SYSMEM;
} else if (WARN_ON(buf_owner == g && unified_memory)) {
/* Looks like our video memory, but this gpu doesn't support
* it. Warn about a bug and bail out */
nvgpu_warn(g,
"dmabuf is our vidmem but we don't have local vidmem");
return APERTURE_INVALID;
} else if (buf_owner != g) {
/* Someone else's vidmem */
return APERTURE_INVALID;
} else {
/* Yay, buf_owner == g */
return APERTURE_VIDMEM;
}
}
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
struct dma_buf_attachment **attachment)
{
struct gk20a_dmabuf_priv *priv;
priv = dma_buf_get_drvdata(dmabuf, dev);
if (WARN_ON(!priv))
return ERR_PTR(-EINVAL);
nvgpu_mutex_acquire(&priv->lock);
if (priv->pin_count == 0) {
priv->attach = dma_buf_attach(dmabuf, dev);
if (IS_ERR(priv->attach)) {
nvgpu_mutex_release(&priv->lock);
return (struct sg_table *)priv->attach;
}
priv->sgt = dma_buf_map_attachment(priv->attach,
DMA_BIDIRECTIONAL);
if (IS_ERR(priv->sgt)) {
dma_buf_detach(dmabuf, priv->attach);
nvgpu_mutex_release(&priv->lock);
return priv->sgt;
}
}
priv->pin_count++;
nvgpu_mutex_release(&priv->lock);
*attachment = priv->attach;
return priv->sgt;
}
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment,
struct sg_table *sgt)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
dma_addr_t dma_addr;
if (IS_ERR(priv) || !priv)
return;
nvgpu_mutex_acquire(&priv->lock);
WARN_ON(priv->sgt != sgt);
WARN_ON(priv->attach != attachment);
priv->pin_count--;
WARN_ON(priv->pin_count < 0);
dma_addr = sg_dma_address(priv->sgt->sgl);
if (priv->pin_count == 0) {
dma_buf_unmap_attachment(priv->attach, priv->sgt,
DMA_BIDIRECTIONAL);
dma_buf_detach(dmabuf, priv->attach);
}
nvgpu_mutex_release(&priv->lock);
}
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
{
struct gk20a *g = gk20a_get_platform(dev)->g;
struct gk20a_dmabuf_priv *priv;
priv = dma_buf_get_drvdata(dmabuf, dev);
if (likely(priv))
return 0;
nvgpu_mutex_acquire(&g->mm.priv_lock);
priv = dma_buf_get_drvdata(dmabuf, dev);
if (priv)
goto priv_exist_or_err;
priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv) {
priv = ERR_PTR(-ENOMEM);
goto priv_exist_or_err;
}
nvgpu_mutex_init(&priv->lock);
nvgpu_init_list_node(&priv->states);
priv->g = g;
dma_buf_set_drvdata(dmabuf, dev, priv, gk20a_mm_delete_priv);
priv_exist_or_err:
nvgpu_mutex_release(&g->mm.priv_lock);
if (IS_ERR(priv))
return -ENOMEM;
return 0;
}
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
u64 offset, struct gk20a_buffer_state **state)
{
int err = 0;
struct gk20a_dmabuf_priv *priv;
struct gk20a_buffer_state *s;
struct device *dev = dev_from_gk20a(g);
if (WARN_ON(offset >= (u64)dmabuf->size))
return -EINVAL;
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
if (err)
return err;
priv = dma_buf_get_drvdata(dmabuf, dev);
if (WARN_ON(!priv))
return -ENOSYS;
nvgpu_mutex_acquire(&priv->lock);
nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
if (s->offset == offset)
goto out;
/* State not found, create state. */
s = nvgpu_kzalloc(g, sizeof(*s));
if (!s) {
err = -ENOMEM;
goto out;
}
s->offset = offset;
nvgpu_init_list_node(&s->list);
nvgpu_mutex_init(&s->lock);
nvgpu_list_add_tail(&s->list, &priv->states);
out:
nvgpu_mutex_release(&priv->lock);
if (!err)
*state = s;
return err;
}

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __COMMON_LINUX_DMABUF_H__
#define __COMMON_LINUX_DMABUF_H__
#include <nvgpu/comptags.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <nvgpu/gmmu.h>
struct sg_table;
struct dma_buf;
struct dma_buf_attachment;
struct device;
struct gk20a;
struct gk20a_buffer_state;
struct gk20a_dmabuf_priv {
struct nvgpu_mutex lock;
struct gk20a *g;
struct gk20a_comptag_allocator *comptag_allocator;
struct gk20a_comptags comptags;
struct dma_buf_attachment *attach;
struct sg_table *sgt;
int pin_count;
struct nvgpu_list_node states;
u64 buffer_id;
};
struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf,
struct dma_buf_attachment **attachment);
void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment,
struct sg_table *sgt);
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
u64 offset, struct gk20a_buffer_state **state);
#endif

View File

@@ -0,0 +1,334 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/reboot.h>
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/defaults.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/soc.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/sizes.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "module.h"
#include "os_linux.h"
#include "sysfs.h"
#include "ioctl.h"
#include "gk20a/regops_gk20a.h"
#define EMC3D_DEFAULT_RATIO 750
void nvgpu_kernel_restart(void *cmd)
{
kernel_restart(cmd);
}
static void nvgpu_init_vars(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
nvgpu_cond_init(&l->sw_irq_stall_last_handled_wq);
nvgpu_cond_init(&l->sw_irq_nonstall_last_handled_wq);
init_rwsem(&l->busy_lock);
nvgpu_rwsem_init(&g->deterministic_busy);
nvgpu_spinlock_init(&g->mc_enable_lock);
nvgpu_mutex_init(&platform->railgate_lock);
nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->client_lock);
nvgpu_mutex_init(&g->poweron_lock);
nvgpu_mutex_init(&g->poweroff_lock);
nvgpu_mutex_init(&g->ctxsw_disable_lock);
l->regs_saved = l->regs;
l->bar1_saved = l->bar1;
g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
/* Set DMA parameters to allow larger sgt lists */
dev->dma_parms = &l->dma_parms;
dma_set_max_seg_size(dev, UINT_MAX);
/*
* A default of 16GB is the largest supported DMA size that is
* acceptable to all currently supported Tegra SoCs.
*/
if (!platform->dma_mask)
platform->dma_mask = DMA_BIT_MASK(34);
dma_set_mask(dev, platform->dma_mask);
dma_set_coherent_mask(dev, platform->dma_mask);
nvgpu_init_list_node(&g->profiler_objects);
nvgpu_init_list_node(&g->boardobj_head);
nvgpu_init_list_node(&g->boardobjgrp_head);
}
static void nvgpu_init_gr_vars(struct gk20a *g)
{
gk20a_init_gr(g);
nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
g->gr.max_comptag_mem = totalram_pages
>> (10 - (PAGE_SHIFT - 10));
}
static void nvgpu_init_timeout(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->timeouts_disabled_by_user = false;
nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
if (nvgpu_platform_is_silicon(g)) {
g->gr_idle_timeout_default = NVGPU_DEFAULT_GR_IDLE_TIMEOUT;
} else if (nvgpu_platform_is_fpga(g)) {
g->gr_idle_timeout_default = GK20A_TIMEOUT_FPGA;
} else {
g->gr_idle_timeout_default = (u32)ULONG_MAX;
}
g->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
g->fifo_eng_timeout_us = GRFIFO_TIMEOUT_CHECK_PERIOD_US;
}
static void nvgpu_init_timeslice(struct gk20a *g)
{
g->runlist_interleave = true;
g->timeslice_low_priority_us = 1300;
g->timeslice_medium_priority_us = 2600;
g->timeslice_high_priority_us = 5200;
g->min_timeslice_us = 1000;
g->max_timeslice_us = 50000;
}
static void nvgpu_init_pm_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
/*
* Set up initial power settings. For non-slicon platforms, disable
* power features and for silicon platforms, read from platform data
*/
g->slcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
g->blcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
g->elcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
g->elpg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
g->aelpg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
g->mscg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
g->can_elpg =
nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
__nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
g->has_syncpoints = platform->has_syncpoints;
#ifdef CONFIG_NVGPU_SUPPORT_CDE
g->has_cde = platform->has_cde;
#endif
g->ptimer_src_freq = platform->ptimer_src_freq;
g->support_pmu = support_gk20a_pmu(dev_from_gk20a(g));
g->can_railgate = platform->can_railgate_init;
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
/* if default delay is not set, set default delay to 500msec */
if (platform->railgate_delay_init)
g->railgate_delay = platform->railgate_delay_init;
else
g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
__nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
/* set default values to aelpg parameters */
g->pmu.aelpg_param[0] = APCTRL_SAMPLING_PERIOD_PG_DEFAULT_US;
g->pmu.aelpg_param[1] = APCTRL_MINIMUM_IDLE_FILTER_DEFAULT_US;
g->pmu.aelpg_param[2] = APCTRL_MINIMUM_TARGET_SAVING_DEFAULT_US;
g->pmu.aelpg_param[3] = APCTRL_POWER_BREAKEVEN_DEFAULT_US;
g->pmu.aelpg_param[4] = APCTRL_CYCLES_PER_SAMPLE_MAX_DEFAULT;
__nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
}
static void nvgpu_init_vbios_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
__nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
g->vbios_min_version = platform->vbios_min_version;
}
static void nvgpu_init_ltc_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->ltc_streamid = platform->ltc_streamid;
}
static void nvgpu_init_mm_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->mm.disable_bigpage = platform->disable_bigpage;
__nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
platform->honors_aperture);
__nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
platform->unified_memory);
__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
platform->unify_address_spaces);
nvgpu_mutex_init(&g->mm.tlb_lock);
nvgpu_mutex_init(&g->mm.priv_lock);
}
int nvgpu_probe(struct gk20a *g,
const char *debugfs_symlink,
const char *interface_name,
struct class *class)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
int err = 0;
nvgpu_init_vars(g);
nvgpu_init_gr_vars(g);
nvgpu_init_timeout(g);
nvgpu_init_timeslice(g);
nvgpu_init_pm_vars(g);
nvgpu_init_vbios_vars(g);
nvgpu_init_ltc_vars(g);
err = nvgpu_init_soc_vars(g);
if (err) {
nvgpu_err(g, "init soc vars failed");
return err;
}
/* Initialize the platform interface. */
err = platform->probe(dev);
if (err) {
if (err == -EPROBE_DEFER)
nvgpu_info(g, "platform probe failed");
else
nvgpu_err(g, "platform probe failed");
return err;
}
nvgpu_init_mm_vars(g);
/* platform probe can defer do user init only if probe succeeds */
err = gk20a_user_init(dev, interface_name, class);
if (err)
return err;
if (platform->late_probe) {
err = platform->late_probe(dev);
if (err) {
nvgpu_err(g, "late probe failed");
return err;
}
}
nvgpu_create_sysfs(dev);
gk20a_debug_init(g, debugfs_symlink);
g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
if (!g->dbg_regops_tmp_buf) {
nvgpu_err(g, "couldn't allocate regops tmp buf");
return -ENOMEM;
}
g->dbg_regops_tmp_buf_ops =
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
g->remove_support = gk20a_remove_support;
nvgpu_ref_init(&g->refcount);
return 0;
}
/**
* cyclic_delta - Returns delta of cyclic integers a and b.
*
* @a - First integer
* @b - Second integer
*
* Note: if a is ahead of b, delta is positive.
*/
static int cyclic_delta(int a, int b)
{
return a - b;
}
/**
* nvgpu_wait_for_deferred_interrupts - Wait for interrupts to complete
*
* @g - The GPU to wait on.
*
* Waits until all interrupt handlers that have been scheduled to run have
* completed.
*/
void nvgpu_wait_for_deferred_interrupts(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int stall_irq_threshold = atomic_read(&l->hw_irq_stall_count);
int nonstall_irq_threshold = atomic_read(&l->hw_irq_nonstall_count);
/* wait until all stalling irqs are handled */
NVGPU_COND_WAIT(&l->sw_irq_stall_last_handled_wq,
cyclic_delta(stall_irq_threshold,
atomic_read(&l->sw_irq_stall_last_handled))
<= 0, 0);
/* wait until all non-stalling irqs are handled */
NVGPU_COND_WAIT(&l->sw_irq_nonstall_last_handled_wq,
cyclic_delta(nonstall_irq_threshold,
atomic_read(&l->sw_irq_nonstall_last_handled))
<= 0, 0);
}
static void nvgpu_free_gk20a(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
kfree(l);
}
void nvgpu_init_gk20a(struct gk20a *g)
{
g->free = nvgpu_free_gk20a;
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_LINUX_DRIVER_COMMON
#define NVGPU_LINUX_DRIVER_COMMON
void nvgpu_init_gk20a(struct gk20a *g);
#endif

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/dt.h>
#include <linux/of.h>
#include "os_linux.h"
int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
u32 index, u32 *value)
{
struct device *dev = dev_from_gk20a(g);
struct device_node *np = dev->of_node;
return of_property_read_u32_index(np, name, index, value);
}

View File

@@ -0,0 +1,117 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/firmware.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/firmware.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "os_linux.h"
static const struct firmware *do_request_firmware(struct device *dev,
const char *prefix, const char *fw_name, int flags)
{
const struct firmware *fw;
char *fw_path = NULL;
int path_len, err;
if (prefix) {
path_len = strlen(prefix) + strlen(fw_name);
path_len += 2; /* for the path separator and zero terminator*/
fw_path = nvgpu_kzalloc(get_gk20a(dev),
sizeof(*fw_path) * path_len);
if (!fw_path)
return NULL;
sprintf(fw_path, "%s/%s", prefix, fw_name);
fw_name = fw_path;
}
if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
err = request_firmware_direct(&fw, fw_name, dev);
else
err = request_firmware(&fw, fw_name, dev);
nvgpu_kfree(get_gk20a(dev), fw_path);
if (err)
return NULL;
return fw;
}
/* This is a simple wrapper around request_firmware that takes 'fw_name' and
* applies an IP specific relative path prefix to it. The caller is
* responsible for calling nvgpu_release_firmware later. */
struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
const char *fw_name,
int flags)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_firmware *fw;
const struct firmware *linux_fw;
/* current->fs is NULL when calling from SYS_EXIT.
Add a check here to prevent crash in request_firmware */
if (!current->fs || !fw_name)
return NULL;
fw = nvgpu_kzalloc(g, sizeof(*fw));
if (!fw)
return NULL;
linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
#ifdef CONFIG_TEGRA_GK20A
/* TO BE REMOVED - Support loading from legacy SOC specific path. */
if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
struct gk20a_platform *platform = gk20a_get_platform(dev);
linux_fw = do_request_firmware(dev,
platform->soc_name, fw_name, flags);
}
#endif
if (!linux_fw)
goto err;
fw->data = nvgpu_kmalloc(g, linux_fw->size);
if (!fw->data)
goto err_release;
memcpy(fw->data, linux_fw->data, linux_fw->size);
fw->size = linux_fw->size;
release_firmware(linux_fw);
return fw;
err_release:
release_firmware(linux_fw);
err:
nvgpu_kfree(g, fw);
return NULL;
}
void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
{
if(!fw)
return;
nvgpu_kfree(g, fw->data);
nvgpu_kfree(g, fw);
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <soc/tegra/fuse.h>
#include <nvgpu/fuse.h>
int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g)
{
return tegra_sku_info.gpu_speedo_id;
}
/*
* Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
* Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
*/
void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
{
tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
}
void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
{
tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
}
void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
{
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
}
void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
{
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
}
int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
{
return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
}
int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
{
return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
}

View File

@@ -0,0 +1,122 @@
/*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <trace/events/gk20a.h>
#include <linux/irqreturn.h>
#include "gk20a/gk20a.h"
#include "gk20a/mc_gk20a.h"
#include <nvgpu/atomic.h>
#include <nvgpu/unit.h>
#include "os_linux.h"
irqreturn_t nvgpu_intr_stall(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 mc_intr_0;
trace_mc_gk20a_intr_stall(g->name);
if (!g->power_on)
return IRQ_NONE;
/* not from gpu when sharing irq with others */
mc_intr_0 = g->ops.mc.intr_stall(g);
if (unlikely(!mc_intr_0))
return IRQ_NONE;
g->ops.mc.intr_stall_pause(g);
atomic_inc(&l->hw_irq_stall_count);
trace_mc_gk20a_intr_stall_done(g->name);
return IRQ_WAKE_THREAD;
}
irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int hw_irq_count;
nvgpu_log(g, gpu_dbg_intr, "interrupt thread launched");
trace_mc_gk20a_intr_thread_stall(g->name);
hw_irq_count = atomic_read(&l->hw_irq_stall_count);
g->ops.mc.isr_stall(g);
g->ops.mc.intr_stall_resume(g);
/* sync handled irq counter before re-enabling interrupts */
atomic_set(&l->sw_irq_stall_last_handled, hw_irq_count);
nvgpu_cond_broadcast(&l->sw_irq_stall_last_handled_wq);
trace_mc_gk20a_intr_thread_stall_done(g->name);
return IRQ_HANDLED;
}
irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
{
u32 non_stall_intr_val;
u32 hw_irq_count;
int ops_old, ops_new, ops = 0;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!g->power_on)
return IRQ_NONE;
/* not from gpu when sharing irq with others */
non_stall_intr_val = g->ops.mc.intr_nonstall(g);
if (unlikely(!non_stall_intr_val))
return IRQ_NONE;
g->ops.mc.intr_nonstall_pause(g);
ops = g->ops.mc.isr_nonstall(g);
if (ops) {
do {
ops_old = atomic_read(&l->nonstall_ops);
ops_new = ops_old | ops;
} while (ops_old != atomic_cmpxchg(&l->nonstall_ops,
ops_old, ops_new));
queue_work(l->nonstall_work_queue, &l->nonstall_fn_work);
}
hw_irq_count = atomic_inc_return(&l->hw_irq_nonstall_count);
/* sync handled irq counter before re-enabling interrupts */
atomic_set(&l->sw_irq_nonstall_last_handled, hw_irq_count);
g->ops.mc.intr_nonstall_resume(g);
nvgpu_cond_broadcast(&l->sw_irq_nonstall_last_handled_wq);
return IRQ_HANDLED;
}
void nvgpu_intr_nonstall_cb(struct work_struct *work)
{
struct nvgpu_os_linux *l =
container_of(work, struct nvgpu_os_linux, nonstall_fn_work);
struct gk20a *g = &l->g;
do {
u32 ops;
ops = atomic_xchg(&l->nonstall_ops, 0);
mc_gk20a_handle_intr_nonstall(g, ops);
} while (atomic_read(&l->nonstall_ops) != 0);
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_LINUX_INTR_H__
#define __NVGPU_LINUX_INTR_H__
struct gk20a;
irqreturn_t nvgpu_intr_stall(struct gk20a *g);
irqreturn_t nvgpu_intr_thread_stall(struct gk20a *g);
irqreturn_t nvgpu_intr_nonstall(struct gk20a *g);
void nvgpu_intr_nonstall_cb(struct work_struct *work);
#endif

View File

@@ -0,0 +1,118 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/io.h>
#include <nvgpu/types.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
void nvgpu_writel(struct gk20a *g, u32 r, u32 v)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (unlikely(!l->regs)) {
__gk20a_warn_on_no_regs();
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
} else {
writel_relaxed(v, l->regs + r);
nvgpu_wmb();
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
}
}
u32 nvgpu_readl(struct gk20a *g, u32 r)
{
u32 v = __nvgpu_readl(g, r);
if (v == 0xffffffff)
__nvgpu_check_gpu_state(g);
return v;
}
u32 __nvgpu_readl(struct gk20a *g, u32 r)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 v = 0xffffffff;
if (unlikely(!l->regs)) {
__gk20a_warn_on_no_regs();
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
} else {
v = readl(l->regs + r);
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
}
return v;
}
void nvgpu_writel_check(struct gk20a *g, u32 r, u32 v)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (unlikely(!l->regs)) {
__gk20a_warn_on_no_regs();
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x (failed)", r, v);
} else {
nvgpu_wmb();
do {
writel_relaxed(v, l->regs + r);
} while (readl(l->regs + r) != v);
nvgpu_log(g, gpu_dbg_reg, "r=0x%x v=0x%x", r, v);
}
}
void nvgpu_bar1_writel(struct gk20a *g, u32 b, u32 v)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (unlikely(!l->bar1)) {
__gk20a_warn_on_no_regs();
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
} else {
nvgpu_wmb();
writel_relaxed(v, l->bar1 + b);
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
}
}
u32 nvgpu_bar1_readl(struct gk20a *g, u32 b)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 v = 0xffffffff;
if (unlikely(!l->bar1)) {
__gk20a_warn_on_no_regs();
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x (failed)", b, v);
} else {
v = readl(l->bar1 + b);
nvgpu_log(g, gpu_dbg_reg, "b=0x%x v=0x%x", b, v);
}
return v;
}
bool nvgpu_io_exists(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
return l->regs != NULL;
}
bool nvgpu_io_valid_reg(struct gk20a *g, u32 r)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
return r < resource_size(l->regs);
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/io.h>
#include <nvgpu/types.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
void __iomem *reg = l->usermode_regs + (r - usermode_cfg0_r());
writel_relaxed(v, reg);
nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
}

View File

@@ -0,0 +1,296 @@
/*
* NVGPU IOCTLs
*
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/file.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/ctxsw_trace.h>
#include "gk20a/gk20a.h"
#include "gk20a/dbg_gpu_gk20a.h"
#include "ioctl_channel.h"
#include "ioctl_ctrl.h"
#include "ioctl_as.h"
#include "ioctl_tsg.h"
#include "ioctl_dbg.h"
#include "module.h"
#include "os_linux.h"
#include "ctxsw_trace.h"
#include "platform_gk20a.h"
#define GK20A_NUM_CDEVS 7
const struct file_operations gk20a_channel_ops = {
.owner = THIS_MODULE,
.release = gk20a_channel_release,
.open = gk20a_channel_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_channel_ioctl,
#endif
.unlocked_ioctl = gk20a_channel_ioctl,
};
static const struct file_operations gk20a_ctrl_ops = {
.owner = THIS_MODULE,
.release = gk20a_ctrl_dev_release,
.open = gk20a_ctrl_dev_open,
.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_ctrl_dev_ioctl,
#endif
};
static const struct file_operations gk20a_dbg_ops = {
.owner = THIS_MODULE,
.release = gk20a_dbg_gpu_dev_release,
.open = gk20a_dbg_gpu_dev_open,
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
.poll = gk20a_dbg_gpu_dev_poll,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
#endif
};
static const struct file_operations gk20a_as_ops = {
.owner = THIS_MODULE,
.release = gk20a_as_dev_release,
.open = gk20a_as_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_as_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_as_dev_ioctl,
};
/*
* Note: We use a different 'open' to trigger handling of the profiler session.
* Most of the code is shared between them... Though, at some point if the
* code does get too tangled trying to handle each in the same path we can
* separate them cleanly.
*/
static const struct file_operations gk20a_prof_ops = {
.owner = THIS_MODULE,
.release = gk20a_dbg_gpu_dev_release,
.open = gk20a_prof_gpu_dev_open,
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
#endif
};
static const struct file_operations gk20a_tsg_ops = {
.owner = THIS_MODULE,
.release = nvgpu_ioctl_tsg_dev_release,
.open = nvgpu_ioctl_tsg_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
#endif
.unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
};
#ifdef CONFIG_GK20A_CTXSW_TRACE
static const struct file_operations gk20a_ctxsw_ops = {
.owner = THIS_MODULE,
.release = gk20a_ctxsw_dev_release,
.open = gk20a_ctxsw_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_ctxsw_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
.poll = gk20a_ctxsw_dev_poll,
.read = gk20a_ctxsw_dev_read,
.mmap = gk20a_ctxsw_dev_mmap,
};
#endif
static const struct file_operations gk20a_sched_ops = {
.owner = THIS_MODULE,
.release = gk20a_sched_dev_release,
.open = gk20a_sched_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_sched_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_sched_dev_ioctl,
.poll = gk20a_sched_dev_poll,
.read = gk20a_sched_dev_read,
};
static int gk20a_create_device(
struct device *dev, int devno,
const char *interface_name, const char *cdev_name,
struct cdev *cdev, struct device **out,
const struct file_operations *ops,
struct class *class)
{
struct device *subdev;
int err;
struct gk20a *g = gk20a_from_dev(dev);
nvgpu_log_fn(g, " ");
cdev_init(cdev, ops);
cdev->owner = THIS_MODULE;
err = cdev_add(cdev, devno, 1);
if (err) {
dev_err(dev, "failed to add %s cdev\n", cdev_name);
return err;
}
subdev = device_create(class, NULL, devno, NULL,
interface_name, cdev_name);
if (IS_ERR(subdev)) {
err = PTR_ERR(dev);
cdev_del(cdev);
dev_err(dev, "failed to create %s device for %s\n",
cdev_name, dev_name(dev));
return err;
}
*out = subdev;
return 0;
}
void gk20a_user_deinit(struct device *dev, struct class *class)
{
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (l->channel.node) {
device_destroy(class, l->channel.cdev.dev);
cdev_del(&l->channel.cdev);
}
if (l->as_dev.node) {
device_destroy(class, l->as_dev.cdev.dev);
cdev_del(&l->as_dev.cdev);
}
if (l->ctrl.node) {
device_destroy(class, l->ctrl.cdev.dev);
cdev_del(&l->ctrl.cdev);
}
if (l->dbg.node) {
device_destroy(class, l->dbg.cdev.dev);
cdev_del(&l->dbg.cdev);
}
if (l->prof.node) {
device_destroy(class, l->prof.cdev.dev);
cdev_del(&l->prof.cdev);
}
if (l->tsg.node) {
device_destroy(class, l->tsg.cdev.dev);
cdev_del(&l->tsg.cdev);
}
if (l->ctxsw.node) {
device_destroy(class, l->ctxsw.cdev.dev);
cdev_del(&l->ctxsw.cdev);
}
if (l->sched.node) {
device_destroy(class, l->sched.cdev.dev);
cdev_del(&l->sched.cdev);
}
if (l->cdev_region)
unregister_chrdev_region(l->cdev_region, GK20A_NUM_CDEVS);
}
int gk20a_user_init(struct device *dev, const char *interface_name,
struct class *class)
{
int err;
dev_t devno;
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
err = alloc_chrdev_region(&devno, 0, GK20A_NUM_CDEVS, dev_name(dev));
if (err) {
dev_err(dev, "failed to allocate devno\n");
goto fail;
}
l->cdev_region = devno;
err = gk20a_create_device(dev, devno++, interface_name, "",
&l->channel.cdev, &l->channel.node,
&gk20a_channel_ops,
class);
if (err)
goto fail;
err = gk20a_create_device(dev, devno++, interface_name, "-as",
&l->as_dev.cdev, &l->as_dev.node,
&gk20a_as_ops,
class);
if (err)
goto fail;
err = gk20a_create_device(dev, devno++, interface_name, "-ctrl",
&l->ctrl.cdev, &l->ctrl.node,
&gk20a_ctrl_ops,
class);
if (err)
goto fail;
err = gk20a_create_device(dev, devno++, interface_name, "-dbg",
&l->dbg.cdev, &l->dbg.node,
&gk20a_dbg_ops,
class);
if (err)
goto fail;
err = gk20a_create_device(dev, devno++, interface_name, "-prof",
&l->prof.cdev, &l->prof.node,
&gk20a_prof_ops,
class);
if (err)
goto fail;
err = gk20a_create_device(dev, devno++, interface_name, "-tsg",
&l->tsg.cdev, &l->tsg.node,
&gk20a_tsg_ops,
class);
if (err)
goto fail;
#if defined(CONFIG_GK20A_CTXSW_TRACE)
err = gk20a_create_device(dev, devno++, interface_name, "-ctxsw",
&l->ctxsw.cdev, &l->ctxsw.node,
&gk20a_ctxsw_ops,
class);
if (err)
goto fail;
#endif
err = gk20a_create_device(dev, devno++, interface_name, "-sched",
&l->sched.cdev, &l->sched.node,
&gk20a_sched_ops,
class);
if (err)
goto fail;
return 0;
fail:
gk20a_user_deinit(dev, &nvgpu_class);
return err;
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_IOCTL_H__
#define __NVGPU_IOCTL_H__
struct device;
struct class;
int gk20a_user_init(struct device *dev, const char *interface_name,
struct class *class);
void gk20a_user_deinit(struct device *dev, struct class *class);
#endif

View File

@@ -0,0 +1,423 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <trace/events/gk20a.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/log2.h>
#include <nvgpu/linux/vm.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "ioctl_as.h"
#include "os_linux.h"
static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
{
u32 core_flags = 0;
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
return core_flags;
}
static int gk20a_as_ioctl_bind_channel(
struct gk20a_as_share *as_share,
struct nvgpu_as_bind_channel_args *args)
{
int err = 0;
struct channel_gk20a *ch;
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
ch = gk20a_get_channel_from_file(args->channel_fd);
if (!ch)
return -EINVAL;
if (gk20a_channel_as_bound(ch)) {
err = -EINVAL;
goto out;
}
/* this will set channel_gk20a->vm */
err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
out:
gk20a_channel_put(ch);
return err;
}
static int gk20a_as_ioctl_alloc_space(
struct gk20a_as_share *as_share,
struct nvgpu_as_alloc_space_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
&args->o_a.offset,
gk20a_as_translate_as_alloc_space_flags(g,
args->flags));
}
static int gk20a_as_ioctl_free_space(
struct gk20a_as_share *as_share,
struct nvgpu_as_free_space_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
return nvgpu_vm_area_free(as_share->vm, args->offset);
}
static int gk20a_as_ioctl_map_buffer_ex(
struct gk20a_as_share *as_share,
struct nvgpu_as_map_buffer_ex_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
/* unsupported, direct kind control must be used */
if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
struct gk20a *g = as_share->vm->mm->g;
nvgpu_log_info(g, "Direct kind control must be requested");
return -EINVAL;
}
return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
&args->offset, args->flags,
args->compr_kind,
args->incompr_kind,
args->buffer_offset,
args->mapping_size,
NULL);
}
static int gk20a_as_ioctl_unmap_buffer(
struct gk20a_as_share *as_share,
struct nvgpu_as_unmap_buffer_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
return 0;
}
static int gk20a_as_ioctl_map_buffer_batch(
struct gk20a_as_share *as_share,
struct nvgpu_as_map_buffer_batch_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
u32 i;
int err = 0;
struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
args->unmaps;
struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
args->maps;
struct vm_gk20a_mapping_batch batch;
nvgpu_log_fn(g, " ");
if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
return -EINVAL;
nvgpu_vm_mapping_batch_start(&batch);
for (i = 0; i < args->num_unmaps; ++i) {
struct nvgpu_as_unmap_buffer_args unmap_args;
if (copy_from_user(&unmap_args, &user_unmap_args[i],
sizeof(unmap_args))) {
err = -EFAULT;
break;
}
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
}
if (err) {
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
args->num_unmaps = i;
args->num_maps = 0;
return err;
}
for (i = 0; i < args->num_maps; ++i) {
s16 compressible_kind;
s16 incompressible_kind;
struct nvgpu_as_map_buffer_ex_args map_args;
memset(&map_args, 0, sizeof(map_args));
if (copy_from_user(&map_args, &user_map_args[i],
sizeof(map_args))) {
err = -EFAULT;
break;
}
if (map_args.flags &
NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
compressible_kind = map_args.compr_kind;
incompressible_kind = map_args.incompr_kind;
} else {
/* direct kind control must be used */
err = -EINVAL;
break;
}
err = nvgpu_vm_map_buffer(
as_share->vm, map_args.dmabuf_fd,
&map_args.offset, map_args.flags,
compressible_kind, incompressible_kind,
map_args.buffer_offset,
map_args.mapping_size,
&batch);
if (err)
break;
}
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
if (err)
args->num_maps = i;
/* note: args->num_unmaps will be unmodified, which is ok
* since all unmaps are done */
return err;
}
static int gk20a_as_ioctl_get_va_regions(
struct gk20a_as_share *as_share,
struct nvgpu_as_get_va_regions_args *args)
{
unsigned int i;
unsigned int write_entries;
struct nvgpu_as_va_region __user *user_region_ptr;
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = gk20a_from_vm(vm);
unsigned int page_sizes = gmmu_page_size_kernel;
nvgpu_log_fn(g, " ");
if (!vm->big_pages)
page_sizes--;
write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
if (write_entries > page_sizes)
write_entries = page_sizes;
user_region_ptr =
(struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
for (i = 0; i < write_entries; ++i) {
struct nvgpu_as_va_region region;
struct nvgpu_allocator *vma = vm->vma[i];
memset(&region, 0, sizeof(struct nvgpu_as_va_region));
region.page_size = vm->gmmu_page_sizes[i];
region.offset = nvgpu_alloc_base(vma);
/* No __aeabi_uldivmod() on some platforms... */
region.pages = (nvgpu_alloc_end(vma) -
nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
return -EFAULT;
}
args->buf_size =
page_sizes * sizeof(struct nvgpu_as_va_region);
return 0;
}
static int nvgpu_as_ioctl_get_sync_ro_map(
struct gk20a_as_share *as_share,
struct nvgpu_as_get_sync_ro_map_args *args)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = gk20a_from_vm(vm);
u64 base_gpuva;
u32 sync_size;
int err = 0;
if (!g->ops.fifo.get_sync_ro_map)
return -EINVAL;
if (!gk20a_platform_has_syncpoints(g))
return -EINVAL;
err = g->ops.fifo.get_sync_ro_map(vm, &base_gpuva, &sync_size);
if (err)
return err;
args->base_gpuva = base_gpuva;
args->sync_size = sync_size;
return err;
#else
return -EINVAL;
#endif
}
int gk20a_as_dev_open(struct inode *inode, struct file *filp)
{
struct nvgpu_os_linux *l;
struct gk20a_as_share *as_share;
struct gk20a *g;
int err;
l = container_of(inode->i_cdev, struct nvgpu_os_linux, as_dev.cdev);
g = &l->g;
nvgpu_log_fn(g, " ");
err = gk20a_as_alloc_share(g, 0, 0, &as_share);
if (err) {
nvgpu_log_fn(g, "failed to alloc share");
return err;
}
filp->private_data = as_share;
return 0;
}
int gk20a_as_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_as_share *as_share = filp->private_data;
if (!as_share)
return 0;
return gk20a_as_release_share(as_share);
}
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int err = 0;
struct gk20a_as_share *as_share = filp->private_data;
struct gk20a *g = gk20a_from_as(as_share->as);
u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
err = gk20a_busy(g);
if (err)
return err;
switch (cmd) {
case NVGPU_AS_IOCTL_BIND_CHANNEL:
trace_gk20a_as_ioctl_bind_channel(g->name);
err = gk20a_as_ioctl_bind_channel(as_share,
(struct nvgpu_as_bind_channel_args *)buf);
break;
case NVGPU32_AS_IOCTL_ALLOC_SPACE:
{
struct nvgpu32_as_alloc_space_args *args32 =
(struct nvgpu32_as_alloc_space_args *)buf;
struct nvgpu_as_alloc_space_args args;
args.pages = args32->pages;
args.page_size = args32->page_size;
args.flags = args32->flags;
args.o_a.offset = args32->o_a.offset;
trace_gk20a_as_ioctl_alloc_space(g->name);
err = gk20a_as_ioctl_alloc_space(as_share, &args);
args32->o_a.offset = args.o_a.offset;
break;
}
case NVGPU_AS_IOCTL_ALLOC_SPACE:
trace_gk20a_as_ioctl_alloc_space(g->name);
err = gk20a_as_ioctl_alloc_space(as_share,
(struct nvgpu_as_alloc_space_args *)buf);
break;
case NVGPU_AS_IOCTL_FREE_SPACE:
trace_gk20a_as_ioctl_free_space(g->name);
err = gk20a_as_ioctl_free_space(as_share,
(struct nvgpu_as_free_space_args *)buf);
break;
case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
trace_gk20a_as_ioctl_map_buffer(g->name);
err = gk20a_as_ioctl_map_buffer_ex(as_share,
(struct nvgpu_as_map_buffer_ex_args *)buf);
break;
case NVGPU_AS_IOCTL_UNMAP_BUFFER:
trace_gk20a_as_ioctl_unmap_buffer(g->name);
err = gk20a_as_ioctl_unmap_buffer(as_share,
(struct nvgpu_as_unmap_buffer_args *)buf);
break;
case NVGPU_AS_IOCTL_GET_VA_REGIONS:
trace_gk20a_as_ioctl_get_va_regions(g->name);
err = gk20a_as_ioctl_get_va_regions(as_share,
(struct nvgpu_as_get_va_regions_args *)buf);
break;
case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
err = gk20a_as_ioctl_map_buffer_batch(as_share,
(struct nvgpu_as_map_buffer_batch_args *)buf);
break;
case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
(struct nvgpu_as_get_sync_ro_map_args *)buf);
break;
default:
err = -ENOTTY;
break;
}
gk20a_idle(g);
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
err = -EFAULT;
return err;
}

View File

@@ -0,0 +1,30 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_COMMON_LINUX_AS_H__
#define __NVGPU_COMMON_LINUX_AS_H__
struct inode;
struct file;
/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
* num_maps */
#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256
/* struct file_operations driver interface */
int gk20a_as_dev_open(struct inode *inode, struct file *filp);
int gk20a_as_dev_release(struct inode *inode, struct file *filp);
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_IOCTL_CHANNEL_H__
#define __NVGPU_IOCTL_CHANNEL_H__
#include <linux/fs.h>
#include "gk20a/css_gr_gk20a.h"
struct inode;
struct file;
struct gk20a;
struct nvgpu_channel_open_args;
struct gk20a_cs_snapshot_client_linux {
struct gk20a_cs_snapshot_client cs_client;
u32 dmabuf_fd;
struct dma_buf *dma_handler;
};
int gk20a_channel_open(struct inode *inode, struct file *filp);
int gk20a_channel_release(struct inode *inode, struct file *filp);
long gk20a_channel_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
int gk20a_channel_open_ioctl(struct gk20a *g,
struct nvgpu_channel_open_args *args);
int gk20a_channel_free_cycle_stats_snapshot(struct channel_gk20a *ch);
void gk20a_channel_free_cycle_stats_buffer(struct channel_gk20a *ch);
extern const struct file_operations gk20a_channel_ops;
u32 nvgpu_get_common_runlist_level(u32 level);
u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
#endif

View File

@@ -0,0 +1,562 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/cdev.h>
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#endif
#include <uapi/linux/nvgpu.h>
#include <nvgpu/bitops.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/kref.h>
#include <nvgpu/log.h>
#include <nvgpu/barrier.h>
#include <nvgpu/cond.h>
#include <nvgpu/list.h>
#include <nvgpu/clk_arb.h>
#include "gk20a/gk20a.h"
#include "clk/clk.h"
#include "pstate/pstate.h"
#include "lpwr/lpwr.h"
#include "volt/volt.h"
#ifdef CONFIG_DEBUG_FS
#include "os_linux.h"
#endif
static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
struct file *filp)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
clk_arb_dbg(session->g, " ");
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}
static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
{
unsigned int poll_mask = 0;
if (nvgpu_poll_mask & NVGPU_POLLIN)
poll_mask |= POLLIN;
if (nvgpu_poll_mask & NVGPU_POLLPRI)
poll_mask |= POLLPRI;
if (nvgpu_poll_mask & NVGPU_POLLOUT)
poll_mask |= POLLOUT;
if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
poll_mask |= POLLRDNORM;
if (nvgpu_poll_mask & NVGPU_POLLHUP)
poll_mask |= POLLHUP;
return poll_mask;
}
static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
{
struct nvgpu_clk_dev *dev = filp->private_data;
clk_arb_dbg(dev->session->g, " ");
poll_wait(filp, &dev->readout_wq.wq, wait);
return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
}
void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
{
nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
}
static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
struct file *filp)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
struct nvgpu_clk_arb *arb;
arb = session->g->clk_arb;
clk_arb_dbg(session->g, " ");
if (arb) {
nvgpu_spinlock_acquire(&arb->users_lock);
nvgpu_list_del(&dev->link);
nvgpu_spinlock_release(&arb->users_lock);
nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
}
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}
static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
{
u32 nvgpu_gpu_event;
switch (nvgpu_event) {
case NVGPU_EVENT_VF_UPDATE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
break;
case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
break;
case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
break;
case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
break;
case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
break;
case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
break;
case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
break;
case NVGPU_EVENT_ALARM_GPU_LOST:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
break;
default:
/* Control shouldn't come here */
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
break;
}
return nvgpu_gpu_event;
}
static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
struct nvgpu_gpu_event_info *info) {
u32 tail, head;
u32 events = 0;
struct nvgpu_clk_notification *p_notif;
tail = nvgpu_atomic_read(&dev->queue.tail);
head = nvgpu_atomic_read(&dev->queue.head);
head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
if (_WRAPGTEQ(tail, head) && info) {
head++;
p_notif = &dev->queue.notifications[head % dev->queue.size];
events |= nvgpu_convert_gpu_event(p_notif->notification);
info->event_id = ffs(events) - 1;
info->timestamp = p_notif->timestamp;
nvgpu_atomic_set(&dev->queue.head, head);
}
return events;
}
static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
size_t size, loff_t *off)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_gpu_event_info info;
ssize_t err;
clk_arb_dbg(dev->session->g,
"filp=%p, buf=%p, size=%zu", filp, buf, size);
if ((size - *off) < sizeof(info))
return 0;
memset(&info, 0, sizeof(info));
/* Get the oldest event from the queue */
while (!__pending_event(dev, &info)) {
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
__pending_event(dev, &info), 0);
if (err)
return err;
if (info.timestamp)
break;
}
if (copy_to_user(buf + *off, &info, sizeof(info)))
return -EFAULT;
return sizeof(info);
}
static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
struct nvgpu_gpu_set_event_filter_args *args)
{
struct gk20a *g = dev->session->g;
u32 mask;
nvgpu_log(g, gpu_dbg_fn, " ");
if (args->flags)
return -EINVAL;
if (args->size != 1)
return -EINVAL;
if (copy_from_user(&mask, (void __user *) args->buffer,
args->size * sizeof(u32)))
return -EFAULT;
/* update alarm mask */
nvgpu_atomic_set(&dev->enabled_mask, mask);
return 0;
}
static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct gk20a *g = dev->session->g;
u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST))
return -EINVAL;
BUG_ON(_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE);
memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
switch (cmd) {
case NVGPU_EVENT_IOCTL_SET_FILTER:
err = nvgpu_clk_arb_set_event_filter(dev,
(struct nvgpu_gpu_set_event_filter_args *)buf);
break;
default:
nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
err = -ENOTTY;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
return err;
}
static const struct file_operations completion_dev_ops = {
.owner = THIS_MODULE,
.release = nvgpu_clk_arb_release_completion_dev,
.poll = nvgpu_clk_arb_poll_dev,
};
static const struct file_operations event_dev_ops = {
.owner = THIS_MODULE,
.release = nvgpu_clk_arb_release_event_dev,
.poll = nvgpu_clk_arb_poll_dev,
.read = nvgpu_clk_arb_read_event_dev,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
#endif
.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
};
static int nvgpu_clk_arb_install_fd(struct gk20a *g,
struct nvgpu_clk_session *session,
const struct file_operations *fops,
struct nvgpu_clk_dev **_dev)
{
struct file *file;
int fd;
int err;
int status;
char name[64];
struct nvgpu_clk_dev *dev;
clk_arb_dbg(g, " ");
dev = nvgpu_kzalloc(g, sizeof(*dev));
if (!dev)
return -ENOMEM;
status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
DEFAULT_EVENT_NUMBER);
if (status < 0) {
err = status;
goto fail;
}
fd = get_unused_fd_flags(O_RDWR);
if (fd < 0) {
err = fd;
goto fail;
}
snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
file = anon_inode_getfile(name, fops, dev, O_RDWR);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto fail_fd;
}
fd_install(fd, file);
nvgpu_cond_init(&dev->readout_wq);
nvgpu_atomic_set(&dev->poll_mask, 0);
dev->session = session;
nvgpu_ref_init(&dev->refcount);
nvgpu_ref_get(&session->refcount);
*_dev = dev;
return fd;
fail_fd:
put_unused_fd(fd);
fail:
nvgpu_kfree(g, dev);
return err;
}
int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_dev *dev;
int fd;
clk_arb_dbg(g, " ");
fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
if (fd < 0)
return fd;
/* TODO: alarm mask needs to be set to default value to prevent
* failures of legacy tests. This will be removed when sanity is
* updated
*/
if (alarm_mask)
nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
else
nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
nvgpu_spinlock_acquire(&arb->users_lock);
nvgpu_list_add_tail(&dev->link, &arb->users);
nvgpu_spinlock_release(&arb->users_lock);
*event_fd = fd;
return 0;
}
int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int *request_fd)
{
struct nvgpu_clk_dev *dev;
int fd;
clk_arb_dbg(g, " ");
fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
if (fd < 0)
return fd;
*request_fd = fd;
return 0;
}
int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int request_fd)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_dev *dev;
struct fd fd;
int err = 0;
clk_arb_dbg(g, " ");
fd = fdget(request_fd);
if (!fd.file)
return -EINVAL;
if (fd.file->f_op != &completion_dev_ops) {
err = -EINVAL;
goto fdput_fd;
}
dev = (struct nvgpu_clk_dev *) fd.file->private_data;
if (!dev || dev->session != session) {
err = -EINVAL;
goto fdput_fd;
}
nvgpu_ref_get(&dev->refcount);
nvgpu_spinlock_acquire(&session->session_lock);
nvgpu_list_add(&dev->node, &session->targets);
nvgpu_spinlock_release(&session->session_lock);
nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
fdput_fd:
fdput(fd);
return err;
}
int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
int request_fd, u32 api_domain, u16 target_mhz)
{
struct nvgpu_clk_dev *dev;
struct fd fd;
int err = 0;
clk_arb_dbg(session->g,
"domain=0x%08x target_mhz=%u", api_domain, target_mhz);
fd = fdget(request_fd);
if (!fd.file)
return -EINVAL;
if (fd.file->f_op != &completion_dev_ops) {
err = -EINVAL;
goto fdput_fd;
}
dev = fd.file->private_data;
if (!dev || dev->session != session) {
err = -EINVAL;
goto fdput_fd;
}
switch (api_domain) {
case NVGPU_CLK_DOMAIN_MCLK:
dev->mclk_target_mhz = target_mhz;
break;
case NVGPU_CLK_DOMAIN_GPCCLK:
dev->gpc2clk_target_mhz = target_mhz * 2ULL;
break;
default:
err = -EINVAL;
}
fdput_fd:
fdput(fd);
return err;
}
u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
{
u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
u32 api_domains = 0;
if (clk_domains & CTRL_CLK_DOMAIN_GPC2CLK)
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
return api_domains;
}
#ifdef CONFIG_DEBUG_FS
static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_arb_debug *debug;
u64 num;
s64 tmp, avg, std, max, min;
debug = NV_ACCESS_ONCE(arb->debug);
/* Make copy of structure and ensure no reordering */
nvgpu_smp_rmb();
if (!debug)
return -EINVAL;
std = debug->switch_std;
avg = debug->switch_avg;
max = debug->switch_max;
min = debug->switch_min;
num = debug->switch_num;
tmp = std;
do_div(tmp, num);
seq_printf(s, "Number of transitions: %lld\n",
num);
seq_printf(s, "max / min : %lld / %lld usec\n",
max, min);
seq_printf(s, "avg / std : %lld / %ld usec\n",
avg, int_sqrt(tmp));
return 0;
}
static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
}
static const struct file_operations nvgpu_clk_arb_stats_fops = {
.open = nvgpu_clk_arb_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
struct dentry *d;
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
d = debugfs_create_file(
"arb_stats",
S_IRUGO,
gpu_root,
g,
&nvgpu_clk_arb_stats_fops);
if (!d)
return -ENOMEM;
return 0;
}
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_IOCTL_CTRL_H__
#define __NVGPU_IOCTL_CTRL_H__
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,54 @@
/*
* Tegra GK20A GPU Debugger Driver
*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef DBG_GPU_IOCTL_GK20A_H
#define DBG_GPU_IOCTL_GK20A_H
#include <linux/poll.h>
#include "gk20a/dbg_gpu_gk20a.h"
/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
* of regops */
#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
struct dbg_session_gk20a_linux {
struct device *dev;
struct dbg_session_gk20a dbg_s;
};
struct dbg_session_channel_data_linux {
/*
* We have to keep a ref to the _file_, not the channel, because
* close(channel_fd) is synchronous and would deadlock if we had an
* open debug session fd holding a channel ref at that time. Holding a
* ref to the file makes close(channel_fd) just drop a kernel ref to
* the file; the channel will close when the last file ref is dropped.
*/
struct file *ch_f;
struct dbg_session_channel_data ch_data;
};
/* module debug driver interface */
int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
/* used by profiler driver interface */
int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
#endif

View File

@@ -0,0 +1,677 @@
/*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <uapi/linux/nvgpu.h>
#include <linux/anon_inodes.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/os_sched.h>
#include "gk20a/gk20a.h"
#include "gk20a/tsg_gk20a.h"
#include "gv11b/fifo_gv11b.h"
#include "platform_gk20a.h"
#include "ioctl_tsg.h"
#include "ioctl_channel.h"
#include "os_linux.h"
struct tsg_private {
struct gk20a *g;
struct tsg_gk20a *tsg;
};
static int gk20a_tsg_bind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
{
struct channel_gk20a *ch;
int err;
ch = gk20a_get_channel_from_file(ch_fd);
if (!ch)
return -EINVAL;
err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
gk20a_channel_put(ch);
return err;
}
static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
struct tsg_gk20a *tsg, struct nvgpu_tsg_bind_channel_ex_args *arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
struct channel_gk20a *ch;
struct gr_gk20a *gr = &g->gr;
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto mutex_release;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto mutex_release;
}
ch = gk20a_get_channel_from_file(arg->channel_fd);
if (!ch) {
err = -EINVAL;
goto idle;
}
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
if ((arg->num_active_tpcs > gr->max_tpc_count) ||
!(arg->num_active_tpcs)) {
nvgpu_err(g, "Invalid num of active TPCs");
err = -EINVAL;
goto ch_put;
}
tsg->tpc_num_initialized = true;
tsg->num_active_tpcs = arg->num_active_tpcs;
tsg->tpc_pg_enabled = true;
} else {
tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
}
if (arg->subcontext_id < g->fifo.max_subctx_count) {
ch->subctx_id = arg->subcontext_id;
} else {
err = -EINVAL;
goto ch_put;
}
nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
ch->chid, ch->subctx_id);
/* Use runqueue selector 1 for all ASYNC ids */
if (ch->subctx_id > CHANNEL_INFO_VEID0)
ch->runqueue_sel = 1;
err = ch->g->ops.fifo.tsg_bind_channel(tsg, ch);
ch_put:
gk20a_channel_put(ch);
idle:
gk20a_idle(g);
mutex_release:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int gk20a_tsg_unbind_channel_fd(struct tsg_gk20a *tsg, int ch_fd)
{
struct channel_gk20a *ch;
int err = 0;
ch = gk20a_get_channel_from_file(ch_fd);
if (!ch)
return -EINVAL;
if (ch->tsgid != tsg->tsgid) {
err = -EINVAL;
goto out;
}
err = gk20a_tsg_unbind_channel(ch);
/*
* Mark the channel timedout since channel unbound from TSG
* has no context of its own so it can't serve any job
*/
ch->has_timedout = true;
out:
gk20a_channel_put(ch);
return err;
}
static int gk20a_tsg_get_event_data_from_id(struct tsg_gk20a *tsg,
unsigned int event_id,
struct gk20a_event_id_data **event_id_data)
{
struct gk20a_event_id_data *local_event_id_data;
bool event_found = false;
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
gk20a_event_id_data, event_id_node) {
if (local_event_id_data->event_id == event_id) {
event_found = true;
break;
}
}
nvgpu_mutex_release(&tsg->event_id_list_lock);
if (event_found) {
*event_id_data = local_event_id_data;
return 0;
} else {
return -1;
}
}
/*
* Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
* event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
*/
static u32 nvgpu_event_id_to_ioctl_channel_event_id(u32 event_id)
{
switch (event_id) {
case NVGPU_EVENT_ID_BPT_INT:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
case NVGPU_EVENT_ID_BPT_PAUSE:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
case NVGPU_EVENT_ID_BLOCKING_SYNC:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
}
return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
}
void gk20a_tsg_event_id_post_event(struct tsg_gk20a *tsg,
int __event_id)
{
struct gk20a_event_id_data *event_id_data;
u32 event_id;
int err = 0;
struct gk20a *g = tsg->g;
event_id = nvgpu_event_id_to_ioctl_channel_event_id(__event_id);
if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
return;
err = gk20a_tsg_get_event_data_from_id(tsg, event_id,
&event_id_data);
if (err)
return;
nvgpu_mutex_acquire(&event_id_data->lock);
nvgpu_log_info(g,
"posting event for event_id=%d on tsg=%d\n",
event_id, tsg->tsgid);
event_id_data->event_posted = true;
nvgpu_cond_broadcast_interruptible(&event_id_data->event_id_wq);
nvgpu_mutex_release(&event_id_data->lock);
}
static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
{
unsigned int mask = 0;
struct gk20a_event_id_data *event_id_data = filep->private_data;
struct gk20a *g = event_id_data->g;
u32 event_id = event_id_data->event_id;
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
nvgpu_mutex_acquire(&event_id_data->lock);
if (event_id_data->event_posted) {
nvgpu_log_info(g,
"found pending event_id=%d on TSG=%d\n",
event_id, tsg->tsgid);
mask = (POLLPRI | POLLIN);
event_id_data->event_posted = false;
}
nvgpu_mutex_release(&event_id_data->lock);
return mask;
}
static int gk20a_event_id_release(struct inode *inode, struct file *filp)
{
struct gk20a_event_id_data *event_id_data = filp->private_data;
struct gk20a *g = event_id_data->g;
struct tsg_gk20a *tsg = g->fifo.tsg + event_id_data->id;
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_del(&event_id_data->event_id_node);
nvgpu_mutex_release(&tsg->event_id_list_lock);
nvgpu_mutex_destroy(&event_id_data->lock);
gk20a_put(g);
nvgpu_kfree(g, event_id_data);
filp->private_data = NULL;
return 0;
}
const struct file_operations gk20a_event_id_ops = {
.owner = THIS_MODULE,
.poll = gk20a_event_id_poll,
.release = gk20a_event_id_release,
};
static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
int event_id,
int *fd)
{
int err = 0;
int local_fd;
struct file *file;
char name[64];
struct gk20a_event_id_data *event_id_data;
struct gk20a *g;
g = gk20a_get(tsg->g);
if (!g)
return -ENODEV;
err = gk20a_tsg_get_event_data_from_id(tsg,
event_id, &event_id_data);
if (err == 0) {
/* We already have event enabled */
err = -EINVAL;
goto free_ref;
}
err = get_unused_fd_flags(O_RDWR);
if (err < 0)
goto free_ref;
local_fd = err;
snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
event_id, local_fd);
file = anon_inode_getfile(name, &gk20a_event_id_ops,
NULL, O_RDWR);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto clean_up;
}
event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
if (!event_id_data) {
err = -ENOMEM;
goto clean_up_file;
}
event_id_data->g = g;
event_id_data->id = tsg->tsgid;
event_id_data->event_id = event_id;
nvgpu_cond_init(&event_id_data->event_id_wq);
err = nvgpu_mutex_init(&event_id_data->lock);
if (err)
goto clean_up_free;
nvgpu_init_list_node(&event_id_data->event_id_node);
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
nvgpu_mutex_release(&tsg->event_id_list_lock);
fd_install(local_fd, file);
file->private_data = event_id_data;
*fd = local_fd;
return 0;
clean_up_free:
nvgpu_kfree(g, event_id_data);
clean_up_file:
fput(file);
clean_up:
put_unused_fd(local_fd);
free_ref:
gk20a_put(g);
return err;
}
static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct tsg_gk20a *tsg,
struct nvgpu_event_id_ctrl_args *args)
{
int err = 0;
int fd = -1;
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
return -EINVAL;
switch (args->cmd) {
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
if (!err)
args->event_fd = fd;
break;
default:
nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
args->cmd);
err = -EINVAL;
break;
}
return err;
}
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp)
{
struct tsg_private *priv;
struct tsg_gk20a *tsg;
struct device *dev;
int err;
g = gk20a_get(g);
if (!g)
return -ENODEV;
dev = dev_from_gk20a(g);
nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
priv = nvgpu_kmalloc(g, sizeof(*priv));
if (!priv) {
err = -ENOMEM;
goto free_ref;
}
tsg = gk20a_tsg_open(g, nvgpu_current_pid(g));
if (!tsg) {
nvgpu_kfree(g, priv);
err = -ENOMEM;
goto free_ref;
}
priv->g = g;
priv->tsg = tsg;
filp->private_data = priv;
gk20a_sched_ctrl_tsg_added(g, tsg);
return 0;
free_ref:
gk20a_put(g);
return err;
}
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
{
struct nvgpu_os_linux *l;
struct gk20a *g;
int ret;
l = container_of(inode->i_cdev,
struct nvgpu_os_linux, tsg.cdev);
g = &l->g;
nvgpu_log_fn(g, " ");
ret = gk20a_busy(g);
if (ret) {
nvgpu_err(g, "failed to power on, %d", ret);
return ret;
}
ret = nvgpu_ioctl_tsg_open(&l->g, filp);
gk20a_idle(g);
nvgpu_log_fn(g, "done");
return ret;
}
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
{
struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
struct gk20a *g = tsg->g;
gk20a_sched_ctrl_tsg_removed(g, tsg);
gk20a_tsg_release(ref);
gk20a_put(g);
}
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
{
struct tsg_private *priv = filp->private_data;
struct tsg_gk20a *tsg = priv->tsg;
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
nvgpu_kfree(tsg->g, priv);
return 0;
}
static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
struct tsg_gk20a *tsg, struct nvgpu_runlist_interleave_args *arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
u32 level = arg->level;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto done;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto done;
}
level = nvgpu_get_common_runlist_level(level);
err = gk20a_tsg_set_runlist_interleave(tsg, level);
gk20a_idle(g);
done:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto done;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto done;
}
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice_us);
gk20a_idle(g);
done:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
struct tsg_gk20a *tsg, struct nvgpu_timeslice_args *arg)
{
arg->timeslice_us = gk20a_tsg_get_timeslice(tsg);
return 0;
}
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct tsg_private *priv = filp->private_data;
struct tsg_gk20a *tsg = priv->tsg;
struct gk20a *g = tsg->g;
u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
if (!g->sw_ready) {
err = gk20a_busy(g);
if (err)
return err;
gk20a_idle(g);
}
switch (cmd) {
case NVGPU_TSG_IOCTL_BIND_CHANNEL:
{
int ch_fd = *(int *)buf;
if (ch_fd < 0) {
err = -EINVAL;
break;
}
err = gk20a_tsg_bind_channel_fd(tsg, ch_fd);
break;
}
case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
{
err = gk20a_tsg_ioctl_bind_channel_ex(g, tsg,
(struct nvgpu_tsg_bind_channel_ex_args *)buf);
break;
}
case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
{
int ch_fd = *(int *)buf;
if (ch_fd < 0) {
err = -EINVAL;
break;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
break;
}
err = gk20a_tsg_unbind_channel_fd(tsg, ch_fd);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_ENABLE:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
g->ops.fifo.enable_tsg(tsg);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_DISABLE:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
g->ops.fifo.disable_tsg(tsg);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_PREEMPT:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
/* preempt TSG */
err = g->ops.fifo.preempt_tsg(g, tsg->tsgid);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
{
err = gk20a_tsg_event_id_ctrl(g, tsg,
(struct nvgpu_event_id_ctrl_args *)buf);
break;
}
case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
(struct nvgpu_runlist_interleave_args *)buf);
break;
case NVGPU_IOCTL_TSG_SET_TIMESLICE:
{
err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
(struct nvgpu_timeslice_args *)buf);
break;
}
case NVGPU_IOCTL_TSG_GET_TIMESLICE:
{
err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
(struct nvgpu_timeslice_args *)buf);
break;
}
default:
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
cmd);
err = -ENOTTY;
break;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *)arg,
buf, _IOC_SIZE(cmd));
return err;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef NVGPU_IOCTL_TSG_H
#define NVGPU_IOCTL_TSG_H
struct inode;
struct file;
struct gk20a;
struct nvgpu_ref;
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct file *filp);
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
#endif

View File

@@ -0,0 +1,654 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
#include <linux/stacktrace.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include "gk20a/gk20a.h"
#include "kmem_priv.h"
/*
* Statically declared because this needs to be shared across all nvgpu driver
* instances. This makes sure that all kmem caches are _definitely_ uniquely
* named.
*/
static atomic_t kmem_cache_id;
void *__nvgpu_big_alloc(struct gk20a *g, size_t size, bool clear)
{
void *p;
if (size > PAGE_SIZE) {
if (clear)
p = nvgpu_vzalloc(g, size);
else
p = nvgpu_vmalloc(g, size);
} else {
if (clear)
p = nvgpu_kzalloc(g, size);
else
p = nvgpu_kmalloc(g, size);
}
return p;
}
void nvgpu_big_free(struct gk20a *g, void *p)
{
/*
* This will have to be fixed eventually. Allocs that use
* nvgpu_big_[mz]alloc() will need to remember the size of the alloc
* when freeing.
*/
if (is_vmalloc_addr(p))
nvgpu_vfree(g, p);
else
nvgpu_kfree(g, p);
}
void *__nvgpu_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kmalloc(g, size, ip);
#else
alloc = kmalloc(size, GFP_KERNEL);
#endif
kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
size, alloc, GFP_KERNEL);
return alloc;
}
void *__nvgpu_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kzalloc(g, size, ip);
#else
alloc = kzalloc(size, GFP_KERNEL);
#endif
kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
size, alloc, GFP_KERNEL);
return alloc;
}
void *__nvgpu_kcalloc(struct gk20a *g, size_t n, size_t size, unsigned long ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kcalloc(g, n, size, ip);
#else
alloc = kcalloc(n, size, GFP_KERNEL);
#endif
kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
n * size, alloc, GFP_KERNEL);
return alloc;
}
void *__nvgpu_vmalloc(struct gk20a *g, unsigned long size, unsigned long ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_vmalloc(g, size, ip);
#else
alloc = vmalloc(size);
#endif
kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
return alloc;
}
void *__nvgpu_vzalloc(struct gk20a *g, unsigned long size, unsigned long ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_vzalloc(g, size, ip);
#else
alloc = vzalloc(size);
#endif
kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
return alloc;
}
void __nvgpu_kfree(struct gk20a *g, void *addr)
{
kmem_dbg(g, "kfree: addr=0x%p", addr);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
__nvgpu_track_kfree(g, addr);
#else
kfree(addr);
#endif
}
void __nvgpu_vfree(struct gk20a *g, void *addr)
{
kmem_dbg(g, "vfree: addr=0x%p", addr);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
__nvgpu_track_vfree(g, addr);
#else
vfree(addr);
#endif
}
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
{
nvgpu_mutex_acquire(&tracker->lock);
}
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
{
nvgpu_mutex_release(&tracker->lock);
}
void kmem_print_mem_alloc(struct gk20a *g,
struct nvgpu_mem_alloc *alloc,
struct seq_file *s)
{
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
int i;
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
alloc->addr, alloc->size);
for (i = 0; i < alloc->stack_length; i++)
__pstat(s, " %3d [<%p>] %pS\n", i,
(void *)alloc->stack[i],
(void *)alloc->stack[i]);
__pstat(s, "\n");
#else
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
alloc->addr, alloc->size, alloc->ip);
#endif
}
static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
struct nvgpu_mem_alloc *alloc)
{
alloc->allocs_entry.key_start = alloc->addr;
alloc->allocs_entry.key_end = alloc->addr + alloc->size;
nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
return 0;
}
static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
{
struct nvgpu_mem_alloc *alloc;
struct nvgpu_rbtree_node *node = NULL;
nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
if (!node)
return NULL;
alloc = nvgpu_mem_alloc_from_rbtree_node(node);
nvgpu_rbtree_unlink(node, &tracker->allocs);
return alloc;
}
static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
unsigned long size, unsigned long real_size,
u64 addr, unsigned long ip)
{
int ret;
struct nvgpu_mem_alloc *alloc;
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
struct stack_trace stack_trace;
#endif
alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
if (!alloc)
return -ENOMEM;
alloc->owner = tracker;
alloc->size = size;
alloc->real_size = real_size;
alloc->addr = addr;
alloc->ip = (void *)(uintptr_t)ip;
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
stack_trace.max_entries = MAX_STACK_TRACE;
stack_trace.nr_entries = 0;
stack_trace.entries = alloc->stack;
/*
* This 4 here skips the 2 function calls that happen for all traced
* allocs due to nvgpu:
*
* __nvgpu_save_kmem_alloc+0x7c/0x128
* __nvgpu_track_kzalloc+0xcc/0xf8
*
* And the function calls that get made by the stack trace code itself.
* If the trace savings code changes this will likely have to change
* as well.
*/
stack_trace.skip = 4;
save_stack_trace(&stack_trace);
alloc->stack_length = stack_trace.nr_entries;
#endif
nvgpu_lock_tracker(tracker);
tracker->bytes_alloced += size;
tracker->bytes_alloced_real += real_size;
tracker->nr_allocs++;
/* Keep track of this for building a histogram later on. */
if (tracker->max_alloc < size)
tracker->max_alloc = size;
if (tracker->min_alloc > size)
tracker->min_alloc = size;
ret = nvgpu_add_alloc(tracker, alloc);
if (ret) {
WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
kfree(alloc);
nvgpu_unlock_tracker(tracker);
return ret;
}
nvgpu_unlock_tracker(tracker);
return 0;
}
static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
u64 addr)
{
struct nvgpu_mem_alloc *alloc;
nvgpu_lock_tracker(tracker);
alloc = nvgpu_rem_alloc(tracker, addr);
if (WARN(!alloc, "Possible double-free detected: 0x%llx!", addr)) {
nvgpu_unlock_tracker(tracker);
return -EINVAL;
}
memset((void *)alloc->addr, 0, alloc->size);
tracker->nr_frees++;
tracker->bytes_freed += alloc->size;
tracker->bytes_freed_real += alloc->real_size;
nvgpu_unlock_tracker(tracker);
return 0;
}
static void __nvgpu_check_valloc_size(unsigned long size)
{
WARN(size < PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
}
static void __nvgpu_check_kalloc_size(size_t size)
{
WARN(size > PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
}
void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
unsigned long ip)
{
void *alloc = vmalloc(size);
if (!alloc)
return NULL;
__nvgpu_check_valloc_size(size);
/*
* Ignore the return message. If this fails let's not cause any issues
* for the rest of the driver.
*/
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
unsigned long ip)
{
void *alloc = vzalloc(size);
if (!alloc)
return NULL;
__nvgpu_check_valloc_size(size);
/*
* Ignore the return message. If this fails let's not cause any issues
* for the rest of the driver.
*/
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, unsigned long ip)
{
void *alloc = kmalloc(size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(size);
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, unsigned long ip)
{
void *alloc = kzalloc(size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(size);
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
unsigned long ip)
{
void *alloc = kcalloc(n, size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(n * size);
__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
roundup_pow_of_two(n * size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void __nvgpu_track_vfree(struct gk20a *g, void *addr)
{
/*
* Often it is accepted practice to pass NULL pointers into free
* functions to save code.
*/
if (!addr)
return;
__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
vfree(addr);
}
void __nvgpu_track_kfree(struct gk20a *g, void *addr)
{
if (!addr)
return;
__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
kfree(addr);
}
static int __do_check_for_outstanding_allocs(
struct gk20a *g,
struct nvgpu_mem_alloc_tracker *tracker,
const char *type, bool silent)
{
struct nvgpu_rbtree_node *node;
int count = 0;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
if (!silent)
kmem_print_mem_alloc(g, alloc, NULL);
count++;
nvgpu_rbtree_enum_next(&node, node);
}
return count;
}
/**
* check_for_outstanding_allocs - Count and display outstanding allocs
*
* @g - The GPU.
* @silent - If set don't print anything about the allocs.
*
* Dump (or just count) the number of allocations left outstanding.
*/
static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
{
int count = 0;
count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
silent);
count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
silent);
return count;
}
static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
void (*force_free_func)(const void *))
{
struct nvgpu_rbtree_node *node;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
if (force_free_func)
force_free_func((void *)alloc->addr);
nvgpu_rbtree_unlink(node, &tracker->allocs);
kfree(alloc);
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
}
}
/**
* nvgpu_kmem_cleanup - Cleanup the kmem tracking
*
* @g - The GPU.
* @force_free - If set will also free leaked objects if possible.
*
* Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
* is non-zero then the allocation made by nvgpu is also freed. This is risky,
* though, as it is possible that the memory is still in use by other parts of
* the GPU driver not aware that this has happened.
*
* In theory it should be fine if the GPU driver has been deinitialized and
* there are no bugs in that code. However, if there are any bugs in that code
* then they could likely manifest as odd crashes indeterminate amounts of time
* in the future. So use @force_free at your own risk.
*/
static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
{
do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
}
void nvgpu_kmem_fini(struct gk20a *g, int flags)
{
int count;
bool silent, force_free;
if (!flags)
return;
silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
count = check_for_outstanding_allocs(g, silent);
nvgpu_kmem_cleanup(g, force_free);
/*
* If we leak objects we can either BUG() out or just WARN(). In general
* it doesn't make sense to BUG() on here since leaking a few objects
* won't crash the kernel but it can be helpful for development.
*
* If neither flag is set then we just silently do nothing.
*/
if (count > 0) {
if (flags & NVGPU_KMEM_FINI_WARN) {
WARN(1, "Letting %d allocs leak!!\n", count);
} else if (flags & NVGPU_KMEM_FINI_BUG) {
nvgpu_err(g, "Letting %d allocs leak!!", count);
BUG();
}
}
}
int nvgpu_kmem_init(struct gk20a *g)
{
int err;
g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
if (!g->vmallocs || !g->kmallocs) {
err = -ENOMEM;
goto fail;
}
g->vmallocs->name = "vmalloc";
g->kmallocs->name = "kmalloc";
g->vmallocs->allocs = NULL;
g->kmallocs->allocs = NULL;
nvgpu_mutex_init(&g->vmallocs->lock);
nvgpu_mutex_init(&g->kmallocs->lock);
g->vmallocs->min_alloc = PAGE_SIZE;
g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
/*
* This needs to go after all the other initialization since they use
* the nvgpu_kzalloc() API.
*/
g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
sizeof(struct nvgpu_mem_alloc));
g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
sizeof(struct nvgpu_mem_alloc));
if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
err = -ENOMEM;
if (g->vmallocs->allocs_cache)
nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
if (g->kmallocs->allocs_cache)
nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
goto fail;
}
return 0;
fail:
if (g->vmallocs)
kfree(g->vmallocs);
if (g->kmallocs)
kfree(g->kmallocs);
return err;
}
#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
int nvgpu_kmem_init(struct gk20a *g)
{
return 0;
}
void nvgpu_kmem_fini(struct gk20a *g, int flags)
{
}
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
{
struct nvgpu_kmem_cache *cache =
nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
if (!cache)
return NULL;
cache->g = g;
snprintf(cache->name, sizeof(cache->name),
"nvgpu-cache-0x%p-%d-%d", g, (int)size,
atomic_inc_return(&kmem_cache_id));
cache->cache = kmem_cache_create(cache->name,
size, size, 0, NULL);
if (!cache->cache) {
nvgpu_kfree(g, cache);
return NULL;
}
return cache;
}
void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
{
struct gk20a *g = cache->g;
kmem_cache_destroy(cache->cache);
nvgpu_kfree(g, cache);
}
void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
{
return kmem_cache_alloc(cache->cache, GFP_KERNEL);
}
void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
{
kmem_cache_free(cache->cache, ptr);
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __KMEM_PRIV_H__
#define __KMEM_PRIV_H__
#include <nvgpu/rbtree.h>
#include <nvgpu/lock.h>
struct seq_file;
#define __pstat(s, fmt, msg...) \
do { \
if (s) \
seq_printf(s, fmt, ##msg); \
else \
pr_info(fmt, ##msg); \
} while (0)
#define MAX_STACK_TRACE 20
/*
* Linux specific version of the nvgpu_kmem_cache struct. This type is
* completely opaque to the rest of the driver.
*/
struct nvgpu_kmem_cache {
struct gk20a *g;
struct kmem_cache *cache;
/*
* Memory to hold the kmem_cache unique name. Only necessary on our
* k3.10 kernel when not using the SLUB allocator but it's easier to
* just carry this on to newer kernels.
*/
char name[128];
};
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
struct nvgpu_mem_alloc {
struct nvgpu_mem_alloc_tracker *owner;
void *ip;
#ifdef __NVGPU_SAVE_KALLOC_STACK_TRACES
unsigned long stack[MAX_STACK_TRACE];
int stack_length;
#endif
u64 addr;
unsigned long size;
unsigned long real_size;
struct nvgpu_rbtree_node allocs_entry;
};
static inline struct nvgpu_mem_alloc *
nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_mem_alloc *)
((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
};
/*
* Linux specific tracking of vmalloc, kmalloc, etc.
*/
struct nvgpu_mem_alloc_tracker {
const char *name;
struct nvgpu_kmem_cache *allocs_cache;
struct nvgpu_rbtree_node *allocs;
struct nvgpu_mutex lock;
u64 bytes_alloced;
u64 bytes_freed;
u64 bytes_alloced_real;
u64 bytes_freed_real;
u64 nr_allocs;
u64 nr_frees;
unsigned long min_alloc;
unsigned long max_alloc;
};
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
void kmem_print_mem_alloc(struct gk20a *g,
struct nvgpu_mem_alloc *alloc,
struct seq_file *s);
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
#endif /* __KMEM_PRIV_H__ */

View File

@@ -0,0 +1,132 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/kernel.h>
#include <linux/device.h>
#include <nvgpu/log.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "os_linux.h"
/*
* Define a length for log buffers. This is the buffer that the 'fmt, ...' part
* of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
* needs to not be overly sized since we have limited kernel stack space. But at
* the same time we don't want it to be restrictive either.
*/
#define LOG_BUFFER_LENGTH 160
/*
* Annoying quirk of Linux: this has to be a string literal since the printk()
* function and friends use the preprocessor to concatenate stuff to the start
* of this string when printing.
*/
#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n"
static const char *log_types[] = {
"ERR",
"WRN",
"DBG",
"INFO",
};
int nvgpu_log_mask_enabled(struct gk20a *g, u64 log_mask)
{
return !!(g->log_mask & log_mask);
}
static inline const char *nvgpu_log_name(struct gk20a *g)
{
return dev_name(dev_from_gk20a(g));
}
#ifdef CONFIG_GK20A_TRACE_PRINTK
static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
const char *func_name, int line,
const char *log_type, const char *log)
{
trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
}
#endif
static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
const char *func_name, int line,
enum nvgpu_log_type type, const char *log)
{
const char *name = gpu_name ? gpu_name : "";
const char *log_type = log_types[type];
#ifdef CONFIG_GK20A_TRACE_PRINTK
if (trace)
return __nvgpu_trace_printk_log(trace, name, func_name,
line, log_type, log);
#endif
switch (type) {
case NVGPU_DEBUG:
/*
* We could use pr_debug() here but we control debug enablement
* separately from the Linux kernel. Perhaps this is a bug in
* nvgpu.
*/
pr_info(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_INFO:
pr_info(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_WARNING:
pr_warn(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_ERROR:
pr_err(LOG_FMT, name, func_name, line, log_type, log);
break;
}
}
__attribute__((format (printf, 5, 6)))
void __nvgpu_log_msg(struct gk20a *g, const char *func_name, int line,
enum nvgpu_log_type type, const char *fmt, ...)
{
char log[LOG_BUFFER_LENGTH];
va_list args;
va_start(args, fmt);
vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
va_end(args);
__nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
func_name, line, type, log);
}
__attribute__((format (printf, 5, 6)))
void __nvgpu_log_dbg(struct gk20a *g, u64 log_mask,
const char *func_name, int line,
const char *fmt, ...)
{
char log[LOG_BUFFER_LENGTH];
va_list args;
if ((log_mask & g->log_mask) == 0)
return;
va_start(args, fmt);
vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
va_end(args);
__nvgpu_really_print_log(g->log_trace, nvgpu_log_name(g),
func_name, line, NVGPU_DEBUG, log);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
/*
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
#define __NVGPU_COMMON_LINUX_MODULE_H__
struct gk20a;
struct device;
struct nvgpu_os_linux;
int gk20a_pm_finalize_poweron(struct device *dev);
int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
void gk20a_remove_support(struct gk20a *g);
void gk20a_driver_start_unload(struct gk20a *g);
int nvgpu_quiesce(struct gk20a *g);
int nvgpu_remove(struct device *dev, struct class *class);
void nvgpu_free_irq(struct gk20a *g);
struct device_node *nvgpu_get_node(struct gk20a *g);
void __iomem *nvgpu_ioremap_resource(struct platform_device *dev, int i,
struct resource **out);
extern struct class nvgpu_class;
#endif

View File

@@ -0,0 +1,62 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/types.h>
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
#include "os_linux.h"
/*
* Locks out the driver from accessing GPU registers. This prevents access to
* thse registers after the GPU has been clock or power gated. This should help
* find annoying bugs where register reads and writes are silently dropped
* after the GPU has been turned off. On older chips these reads and writes can
* also lock the entire CPU up.
*/
void nvgpu_lockout_usermode_registers(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->usermode_regs = NULL;
}
/*
* Undoes t19x_lockout_registers().
*/
void nvgpu_restore_usermode_registers(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->usermode_regs = l->usermode_regs_saved;
}
void nvgpu_remove_usermode_support(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (l->usermode_regs) {
l->usermode_regs = NULL;
}
}
void nvgpu_init_usermode_support(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->usermode_regs = l->regs + usermode_cfg0_r();
l->usermode_regs_saved = l->usermode_regs;
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_MODULE_T19X_H__
#define __NVGPU_MODULE_T19X_H__
struct gk20a;
void nvgpu_init_usermode_support(struct gk20a *g);
void nvgpu_remove_usermode_support(struct gk20a *g);
void nvgpu_lockout_usermode_registers(struct gk20a *g);
void nvgpu_restore_usermode_registers(struct gk20a *g);
#endif

View File

@@ -0,0 +1,613 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/page_allocator.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/kmem.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/linux/dma.h>
#include <nvgpu/linux/vidmem.h>
#include <linux/vmalloc.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
#include "platform_gk20a.h"
static u64 __nvgpu_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
u64 ipa = sg_phys((struct scatterlist *)sgl);
if (platform->phys_addr)
return platform->phys_addr(g, ipa);
return ipa;
}
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
{
void *cpu_va;
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
PAGE_KERNEL :
pgprot_writecombine(PAGE_KERNEL);
if (mem->aperture != APERTURE_SYSMEM)
return 0;
/*
* WAR for bug 2040115: we already will always have a coherent vmap()
* for all sysmem buffers. The prot settings are left alone since
* eventually this should be deleted.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return 0;
/*
* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
* another CPU mapping.
*/
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
return 0;
if (WARN_ON(mem->cpu_va)) {
nvgpu_warn(g, "nested");
return -EBUSY;
}
cpu_va = vmap(mem->priv.pages,
PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
0, prot);
if (WARN_ON(!cpu_va))
return -ENOMEM;
mem->cpu_va = cpu_va;
return 0;
}
void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
{
if (mem->aperture != APERTURE_SYSMEM)
return;
/*
* WAR for bug 2040115: skip this since the map will be taken care of
* during the free in the DMA API.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return;
/*
* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
* already made by the DMA API.
*/
if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
return;
vunmap(mem->cpu_va);
mem->cpu_va = NULL;
}
static void pramin_access_batch_rd_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 r = start, *dest_u32 = *arg;
if (!l->regs) {
__gk20a_warn_on_no_regs();
return;
}
while (words--) {
*dest_u32++ = gk20a_readl(g, r);
r += sizeof(u32);
}
*arg = dest_u32;
}
u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
{
u32 data = 0;
if (mem->aperture == APERTURE_SYSMEM) {
u32 *ptr = mem->cpu_va;
WARN_ON(!ptr);
data = ptr[w];
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
#endif
} else if (mem->aperture == APERTURE_VIDMEM) {
u32 value;
u32 *p = &value;
nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
sizeof(u32), pramin_access_batch_rd_n, &p);
data = value;
} else {
WARN_ON("Accessing unallocated nvgpu_mem");
}
return data;
}
u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
{
WARN_ON(offset & 3);
return nvgpu_mem_rd32(g, mem, offset / sizeof(u32));
}
void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
u32 offset, void *dest, u32 size)
{
WARN_ON(offset & 3);
WARN_ON(size & 3);
if (mem->aperture == APERTURE_SYSMEM) {
u8 *src = (u8 *)mem->cpu_va + offset;
WARN_ON(!mem->cpu_va);
memcpy(dest, src, size);
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
if (size)
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
src, *dest, size);
#endif
} else if (mem->aperture == APERTURE_VIDMEM) {
u32 *dest_u32 = dest;
nvgpu_pramin_access_batched(g, mem, offset, size,
pramin_access_batch_rd_n, &dest_u32);
} else {
WARN_ON("Accessing unallocated nvgpu_mem");
}
}
static void pramin_access_batch_wr_n(struct gk20a *g, u32 start, u32 words, u32 **arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 r = start, *src_u32 = *arg;
if (!l->regs) {
__gk20a_warn_on_no_regs();
return;
}
while (words--) {
writel_relaxed(*src_u32++, l->regs + r);
r += sizeof(u32);
}
*arg = src_u32;
}
void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
{
if (mem->aperture == APERTURE_SYSMEM) {
u32 *ptr = mem->cpu_va;
WARN_ON(!ptr);
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x", ptr + w, data);
#endif
ptr[w] = data;
} else if (mem->aperture == APERTURE_VIDMEM) {
u32 value = data;
u32 *p = &value;
nvgpu_pramin_access_batched(g, mem, w * sizeof(u32),
sizeof(u32), pramin_access_batch_wr_n, &p);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated nvgpu_mem");
}
}
void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
{
WARN_ON(offset & 3);
nvgpu_mem_wr32(g, mem, offset / sizeof(u32), data);
}
void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
void *src, u32 size)
{
WARN_ON(offset & 3);
WARN_ON(size & 3);
if (mem->aperture == APERTURE_SYSMEM) {
u8 *dest = (u8 *)mem->cpu_va + offset;
WARN_ON(!mem->cpu_va);
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
if (size)
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x ... [%d bytes]",
dest, *src, size);
#endif
memcpy(dest, src, size);
} else if (mem->aperture == APERTURE_VIDMEM) {
u32 *src_u32 = src;
nvgpu_pramin_access_batched(g, mem, offset, size,
pramin_access_batch_wr_n, &src_u32);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated nvgpu_mem");
}
}
static void pramin_access_batch_set(struct gk20a *g, u32 start, u32 words, u32 **arg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
u32 r = start, repeat = **arg;
if (!l->regs) {
__gk20a_warn_on_no_regs();
return;
}
while (words--) {
writel_relaxed(repeat, l->regs + r);
r += sizeof(u32);
}
}
void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
u32 c, u32 size)
{
WARN_ON(offset & 3);
WARN_ON(size & 3);
WARN_ON(c & ~0xff);
c &= 0xff;
if (mem->aperture == APERTURE_SYSMEM) {
u8 *dest = (u8 *)mem->cpu_va + offset;
WARN_ON(!mem->cpu_va);
#ifdef CONFIG_TEGRA_SIMULATION_PLATFORM
if (size)
nvgpu_log(g, gpu_dbg_mem, " %p = 0x%x [times %d]",
dest, c, size);
#endif
memset(dest, c, size);
} else if (mem->aperture == APERTURE_VIDMEM) {
u32 repeat_value = c | (c << 8) | (c << 16) | (c << 24);
u32 *p = &repeat_value;
nvgpu_pramin_access_batched(g, mem, offset, size,
pramin_access_batch_set, &p);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated nvgpu_mem");
}
}
/*
* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
* and/or become private to this file once all bad usages of Linux SGLs are
* cleaned up in the driver.
*/
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
{
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
!nvgpu_iommuable(g))
return g->ops.mm.gpu_phys_addr(g, NULL,
__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
if (sg_dma_address(sgl) == 0)
return g->ops.mm.gpu_phys_addr(g, NULL,
__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
return 0;
return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
}
/*
* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
* allocation.
*/
static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
{
return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
}
/*
* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
* allocation.
*
* Note: this API does not make sense to use for _VIDMEM_ buffers with greater
* than one scatterlist chunk. If there's more than one scatterlist chunk then
* the buffer will not be contiguous. As such the base address probably isn't
* very useful. This is true for SYSMEM as well, if there's no IOMMU.
*
* However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
* an IOMMU present and enabled for the GPU.
*
* %attrs can be NULL. If it is not NULL then it may be inspected to determine
* if the address needs to be modified before writing into a PTE.
*/
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
{
struct nvgpu_page_alloc *alloc;
if (mem->aperture == APERTURE_SYSMEM)
return nvgpu_mem_get_addr_sysmem(g, mem);
/*
* Otherwise get the vidmem address.
*/
alloc = mem->vidmem_alloc;
/* This API should not be used with > 1 chunks */
WARN_ON(alloc->nr_chunks != 1);
return alloc->base;
}
/*
* This should only be used on contiguous buffers regardless of whether
* there's an IOMMU present/enabled. This applies to both SYSMEM and
* VIDMEM.
*/
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
{
/*
* For a VIDMEM buf, this is identical to simply get_addr() so just fall
* back to that.
*/
if (mem->aperture == APERTURE_VIDMEM)
return nvgpu_mem_get_addr(g, mem);
return __nvgpu_sgl_phys(g, (struct nvgpu_sgl *)mem->priv.sgt->sgl);
}
/*
* Be careful how you use this! You are responsible for correctly freeing this
* memory.
*/
int nvgpu_mem_create_from_mem(struct gk20a *g,
struct nvgpu_mem *dest, struct nvgpu_mem *src,
int start_page, int nr_pages)
{
int ret;
u64 start = start_page * PAGE_SIZE;
u64 size = nr_pages * PAGE_SIZE;
dma_addr_t new_iova;
if (src->aperture != APERTURE_SYSMEM)
return -EINVAL;
/* Some silly things a caller might do... */
if (size > src->size)
return -EINVAL;
if ((start + size) > src->size)
return -EINVAL;
dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
dest->aperture = src->aperture;
dest->skip_wmb = src->skip_wmb;
dest->size = size;
/*
* Re-use the CPU mapping only if the mapping was made by the DMA API.
*
* Bug 2040115: the DMA API wrapper makes the mapping that we should
* re-use.
*/
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
dest->priv.pages = src->priv.pages + start_page;
dest->priv.flags = src->priv.flags;
new_iova = sg_dma_address(src->priv.sgt->sgl) ?
sg_dma_address(src->priv.sgt->sgl) + start : 0;
/*
* Make a new SG table that is based only on the subset of pages that
* is passed to us. This table gets freed by the dma free routines.
*/
if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
src->priv.pages + start_page,
new_iova, size);
else
ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
new_iova, size);
return ret;
}
int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
struct page **pages, int nr_pages)
{
struct sg_table *sgt;
struct page **our_pages =
nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
if (!our_pages)
return -ENOMEM;
memcpy(our_pages, pages, sizeof(struct page *) * nr_pages);
if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
nr_pages * PAGE_SIZE)) {
nvgpu_kfree(g, our_pages);
return -ENOMEM;
}
/*
* If we are making an SGT from physical pages we can be reasonably
* certain that this should bypass the SMMU - thus we set the DMA (aka
* IOVA) address to 0. This tells the GMMU mapping code to not make a
* mapping directed to the SMMU.
*/
sg_dma_address(sgt->sgl) = 0;
dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA;
dest->aperture = APERTURE_SYSMEM;
dest->skip_wmb = 0;
dest->size = PAGE_SIZE * nr_pages;
dest->priv.flags = 0;
dest->priv.pages = our_pages;
dest->priv.sgt = sgt;
return 0;
}
#ifdef CONFIG_TEGRA_GK20A_NVHOST
int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
u64 src_phys, int nr_pages)
{
struct page **pages =
nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
int i, ret = 0;
if (!pages)
return -ENOMEM;
for (i = 0; i < nr_pages; i++)
pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
nvgpu_kfree(g, pages);
return ret;
}
#endif
static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
{
return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);
}
static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
{
return (u64)__nvgpu_sgl_phys(g, sgl);
}
static u64 nvgpu_mem_linux_sgl_dma(struct nvgpu_sgl *sgl)
{
return (u64)sg_dma_address((struct scatterlist *)sgl);
}
static u64 nvgpu_mem_linux_sgl_length(struct nvgpu_sgl *sgl)
{
return (u64)((struct scatterlist *)sgl)->length;
}
static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
struct nvgpu_sgl *sgl,
struct nvgpu_gmmu_attrs *attrs)
{
if (sg_dma_address((struct scatterlist *)sgl) == 0)
return g->ops.mm.gpu_phys_addr(g, attrs,
__nvgpu_sgl_phys(g, sgl));
if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
return 0;
return nvgpu_mem_iommu_translate(g,
sg_dma_address((struct scatterlist *)sgl));
}
static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
struct nvgpu_sgt *sgt)
{
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
return false;
return true;
}
static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
{
/*
* Free this SGT. All we do is free the passed SGT. The actual Linux
* SGT/SGL needs to be freed separately.
*/
nvgpu_kfree(g, sgt);
}
static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
.sgl_next = nvgpu_mem_linux_sgl_next,
.sgl_phys = nvgpu_mem_linux_sgl_phys,
.sgl_dma = nvgpu_mem_linux_sgl_dma,
.sgl_length = nvgpu_mem_linux_sgl_length,
.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
.sgt_free = nvgpu_mem_linux_sgl_free,
};
static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
struct gk20a *g,
struct scatterlist *linux_sgl)
{
struct nvgpu_page_alloc *vidmem_alloc;
vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
if (!vidmem_alloc)
return NULL;
return &vidmem_alloc->sgt;
}
struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
{
struct nvgpu_sgt *nvgpu_sgt;
struct scatterlist *linux_sgl = sgt->sgl;
if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
if (!nvgpu_sgt)
return NULL;
nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
nvgpu_sgt->sgl = (struct nvgpu_sgl *)linux_sgl;
nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
return nvgpu_sgt;
}
struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
struct nvgpu_mem *mem)
{
return nvgpu_linux_sgt_create(g, mem->priv.sgt);
}

View File

@@ -0,0 +1,294 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/nvhost.h>
#include <linux/nvhost_t194.h>
#include <linux/nvhost_ioctl.h>
#include <linux/of_platform.h>
#include <nvgpu/nvhost.h>
#include "nvhost_priv.h"
#include "gk20a/gk20a.h"
#include "os_linux.h"
#include "module.h"
int nvgpu_get_nvhost_dev(struct gk20a *g)
{
struct device_node *np = nvgpu_get_node(g);
struct platform_device *host1x_pdev = NULL;
const __be32 *host1x_ptr;
host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
if (host1x_ptr) {
struct device_node *host1x_node =
of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
host1x_pdev = of_find_device_by_node(host1x_node);
if (!host1x_pdev) {
nvgpu_warn(g, "host1x device not available");
return -EPROBE_DEFER;
}
} else {
if (g->has_syncpoints) {
nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
g->has_syncpoints = false;
}
return 0;
}
g->nvhost_dev = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
if (!g->nvhost_dev)
return -ENOMEM;
g->nvhost_dev->host1x_pdev = host1x_pdev;
return 0;
}
void nvgpu_free_nvhost_dev(struct gk20a *g)
{
nvgpu_kfree(g, g->nvhost_dev);
}
int nvgpu_nvhost_module_busy_ext(
struct nvgpu_nvhost_dev *nvhost_dev)
{
return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
}
void nvgpu_nvhost_module_idle_ext(
struct nvgpu_nvhost_dev *nvhost_dev)
{
nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
}
void nvgpu_nvhost_debug_dump_device(
struct nvgpu_nvhost_dev *nvhost_dev)
{
nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
}
const char *nvgpu_nvhost_syncpt_get_name(
struct nvgpu_nvhost_dev *nvhost_dev, int id)
{
return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
}
bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
}
int nvgpu_nvhost_syncpt_is_expired_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
{
return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
id, thresh);
}
u32 nvgpu_nvhost_syncpt_incr_max_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 incrs)
{
return nvhost_syncpt_incr_max_ext(nvhost_dev->host1x_pdev, id, incrs);
}
int nvgpu_nvhost_intr_register_notifier(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
void (*callback)(void *, int), void *private_data)
{
return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
id, thresh,
callback, private_data);
}
void nvgpu_nvhost_syncpt_set_min_eq_max_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
nvhost_syncpt_set_min_eq_max_ext(nvhost_dev->host1x_pdev, id);
}
void nvgpu_nvhost_syncpt_put_ref_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
}
u32 nvgpu_nvhost_get_syncpt_host_managed(
struct nvgpu_nvhost_dev *nvhost_dev,
u32 param, const char *syncpt_name)
{
return nvhost_get_syncpt_host_managed(nvhost_dev->host1x_pdev,
param, syncpt_name);
}
u32 nvgpu_nvhost_get_syncpt_client_managed(
struct nvgpu_nvhost_dev *nvhost_dev,
const char *syncpt_name)
{
return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
syncpt_name);
}
int nvgpu_nvhost_syncpt_wait_timeout_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
u32 thresh, u32 timeout, u32 *value, struct timespec *ts)
{
return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
id, thresh, timeout, value, ts);
}
int nvgpu_nvhost_syncpt_read_ext_check(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
{
return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
}
u32 nvgpu_nvhost_syncpt_read_maxval(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
return nvhost_syncpt_read_maxval(nvhost_dev->host1x_pdev, id);
}
void nvgpu_nvhost_syncpt_set_safe_state(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
u32 val;
/*
* Add large number of increments to current value
* so that all waiters on this syncpoint are released
*
* We don't expect any case where more than 0x10000 increments
* are pending
*/
val = nvhost_syncpt_read_minval(nvhost_dev->host1x_pdev, id);
val += 0x10000;
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
nvhost_syncpt_set_maxval(nvhost_dev->host1x_pdev, id, val);
}
int nvgpu_nvhost_create_symlink(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
int err = 0;
if (g->nvhost_dev &&
(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
err = sysfs_create_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
&dev->kobj,
dev_name(dev));
}
return err;
}
void nvgpu_nvhost_remove_symlink(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (g->nvhost_dev &&
(dev->parent != &g->nvhost_dev->host1x_pdev->dev)) {
sysfs_remove_link(&g->nvhost_dev->host1x_pdev->dev.kobj,
dev_name(dev));
}
}
#ifdef CONFIG_SYNC
u32 nvgpu_nvhost_sync_pt_id(struct sync_pt *pt)
{
return nvhost_sync_pt_id(pt);
}
u32 nvgpu_nvhost_sync_pt_thresh(struct sync_pt *pt)
{
return nvhost_sync_pt_thresh(pt);
}
struct sync_fence *nvgpu_nvhost_sync_fdget(int fd)
{
return nvhost_sync_fdget(fd);
}
int nvgpu_nvhost_sync_num_pts(struct sync_fence *fence)
{
return nvhost_sync_num_pts(fence);
}
struct sync_fence *nvgpu_nvhost_sync_create_fence(
struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh, const char *name)
{
struct nvhost_ctrl_sync_fence_info pt = {
.id = id,
.thresh = thresh,
};
return nvhost_sync_create_fence(nvhost_dev->host1x_pdev, &pt, 1, name);
}
#endif /* CONFIG_SYNC */
#ifdef CONFIG_TEGRA_T19X_GRHOST
int nvgpu_nvhost_syncpt_unit_interface_get_aperture(
struct nvgpu_nvhost_dev *nvhost_dev,
u64 *base, size_t *size)
{
return nvhost_syncpt_unit_interface_get_aperture(
nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
}
u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(u32 syncpt_id)
{
return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
}
int nvgpu_nvhost_syncpt_init(struct gk20a *g)
{
int err = 0;
if (!g->has_syncpoints)
return -ENOSYS;
err = nvgpu_get_nvhost_dev(g);
if (err) {
nvgpu_err(g, "host1x device not available");
g->has_syncpoints = false;
return -ENOSYS;
}
err = nvgpu_nvhost_syncpt_unit_interface_get_aperture(
g->nvhost_dev,
&g->syncpt_unit_base,
&g->syncpt_unit_size);
if (err) {
nvgpu_err(g, "Failed to get syncpt interface");
g->has_syncpoints = false;
return -ENOSYS;
}
g->syncpt_size =
nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(1);
nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
g->syncpt_unit_base, g->syncpt_unit_size,
g->syncpt_size);
return 0;
}
#endif

View File

@@ -0,0 +1,24 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_NVHOST_PRIV_H__
#define __NVGPU_NVHOST_PRIV_H__
struct nvgpu_nvhost_dev {
struct platform_device *host1x_pdev;
};
#endif /* __NVGPU_NVHOST_PRIV_H__ */

View File

@@ -0,0 +1,106 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <gk20a/gk20a.h>
#include <nvgpu/nvlink.h>
#include <nvgpu/enabled.h>
#include "module.h"
#ifdef CONFIG_TEGRA_NVLINK
int nvgpu_nvlink_read_dt_props(struct gk20a *g)
{
struct device_node *np;
struct nvlink_device *ndev = g->nvlink.priv;
u32 local_dev_id;
u32 local_link_id;
u32 remote_dev_id;
u32 remote_link_id;
bool is_master;
/* Parse DT */
np = nvgpu_get_node(g);
if (!np)
goto fail;
np = of_get_child_by_name(np, "nvidia,nvlink");
if (!np)
goto fail;
np = of_get_child_by_name(np, "endpoint");
if (!np)
goto fail;
/* Parse DT structure to detect endpoint topology */
of_property_read_u32(np, "local_dev_id", &local_dev_id);
of_property_read_u32(np, "local_link_id", &local_link_id);
of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
of_property_read_u32(np, "remote_link_id", &remote_link_id);
is_master = of_property_read_bool(np, "is_master");
/* Check that we are in dGPU mode */
if (local_dev_id != NVLINK_ENDPT_GV100) {
nvgpu_err(g, "Local nvlink device is not dGPU");
return -EINVAL;
}
ndev->is_master = is_master;
ndev->device_id = local_dev_id;
ndev->link.link_id = local_link_id;
ndev->link.remote_dev_info.device_id = remote_dev_id;
ndev->link.remote_dev_info.link_id = remote_link_id;
return 0;
fail:
nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
return -ENODEV;
}
#endif /* CONFIG_TEGRA_NVLINK */
void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
{
/* MSS_NVLINK_1_BASE */
void __iomem *soc1 = ioremap(0x01f20010, 4096);
/* MSS_NVLINK_2_BASE */
void __iomem *soc2 = ioremap(0x01f40010, 4096);
/* MSS_NVLINK_3_BASE */
void __iomem *soc3 = ioremap(0x01f60010, 4096);
/* MSS_NVLINK_4_BASE */
void __iomem *soc4 = ioremap(0x01f80010, 4096);
u32 val;
nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
val = readl_relaxed(soc1);
writel_relaxed(val, soc1);
val = readl_relaxed(soc1 + 4);
writel_relaxed(val, soc1 + 4);
val = readl_relaxed(soc2);
writel_relaxed(val, soc2);
val = readl_relaxed(soc2 + 4);
writel_relaxed(val, soc2 + 4);
val = readl_relaxed(soc3);
writel_relaxed(val, soc3);
val = readl_relaxed(soc3 + 4);
writel_relaxed(val, soc3 + 4);
val = readl_relaxed(soc4);
writel_relaxed(val, soc4);
val = readl_relaxed(soc4 + 4);
writel_relaxed(val, soc4 + 4);
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/types.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/linux/os_fence_android.h>
#include "gk20a/gk20a.h"
#include "../drivers/staging/android/sync.h"
inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
{
struct sync_fence *fence = (struct sync_fence *)s->priv;
return fence;
}
static void nvgpu_os_fence_clear(struct nvgpu_os_fence *fence_out)
{
fence_out->priv = NULL;
fence_out->g = NULL;
fence_out->ops = NULL;
}
void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
struct sync_fence *fence)
{
fence_out->g = g;
fence_out->ops = fops;
fence_out->priv = (void *)fence;
}
void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
{
struct sync_fence *fence = nvgpu_get_sync_fence(s);
sync_fence_put(fence);
nvgpu_os_fence_clear(s);
}
void nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
{
struct sync_fence *fence = nvgpu_get_sync_fence(s);
sync_fence_get(fence);
sync_fence_install(fence, fd);
}
int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd)
{
int err = -ENOSYS;
#ifdef CONFIG_TEGRA_GK20A_NVHOST
err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
#endif
if (err)
err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
if (err)
nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
return err;
}

View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/errno.h>
#include <nvgpu/types.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/linux/os_fence_android.h>
#include <nvgpu/semaphore.h>
#include "gk20a/channel_sync_gk20a.h"
#include "gk20a/mm_gk20a.h"
#include "sync_sema_android.h"
#include "../drivers/staging/android/sync.h"
int nvgpu_os_fence_sema_wait_gen_cmd(struct nvgpu_os_fence *s,
struct priv_cmd_entry *wait_cmd,
struct channel_gk20a *c,
int max_wait_cmds)
{
int err;
int wait_cmd_size;
int num_wait_cmds;
int i;
struct nvgpu_semaphore *sema;
struct sync_fence *sync_fence = nvgpu_get_sync_fence(s);
wait_cmd_size = c->g->ops.fifo.get_sema_wait_cmd_size();
num_wait_cmds = sync_fence->num_fences;
if (num_wait_cmds == 0)
return 0;
if (max_wait_cmds && num_wait_cmds > max_wait_cmds)
return -EINVAL;
err = gk20a_channel_alloc_priv_cmdbuf(c,
wait_cmd_size * num_wait_cmds,
wait_cmd);
if (err) {
nvgpu_err(c->g, "not enough priv cmd buffer space");
return err;
}
for (i = 0; i < num_wait_cmds; i++) {
struct fence *f = sync_fence->cbs[i].sync_pt;
struct sync_pt *pt = sync_pt_from_fence(f);
sema = gk20a_sync_pt_sema(pt);
gk20a_channel_gen_sema_wait_cmd(c, sema, wait_cmd,
wait_cmd_size, i);
}
return 0;
}
static const struct nvgpu_os_fence_ops sema_ops = {
.program_waits = nvgpu_os_fence_sema_wait_gen_cmd,
.drop_ref = nvgpu_os_fence_android_drop_ref,
.install_fence = nvgpu_os_fence_android_install_fd,
};
int nvgpu_os_fence_sema_create(
struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c,
struct nvgpu_semaphore *sema)
{
struct sync_fence *fence;
fence = gk20a_sync_fence_create(c, sema, "f-gk20a-0x%04x",
nvgpu_semaphore_gpu_ro_va(sema));
if (!fence) {
nvgpu_err(c->g, "error constructing new fence: f-gk20a-0x%04x",
(u32)nvgpu_semaphore_gpu_ro_va(sema));
return -ENOMEM;
}
nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
return 0;
}
int nvgpu_os_fence_sema_fdget(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd)
{
struct sync_fence *fence = gk20a_sync_fence_fdget(fd);
if (!fence)
return -EINVAL;
nvgpu_os_fence_init(fence_out, c->g, &sema_ops, fence);
return 0;
}

View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/errno.h>
#include <nvgpu/types.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/linux/os_fence_android.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/atomic.h>
#include "gk20a/gk20a.h"
#include "gk20a/channel_gk20a.h"
#include "gk20a/channel_sync_gk20a.h"
#include "gk20a/mm_gk20a.h"
#include "../drivers/staging/android/sync.h"
int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
struct priv_cmd_entry *wait_cmd,
struct channel_gk20a *c,
int max_wait_cmds)
{
int err;
int wait_cmd_size;
int num_wait_cmds;
int i;
u32 wait_id;
struct sync_pt *pt;
struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
return -EINVAL;
/* validate syncpt ids */
for (i = 0; i < sync_fence->num_fences; i++) {
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
wait_id = nvgpu_nvhost_sync_pt_id(pt);
if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
c->g->nvhost_dev, wait_id)) {
return -EINVAL;
}
}
num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
if (num_wait_cmds == 0)
return 0;
wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
err = gk20a_channel_alloc_priv_cmdbuf(c,
wait_cmd_size * num_wait_cmds, wait_cmd);
if (err) {
nvgpu_err(c->g,
"not enough priv cmd buffer space");
return err;
}
for (i = 0; i < sync_fence->num_fences; i++) {
struct fence *f = sync_fence->cbs[i].sync_pt;
struct sync_pt *pt = sync_pt_from_fence(f);
u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
wait_cmd, wait_cmd_size, i, true);
}
WARN_ON(i != num_wait_cmds);
return 0;
}
static const struct nvgpu_os_fence_ops syncpt_ops = {
.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
.drop_ref = nvgpu_os_fence_android_drop_ref,
.install_fence = nvgpu_os_fence_android_install_fd,
};
int nvgpu_os_fence_syncpt_create(
struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
{
struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
nvhost_dev, id, thresh, "fence");
if (!fence) {
nvgpu_err(c->g, "error constructing fence %s", "fence");
return -ENOMEM;
}
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
return 0;
}
int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd)
{
struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
if (!fence)
return -ENOMEM;
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
return 0;
}

View File

@@ -0,0 +1,166 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_OS_LINUX_H
#define NVGPU_OS_LINUX_H
#include <linux/cdev.h>
#include <linux/iommu.h>
#include <linux/hashtable.h>
#include "gk20a/gk20a.h"
#include "cde.h"
#include "sched.h"
struct nvgpu_os_linux_ops {
struct {
void (*get_program_numbers)(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog, int *vprog);
bool (*need_scatter_buffer)(struct gk20a *g);
int (*populate_scatter_buffer)(struct gk20a *g,
struct sg_table *sgt,
size_t surface_size,
void *scatter_buffer_ptr,
size_t scatter_buffer_size);
} cde;
};
struct nvgpu_os_linux {
struct gk20a g;
struct device *dev;
struct {
struct cdev cdev;
struct device *node;
} channel;
struct {
struct cdev cdev;
struct device *node;
} ctrl;
struct {
struct cdev cdev;
struct device *node;
} as_dev;
struct {
struct cdev cdev;
struct device *node;
} dbg;
struct {
struct cdev cdev;
struct device *node;
} prof;
struct {
struct cdev cdev;
struct device *node;
} tsg;
struct {
struct cdev cdev;
struct device *node;
} ctxsw;
struct {
struct cdev cdev;
struct device *node;
} sched;
dev_t cdev_region;
struct devfreq *devfreq;
struct device_dma_parameters dma_parms;
atomic_t hw_irq_stall_count;
atomic_t hw_irq_nonstall_count;
struct nvgpu_cond sw_irq_stall_last_handled_wq;
atomic_t sw_irq_stall_last_handled;
atomic_t nonstall_ops;
struct nvgpu_cond sw_irq_nonstall_last_handled_wq;
atomic_t sw_irq_nonstall_last_handled;
struct work_struct nonstall_fn_work;
struct workqueue_struct *nonstall_work_queue;
struct resource *reg_mem;
void __iomem *regs;
void __iomem *regs_saved;
struct resource *bar1_mem;
void __iomem *bar1;
void __iomem *bar1_saved;
void __iomem *usermode_regs;
void __iomem *usermode_regs_saved;
struct nvgpu_os_linux_ops ops;
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs;
struct dentry *debugfs_alias;
struct dentry *debugfs_ltc_enabled;
struct dentry *debugfs_timeouts_enabled;
struct dentry *debugfs_gr_idle_timeout_default;
struct dentry *debugfs_disable_bigpage;
struct dentry *debugfs_gr_default_attrib_cb_size;
struct dentry *debugfs_timeslice_low_priority_us;
struct dentry *debugfs_timeslice_medium_priority_us;
struct dentry *debugfs_timeslice_high_priority_us;
struct dentry *debugfs_runlist_interleave;
struct dentry *debugfs_allocators;
struct dentry *debugfs_xve;
struct dentry *debugfs_kmem;
struct dentry *debugfs_hal;
struct dentry *debugfs_force_preemption_cilp;
struct dentry *debugfs_force_preemption_gfxp;
struct dentry *debugfs_dump_ctxsw_stats;
#endif
DECLARE_HASHTABLE(ecc_sysfs_stats_htable, 5);
struct gk20a_cde_app cde_app;
struct rw_semaphore busy_lock;
struct gk20a_sched_ctrl sched_ctrl;
bool init_done;
};
static inline struct nvgpu_os_linux *nvgpu_os_linux_from_gk20a(struct gk20a *g)
{
return container_of(g, struct nvgpu_os_linux, g);
}
static inline struct device *dev_from_gk20a(struct gk20a *g)
{
return nvgpu_os_linux_from_gk20a(g)->dev;
}
#define INTERFACE_NAME "nvhost%s-gpu"
#endif

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/os_sched.h>
#include <linux/sched.h>
int nvgpu_current_tid(struct gk20a *g)
{
return current->pid;
}
int nvgpu_current_pid(struct gk20a *g)
{
return current->tgid;
}

View File

@@ -0,0 +1,861 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h>
#include <nvgpu/enabled.h>
#include <nvgpu/nvlink.h>
#include <nvgpu/soc.h>
#include "gk20a/gk20a.h"
#include "clk/clk.h"
#include "clk/clk_mclk.h"
#include "module.h"
#include "intr.h"
#include "sysfs.h"
#include "os_linux.h"
#include "platform_gk20a.h"
#include <nvgpu/sim.h>
#include "pci.h"
#include "pci_usermode.h"
#include "os_linux.h"
#include "driver_common.h"
#define PCI_INTERFACE_NAME "card-%s%%s"
static int nvgpu_pci_tegra_probe(struct device *dev)
{
return 0;
}
static int nvgpu_pci_tegra_remove(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
return 0;
}
static bool nvgpu_pci_tegra_is_railgated(struct device *pdev)
{
return false;
}
static long nvgpu_pci_clk_round_rate(struct device *dev, unsigned long rate)
{
long ret = (long)rate;
if (rate == UINT_MAX)
ret = BOOT_GPC2CLK_MHZ * 1000000UL;
return ret;
}
static struct gk20a_platform nvgpu_pci_device[] = {
{ /* DEVICE=0x1c35 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = true,
.enable_elpg = true,
.enable_elcg = false,
.enable_slcg = true,
.enable_blcg = true,
.enable_mscg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x86063000,
.hardcode_sw_threshold = true,
.ina3221_dcb_index = 0,
.ina3221_i2c_address = 0x84,
.ina3221_i2c_port = 0x2,
},
{ /* DEVICE=0x1c36 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = true,
.enable_elpg = true,
.enable_elcg = false,
.enable_slcg = true,
.enable_blcg = true,
.enable_mscg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x86062d00,
.hardcode_sw_threshold = true,
.ina3221_dcb_index = 0,
.ina3221_i2c_address = 0x84,
.ina3221_i2c_port = 0x2,
},
{ /* DEVICE=0x1c37 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = true,
.enable_elpg = true,
.enable_elcg = false,
.enable_slcg = true,
.enable_blcg = true,
.enable_mscg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x86063000,
.hardcode_sw_threshold = true,
.ina3221_dcb_index = 0,
.ina3221_i2c_address = 0x84,
.ina3221_i2c_port = 0x2,
},
{ /* DEVICE=0x1c75 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = true,
.enable_elpg = true,
.enable_elcg = false,
.enable_slcg = true,
.enable_blcg = true,
.enable_mscg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x86065300,
.hardcode_sw_threshold = false,
.ina3221_dcb_index = 1,
.ina3221_i2c_address = 0x80,
.ina3221_i2c_port = 0x1,
},
{ /* DEVICE=PG503 SKU 201 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_mscg = false,
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x88001e00,
.hardcode_sw_threshold = false,
.run_preos = true,
},
{ /* DEVICE=PG503 SKU 200 ES */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_mscg = false,
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x88001e00,
.hardcode_sw_threshold = false,
.run_preos = true,
},
{
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_mscg = false,
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x88000126,
.hardcode_sw_threshold = false,
.run_preos = true,
.has_syncpoints = true,
},
{ /* SKU250 */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = true,
.enable_slcg = true,
.enable_blcg = true,
.enable_mscg = false,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x1,
.hardcode_sw_threshold = false,
.run_preos = true,
.has_syncpoints = true,
},
{ /* SKU 0x1e3f */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_mscg = false,
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
/*
* WAR: PCIE X1 is very slow, set to very high value till nvlink is up
*/
.ch_wdt_timeout_ms = 30000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x1,
.hardcode_sw_threshold = false,
.unified_memory = false,
},
{ /* 0x1eba */
/* ptimer src frequency in hz */
.ptimer_src_freq = 31250000,
.probe = nvgpu_pci_tegra_probe,
.remove = nvgpu_pci_tegra_remove,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = false,
.can_elpg_init = false,
.enable_elpg = false,
.enable_elcg = false,
.enable_slcg = false,
.enable_blcg = false,
.enable_mscg = false,
.can_slcg = false,
.can_blcg = false,
.can_elcg = false,
.disable_aspm = true,
/* power management callbacks */
.is_railgated = nvgpu_pci_tegra_is_railgated,
.clk_round_rate = nvgpu_pci_clk_round_rate,
.ch_wdt_timeout_ms = 7000,
.honors_aperture = true,
.dma_mask = DMA_BIT_MASK(40),
.vbios_min_version = 0x90040109,
.hardcode_sw_threshold = false,
.has_syncpoints = true,
},
};
static struct pci_device_id nvgpu_pci_table[] = {
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c35),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 0,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c36),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 1,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c37),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 2,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1c75),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 3,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db1),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 4,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1db0),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 5,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1dbe),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 6,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1df1),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 7,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1e3f),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 8,
},
{
PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, 0x1eba),
.class = PCI_BASE_CLASS_DISPLAY << 16,
.class_mask = 0xff << 16,
.driver_data = 9,
},
{}
};
static irqreturn_t nvgpu_pci_isr(int irq, void *dev_id)
{
struct gk20a *g = dev_id;
irqreturn_t ret_stall;
irqreturn_t ret_nonstall;
ret_stall = nvgpu_intr_stall(g);
ret_nonstall = nvgpu_intr_nonstall(g);
#if defined(CONFIG_PCI_MSI)
/* Send MSI EOI */
if (g->ops.xve.rearm_msi && g->msi_enabled)
g->ops.xve.rearm_msi(g);
#endif
return (ret_stall == IRQ_NONE) ? ret_nonstall : IRQ_WAKE_THREAD;
}
static irqreturn_t nvgpu_pci_intr_thread(int irq, void *dev_id)
{
struct gk20a *g = dev_id;
return nvgpu_intr_thread_stall(g);
}
static int nvgpu_pci_init_support(struct pci_dev *pdev)
{
int err = 0;
struct gk20a *g = get_gk20a(&pdev->dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->regs = ioremap(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
if (IS_ERR(l->regs)) {
nvgpu_err(g, "failed to remap gk20a registers");
err = PTR_ERR(l->regs);
goto fail;
}
l->bar1 = ioremap(pci_resource_start(pdev, 1),
pci_resource_len(pdev, 1));
if (IS_ERR(l->bar1)) {
nvgpu_err(g, "failed to remap gk20a bar1");
err = PTR_ERR(l->bar1);
goto fail;
}
err = nvgpu_init_sim_support_linux_pci(g);
if (err)
goto fail;
err = nvgpu_init_sim_support_pci(g);
if (err)
goto fail_sim;
nvgpu_pci_init_usermode_support(l);
return 0;
fail_sim:
nvgpu_remove_sim_support_linux_pci(g);
fail:
if (l->regs) {
iounmap(l->regs);
l->regs = NULL;
}
if (l->bar1) {
iounmap(l->bar1);
l->bar1 = NULL;
}
return err;
}
static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
{
if (mode)
*mode = S_IRUGO | S_IWUGO;
return kasprintf(GFP_KERNEL, "nvgpu-pci/%s", dev_name(dev));
}
static struct class nvgpu_pci_class = {
.owner = THIS_MODULE,
.name = "nvidia-pci-gpu",
.devnode = nvgpu_pci_devnode,
};
#ifdef CONFIG_PM
static int nvgpu_pci_pm_runtime_resume(struct device *dev)
{
return gk20a_pm_finalize_poweron(dev);
}
static int nvgpu_pci_pm_runtime_suspend(struct device *dev)
{
return 0;
}
static const struct dev_pm_ops nvgpu_pci_pm_ops = {
.runtime_resume = nvgpu_pci_pm_runtime_resume,
.runtime_suspend = nvgpu_pci_pm_runtime_suspend,
.resume = nvgpu_pci_pm_runtime_resume,
.suspend = nvgpu_pci_pm_runtime_suspend,
};
#endif
static int nvgpu_pci_pm_init(struct device *dev)
{
#ifdef CONFIG_PM
struct gk20a *g = get_gk20a(dev);
if (!g->can_railgate) {
pm_runtime_disable(dev);
} else {
if (g->railgate_delay)
pm_runtime_set_autosuspend_delay(dev,
g->railgate_delay);
/*
* Runtime PM for PCI devices is disabled by default,
* so we need to enable it first
*/
pm_runtime_use_autosuspend(dev);
pm_runtime_put_noidle(dev);
pm_runtime_allow(dev);
}
#endif
return 0;
}
static int nvgpu_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *pent)
{
struct gk20a_platform *platform = NULL;
struct nvgpu_os_linux *l;
struct gk20a *g;
int err;
char nodefmt[64];
struct device_node *np;
/* make sure driver_data is a sane index */
if (pent->driver_data >= sizeof(nvgpu_pci_device) /
sizeof(nvgpu_pci_device[0])) {
return -EINVAL;
}
l = kzalloc(sizeof(*l), GFP_KERNEL);
if (!l) {
dev_err(&pdev->dev, "couldn't allocate gk20a support");
return -ENOMEM;
}
hash_init(l->ecc_sysfs_stats_htable);
g = &l->g;
nvgpu_init_gk20a(g);
nvgpu_kmem_init(g);
/* Allocate memory to hold platform data*/
platform = (struct gk20a_platform *)nvgpu_kzalloc( g,
sizeof(struct gk20a_platform));
if (!platform) {
dev_err(&pdev->dev, "couldn't allocate platform data");
err = -ENOMEM;
goto err_free_l;
}
/* copy detected device data to allocated platform space*/
memcpy((void *)platform, (void *)&nvgpu_pci_device[pent->driver_data],
sizeof(struct gk20a_platform));
pci_set_drvdata(pdev, platform);
err = nvgpu_init_enabled_flags(g);
if (err)
goto err_free_platform;
platform->g = g;
l->dev = &pdev->dev;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
}
err = pci_enable_device(pdev);
if (err)
goto err_free_platform;
pci_set_master(pdev);
g->pci_vendor_id = pdev->vendor;
g->pci_device_id = pdev->device;
g->pci_subsystem_vendor_id = pdev->subsystem_vendor;
g->pci_subsystem_device_id = pdev->subsystem_device;
g->pci_class = (pdev->class >> 8) & 0xFFFFU; // we only want base/sub
g->pci_revision = pdev->revision;
g->ina3221_dcb_index = platform->ina3221_dcb_index;
g->ina3221_i2c_address = platform->ina3221_i2c_address;
g->ina3221_i2c_port = platform->ina3221_i2c_port;
g->hardcode_sw_threshold = platform->hardcode_sw_threshold;
#if defined(CONFIG_PCI_MSI)
err = pci_enable_msi(pdev);
if (err) {
nvgpu_err(g,
"MSI could not be enabled, falling back to legacy");
g->msi_enabled = false;
} else
g->msi_enabled = true;
#endif
g->irq_stall = pdev->irq;
g->irq_nonstall = pdev->irq;
if (g->irq_stall < 0) {
err = -ENXIO;
goto err_disable_msi;
}
err = devm_request_threaded_irq(&pdev->dev,
g->irq_stall,
nvgpu_pci_isr,
nvgpu_pci_intr_thread,
#if defined(CONFIG_PCI_MSI)
g->msi_enabled ? 0 :
#endif
IRQF_SHARED, "nvgpu", g);
if (err) {
nvgpu_err(g,
"failed to request irq @ %d", g->irq_stall);
goto err_disable_msi;
}
disable_irq(g->irq_stall);
err = nvgpu_pci_init_support(pdev);
if (err)
goto err_free_irq;
if (strchr(dev_name(&pdev->dev), '%')) {
nvgpu_err(g, "illegal character in device name");
err = -EINVAL;
goto err_free_irq;
}
snprintf(nodefmt, sizeof(nodefmt),
PCI_INTERFACE_NAME, dev_name(&pdev->dev));
err = nvgpu_probe(g, "gpu_pci", nodefmt, &nvgpu_pci_class);
if (err)
goto err_free_irq;
err = nvgpu_pci_pm_init(&pdev->dev);
if (err) {
nvgpu_err(g, "pm init failed");
goto err_free_irq;
}
err = nvgpu_nvlink_probe(g);
/*
* ENODEV is a legal error which means there is no NVLINK
* any other error is fatal
*/
if (err) {
if (err != -ENODEV) {
nvgpu_err(g, "fatal error probing nvlink, bailing out");
goto err_free_irq;
}
/* Enable Semaphore SHIM on nvlink only for now. */
__nvgpu_set_enabled(g, NVGPU_SUPPORT_NVLINK, false);
g->has_syncpoints = false;
} else {
err = nvgpu_nvhost_syncpt_init(g);
if (err) {
if (err != -ENOSYS) {
nvgpu_err(g, "syncpt init failed");
goto err_free_irq;
}
}
}
g->mm.has_physical_mode = false;
return 0;
err_free_irq:
nvgpu_free_irq(g);
err_disable_msi:
#if defined(CONFIG_PCI_MSI)
if (g->msi_enabled)
pci_disable_msi(pdev);
#endif
err_free_platform:
nvgpu_kfree(g, platform);
err_free_l:
kfree(l);
return err;
}
static void nvgpu_pci_remove(struct pci_dev *pdev)
{
struct gk20a *g = get_gk20a(&pdev->dev);
struct device *dev = dev_from_gk20a(g);
int err;
/* no support yet for unbind if DGPU is in VGPU mode */
if (gk20a_gpu_is_virtual(dev))
return;
nvgpu_nvlink_remove(g);
gk20a_driver_start_unload(g);
err = nvgpu_quiesce(g);
/* TODO: handle failure to idle */
WARN(err, "gpu failed to idle during driver removal");
nvgpu_free_irq(g);
nvgpu_remove(dev, &nvgpu_pci_class);
#if defined(CONFIG_PCI_MSI)
if (g->msi_enabled)
pci_disable_msi(pdev);
else {
/* IRQ does not need to be enabled in MSI as the line is not
* shared
*/
enable_irq(g->irq_stall);
}
#endif
/* free allocated platform data space */
nvgpu_kfree(g, gk20a_get_platform(&pdev->dev));
gk20a_get_platform(&pdev->dev)->g = NULL;
gk20a_put(g);
}
static struct pci_driver nvgpu_pci_driver = {
.name = "nvgpu",
.id_table = nvgpu_pci_table,
.probe = nvgpu_pci_probe,
.remove = nvgpu_pci_remove,
#ifdef CONFIG_PM
.driver.pm = &nvgpu_pci_pm_ops,
#endif
};
int __init nvgpu_pci_init(void)
{
int ret;
ret = class_register(&nvgpu_pci_class);
if (ret)
return ret;
return pci_register_driver(&nvgpu_pci_driver);
}
void __exit nvgpu_pci_exit(void)
{
pci_unregister_driver(&nvgpu_pci_driver);
class_unregister(&nvgpu_pci_class);
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_PCI_H
#define NVGPU_PCI_H
#ifdef CONFIG_GK20A_PCI
int nvgpu_pci_init(void);
void nvgpu_pci_exit(void);
#else
static inline int nvgpu_pci_init(void) { return 0; }
static inline void nvgpu_pci_exit(void) {}
#endif
#endif

View File

@@ -0,0 +1,24 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/types.h>
#include <nvgpu/hw/gv11b/hw_usermode_gv11b.h>
#include "os_linux.h"
void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l)
{
l->usermode_regs = l->regs + usermode_cfg0_r();
l->usermode_regs_saved = l->usermode_regs;
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_PCI_USERMODE_H__
#define __NVGPU_PCI_USERMODE_H__
struct nvgpu_os_linux;
void nvgpu_pci_init_usermode_support(struct nvgpu_os_linux *l);
#endif

View File

@@ -0,0 +1,269 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/hashtable.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/hashtable.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "platform_gk20a_tegra.h"
#include "platform_gp10b.h"
#include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
static u32 gen_ecc_hash_key(char *str)
{
int i = 0;
u32 hash_key = 0x811c9dc5;
while (str[i]) {
hash_key *= 0x1000193;
hash_key ^= (u32)(str[i]);
i++;
};
return hash_key;
}
static ssize_t ecc_stat_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
const char *ecc_stat_full_name = attr->attr.name;
const char *ecc_stat_base_name;
unsigned int hw_unit;
unsigned int subunit;
struct gk20a_ecc_stat *ecc_stat;
u32 hash_key;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (sscanf(ecc_stat_full_name, "ltc%u_lts%u", &hw_unit,
&subunit) == 2) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_lts0_")]);
hw_unit = g->gr.slices_per_ltc * hw_unit + subunit;
} else if (sscanf(ecc_stat_full_name, "ltc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("ltc0_")]);
} else if (sscanf(ecc_stat_full_name, "gpc0_tpc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_tpc0_")]);
} else if (sscanf(ecc_stat_full_name, "gpc%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("gpc0_")]);
} else if (sscanf(ecc_stat_full_name, "eng%u", &hw_unit) == 1) {
ecc_stat_base_name = &(ecc_stat_full_name[strlen("eng0_")]);
} else {
return snprintf(buf,
PAGE_SIZE,
"Error: Invalid ECC stat name!\n");
}
hash_key = gen_ecc_hash_key((char *)ecc_stat_base_name);
hash_for_each_possible(l->ecc_sysfs_stats_htable,
ecc_stat,
hash_node,
hash_key) {
if (hw_unit >= ecc_stat->count)
continue;
if (!strcmp(ecc_stat_full_name, ecc_stat->names[hw_unit]))
return snprintf(buf, PAGE_SIZE, "%u\n", ecc_stat->counters[hw_unit]);
}
return snprintf(buf, PAGE_SIZE, "Error: No ECC stat found!\n");
}
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
char *ltc_unit_name = "ltc";
char *gr_unit_name = "gpc0_tpc";
char *lts_unit_name = "lts";
int num_hw_units = 0;
int num_subunits = 0;
if (is_l2 == 1)
num_hw_units = g->ltc_count;
else if (is_l2 == 2) {
num_hw_units = g->ltc_count;
num_subunits = g->gr.slices_per_ltc;
} else
num_hw_units = g->gr.tpc_count;
return nvgpu_ecc_stat_create(dev, num_hw_units, num_subunits,
is_l2 ? ltc_unit_name : gr_unit_name,
num_subunits ? lts_unit_name: NULL,
ecc_stat_name,
ecc_stat);
}
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat)
{
int error = 0;
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int hw_unit = 0;
int subunit = 0;
int element = 0;
u32 hash_key = 0;
struct device_attribute *dev_attr_array;
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Allocate arrays */
dev_attr_array = nvgpu_kzalloc(g, sizeof(struct device_attribute) *
num_elements);
ecc_stat->counters = nvgpu_kzalloc(g, sizeof(u32) * num_elements);
ecc_stat->names = nvgpu_kzalloc(g, sizeof(char *) * num_elements);
for (hw_unit = 0; hw_unit < num_elements; hw_unit++) {
ecc_stat->names[hw_unit] = nvgpu_kzalloc(g, sizeof(char) *
ECC_STAT_NAME_MAX_SIZE);
}
ecc_stat->count = num_elements;
if (num_subunits) {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (subunit = 0; subunit < num_subunits; subunit++) {
element = hw_unit*num_subunits + subunit;
snprintf(ecc_stat->names[element],
ECC_STAT_NAME_MAX_SIZE,
"%s%d_%s%d_%s",
ecc_unit_name,
hw_unit,
ecc_subunit_name,
subunit,
ecc_stat_name);
sysfs_attr_init(&dev_attr_array[element].attr);
dev_attr_array[element].attr.name =
ecc_stat->names[element];
dev_attr_array[element].attr.mode =
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
dev_attr_array[element].show = ecc_stat_show;
dev_attr_array[element].store = NULL;
/* Create sysfs file */
error |= device_create_file(dev,
&dev_attr_array[element]);
}
}
} else {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
/* Fill in struct device_attribute members */
snprintf(ecc_stat->names[hw_unit],
ECC_STAT_NAME_MAX_SIZE,
"%s%d_%s",
ecc_unit_name,
hw_unit,
ecc_stat_name);
sysfs_attr_init(&dev_attr_array[hw_unit].attr);
dev_attr_array[hw_unit].attr.name =
ecc_stat->names[hw_unit];
dev_attr_array[hw_unit].attr.mode =
VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
dev_attr_array[hw_unit].show = ecc_stat_show;
dev_attr_array[hw_unit].store = NULL;
/* Create sysfs file */
error |= device_create_file(dev,
&dev_attr_array[hw_unit]);
}
}
/* Add hash table entry */
hash_key = gen_ecc_hash_key(ecc_stat_name);
hash_add(l->ecc_sysfs_stats_htable,
&ecc_stat->hash_node,
hash_key);
ecc_stat->attr_array = dev_attr_array;
return error;
}
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
int num_hw_units = 0;
int num_subunits = 0;
if (is_l2 == 1)
num_hw_units = g->ltc_count;
else if (is_l2 == 2) {
num_hw_units = g->ltc_count;
num_subunits = g->gr.slices_per_ltc;
} else
num_hw_units = g->gr.tpc_count;
nvgpu_ecc_stat_remove(dev, num_hw_units, num_subunits, ecc_stat);
}
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat)
{
struct gk20a *g = get_gk20a(dev);
struct device_attribute *dev_attr_array = ecc_stat->attr_array;
int hw_unit = 0;
int subunit = 0;
int element = 0;
int num_elements = num_subunits ? num_subunits * num_hw_units :
num_hw_units;
/* Remove sysfs files */
if (num_subunits) {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++) {
for (subunit = 0; subunit < num_subunits; subunit++) {
element = hw_unit * num_subunits + subunit;
device_remove_file(dev,
&dev_attr_array[element]);
}
}
} else {
for (hw_unit = 0; hw_unit < num_hw_units; hw_unit++)
device_remove_file(dev, &dev_attr_array[hw_unit]);
}
/* Remove hash table entry */
hash_del(&ecc_stat->hash_node);
/* Free arrays */
nvgpu_kfree(g, ecc_stat->counters);
for (hw_unit = 0; hw_unit < num_elements; hw_unit++)
nvgpu_kfree(g, ecc_stat->names[hw_unit]);
nvgpu_kfree(g, ecc_stat->names);
nvgpu_kfree(g, dev_attr_array);
}

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NVGPU_PLATFORM_SYSFS_H_
#define _NVGPU_PLATFORM_SYSFS_H_
#include "gp10b/gr_gp10b.h"
#define ECC_STAT_NAME_MAX_SIZE 100
int nvgpu_gr_ecc_stat_create(struct device *dev,
int is_l2, char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
int nvgpu_ecc_stat_create(struct device *dev,
int num_hw_units, int num_subunits,
char *ecc_unit_name, char *ecc_subunit_name,
char *ecc_stat_name,
struct gk20a_ecc_stat *ecc_stat);
void nvgpu_gr_ecc_stat_remove(struct device *dev,
int is_l2, struct gk20a_ecc_stat *ecc_stat);
void nvgpu_ecc_stat_remove(struct device *dev,
int num_hw_units, int num_subunits,
struct gk20a_ecc_stat *ecc_stat);
#endif

View File

@@ -0,0 +1,317 @@
/*
* GK20A Platform (SoC) Interface
*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _GK20A_PLATFORM_H_
#define _GK20A_PLATFORM_H_
#include <linux/device.h>
#include <nvgpu/lock.h>
#include "gk20a/gk20a.h"
#define GK20A_CLKS_MAX 4
struct gk20a;
struct channel_gk20a;
struct gr_ctx_buffer_desc;
struct gk20a_scale_profile;
struct secure_page_buffer {
void (*destroy)(struct gk20a *, struct secure_page_buffer *);
size_t size;
dma_addr_t phys;
size_t used;
};
struct gk20a_platform {
/* Populated by the gk20a driver before probing the platform. */
struct gk20a *g;
/* Should be populated at probe. */
bool can_railgate_init;
/* Should be populated at probe. */
bool can_elpg_init;
/* Should be populated at probe. */
bool has_syncpoints;
/* channel limit after which to start aggressive sync destroy */
unsigned int aggressive_sync_destroy_thresh;
/* flag to set sync destroy aggressiveness */
bool aggressive_sync_destroy;
/* set if ASPM should be disabled on boot; only makes sense for PCI */
bool disable_aspm;
/* Set if the platform can unify the small/large address spaces. */
bool unify_address_spaces;
/* Clock configuration is stored here. Platform probe is responsible
* for filling this data. */
struct clk *clk[GK20A_CLKS_MAX];
int num_clks;
int maxmin_clk_id;
#ifdef CONFIG_RESET_CONTROLLER
/* Reset control for device */
struct reset_control *reset_control;
#endif
/* Delay before rail gated */
int railgate_delay_init;
/* init value for slowdown factor */
u8 ldiv_slowdown_factor_init;
/* Second Level Clock Gating: true = enable false = disable */
bool enable_slcg;
/* Block Level Clock Gating: true = enable flase = disable */
bool enable_blcg;
/* Engine Level Clock Gating: true = enable flase = disable */
bool enable_elcg;
/* Should be populated at probe. */
bool can_slcg;
/* Should be populated at probe. */
bool can_blcg;
/* Should be populated at probe. */
bool can_elcg;
/* Engine Level Power Gating: true = enable flase = disable */
bool enable_elpg;
/* Adaptative ELPG: true = enable flase = disable */
bool enable_aelpg;
/* PMU Perfmon: true = enable false = disable */
bool enable_perfmon;
/* Memory System Clock Gating: true = enable flase = disable*/
bool enable_mscg;
/* Timeout for per-channel watchdog (in mS) */
u32 ch_wdt_timeout_ms;
/* Disable big page support */
bool disable_bigpage;
/*
* gk20a_do_idle() API can take GPU either into rail gate or CAR reset
* This flag can be used to force CAR reset case instead of rail gate
*/
bool force_reset_in_do_idle;
/* guest/vm id, needed for IPA to PA transation */
int vmid;
/* Initialize the platform interface of the gk20a driver.
*
* The platform implementation of this function must
* - set the power and clocks of the gk20a device to a known
* state, and
* - populate the gk20a_platform structure (a pointer to the
* structure can be obtained by calling gk20a_get_platform).
*
* After this function is finished, the driver will initialise
* pm runtime and genpd based on the platform configuration.
*/
int (*probe)(struct device *dev);
/* Second stage initialisation - called once all power management
* initialisations are done.
*/
int (*late_probe)(struct device *dev);
/* Remove device after power management has been done
*/
int (*remove)(struct device *dev);
/* Poweron platform dependencies */
int (*busy)(struct device *dev);
/* Powerdown platform dependencies */
void (*idle)(struct device *dev);
/* Preallocated VPR buffer for kernel */
size_t secure_buffer_size;
struct secure_page_buffer secure_buffer;
/* Device is going to be suspended */
int (*suspend)(struct device *);
/* Called to turn off the device */
int (*railgate)(struct device *dev);
/* Called to turn on the device */
int (*unrailgate)(struct device *dev);
struct nvgpu_mutex railgate_lock;
/* Called to check state of device */
bool (*is_railgated)(struct device *dev);
/* get supported frequency list */
int (*get_clk_freqs)(struct device *pdev,
unsigned long **freqs, int *num_freqs);
/* clk related supported functions */
long (*clk_round_rate)(struct device *dev,
unsigned long rate);
/* Called to register GPCPLL with common clk framework */
int (*clk_register)(struct gk20a *g);
/* platform specific scale init quirks */
void (*initscale)(struct device *dev);
/* Postscale callback is called after frequency change */
void (*postscale)(struct device *dev,
unsigned long freq);
/* Pre callback is called before frequency change */
void (*prescale)(struct device *dev);
/* Devfreq governor name. If scaling is enabled, we request
* this governor to be used in scaling */
const char *devfreq_governor;
/* Quality of service notifier callback. If this is set, the scaling
* routines will register a callback to Qos. Each time we receive
* a new value, this callback gets called. */
int (*qos_notify)(struct notifier_block *nb,
unsigned long n, void *p);
/* Called as part of debug dump. If the gpu gets hung, this function
* is responsible for delivering all necessary debug data of other
* hw units which may interact with the gpu without direct supervision
* of the CPU.
*/
void (*dump_platform_dependencies)(struct device *dev);
/* Defined when SMMU stage-2 is enabled, and we need to use physical
* addresses (not IPA). This is the case for GV100 nvlink in HV+L
* configuration, when dGPU is in pass-through mode.
*/
u64 (*phys_addr)(struct gk20a *g, u64 ipa);
/* Callbacks to assert/deassert GPU reset */
int (*reset_assert)(struct device *dev);
int (*reset_deassert)(struct device *dev);
struct clk *clk_reset;
struct dvfs_rail *gpu_rail;
bool virtual_dev;
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
void *vgpu_priv;
#endif
/* source frequency for ptimer in hz */
u32 ptimer_src_freq;
#ifdef CONFIG_NVGPU_SUPPORT_CDE
bool has_cde;
#endif
/* soc name for finding firmware files */
const char *soc_name;
/* false if vidmem aperture actually points to sysmem */
bool honors_aperture;
/* unified or split memory with separate vidmem? */
bool unified_memory;
/*
* DMA mask for Linux (both coh and non-coh). If not set defaults to
* 0x3ffffffff (i.e a 34 bit mask).
*/
u64 dma_mask;
/* minimum supported VBIOS version */
u32 vbios_min_version;
/* true if we run preos microcode on this board */
bool run_preos;
/* true if we need to program sw threshold for
* power limits
*/
bool hardcode_sw_threshold;
/* i2c device index, port and address for INA3221 */
u32 ina3221_dcb_index;
u32 ina3221_i2c_address;
u32 ina3221_i2c_port;
/* stream id to use */
u32 ltc_streamid;
/* scaling rate */
unsigned long cached_rate;
};
static inline struct gk20a_platform *gk20a_get_platform(
struct device *dev)
{
return (struct gk20a_platform *)dev_get_drvdata(dev);
}
#ifdef CONFIG_TEGRA_GK20A
extern struct gk20a_platform gm20b_tegra_platform;
extern struct gk20a_platform gp10b_tegra_platform;
extern struct gk20a_platform gv11b_tegra_platform;
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
extern struct gk20a_platform vgpu_tegra_platform;
extern struct gk20a_platform gv11b_vgpu_tegra_platform;
#endif
#endif
int gk20a_tegra_busy(struct device *dev);
void gk20a_tegra_idle(struct device *dev);
void gk20a_tegra_debug_dump(struct device *pdev);
static inline struct gk20a *get_gk20a(struct device *dev)
{
return gk20a_get_platform(dev)->g;
}
static inline struct gk20a *gk20a_from_dev(struct device *dev)
{
if (!dev)
return NULL;
return ((struct gk20a_platform *)dev_get_drvdata(dev))->g;
}
static inline bool gk20a_gpu_is_virtual(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
return platform->virtual_dev;
}
static inline int support_gk20a_pmu(struct device *dev)
{
if (IS_ENABLED(CONFIG_GK20A_PMU)) {
/* gPMU is not supported for vgpu */
return !gk20a_gpu_is_virtual(dev);
}
return 0;
}
#endif

View File

@@ -0,0 +1,957 @@
/*
* GK20A Tegra Platform Interface
*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/clkdev.h>
#include <linux/of_platform.h>
#include <linux/debugfs.h>
#include <linux/platform_data/tegra_edp.h>
#include <linux/delay.h>
#include <uapi/linux/nvgpu.h>
#include <linux/dma-buf.h>
#include <linux/dma-attrs.h>
#include <linux/nvmap.h>
#include <linux/reset.h>
#if defined(CONFIG_TEGRA_DVFS)
#include <linux/tegra_soctherm.h>
#endif
#include <linux/platform/tegra/common.h>
#include <linux/platform/tegra/mc.h>
#include <linux/clk/tegra.h>
#if defined(CONFIG_COMMON_CLK)
#include <soc/tegra/tegra-dvfs.h>
#endif
#ifdef CONFIG_TEGRA_BWMGR
#include <linux/platform/tegra/emc_bwmgr.h>
#endif
#include <linux/platform/tegra/tegra_emc.h>
#include <soc/tegra/chip-id.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/linux/dma.h>
#include "gk20a/gk20a.h"
#include "gm20b/clk_gm20b.h"
#include "scale.h"
#include "platform_gk20a.h"
#include "clk.h"
#include "os_linux.h"
#include "../../../arch/arm/mach-tegra/iomap.h"
#include <soc/tegra/pmc.h>
#define TEGRA_GK20A_BW_PER_FREQ 32
#define TEGRA_GM20B_BW_PER_FREQ 64
#define TEGRA_DDR3_BW_PER_FREQ 16
#define TEGRA_DDR4_BW_PER_FREQ 16
#define MC_CLIENT_GPU 34
#define PMC_GPU_RG_CNTRL_0 0x2d4
#ifdef CONFIG_COMMON_CLK
#define GPU_RAIL_NAME "vdd-gpu"
#else
#define GPU_RAIL_NAME "vdd_gpu"
#endif
extern struct device tegra_vpr_dev;
#ifdef CONFIG_TEGRA_BWMGR
struct gk20a_emc_params {
unsigned long bw_ratio;
unsigned long freq_last_set;
struct tegra_bwmgr_client *bwmgr_cl;
};
#else
struct gk20a_emc_params {
unsigned long bw_ratio;
unsigned long freq_last_set;
};
#endif
#define MHZ_TO_HZ(x) ((x) * 1000000)
#define HZ_TO_MHZ(x) ((x) / 1000000)
static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
struct secure_page_buffer *secure_buffer)
{
DEFINE_DMA_ATTRS(attrs);
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
(void *)(uintptr_t)secure_buffer->phys,
secure_buffer->phys, __DMA_ATTR(attrs));
secure_buffer->destroy = NULL;
}
static int gk20a_tegra_secure_alloc(struct gk20a *g,
struct gr_ctx_buffer_desc *desc,
size_t size)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
dma_addr_t phys;
struct sg_table *sgt;
struct page *page;
int err = 0;
size_t aligned_size = PAGE_ALIGN(size);
if (nvgpu_mem_is_valid(&desc->mem))
return 0;
/* We ran out of preallocated memory */
if (secure_buffer->used + aligned_size > secure_buffer->size) {
nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
size, secure_buffer->used, secure_buffer->size);
return -ENOMEM;
}
phys = secure_buffer->phys + secure_buffer->used;
sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
if (!sgt) {
nvgpu_err(platform->g, "failed to allocate memory");
return -ENOMEM;
}
err = sg_alloc_table(sgt, 1, GFP_KERNEL);
if (err) {
nvgpu_err(platform->g, "failed to allocate sg_table");
goto fail_sgt;
}
page = phys_to_page(phys);
sg_set_page(sgt->sgl, page, size, 0);
/* This bypasses SMMU for VPR during gmmu_map. */
sg_dma_address(sgt->sgl) = 0;
desc->destroy = NULL;
desc->mem.priv.sgt = sgt;
desc->mem.size = size;
desc->mem.aperture = APERTURE_SYSMEM;
secure_buffer->used += aligned_size;
return err;
fail_sgt:
nvgpu_kfree(platform->g, sgt);
return err;
}
/*
* gk20a_tegra_get_emc_rate()
*
* This function returns the minimum emc clock based on gpu frequency
*/
static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
struct gk20a_emc_params *emc_params)
{
unsigned long gpu_freq, gpu_fmax_at_vmin;
unsigned long emc_rate, emc_scale;
gpu_freq = clk_get_rate(g->clk.tegra_clk);
gpu_fmax_at_vmin = tegra_dvfs_get_fmax_at_vmin_safe_t(
clk_get_parent(g->clk.tegra_clk));
/* When scaling emc, account for the gpu load when the
* gpu frequency is less than or equal to fmax@vmin. */
if (gpu_freq <= gpu_fmax_at_vmin)
emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
else
emc_scale = g->emc3d_ratio;
emc_rate =
(HZ_TO_MHZ(gpu_freq) * emc_params->bw_ratio * emc_scale) / 1000;
return MHZ_TO_HZ(emc_rate);
}
/*
* gk20a_tegra_prescale(profile, freq)
*
* This function informs EDP about changed constraints.
*/
static void gk20a_tegra_prescale(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
u32 avg = 0;
nvgpu_pmu_load_norm(g, &avg);
tegra_edp_notify_gpu_load(avg, clk_get_rate(g->clk.tegra_clk));
}
/*
* gk20a_tegra_calibrate_emc()
*
*/
static void gk20a_tegra_calibrate_emc(struct device *dev,
struct gk20a_emc_params *emc_params)
{
enum tegra_chipid cid = tegra_get_chip_id();
long gpu_bw, emc_bw;
/* store gpu bw based on soc */
switch (cid) {
case TEGRA210:
gpu_bw = TEGRA_GM20B_BW_PER_FREQ;
break;
case TEGRA124:
case TEGRA132:
gpu_bw = TEGRA_GK20A_BW_PER_FREQ;
break;
default:
gpu_bw = 0;
break;
}
/* TODO detect DDR type.
* Okay for now since DDR3 and DDR4 have the same BW ratio */
emc_bw = TEGRA_DDR3_BW_PER_FREQ;
/* Calculate the bandwidth ratio of gpu_freq <-> emc_freq
* NOTE the ratio must come out as an integer */
emc_params->bw_ratio = (gpu_bw / emc_bw);
}
#ifdef CONFIG_TEGRA_BWMGR
#ifdef CONFIG_TEGRA_DVFS
static void gm20b_bwmgr_set_rate(struct gk20a_platform *platform, bool enb)
{
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *params;
unsigned long rate;
if (!profile || !profile->private_data)
return;
params = (struct gk20a_emc_params *)profile->private_data;
rate = (enb) ? params->freq_last_set : 0;
tegra_bwmgr_set_emc(params->bwmgr_cl, rate, TEGRA_BWMGR_SET_EMC_FLOOR);
}
#endif
static void gm20b_tegra_postscale(struct device *dev, unsigned long freq)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params;
unsigned long emc_rate;
if (!profile || !profile->private_data)
return;
emc_params = profile->private_data;
emc_rate = gk20a_tegra_get_emc_rate(get_gk20a(dev), emc_params);
if (emc_rate > tegra_bwmgr_get_max_emc_rate())
emc_rate = tegra_bwmgr_get_max_emc_rate();
emc_params->freq_last_set = emc_rate;
if (platform->is_railgated && platform->is_railgated(dev))
return;
tegra_bwmgr_set_emc(emc_params->bwmgr_cl, emc_rate,
TEGRA_BWMGR_SET_EMC_FLOOR);
}
#endif
#if defined(CONFIG_TEGRA_DVFS)
/*
* gk20a_tegra_is_railgated()
*
* Check status of gk20a power rail
*/
static bool gk20a_tegra_is_railgated(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_platform *platform = dev_get_drvdata(dev);
bool ret = false;
if (!nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
ret = !tegra_dvfs_is_rail_up(platform->gpu_rail);
return ret;
}
/*
* gm20b_tegra_railgate()
*
* Gate (disable) gm20b power rail
*/
static int gm20b_tegra_railgate(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_platform *platform = dev_get_drvdata(dev);
int ret = 0;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL) ||
!tegra_dvfs_is_rail_up(platform->gpu_rail))
return 0;
tegra_mc_flush(MC_CLIENT_GPU);
udelay(10);
/* enable clamp */
tegra_pmc_writel_relaxed(0x1, PMC_GPU_RG_CNTRL_0);
tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
udelay(10);
platform->reset_assert(dev);
udelay(10);
/*
* GPCPLL is already disabled before entering this function; reference
* clocks are enabled until now - disable them just before rail gating
*/
clk_disable_unprepare(platform->clk_reset);
clk_disable_unprepare(platform->clk[0]);
clk_disable_unprepare(platform->clk[1]);
if (platform->clk[3])
clk_disable_unprepare(platform->clk[3]);
udelay(10);
tegra_soctherm_gpu_tsens_invalidate(1);
if (tegra_dvfs_is_rail_up(platform->gpu_rail)) {
ret = tegra_dvfs_rail_power_down(platform->gpu_rail);
if (ret)
goto err_power_off;
} else
pr_info("No GPU regulator?\n");
#ifdef CONFIG_TEGRA_BWMGR
gm20b_bwmgr_set_rate(platform, false);
#endif
return 0;
err_power_off:
nvgpu_err(platform->g, "Could not railgate GPU");
return ret;
}
/*
* gm20b_tegra_unrailgate()
*
* Ungate (enable) gm20b power rail
*/
static int gm20b_tegra_unrailgate(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = platform->g;
int ret = 0;
bool first = false;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
return 0;
ret = tegra_dvfs_rail_power_up(platform->gpu_rail);
if (ret)
return ret;
#ifdef CONFIG_TEGRA_BWMGR
gm20b_bwmgr_set_rate(platform, true);
#endif
tegra_soctherm_gpu_tsens_invalidate(0);
if (!platform->clk_reset) {
platform->clk_reset = clk_get(dev, "gpu_gate");
if (IS_ERR(platform->clk_reset)) {
nvgpu_err(g, "fail to get gpu reset clk");
goto err_clk_on;
}
}
if (!first) {
ret = clk_prepare_enable(platform->clk_reset);
if (ret) {
nvgpu_err(g, "could not turn on gpu_gate");
goto err_clk_on;
}
ret = clk_prepare_enable(platform->clk[0]);
if (ret) {
nvgpu_err(g, "could not turn on gpu pll");
goto err_clk_on;
}
ret = clk_prepare_enable(platform->clk[1]);
if (ret) {
nvgpu_err(g, "could not turn on pwr clock");
goto err_clk_on;
}
if (platform->clk[3]) {
ret = clk_prepare_enable(platform->clk[3]);
if (ret) {
nvgpu_err(g, "could not turn on fuse clock");
goto err_clk_on;
}
}
}
udelay(10);
platform->reset_assert(dev);
udelay(10);
tegra_pmc_writel_relaxed(0, PMC_GPU_RG_CNTRL_0);
tegra_pmc_readl(PMC_GPU_RG_CNTRL_0);
udelay(10);
clk_disable(platform->clk_reset);
platform->reset_deassert(dev);
clk_enable(platform->clk_reset);
/* Flush MC after boot/railgate/SC7 */
tegra_mc_flush(MC_CLIENT_GPU);
udelay(10);
tegra_mc_flush_done(MC_CLIENT_GPU);
udelay(10);
return 0;
err_clk_on:
tegra_dvfs_rail_power_down(platform->gpu_rail);
return ret;
}
#endif
static struct {
char *name;
unsigned long default_rate;
} tegra_gk20a_clocks[] = {
{"gpu_ref", UINT_MAX},
{"pll_p_out5", 204000000},
{"emc", UINT_MAX},
{"fuse", UINT_MAX},
};
/*
* gk20a_tegra_get_clocks()
*
* This function finds clocks in tegra platform and populates
* the clock information to gk20a platform data.
*/
static int gk20a_tegra_get_clocks(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
char devname[16];
unsigned int i;
int ret = 0;
BUG_ON(GK20A_CLKS_MAX < ARRAY_SIZE(tegra_gk20a_clocks));
snprintf(devname, sizeof(devname), "tegra_%s", dev_name(dev));
platform->num_clks = 0;
for (i = 0; i < ARRAY_SIZE(tegra_gk20a_clocks); i++) {
long rate = tegra_gk20a_clocks[i].default_rate;
struct clk *c;
c = clk_get_sys(devname, tegra_gk20a_clocks[i].name);
if (IS_ERR(c)) {
ret = PTR_ERR(c);
goto err_get_clock;
}
rate = clk_round_rate(c, rate);
clk_set_rate(c, rate);
platform->clk[i] = c;
if (i == 0)
platform->cached_rate = rate;
}
platform->num_clks = i;
return 0;
err_get_clock:
while (i--)
clk_put(platform->clk[i]);
return ret;
}
#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
static int gm20b_tegra_reset_assert(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
if (!platform->reset_control) {
WARN(1, "Reset control not initialized\n");
return -ENOSYS;
}
return reset_control_assert(platform->reset_control);
}
static int gm20b_tegra_reset_deassert(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
if (!platform->reset_control) {
WARN(1, "Reset control not initialized\n");
return -ENOSYS;
}
return reset_control_deassert(platform->reset_control);
}
#endif
static void gk20a_tegra_scale_init(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params;
struct gk20a *g = platform->g;
if (!profile)
return;
if (profile->private_data)
return;
emc_params = nvgpu_kzalloc(platform->g, sizeof(*emc_params));
if (!emc_params)
return;
emc_params->freq_last_set = -1;
gk20a_tegra_calibrate_emc(dev, emc_params);
#ifdef CONFIG_TEGRA_BWMGR
emc_params->bwmgr_cl = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
if (!emc_params->bwmgr_cl) {
nvgpu_log_info(g, "%s Missing GPU BWMGR client\n", __func__);
return;
}
#endif
profile->private_data = emc_params;
}
static void gk20a_tegra_scale_exit(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a_emc_params *emc_params;
if (!profile)
return;
emc_params = profile->private_data;
#ifdef CONFIG_TEGRA_BWMGR
tegra_bwmgr_unregister(emc_params->bwmgr_cl);
#endif
nvgpu_kfree(platform->g, profile->private_data);
}
void gk20a_tegra_debug_dump(struct device *dev)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
if (g->nvhost_dev)
nvgpu_nvhost_debug_dump_device(g->nvhost_dev);
#endif
}
int gk20a_tegra_busy(struct device *dev)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
if (g->nvhost_dev)
return nvgpu_nvhost_module_busy_ext(g->nvhost_dev);
#endif
return 0;
}
void gk20a_tegra_idle(struct device *dev)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
if (g->nvhost_dev)
nvgpu_nvhost_module_idle_ext(g->nvhost_dev);
#endif
}
int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
{
struct gk20a *g = platform->g;
struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
DEFINE_DMA_ATTRS(attrs);
dma_addr_t iova;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
return 0;
dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
GFP_KERNEL, __DMA_ATTR(attrs));
/* Some platforms disable VPR. In that case VPR allocations always
* fail. Just disable VPR usage in nvgpu in that case. */
if (dma_mapping_error(&tegra_vpr_dev, iova))
return 0;
secure_buffer->size = platform->secure_buffer_size;
secure_buffer->phys = iova;
secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
g->ops.secure_alloc = gk20a_tegra_secure_alloc;
__nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
return 0;
}
#ifdef CONFIG_COMMON_CLK
static struct clk *gk20a_clk_get(struct gk20a *g)
{
if (!g->clk.tegra_clk) {
struct clk *clk;
char clk_dev_id[32];
struct device *dev = dev_from_gk20a(g);
snprintf(clk_dev_id, 32, "tegra_%s", dev_name(dev));
clk = clk_get_sys(clk_dev_id, "gpu");
if (IS_ERR(clk)) {
nvgpu_err(g, "fail to get tegra gpu clk %s/gpu\n",
clk_dev_id);
return NULL;
}
g->clk.tegra_clk = clk;
}
return g->clk.tegra_clk;
}
static int gm20b_clk_prepare_ops(struct clk_hw *hw)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
return gm20b_clk_prepare(clk);
}
static void gm20b_clk_unprepare_ops(struct clk_hw *hw)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
gm20b_clk_unprepare(clk);
}
static int gm20b_clk_is_prepared_ops(struct clk_hw *hw)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
return gm20b_clk_is_prepared(clk);
}
static unsigned long gm20b_recalc_rate_ops(struct clk_hw *hw, unsigned long parent_rate)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
return gm20b_recalc_rate(clk, parent_rate);
}
static int gm20b_gpcclk_set_rate_ops(struct clk_hw *hw, unsigned long rate,
unsigned long parent_rate)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
return gm20b_gpcclk_set_rate(clk, rate, parent_rate);
}
static long gm20b_round_rate_ops(struct clk_hw *hw, unsigned long rate,
unsigned long *parent_rate)
{
struct clk_gk20a *clk = to_clk_gk20a(hw);
return gm20b_round_rate(clk, rate, parent_rate);
}
static const struct clk_ops gm20b_clk_ops = {
.prepare = gm20b_clk_prepare_ops,
.unprepare = gm20b_clk_unprepare_ops,
.is_prepared = gm20b_clk_is_prepared_ops,
.recalc_rate = gm20b_recalc_rate_ops,
.set_rate = gm20b_gpcclk_set_rate_ops,
.round_rate = gm20b_round_rate_ops,
};
static int gm20b_register_gpcclk(struct gk20a *g)
{
const char *parent_name = "pllg_ref";
struct clk_gk20a *clk = &g->clk;
struct clk_init_data init;
struct clk *c;
int err = 0;
/* make sure the clock is available */
if (!gk20a_clk_get(g))
return -ENOSYS;
err = gm20b_init_clk_setup_sw(g);
if (err)
return err;
init.name = "gpcclk";
init.ops = &gm20b_clk_ops;
init.parent_names = &parent_name;
init.num_parents = 1;
init.flags = 0;
/* Data in .init is copied by clk_register(), so stack variable OK */
clk->hw.init = &init;
c = clk_register(dev_from_gk20a(g), &clk->hw);
if (IS_ERR(c)) {
nvgpu_err(g, "Failed to register GPCPLL clock");
return -EINVAL;
}
clk->g = g;
clk_register_clkdev(c, "gpcclk", "gpcclk");
return err;
}
#endif /* CONFIG_COMMON_CLK */
static int gk20a_tegra_probe(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct device_node *np = dev->of_node;
bool joint_xpu_rail = false;
int ret;
struct gk20a *g = platform->g;
#ifdef CONFIG_COMMON_CLK
/* DVFS is not guaranteed to be initialized at the time of probe on
* kernels with Common Clock Framework enabled.
*/
if (!platform->gpu_rail) {
platform->gpu_rail = tegra_dvfs_get_rail_by_name(GPU_RAIL_NAME);
if (!platform->gpu_rail) {
nvgpu_log_info(g, "deferring probe no gpu_rail");
return -EPROBE_DEFER;
}
}
if (!tegra_dvfs_is_rail_ready(platform->gpu_rail)) {
nvgpu_log_info(g, "deferring probe gpu_rail not ready");
return -EPROBE_DEFER;
}
#endif
#ifdef CONFIG_TEGRA_GK20A_NVHOST
ret = nvgpu_get_nvhost_dev(platform->g);
if (ret)
return ret;
#endif
#ifdef CONFIG_OF
joint_xpu_rail = of_property_read_bool(of_chosen,
"nvidia,tegra-joint_xpu_rail");
#endif
if (joint_xpu_rail) {
nvgpu_log_info(g, "XPU rails are joint\n");
platform->g->can_railgate = false;
}
platform->g->clk.gpc_pll.id = GK20A_GPC_PLL;
if (tegra_get_chip_id() == TEGRA210) {
/* WAR for bug 1547668: Disable railgating and scaling
irrespective of platform data if the rework was not made. */
np = of_find_node_by_path("/gpu-dvfs-rework");
if (!(np && of_device_is_available(np))) {
platform->devfreq_governor = "";
dev_warn(dev, "board does not support scaling");
}
platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_B1;
if (tegra_chip_get_revision() > TEGRA210_REVISION_A04p)
platform->g->clk.gpc_pll.id = GM20B_GPC_PLL_C1;
}
if (tegra_get_chip_id() == TEGRA132)
platform->soc_name = "tegra13x";
gk20a_tegra_get_clocks(dev);
nvgpu_linux_init_clk_support(platform->g);
ret = gk20a_tegra_init_secure_alloc(platform);
if (ret)
return ret;
if (platform->clk_register) {
ret = platform->clk_register(platform->g);
if (ret)
return ret;
}
return 0;
}
static int gk20a_tegra_late_probe(struct device *dev)
{
return 0;
}
static int gk20a_tegra_remove(struct device *dev)
{
/* deinitialise tegra specific scaling quirks */
gk20a_tegra_scale_exit(dev);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
nvgpu_free_nvhost_dev(get_gk20a(dev));
#endif
return 0;
}
static int gk20a_tegra_suspend(struct device *dev)
{
tegra_edp_notify_gpu_load(0, 0);
return 0;
}
#if defined(CONFIG_COMMON_CLK)
static long gk20a_round_clk_rate(struct device *dev, unsigned long rate)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
/* make sure the clock is available */
if (!gk20a_clk_get(g))
return rate;
return clk_round_rate(clk_get_parent(g->clk.tegra_clk), rate);
}
static int gk20a_clk_get_freqs(struct device *dev,
unsigned long **freqs, int *num_freqs)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
/* make sure the clock is available */
if (!gk20a_clk_get(g))
return -ENOSYS;
return tegra_dvfs_get_freqs(clk_get_parent(g->clk.tegra_clk),
freqs, num_freqs);
}
#endif
struct gk20a_platform gm20b_tegra_platform = {
.has_syncpoints = true,
.aggressive_sync_destroy_thresh = 64,
/* power management configuration */
.railgate_delay_init = 500,
.can_railgate_init = true,
.can_elpg_init = true,
.enable_slcg = true,
.enable_blcg = true,
.enable_elcg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.enable_elpg = true,
.enable_aelpg = true,
.enable_perfmon = true,
.ptimer_src_freq = 19200000,
.force_reset_in_do_idle = false,
.ch_wdt_timeout_ms = 5000,
.probe = gk20a_tegra_probe,
.late_probe = gk20a_tegra_late_probe,
.remove = gk20a_tegra_remove,
/* power management callbacks */
.suspend = gk20a_tegra_suspend,
#if defined(CONFIG_TEGRA_DVFS)
.railgate = gm20b_tegra_railgate,
.unrailgate = gm20b_tegra_unrailgate,
.is_railgated = gk20a_tegra_is_railgated,
#endif
.busy = gk20a_tegra_busy,
.idle = gk20a_tegra_idle,
#if defined(CONFIG_RESET_CONTROLLER) && defined(CONFIG_COMMON_CLK)
.reset_assert = gm20b_tegra_reset_assert,
.reset_deassert = gm20b_tegra_reset_deassert,
#else
.reset_assert = gk20a_tegra_reset_assert,
.reset_deassert = gk20a_tegra_reset_deassert,
#endif
#if defined(CONFIG_COMMON_CLK)
.clk_round_rate = gk20a_round_clk_rate,
.get_clk_freqs = gk20a_clk_get_freqs,
#endif
#ifdef CONFIG_COMMON_CLK
.clk_register = gm20b_register_gpcclk,
#endif
/* frequency scaling configuration */
.initscale = gk20a_tegra_scale_init,
.prescale = gk20a_tegra_prescale,
#ifdef CONFIG_TEGRA_BWMGR
.postscale = gm20b_tegra_postscale,
#endif
.devfreq_governor = "nvhost_podgov",
.qos_notify = gk20a_scale_qos_notify,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
#ifdef CONFIG_NVGPU_SUPPORT_CDE
.has_cde = true,
#endif
.soc_name = "tegra21x",
.unified_memory = true,
.dma_mask = DMA_BIT_MASK(34),
.secure_buffer_size = 335872,
};

View File

@@ -0,0 +1,23 @@
/*
* GK20A Platform (SoC) Interface
*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
#define _NVGPU_PLATFORM_GK20A_TEGRA_H_
struct gk20a_platform;
int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
#endif

View File

@@ -0,0 +1,39 @@
/*
* GP10B Platform (SoC) Interface
*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _GP10B_PLATFORM_H_
#define _GP10B_PLATFORM_H_
struct device;
int gp10b_tegra_get_clocks(struct device *dev);
int gp10b_tegra_reset_assert(struct device *dev);
int gp10b_tegra_reset_deassert(struct device *dev);
void gp10b_tegra_scale_init(struct device *dev);
long gp10b_round_clk_rate(struct device *dev, unsigned long rate);
int gp10b_clk_get_freqs(struct device *dev,
unsigned long **freqs, int *num_freqs);
void gp10b_tegra_prescale(struct device *dev);
void gp10b_tegra_postscale(struct device *pdev, unsigned long freq);
#endif

View File

@@ -0,0 +1,607 @@
/*
* GP10B Tegra Platform Interface
*
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/of_platform.h>
#include <linux/debugfs.h>
#include <linux/dma-buf.h>
#include <linux/nvmap.h>
#include <linux/reset.h>
#include <linux/platform/tegra/emc_bwmgr.h>
#include <uapi/linux/nvgpu.h>
#include <soc/tegra/tegra_bpmp.h>
#include <soc/tegra/tegra_powergate.h>
#include <soc/tegra/tegra-bpmp-dvfs.h>
#include <dt-bindings/memory/tegra-swgroup.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/hashtable.h>
#include <nvgpu/nvhost.h>
#include "os_linux.h"
#include "clk.h"
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "platform_ecc_sysfs.h"
#include "platform_gk20a_tegra.h"
#include "platform_gp10b.h"
#include "platform_gp10b_tegra.h"
#include "scale.h"
/* Select every GP10B_FREQ_SELECT_STEP'th frequency from h/w table */
#define GP10B_FREQ_SELECT_STEP 8
/* Max number of freq supported in h/w */
#define GP10B_MAX_SUPPORTED_FREQS 120
static unsigned long
gp10b_freq_table[GP10B_MAX_SUPPORTED_FREQS / GP10B_FREQ_SELECT_STEP];
#define TEGRA_GP10B_BW_PER_FREQ 64
#define TEGRA_DDR4_BW_PER_FREQ 16
#define EMC_BW_RATIO (TEGRA_GP10B_BW_PER_FREQ / TEGRA_DDR4_BW_PER_FREQ)
#define GPCCLK_INIT_RATE 1000000000
static struct {
char *name;
unsigned long default_rate;
} tegra_gp10b_clocks[] = {
{"gpu", GPCCLK_INIT_RATE},
{"gpu_sys", 204000000} };
/*
* gp10b_tegra_get_clocks()
*
* This function finds clocks in tegra platform and populates
* the clock information to gp10b platform data.
*/
int gp10b_tegra_get_clocks(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
unsigned int i;
platform->num_clks = 0;
for (i = 0; i < ARRAY_SIZE(tegra_gp10b_clocks); i++) {
long rate = tegra_gp10b_clocks[i].default_rate;
struct clk *c;
c = clk_get(dev, tegra_gp10b_clocks[i].name);
if (IS_ERR(c)) {
nvgpu_err(platform->g, "cannot get clock %s",
tegra_gp10b_clocks[i].name);
} else {
clk_set_rate(c, rate);
platform->clk[i] = c;
if (i == 0)
platform->cached_rate = rate;
}
}
platform->num_clks = i;
if (platform->clk[0]) {
i = tegra_bpmp_dvfs_get_clk_id(dev->of_node,
tegra_gp10b_clocks[0].name);
if (i > 0)
platform->maxmin_clk_id = i;
}
return 0;
}
void gp10b_tegra_scale_init(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct tegra_bwmgr_client *bwmgr_handle;
if (!profile)
return;
if ((struct tegra_bwmgr_client *)profile->private_data)
return;
bwmgr_handle = tegra_bwmgr_register(TEGRA_BWMGR_CLIENT_GPU);
if (!bwmgr_handle)
return;
profile->private_data = (void *)bwmgr_handle;
}
static void gp10b_tegra_scale_exit(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
if (profile)
tegra_bwmgr_unregister(
(struct tegra_bwmgr_client *)profile->private_data);
}
static int gp10b_tegra_probe(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
int ret;
ret = nvgpu_get_nvhost_dev(platform->g);
if (ret)
return ret;
#endif
ret = gk20a_tegra_init_secure_alloc(platform);
if (ret)
return ret;
platform->disable_bigpage = !device_is_iommuable(dev);
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false;
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false;
platform->g->gr.ctx_vars.force_preemption_gfxp = false;
platform->g->gr.ctx_vars.force_preemption_cilp = false;
gp10b_tegra_get_clocks(dev);
nvgpu_linux_init_clk_support(platform->g);
return 0;
}
static int gp10b_tegra_late_probe(struct device *dev)
{
return 0;
}
static int gp10b_tegra_remove(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
/* deinitialise tegra specific scaling quirks */
gp10b_tegra_scale_exit(dev);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
nvgpu_free_nvhost_dev(get_gk20a(dev));
#endif
return 0;
}
static bool gp10b_tegra_is_railgated(struct device *dev)
{
bool ret = false;
if (tegra_bpmp_running())
ret = !tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU);
return ret;
}
static int gp10b_tegra_railgate(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
/* remove emc frequency floor */
if (profile)
tegra_bwmgr_set_emc(
(struct tegra_bwmgr_client *)profile->private_data,
0, TEGRA_BWMGR_SET_EMC_FLOOR);
if (tegra_bpmp_running() &&
tegra_powergate_is_powered(TEGRA186_POWER_DOMAIN_GPU)) {
int i;
for (i = 0; i < platform->num_clks; i++) {
if (platform->clk[i])
clk_disable_unprepare(platform->clk[i]);
}
tegra_powergate_partition(TEGRA186_POWER_DOMAIN_GPU);
}
return 0;
}
static int gp10b_tegra_unrailgate(struct device *dev)
{
int ret = 0;
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
if (tegra_bpmp_running()) {
int i;
ret = tegra_unpowergate_partition(TEGRA186_POWER_DOMAIN_GPU);
for (i = 0; i < platform->num_clks; i++) {
if (platform->clk[i])
clk_prepare_enable(platform->clk[i]);
}
}
/* to start with set emc frequency floor to max rate*/
if (profile)
tegra_bwmgr_set_emc(
(struct tegra_bwmgr_client *)profile->private_data,
tegra_bwmgr_get_max_emc_rate(),
TEGRA_BWMGR_SET_EMC_FLOOR);
return ret;
}
static int gp10b_tegra_suspend(struct device *dev)
{
return 0;
}
int gp10b_tegra_reset_assert(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
int ret = 0;
if (!platform->reset_control)
return -EINVAL;
ret = reset_control_assert(platform->reset_control);
return ret;
}
int gp10b_tegra_reset_deassert(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
int ret = 0;
if (!platform->reset_control)
return -EINVAL;
ret = reset_control_deassert(platform->reset_control);
return ret;
}
void gp10b_tegra_prescale(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
u32 avg = 0;
nvgpu_log_fn(g, " ");
nvgpu_pmu_load_norm(g, &avg);
nvgpu_log_fn(g, "done");
}
void gp10b_tegra_postscale(struct device *pdev,
unsigned long freq)
{
struct gk20a_platform *platform = gk20a_get_platform(pdev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a *g = get_gk20a(pdev);
unsigned long emc_rate;
nvgpu_log_fn(g, " ");
if (profile && !platform->is_railgated(pdev)) {
unsigned long emc_scale;
if (freq <= gp10b_freq_table[0])
emc_scale = 0;
else
emc_scale = g->emc3d_ratio;
emc_rate = (freq * EMC_BW_RATIO * emc_scale) / 1000;
if (emc_rate > tegra_bwmgr_get_max_emc_rate())
emc_rate = tegra_bwmgr_get_max_emc_rate();
tegra_bwmgr_set_emc(
(struct tegra_bwmgr_client *)profile->private_data,
emc_rate, TEGRA_BWMGR_SET_EMC_FLOOR);
}
nvgpu_log_fn(g, "done");
}
long gp10b_round_clk_rate(struct device *dev, unsigned long rate)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_scale_profile *profile = g->scale_profile;
unsigned long *freq_table = profile->devfreq_profile.freq_table;
int max_states = profile->devfreq_profile.max_state;
int i;
for (i = 0; i < max_states; ++i)
if (freq_table[i] >= rate)
return freq_table[i];
return freq_table[max_states - 1];
}
int gp10b_clk_get_freqs(struct device *dev,
unsigned long **freqs, int *num_freqs)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = platform->g;
unsigned long max_rate;
unsigned long new_rate = 0, prev_rate = 0;
int i = 0, freq_counter = 0;
max_rate = clk_round_rate(platform->clk[0], (UINT_MAX - 1));
/*
* Walk the h/w frequency table and only select
* GP10B_FREQ_SELECT_STEP'th frequencies and
* add MAX freq to last
*/
for (; i < GP10B_MAX_SUPPORTED_FREQS; ++i) {
prev_rate = new_rate;
new_rate = clk_round_rate(platform->clk[0], prev_rate + 1);
if (i % GP10B_FREQ_SELECT_STEP == 0 ||
new_rate == max_rate) {
gp10b_freq_table[freq_counter++] = new_rate;
if (new_rate == max_rate)
break;
}
}
WARN_ON(i == GP10B_MAX_SUPPORTED_FREQS);
/* Fill freq table */
*freqs = gp10b_freq_table;
*num_freqs = freq_counter;
nvgpu_log_info(g, "min rate: %ld max rate: %ld num_of_freq %d\n",
gp10b_freq_table[0], max_rate, *num_freqs);
return 0;
}
struct gk20a_platform gp10b_tegra_platform = {
.has_syncpoints = true,
/* power management configuration */
.railgate_delay_init = 500,
/* ldiv slowdown factor */
.ldiv_slowdown_factor_init = SLOWDOWN_FACTOR_FPDIV_BY16,
/* power management configuration */
.can_railgate_init = true,
.enable_elpg = true,
.can_elpg_init = true,
.enable_blcg = true,
.enable_slcg = true,
.enable_elcg = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.enable_aelpg = true,
.enable_perfmon = true,
/* ptimer src frequency in hz*/
.ptimer_src_freq = 31250000,
.ch_wdt_timeout_ms = 5000,
.probe = gp10b_tegra_probe,
.late_probe = gp10b_tegra_late_probe,
.remove = gp10b_tegra_remove,
/* power management callbacks */
.suspend = gp10b_tegra_suspend,
.railgate = gp10b_tegra_railgate,
.unrailgate = gp10b_tegra_unrailgate,
.is_railgated = gp10b_tegra_is_railgated,
.busy = gk20a_tegra_busy,
.idle = gk20a_tegra_idle,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
#ifdef CONFIG_NVGPU_SUPPORT_CDE
.has_cde = true,
#endif
.clk_round_rate = gp10b_round_clk_rate,
.get_clk_freqs = gp10b_clk_get_freqs,
/* frequency scaling configuration */
.initscale = gp10b_tegra_scale_init,
.prescale = gp10b_tegra_prescale,
.postscale = gp10b_tegra_postscale,
.devfreq_governor = "nvhost_podgov",
.qos_notify = gk20a_scale_qos_notify,
.reset_assert = gp10b_tegra_reset_assert,
.reset_deassert = gp10b_tegra_reset_deassert,
.force_reset_in_do_idle = false,
.soc_name = "tegra18x",
.unified_memory = true,
.dma_mask = DMA_BIT_MASK(36),
.ltc_streamid = TEGRA_SID_GPUB,
.secure_buffer_size = 401408,
};
void gr_gp10b_create_sysfs(struct gk20a *g)
{
int error = 0;
struct device *dev = dev_from_gk20a(g);
/* This stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC
stats once. Therefore, add the following check to avoid
creating duplicate stat sysfs nodes. */
if (g->ecc.gr.sm_lrf_single_err_count.counters != NULL)
return;
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_single_err_count",
&g->ecc.gr.sm_lrf_single_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_lrf_ecc_double_err_count",
&g->ecc.gr.sm_lrf_double_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sec_count",
&g->ecc.gr.sm_shm_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_sed_count",
&g->ecc.gr.sm_shm_sed_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_shm_ecc_ded_count",
&g->ecc.gr.sm_shm_ded_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe0_count",
&g->ecc.gr.tex_total_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe0_count",
&g->ecc.gr.tex_total_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe0_count",
&g->ecc.gr.tex_unique_sec_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe0_count",
&g->ecc.gr.tex_unique_ded_pipe0_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_sec_pipe1_count",
&g->ecc.gr.tex_total_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_total_ded_pipe1_count",
&g->ecc.gr.tex_total_ded_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_sec_pipe1_count",
&g->ecc.gr.tex_unique_sec_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"tex_ecc_unique_ded_pipe1_count",
&g->ecc.gr.tex_unique_ded_pipe1_count);
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_sec_count",
&g->ecc.ltc.l2_sec_count);
error |= nvgpu_gr_ecc_stat_create(dev,
2,
"ecc_ded_count",
&g->ecc.ltc.l2_ded_count);
if (error)
dev_err(dev, "Failed to create sysfs attributes!\n");
}
void gr_gp10b_remove_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (!g->ecc.gr.sm_lrf_single_err_count.counters)
return;
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_single_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_lrf_double_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sec_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_sed_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_shm_ded_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe0_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_total_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_sec_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.tex_unique_ded_pipe1_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_sec_count);
nvgpu_gr_ecc_stat_remove(dev,
2,
&g->ecc.ltc.l2_ded_count);
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _PLATFORM_GP10B_TEGRA_H_
#define _PLATFORM_GP10B_TEGRA_H_
#include "gp10b/gr_gp10b.h"
#include "platform_ecc_sysfs.h"
#endif

View File

@@ -0,0 +1,588 @@
/*
* GV11B Tegra Platform Interface
*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/of_platform.h>
#include <linux/debugfs.h>
#include <linux/dma-buf.h>
#include <linux/nvmap.h>
#include <linux/reset.h>
#include <linux/hashtable.h>
#include <linux/clk.h>
#include <linux/platform/tegra/emc_bwmgr.h>
#include <nvgpu/nvhost.h>
#include <uapi/linux/nvgpu.h>
#include <soc/tegra/tegra_bpmp.h>
#include <soc/tegra/tegra_powergate.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "clk.h"
#include "scale.h"
#include "platform_gp10b.h"
#include "platform_gp10b_tegra.h"
#include "platform_ecc_sysfs.h"
#include "os_linux.h"
#include "platform_gk20a_tegra.h"
#include "gv11b/gr_gv11b.h"
static void gv11b_tegra_scale_exit(struct device *dev)
{
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
if (profile)
tegra_bwmgr_unregister(
(struct tegra_bwmgr_client *)profile->private_data);
}
static int gv11b_tegra_probe(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
int err;
err = nvgpu_nvhost_syncpt_init(platform->g);
if (err) {
if (err != -ENOSYS)
return err;
}
err = gk20a_tegra_init_secure_alloc(platform);
if (err)
return err;
platform->disable_bigpage = !device_is_iommuable(dev);
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false;
platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
= false;
platform->g->gr.ctx_vars.force_preemption_gfxp = false;
platform->g->gr.ctx_vars.force_preemption_cilp = false;
gp10b_tegra_get_clocks(dev);
nvgpu_linux_init_clk_support(platform->g);
return 0;
}
static int gv11b_tegra_late_probe(struct device *dev)
{
return 0;
}
static int gv11b_tegra_remove(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
if (g->ops.gr.remove_gr_sysfs)
g->ops.gr.remove_gr_sysfs(g);
gv11b_tegra_scale_exit(dev);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
nvgpu_free_nvhost_dev(get_gk20a(dev));
#endif
return 0;
}
static bool gv11b_tegra_is_railgated(struct device *dev)
{
bool ret = false;
#ifdef TEGRA194_POWER_DOMAIN_GPU
struct gk20a *g = get_gk20a(dev);
if (tegra_bpmp_running()) {
nvgpu_log(g, gpu_dbg_info, "bpmp running");
ret = !tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU);
nvgpu_log(g, gpu_dbg_info, "railgated? %s", ret ? "yes" : "no");
} else {
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
}
#endif
return ret;
}
static int gv11b_tegra_railgate(struct device *dev)
{
#ifdef TEGRA194_POWER_DOMAIN_GPU
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
struct gk20a *g = get_gk20a(dev);
int i;
/* remove emc frequency floor */
if (profile)
tegra_bwmgr_set_emc(
(struct tegra_bwmgr_client *)profile->private_data,
0, TEGRA_BWMGR_SET_EMC_FLOOR);
if (tegra_bpmp_running()) {
nvgpu_log(g, gpu_dbg_info, "bpmp running");
if (!tegra_powergate_is_powered(TEGRA194_POWER_DOMAIN_GPU)) {
nvgpu_log(g, gpu_dbg_info, "powergate is not powered");
return 0;
}
nvgpu_log(g, gpu_dbg_info, "clk_disable_unprepare");
for (i = 0; i < platform->num_clks; i++) {
if (platform->clk[i])
clk_disable_unprepare(platform->clk[i]);
}
nvgpu_log(g, gpu_dbg_info, "powergate_partition");
tegra_powergate_partition(TEGRA194_POWER_DOMAIN_GPU);
} else {
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
}
#endif
return 0;
}
static int gv11b_tegra_unrailgate(struct device *dev)
{
int ret = 0;
#ifdef TEGRA194_POWER_DOMAIN_GPU
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct gk20a *g = get_gk20a(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
int i;
if (tegra_bpmp_running()) {
nvgpu_log(g, gpu_dbg_info, "bpmp running");
ret = tegra_unpowergate_partition(TEGRA194_POWER_DOMAIN_GPU);
if (ret) {
nvgpu_log(g, gpu_dbg_info,
"unpowergate partition failed");
return ret;
}
nvgpu_log(g, gpu_dbg_info, "clk_prepare_enable");
for (i = 0; i < platform->num_clks; i++) {
if (platform->clk[i])
clk_prepare_enable(platform->clk[i]);
}
} else {
nvgpu_log(g, gpu_dbg_info, "bpmp not running");
}
/* to start with set emc frequency floor to max rate*/
if (profile)
tegra_bwmgr_set_emc(
(struct tegra_bwmgr_client *)profile->private_data,
tegra_bwmgr_get_max_emc_rate(),
TEGRA_BWMGR_SET_EMC_FLOOR);
#endif
return ret;
}
static int gv11b_tegra_suspend(struct device *dev)
{
return 0;
}
struct gk20a_platform gv11b_tegra_platform = {
.has_syncpoints = true,
/* ptimer src frequency in hz*/
.ptimer_src_freq = 31250000,
.ch_wdt_timeout_ms = 5000,
.probe = gv11b_tegra_probe,
.late_probe = gv11b_tegra_late_probe,
.remove = gv11b_tegra_remove,
.railgate_delay_init = 500,
.can_railgate_init = true,
.can_slcg = true,
.can_blcg = true,
.can_elcg = true,
.enable_slcg = true,
.enable_blcg = true,
.enable_elcg = true,
.enable_perfmon = true,
/* power management configuration */
.enable_elpg = true,
.can_elpg_init = true,
.enable_aelpg = true,
/* power management callbacks */
.suspend = gv11b_tegra_suspend,
.railgate = gv11b_tegra_railgate,
.unrailgate = gv11b_tegra_unrailgate,
.is_railgated = gv11b_tegra_is_railgated,
.busy = gk20a_tegra_busy,
.idle = gk20a_tegra_idle,
.clk_round_rate = gp10b_round_clk_rate,
.get_clk_freqs = gp10b_clk_get_freqs,
/* frequency scaling configuration */
.initscale = gp10b_tegra_scale_init,
.prescale = gp10b_tegra_prescale,
.postscale = gp10b_tegra_postscale,
.devfreq_governor = "nvhost_podgov",
.qos_notify = gk20a_scale_qos_notify,
.dump_platform_dependencies = gk20a_tegra_debug_dump,
.soc_name = "tegra19x",
.honors_aperture = true,
.unified_memory = true,
.dma_mask = DMA_BIT_MASK(36),
.reset_assert = gp10b_tegra_reset_assert,
.reset_deassert = gp10b_tegra_reset_deassert,
.secure_buffer_size = 667648,
};
void gr_gv11b_create_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
int error = 0;
/* This stat creation function is called on GR init. GR can get
initialized multiple times but we only need to create the ECC
stats once. Therefore, add the following check to avoid
creating duplicate stat sysfs nodes. */
if (g->ecc.gr.sm_l1_tag_corrected_err_count.counters != NULL)
return;
gr_gp10b_create_sysfs(g);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_tag_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_tag_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_corrected_err_count",
&g->ecc.gr.sm_cbu_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_cbu_ecc_uncorrected_err_count",
&g->ecc.gr.sm_cbu_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_corrected_err_count",
&g->ecc.gr.sm_l1_data_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_l1_data_ecc_uncorrected_err_count",
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_corrected_err_count",
&g->ecc.gr.sm_icache_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"sm_icache_ecc_uncorrected_err_count",
&g->ecc.gr.sm_icache_uncorrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_corrected_err_count",
&g->ecc.gr.gcc_l15_corrected_err_count);
error |= nvgpu_gr_ecc_stat_create(dev,
0,
"gcc_l15_ecc_uncorrected_err_count",
&g->ecc.gr.gcc_l15_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_uncorrected_err_count",
&g->ecc.ltc.l2_cache_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->ltc_count,
0,
"ltc",
NULL,
"l2_cache_corrected_err_count",
&g->ecc.ltc.l2_cache_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_uncorrected_err_count",
&g->ecc.gr.fecs_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"gpc",
NULL,
"fecs_ecc_corrected_err_count",
&g->ecc.gr.fecs_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_uncorrected_err_count",
&g->ecc.gr.gpccs_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"gpccs_ecc_corrected_err_count",
&g->ecc.gr.gpccs_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_uncorrected_err_count",
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
g->gr.gpc_count,
0,
"gpc",
NULL,
"mmu_l1tlb_ecc_corrected_err_count",
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_l2tlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_hubtlb_ecc_corrected_err_count",
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_uncorrected_err_count",
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"mmu_fillunit_ecc_corrected_err_count",
&g->ecc.fb.mmu_fillunit_corrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_uncorrected_err_count",
&g->ecc.pmu.pmu_uncorrected_err_count);
error |= nvgpu_ecc_stat_create(dev,
1,
0,
"eng",
NULL,
"pmu_ecc_corrected_err_count",
&g->ecc.pmu.pmu_corrected_err_count);
if (error)
dev_err(dev, "Failed to create gv11b sysfs attributes!\n");
}
void gr_gv11b_remove_sysfs(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (!g->ecc.gr.sm_l1_tag_corrected_err_count.counters)
return;
gr_gp10b_remove_sysfs(g);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_tag_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_cbu_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_l1_data_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.sm_icache_uncorrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_corrected_err_count);
nvgpu_gr_ecc_stat_remove(dev,
0,
&g->ecc.gr.gcc_l15_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
0,
&g->ecc.ltc.l2_cache_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->ltc_count,
0,
&g->ecc.ltc.l2_cache_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.gr.fecs_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.gr.fecs_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.gpccs_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.gpccs_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.mmu_l1tlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
g->gr.gpc_count,
0,
&g->ecc.gr.mmu_l1tlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_l2tlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_l2tlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_hubtlb_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_hubtlb_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_fillunit_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.fb.mmu_fillunit_corrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.pmu.pmu_uncorrected_err_count);
nvgpu_ecc_stat_remove(dev,
1,
0,
&g->ecc.pmu.pmu_corrected_err_count);
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/rwsem.h>
void nvgpu_rwsem_init(struct nvgpu_rwsem *rwsem)
{
init_rwsem(&rwsem->rwsem);
}
void nvgpu_rwsem_up_read(struct nvgpu_rwsem *rwsem)
{
up_read(&rwsem->rwsem);
}
void nvgpu_rwsem_down_read(struct nvgpu_rwsem *rwsem)
{
down_read(&rwsem->rwsem);
}
void nvgpu_rwsem_up_write(struct nvgpu_rwsem *rwsem)
{
up_write(&rwsem->rwsem);
}
void nvgpu_rwsem_down_write(struct nvgpu_rwsem *rwsem)
{
down_write(&rwsem->rwsem);
}

View File

@@ -0,0 +1,428 @@
/*
* gk20a clock scaling profile
*
* Copyright (c) 2013-2017, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/devfreq.h>
#include <linux/export.h>
#include <soc/tegra/chip-id.h>
#include <linux/pm_qos.h>
#include <governor.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "scale.h"
#include "os_linux.h"
/*
* gk20a_scale_qos_notify()
*
* This function is called when the minimum QoS requirement for the device
* has changed. The function calls postscaling callback if it is defined.
*/
#if defined(CONFIG_COMMON_CLK)
int gk20a_scale_qos_notify(struct notifier_block *nb,
unsigned long n, void *p)
{
struct gk20a_scale_profile *profile =
container_of(nb, struct gk20a_scale_profile,
qos_notify_block);
struct gk20a *g = get_gk20a(profile->dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct devfreq *devfreq = l->devfreq;
if (!devfreq)
return NOTIFY_OK;
mutex_lock(&devfreq->lock);
/* check for pm_qos min and max frequency requirement */
profile->qos_min_freq =
(unsigned long)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
profile->qos_max_freq =
(unsigned long)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS) * 1000UL;
if (profile->qos_min_freq > profile->qos_max_freq) {
nvgpu_err(g,
"QoS: setting invalid limit, min_freq=%lu max_freq=%lu",
profile->qos_min_freq, profile->qos_max_freq);
profile->qos_min_freq = profile->qos_max_freq;
}
update_devfreq(devfreq);
mutex_unlock(&devfreq->lock);
return NOTIFY_OK;
}
#else
int gk20a_scale_qos_notify(struct notifier_block *nb,
unsigned long n, void *p)
{
struct gk20a_scale_profile *profile =
container_of(nb, struct gk20a_scale_profile,
qos_notify_block);
struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
struct gk20a *g = get_gk20a(profile->dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
unsigned long freq;
if (!platform->postscale)
return NOTIFY_OK;
/* get the frequency requirement. if devfreq is enabled, check if it
* has higher demand than qos */
freq = platform->clk_round_rate(profile->dev,
(u32)pm_qos_read_min_bound(PM_QOS_GPU_FREQ_BOUNDS));
if (l->devfreq)
freq = max(l->devfreq->previous_freq, freq);
/* Update gpu load because we may scale the emc target
* if the gpu load changed. */
nvgpu_pmu_load_update(g);
platform->postscale(profile->dev, freq);
return NOTIFY_OK;
}
#endif
/*
* gk20a_scale_make_freq_table(profile)
*
* This function initialises the frequency table for the given device profile
*/
static int gk20a_scale_make_freq_table(struct gk20a_scale_profile *profile)
{
struct gk20a_platform *platform = dev_get_drvdata(profile->dev);
int num_freqs, err;
unsigned long *freqs;
if (platform->get_clk_freqs) {
/* get gpu frequency table */
err = platform->get_clk_freqs(profile->dev, &freqs,
&num_freqs);
if (err)
return -ENOSYS;
} else
return -ENOSYS;
profile->devfreq_profile.freq_table = (unsigned long *)freqs;
profile->devfreq_profile.max_state = num_freqs;
return 0;
}
/*
* gk20a_scale_target(dev, *freq, flags)
*
* This function scales the clock
*/
static int gk20a_scale_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = platform->g;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_scale_profile *profile = g->scale_profile;
struct devfreq *devfreq = l->devfreq;
unsigned long local_freq = *freq;
unsigned long rounded_rate;
unsigned long min_freq = 0, max_freq = 0;
/*
* Calculate floor and cap frequency values
*
* Policy :
* We have two APIs to clip the frequency
* 1. devfreq
* 2. pm_qos
*
* To calculate floor (min) freq, we select MAX of floor frequencies
* requested from both APIs
* To get cap (max) freq, we select MIN of max frequencies
*
* In case we have conflict (min_freq > max_freq) after above
* steps, we ensure that max_freq wins over min_freq
*/
min_freq = max_t(u32, devfreq->min_freq, profile->qos_min_freq);
max_freq = min_t(u32, devfreq->max_freq, profile->qos_max_freq);
if (min_freq > max_freq)
min_freq = max_freq;
/* Clip requested frequency */
if (local_freq < min_freq)
local_freq = min_freq;
if (local_freq > max_freq)
local_freq = max_freq;
/* set the final frequency */
rounded_rate = platform->clk_round_rate(dev, local_freq);
/* Check for duplicate request */
if (rounded_rate == g->last_freq)
return 0;
if (g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK) == rounded_rate)
*freq = rounded_rate;
else {
g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, rounded_rate);
*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
}
g->last_freq = *freq;
/* postscale will only scale emc (dram clock) if evaluating
* gk20a_tegra_get_emc_rate() produces a new or different emc
* target because the load or_and gpufreq has changed */
if (platform->postscale)
platform->postscale(dev, rounded_rate);
return 0;
}
/*
* update_load_estimate_gpmu(profile)
*
* Update load estimate using gpmu. The gpmu value is normalised
* based on the time it was asked last time.
*/
static void update_load_estimate_gpmu(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_scale_profile *profile = g->scale_profile;
unsigned long dt;
u32 busy_time;
ktime_t t;
t = ktime_get();
dt = ktime_us_delta(t, profile->last_event_time);
profile->dev_stat.total_time = dt;
profile->last_event_time = t;
nvgpu_pmu_load_norm(g, &busy_time);
profile->dev_stat.busy_time = (busy_time * dt) / 1000;
}
/*
* gk20a_scale_suspend(dev)
*
* This function informs devfreq of suspend
*/
void gk20a_scale_suspend(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct devfreq *devfreq = l->devfreq;
if (!devfreq)
return;
devfreq_suspend_device(devfreq);
}
/*
* gk20a_scale_resume(dev)
*
* This functions informs devfreq of resume
*/
void gk20a_scale_resume(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct devfreq *devfreq = l->devfreq;
if (!devfreq)
return;
g->last_freq = 0;
devfreq_resume_device(devfreq);
}
/*
* gk20a_scale_get_dev_status(dev, *stat)
*
* This function queries the current device status.
*/
static int gk20a_scale_get_dev_status(struct device *dev,
struct devfreq_dev_status *stat)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_scale_profile *profile = g->scale_profile;
struct gk20a_platform *platform = dev_get_drvdata(dev);
/* update the software shadow */
nvgpu_pmu_load_update(g);
/* inform edp about new constraint */
if (platform->prescale)
platform->prescale(dev);
/* Make sure there are correct values for the current frequency */
profile->dev_stat.current_frequency =
g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
/* Update load estimate */
update_load_estimate_gpmu(dev);
/* Copy the contents of the current device status */
*stat = profile->dev_stat;
/* Finally, clear out the local values */
profile->dev_stat.total_time = 0;
profile->dev_stat.busy_time = 0;
return 0;
}
/*
* get_cur_freq(struct device *dev, unsigned long *freq)
*
* This function gets the current GPU clock rate.
*/
static int get_cur_freq(struct device *dev, unsigned long *freq)
{
struct gk20a *g = get_gk20a(dev);
*freq = g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_GPCCLK);
return 0;
}
/*
* gk20a_scale_init(dev)
*/
void gk20a_scale_init(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = platform->g;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_scale_profile *profile;
int err;
if (g->scale_profile)
return;
if (!platform->devfreq_governor && !platform->qos_notify)
return;
profile = nvgpu_kzalloc(g, sizeof(*profile));
profile->dev = dev;
profile->dev_stat.busy = false;
/* Create frequency table */
err = gk20a_scale_make_freq_table(profile);
if (err || !profile->devfreq_profile.max_state)
goto err_get_freqs;
profile->qos_min_freq = 0;
profile->qos_max_freq = UINT_MAX;
/* Store device profile so we can access it if devfreq governor
* init needs that */
g->scale_profile = profile;
if (platform->devfreq_governor) {
struct devfreq *devfreq;
profile->devfreq_profile.initial_freq =
profile->devfreq_profile.freq_table[0];
profile->devfreq_profile.target = gk20a_scale_target;
profile->devfreq_profile.get_dev_status =
gk20a_scale_get_dev_status;
profile->devfreq_profile.get_cur_freq = get_cur_freq;
profile->devfreq_profile.polling_ms = 25;
devfreq = devfreq_add_device(dev,
&profile->devfreq_profile,
platform->devfreq_governor, NULL);
if (IS_ERR(devfreq))
devfreq = NULL;
l->devfreq = devfreq;
}
/* Should we register QoS callback for this device? */
if (platform->qos_notify) {
profile->qos_notify_block.notifier_call =
platform->qos_notify;
pm_qos_add_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&profile->qos_notify_block);
pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&profile->qos_notify_block);
}
return;
err_get_freqs:
nvgpu_kfree(g, profile);
}
void gk20a_scale_exit(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a *g = platform->g;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int err;
if (platform->qos_notify) {
pm_qos_remove_min_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&g->scale_profile->qos_notify_block);
pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&g->scale_profile->qos_notify_block);
}
if (platform->devfreq_governor) {
err = devfreq_remove_device(l->devfreq);
l->devfreq = NULL;
}
nvgpu_kfree(g, g->scale_profile);
g->scale_profile = NULL;
}
/*
* gk20a_scale_hw_init(dev)
*
* Initialize hardware portion of the device
*/
void gk20a_scale_hw_init(struct device *dev)
{
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct gk20a_scale_profile *profile = platform->g->scale_profile;
/* make sure that scaling has bee initialised */
if (!profile)
return;
profile->dev_stat.total_time = 0;
profile->last_event_time = ktime_get();
}

View File

@@ -0,0 +1,66 @@
/*
* gk20a clock scaling profile
*
* Copyright (c) 2013-2016, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GK20A_SCALE_H
#define GK20A_SCALE_H
#include <linux/devfreq.h>
struct clk;
struct gk20a_scale_profile {
struct device *dev;
ktime_t last_event_time;
struct devfreq_dev_profile devfreq_profile;
struct devfreq_dev_status dev_stat;
struct notifier_block qos_notify_block;
unsigned long qos_min_freq;
unsigned long qos_max_freq;
void *private_data;
};
/* Initialization and de-initialization for module */
void gk20a_scale_init(struct device *);
void gk20a_scale_exit(struct device *);
void gk20a_scale_hw_init(struct device *dev);
#if defined(CONFIG_GK20A_DEVFREQ)
/*
* call when performing submit to notify scaling mechanism that the module is
* in use
*/
void gk20a_scale_notify_busy(struct device *);
void gk20a_scale_notify_idle(struct device *);
void gk20a_scale_suspend(struct device *);
void gk20a_scale_resume(struct device *);
int gk20a_scale_qos_notify(struct notifier_block *nb,
unsigned long n, void *p);
#else
static inline void gk20a_scale_notify_busy(struct device *dev) {}
static inline void gk20a_scale_notify_idle(struct device *dev) {}
static inline void gk20a_scale_suspend(struct device *dev) {}
static inline void gk20a_scale_resume(struct device *dev) {}
static inline int gk20a_scale_qos_notify(struct notifier_block *nb,
unsigned long n, void *p)
{
return -ENOSYS;
}
#endif
#endif

View File

@@ -0,0 +1,676 @@
/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <asm/barrier.h>
#include <linux/wait.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/barrier.h>
#include "gk20a/gk20a.h"
#include "gk20a/gr_gk20a.h"
#include "sched.h"
#include "os_linux.h"
#include "ioctl_tsg.h"
#include <nvgpu/hw/gk20a/hw_ctxsw_prog_gk20a.h>
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
ssize_t gk20a_sched_dev_read(struct file *filp, char __user *buf,
size_t size, loff_t *off)
{
struct gk20a_sched_ctrl *sched = filp->private_data;
struct gk20a *g = sched->g;
struct nvgpu_sched_event_arg event = { 0 };
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched,
"filp=%p buf=%p size=%zu", filp, buf, size);
if (size < sizeof(event))
return -EINVAL;
size = sizeof(event);
nvgpu_mutex_acquire(&sched->status_lock);
while (!sched->status) {
nvgpu_mutex_release(&sched->status_lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&sched->readout_wq,
sched->status, 0);
if (err)
return err;
nvgpu_mutex_acquire(&sched->status_lock);
}
event.reserved = 0;
event.status = sched->status;
if (copy_to_user(buf, &event, size)) {
nvgpu_mutex_release(&sched->status_lock);
return -EFAULT;
}
sched->status = 0;
nvgpu_mutex_release(&sched->status_lock);
return size;
}
unsigned int gk20a_sched_dev_poll(struct file *filp, poll_table *wait)
{
struct gk20a_sched_ctrl *sched = filp->private_data;
struct gk20a *g = sched->g;
unsigned int mask = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
nvgpu_mutex_acquire(&sched->status_lock);
poll_wait(filp, &sched->readout_wq.wq, wait);
if (sched->status)
mask |= POLLIN | POLLRDNORM;
nvgpu_mutex_release(&sched->status_lock);
return mask;
}
static int gk20a_sched_dev_ioctl_get_tsgs(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_get_tsgs_args *arg)
{
struct gk20a *g = sched->g;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
arg->size, arg->buffer);
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
arg->size = sched->bitmap_size;
return -ENOSPC;
}
nvgpu_mutex_acquire(&sched->status_lock);
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
sched->active_tsg_bitmap, sched->bitmap_size)) {
nvgpu_mutex_release(&sched->status_lock);
return -EFAULT;
}
nvgpu_mutex_release(&sched->status_lock);
return 0;
}
static int gk20a_sched_dev_ioctl_get_recent_tsgs(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_get_tsgs_args *arg)
{
struct gk20a *g = sched->g;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "size=%u buffer=%llx",
arg->size, arg->buffer);
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
arg->size = sched->bitmap_size;
return -ENOSPC;
}
nvgpu_mutex_acquire(&sched->status_lock);
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
sched->recent_tsg_bitmap, sched->bitmap_size)) {
nvgpu_mutex_release(&sched->status_lock);
return -EFAULT;
}
memset(sched->recent_tsg_bitmap, 0, sched->bitmap_size);
nvgpu_mutex_release(&sched->status_lock);
return 0;
}
static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_get_tsgs_by_pid_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u64 *bitmap;
unsigned int tsgid;
/* pid at user level corresponds to kernel tgid */
pid_t tgid = (pid_t)arg->pid;
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "pid=%d size=%u buffer=%llx",
(pid_t)arg->pid, arg->size, arg->buffer);
if ((arg->size < sched->bitmap_size) || (!arg->buffer)) {
arg->size = sched->bitmap_size;
return -ENOSPC;
}
bitmap = nvgpu_kzalloc(sched->g, sched->bitmap_size);
if (!bitmap)
return -ENOMEM;
nvgpu_mutex_acquire(&sched->status_lock);
for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) {
tsg = &f->tsg[tsgid];
if (tsg->tgid == tgid)
NVGPU_SCHED_SET(tsgid, bitmap);
}
}
nvgpu_mutex_release(&sched->status_lock);
if (copy_to_user((void __user *)(uintptr_t)arg->buffer,
bitmap, sched->bitmap_size))
err = -EFAULT;
nvgpu_kfree(sched->g, bitmap);
return err;
}
static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_tsg_get_params_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u32 tsgid = arg->tsgid;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
if (tsgid >= f->num_channels)
return -EINVAL;
nvgpu_speculation_barrier();
tsg = &f->tsg[tsgid];
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
return -ENXIO;
arg->pid = tsg->tgid; /* kernel tgid corresponds to user pid */
arg->runlist_interleave = tsg->interleave_level;
arg->timeslice = tsg->timeslice_us;
arg->graphics_preempt_mode =
tsg->gr_ctx.graphics_preempt_mode;
arg->compute_preempt_mode =
tsg->gr_ctx.compute_preempt_mode;
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
return 0;
}
static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_tsg_timeslice_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u32 tsgid = arg->tsgid;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
if (tsgid >= f->num_channels)
return -EINVAL;
nvgpu_speculation_barrier();
tsg = &f->tsg[tsgid];
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
return -ENXIO;
err = gk20a_busy(g);
if (err)
goto done;
err = gk20a_tsg_set_timeslice(tsg, arg->timeslice);
gk20a_idle(g);
done:
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
return err;
}
static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_tsg_runlist_interleave_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u32 tsgid = arg->tsgid;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
if (tsgid >= f->num_channels)
return -EINVAL;
nvgpu_speculation_barrier();
tsg = &f->tsg[tsgid];
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
return -ENXIO;
err = gk20a_busy(g);
if (err)
goto done;
err = gk20a_tsg_set_runlist_interleave(tsg, arg->runlist_interleave);
gk20a_idle(g);
done:
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
return err;
}
static int gk20a_sched_dev_ioctl_lock_control(struct gk20a_sched_ctrl *sched)
{
struct gk20a *g = sched->g;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
nvgpu_mutex_acquire(&sched->control_lock);
sched->control_locked = true;
nvgpu_mutex_release(&sched->control_lock);
return 0;
}
static int gk20a_sched_dev_ioctl_unlock_control(struct gk20a_sched_ctrl *sched)
{
struct gk20a *g = sched->g;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
nvgpu_mutex_acquire(&sched->control_lock);
sched->control_locked = false;
nvgpu_mutex_release(&sched->control_lock);
return 0;
}
static int gk20a_sched_dev_ioctl_get_api_version(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_api_version_args *args)
{
struct gk20a *g = sched->g;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, " ");
args->version = NVGPU_SCHED_API_VERSION;
return 0;
}
static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_tsg_refcount_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u32 tsgid = arg->tsgid;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
if (tsgid >= f->num_channels)
return -EINVAL;
nvgpu_speculation_barrier();
tsg = &f->tsg[tsgid];
if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
return -ENXIO;
nvgpu_mutex_acquire(&sched->status_lock);
if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
/* unlock status_lock as nvgpu_ioctl_tsg_release locks it */
nvgpu_mutex_release(&sched->status_lock);
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
return -ENXIO;
}
/* keep reference on TSG, will be released on
* NVGPU_SCHED_IOCTL_PUT_TSG ioctl, or close
*/
NVGPU_SCHED_SET(tsgid, sched->ref_tsg_bitmap);
nvgpu_mutex_release(&sched->status_lock);
return 0;
}
static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
struct nvgpu_sched_tsg_refcount_args *arg)
{
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
u32 tsgid = arg->tsgid;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsgid);
if (tsgid >= f->num_channels)
return -EINVAL;
nvgpu_speculation_barrier();
nvgpu_mutex_acquire(&sched->status_lock);
if (!NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
nvgpu_mutex_release(&sched->status_lock);
nvgpu_warn(g, "tsgid=%d not previously referenced", tsgid);
return -ENXIO;
}
NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap);
nvgpu_mutex_release(&sched->status_lock);
tsg = &f->tsg[tsgid];
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
return 0;
}
int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
{
struct nvgpu_os_linux *l = container_of(inode->i_cdev,
struct nvgpu_os_linux, sched.cdev);
struct gk20a *g;
struct gk20a_sched_ctrl *sched;
int err = 0;
g = gk20a_get(&l->g);
if (!g)
return -ENODEV;
sched = &l->sched_ctrl;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
if (!sched->sw_ready) {
err = gk20a_busy(g);
if (err)
goto free_ref;
gk20a_idle(g);
}
if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
err = -EBUSY;
goto free_ref;
}
memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
sched->bitmap_size);
memset(sched->ref_tsg_bitmap, 0, sched->bitmap_size);
filp->private_data = sched;
nvgpu_log(g, gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
free_ref:
if (err)
gk20a_put(g);
return err;
}
long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct gk20a_sched_ctrl *sched = filp->private_data;
struct gk20a *g = sched->g;
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "nr=%d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_SCHED_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_SCHED_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_SCHED_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
switch (cmd) {
case NVGPU_SCHED_IOCTL_GET_TSGS:
err = gk20a_sched_dev_ioctl_get_tsgs(sched,
(struct nvgpu_sched_get_tsgs_args *)buf);
break;
case NVGPU_SCHED_IOCTL_GET_RECENT_TSGS:
err = gk20a_sched_dev_ioctl_get_recent_tsgs(sched,
(struct nvgpu_sched_get_tsgs_args *)buf);
break;
case NVGPU_SCHED_IOCTL_GET_TSGS_BY_PID:
err = gk20a_sched_dev_ioctl_get_tsgs_by_pid(sched,
(struct nvgpu_sched_get_tsgs_by_pid_args *)buf);
break;
case NVGPU_SCHED_IOCTL_TSG_GET_PARAMS:
err = gk20a_sched_dev_ioctl_get_params(sched,
(struct nvgpu_sched_tsg_get_params_args *)buf);
break;
case NVGPU_SCHED_IOCTL_TSG_SET_TIMESLICE:
err = gk20a_sched_dev_ioctl_tsg_set_timeslice(sched,
(struct nvgpu_sched_tsg_timeslice_args *)buf);
break;
case NVGPU_SCHED_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
err = gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(sched,
(struct nvgpu_sched_tsg_runlist_interleave_args *)buf);
break;
case NVGPU_SCHED_IOCTL_LOCK_CONTROL:
err = gk20a_sched_dev_ioctl_lock_control(sched);
break;
case NVGPU_SCHED_IOCTL_UNLOCK_CONTROL:
err = gk20a_sched_dev_ioctl_unlock_control(sched);
break;
case NVGPU_SCHED_IOCTL_GET_API_VERSION:
err = gk20a_sched_dev_ioctl_get_api_version(sched,
(struct nvgpu_sched_api_version_args *)buf);
break;
case NVGPU_SCHED_IOCTL_GET_TSG:
err = gk20a_sched_dev_ioctl_get_tsg(sched,
(struct nvgpu_sched_tsg_refcount_args *)buf);
break;
case NVGPU_SCHED_IOCTL_PUT_TSG:
err = gk20a_sched_dev_ioctl_put_tsg(sched,
(struct nvgpu_sched_tsg_refcount_args *)buf);
break;
default:
nvgpu_log_info(g, "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY;
}
/* Some ioctls like NVGPU_SCHED_IOCTL_GET_TSGS might be called on
* purpose with NULL buffer and/or zero size to discover TSG bitmap
* size. We need to update user arguments in this case too, even
* if we return an error.
*/
if ((!err || (err == -ENOSPC)) && (_IOC_DIR(cmd) & _IOC_READ)) {
if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
err = -EFAULT;
}
return err;
}
int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_sched_ctrl *sched = filp->private_data;
struct gk20a *g = sched->g;
struct fifo_gk20a *f = &g->fifo;
struct tsg_gk20a *tsg;
unsigned int tsgid;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "sched: %p", sched);
/* release any reference to TSGs */
for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
tsg = &f->tsg[tsgid];
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
}
}
/* unlock control */
nvgpu_mutex_acquire(&sched->control_lock);
sched->control_locked = false;
nvgpu_mutex_release(&sched->control_lock);
nvgpu_mutex_release(&sched->busy_lock);
gk20a_put(g);
return 0;
}
void gk20a_sched_ctrl_tsg_added(struct gk20a *g, struct tsg_gk20a *tsg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
if (!sched->sw_ready) {
err = gk20a_busy(g);
if (err) {
WARN_ON(err);
return;
}
gk20a_idle(g);
}
nvgpu_mutex_acquire(&sched->status_lock);
NVGPU_SCHED_SET(tsg->tsgid, sched->active_tsg_bitmap);
NVGPU_SCHED_SET(tsg->tsgid, sched->recent_tsg_bitmap);
sched->status |= NVGPU_SCHED_STATUS_TSG_OPEN;
nvgpu_mutex_release(&sched->status_lock);
nvgpu_cond_signal_interruptible(&sched->readout_wq);
}
void gk20a_sched_ctrl_tsg_removed(struct gk20a *g, struct tsg_gk20a *tsg)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->status_lock);
NVGPU_SCHED_CLR(tsg->tsgid, sched->active_tsg_bitmap);
/* clear recent_tsg_bitmap as well: if app manager did not
* notice that TSG was previously added, no need to notify it
* if the TSG has been released in the meantime. If the
* TSG gets reallocated, app manager will be notified as usual.
*/
NVGPU_SCHED_CLR(tsg->tsgid, sched->recent_tsg_bitmap);
/* do not set event_pending, we only want to notify app manager
* when TSGs are added, so that it can apply sched params
*/
nvgpu_mutex_release(&sched->status_lock);
}
int gk20a_sched_ctrl_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
struct fifo_gk20a *f = &g->fifo;
int err;
if (sched->sw_ready)
return 0;
sched->g = g;
sched->bitmap_size = roundup(f->num_channels, 64) / 8;
sched->status = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "g=%p sched=%p size=%zu",
g, sched, sched->bitmap_size);
sched->active_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
if (!sched->active_tsg_bitmap)
return -ENOMEM;
sched->recent_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
if (!sched->recent_tsg_bitmap) {
err = -ENOMEM;
goto free_active;
}
sched->ref_tsg_bitmap = nvgpu_kzalloc(g, sched->bitmap_size);
if (!sched->ref_tsg_bitmap) {
err = -ENOMEM;
goto free_recent;
}
nvgpu_cond_init(&sched->readout_wq);
err = nvgpu_mutex_init(&sched->status_lock);
if (err)
goto free_ref;
err = nvgpu_mutex_init(&sched->control_lock);
if (err)
goto free_status_lock;
err = nvgpu_mutex_init(&sched->busy_lock);
if (err)
goto free_control_lock;
sched->sw_ready = true;
return 0;
free_control_lock:
nvgpu_mutex_destroy(&sched->control_lock);
free_status_lock:
nvgpu_mutex_destroy(&sched->status_lock);
free_ref:
nvgpu_kfree(g, sched->ref_tsg_bitmap);
free_recent:
nvgpu_kfree(g, sched->recent_tsg_bitmap);
free_active:
nvgpu_kfree(g, sched->active_tsg_bitmap);
return err;
}
void gk20a_sched_ctrl_cleanup(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_sched_ctrl *sched = &l->sched_ctrl;
nvgpu_kfree(g, sched->active_tsg_bitmap);
nvgpu_kfree(g, sched->recent_tsg_bitmap);
nvgpu_kfree(g, sched->ref_tsg_bitmap);
sched->active_tsg_bitmap = NULL;
sched->recent_tsg_bitmap = NULL;
sched->ref_tsg_bitmap = NULL;
nvgpu_mutex_destroy(&sched->status_lock);
nvgpu_mutex_destroy(&sched->control_lock);
nvgpu_mutex_destroy(&sched->busy_lock);
sched->sw_ready = false;
}

View File

@@ -0,0 +1,55 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_SCHED_H
#define __NVGPU_SCHED_H
struct gk20a;
struct gpu_ops;
struct tsg_gk20a;
struct poll_table_struct;
struct gk20a_sched_ctrl {
struct gk20a *g;
struct nvgpu_mutex control_lock;
bool control_locked;
bool sw_ready;
struct nvgpu_mutex status_lock;
struct nvgpu_mutex busy_lock;
u64 status;
size_t bitmap_size;
u64 *active_tsg_bitmap;
u64 *recent_tsg_bitmap;
u64 *ref_tsg_bitmap;
struct nvgpu_cond readout_wq;
};
int gk20a_sched_dev_release(struct inode *inode, struct file *filp);
int gk20a_sched_dev_open(struct inode *inode, struct file *filp);
long gk20a_sched_dev_ioctl(struct file *, unsigned int, unsigned long);
ssize_t gk20a_sched_dev_read(struct file *, char __user *, size_t, loff_t *);
unsigned int gk20a_sched_dev_poll(struct file *, struct poll_table_struct *);
void gk20a_sched_ctrl_tsg_added(struct gk20a *, struct tsg_gk20a *);
void gk20a_sched_ctrl_tsg_removed(struct gk20a *, struct tsg_gk20a *);
int gk20a_sched_ctrl_init(struct gk20a *);
void gk20a_sched_ctrl_cleanup(struct gk20a *g);
#endif /* __NVGPU_SCHED_H */

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/io.h>
#include <linux/highmem.h>
#include <linux/platform_device.h>
#include <nvgpu/log.h>
#include <nvgpu/linux/vm.h>
#include <nvgpu/bitops.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/dma.h>
#include <nvgpu/soc.h>
#include <nvgpu/hw_sim.h>
#include <nvgpu/sim.h>
#include "gk20a/gk20a.h"
#include "platform_gk20a.h"
#include "os_linux.h"
#include "module.h"
void sim_writel(struct sim_nvgpu *sim, u32 r, u32 v)
{
struct sim_nvgpu_linux *sim_linux =
container_of(sim, struct sim_nvgpu_linux, sim);
writel(v, sim_linux->regs + r);
}
u32 sim_readl(struct sim_nvgpu *sim, u32 r)
{
struct sim_nvgpu_linux *sim_linux =
container_of(sim, struct sim_nvgpu_linux, sim);
return readl(sim_linux->regs + r);
}
void nvgpu_remove_sim_support_linux(struct gk20a *g)
{
struct sim_nvgpu_linux *sim_linux;
if (!g->sim)
return;
sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
if (sim_linux->regs) {
sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
iounmap(sim_linux->regs);
sim_linux->regs = NULL;
}
nvgpu_kfree(g, sim_linux);
g->sim = NULL;
}
int nvgpu_init_sim_support_linux(struct gk20a *g,
struct platform_device *dev)
{
struct sim_nvgpu_linux *sim_linux;
int err = -ENOMEM;
if (!nvgpu_platform_is_simulation(g))
return 0;
sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
if (!sim_linux)
return err;
g->sim = &sim_linux->sim;
g->sim->g = g;
sim_linux->regs = nvgpu_ioremap_resource(dev,
GK20A_SIM_IORESOURCE_MEM,
&sim_linux->reg_mem);
if (IS_ERR(sim_linux->regs)) {
nvgpu_err(g, "failed to remap gk20a sim regs");
err = PTR_ERR(sim_linux->regs);
goto fail;
}
sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux;
return 0;
fail:
nvgpu_remove_sim_support_linux(g);
return err;
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/io.h>
#include <linux/highmem.h>
#include <linux/platform_device.h>
#include <nvgpu/log.h>
#include <nvgpu/linux/vm.h>
#include <nvgpu/bitops.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/dma.h>
#include <nvgpu/hw_sim_pci.h>
#include <nvgpu/sim.h>
#include "gk20a/gk20a.h"
#include "os_linux.h"
#include "module.h"
static bool _nvgpu_pci_is_simulation(struct gk20a *g, u32 sim_base)
{
u32 cfg;
bool is_simulation = false;
cfg = nvgpu_readl(g, sim_base + sim_config_r());
if (sim_config_mode_v(cfg) == sim_config_mode_enabled_v())
is_simulation = true;
return is_simulation;
}
void nvgpu_remove_sim_support_linux_pci(struct gk20a *g)
{
struct sim_nvgpu_linux *sim_linux;
bool is_simulation;
is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
if (!is_simulation) {
return;
}
if (!g->sim) {
nvgpu_warn(g, "sim_gk20a not allocated");
return;
}
sim_linux = container_of(g->sim, struct sim_nvgpu_linux, sim);
if (sim_linux->regs) {
sim_writel(g->sim, sim_config_r(), sim_config_mode_disabled_v());
sim_linux->regs = NULL;
}
nvgpu_kfree(g, sim_linux);
g->sim = NULL;
}
int nvgpu_init_sim_support_linux_pci(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct sim_nvgpu_linux *sim_linux;
int err = -ENOMEM;
bool is_simulation;
is_simulation = _nvgpu_pci_is_simulation(g, sim_r());
__nvgpu_set_enabled(g, NVGPU_IS_FMODEL, is_simulation);
if (!is_simulation)
return 0;
sim_linux = nvgpu_kzalloc(g, sizeof(*sim_linux));
if (!sim_linux)
return err;
g->sim = &sim_linux->sim;
g->sim->g = g;
sim_linux->regs = l->regs + sim_r();
sim_linux->remove_support_linux = nvgpu_remove_sim_support_linux_pci;
return 0;
}

View File

@@ -0,0 +1,122 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <soc/tegra/chip-id.h>
#include <soc/tegra/fuse.h>
#include <soc/tegra/tegra_bpmp.h>
#ifdef CONFIG_TEGRA_HV_MANAGER
#include <soc/tegra/virt/syscalls.h>
#endif
#include <nvgpu/soc.h>
#include "os_linux.h"
#include "platform_gk20a.h"
bool nvgpu_platform_is_silicon(struct gk20a *g)
{
return tegra_platform_is_silicon();
}
bool nvgpu_platform_is_simulation(struct gk20a *g)
{
return tegra_platform_is_vdk();
}
bool nvgpu_platform_is_fpga(struct gk20a *g)
{
return tegra_platform_is_fpga();
}
bool nvgpu_is_hypervisor_mode(struct gk20a *g)
{
return is_tegra_hypervisor_mode();
}
bool nvgpu_is_bpmp_running(struct gk20a *g)
{
return tegra_bpmp_running();
}
bool nvgpu_is_soc_t194_a01(struct gk20a *g)
{
return ((tegra_get_chip_id() == TEGRA194 &&
tegra_chip_get_revision() == TEGRA194_REVISION_A01) ?
true : false);
}
#ifdef CONFIG_TEGRA_HV_MANAGER
/* When nvlink is enabled on dGPU, we need to use physical memory addresses.
* There is no SMMU translation. However, the device initially enumerates as a
* PCIe device. As such, when allocation memory for this PCIe device, the DMA
* framework ends up allocating memory using SMMU (if enabled in device tree).
* As a result, when we switch to nvlink, we need to use underlying physical
* addresses, even if memory mappings exist in SMMU.
* In addition, when stage-2 SMMU translation is enabled (for instance when HV
* is enabled), the addresses we get from dma_alloc are IPAs. We need to
* convert them to PA.
*/
static u64 nvgpu_tegra_hv_ipa_pa(struct gk20a *g, u64 ipa)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct hyp_ipa_pa_info info;
int err;
u64 pa = 0ULL;
err = hyp_read_ipa_pa_info(&info, platform->vmid, ipa);
if (err < 0) {
/* WAR for bug 2096877
* hyp_read_ipa_pa_info only looks up RAM mappings.
* assume one to one IPA:PA mapping for syncpt aperture
*/
u64 start = g->syncpt_unit_base;
u64 end = g->syncpt_unit_base + g->syncpt_unit_size;
if ((ipa >= start) && (ipa < end)) {
pa = ipa;
nvgpu_log(g, gpu_dbg_map_v,
"ipa=%llx vmid=%d -> pa=%llx (SYNCPT)\n",
ipa, platform->vmid, pa);
} else {
nvgpu_err(g, "ipa=%llx translation failed vmid=%u err=%d",
ipa, platform->vmid, err);
}
} else {
pa = info.base + info.offset;
nvgpu_log(g, gpu_dbg_map_v,
"ipa=%llx vmid=%d -> pa=%llx "
"base=%llx offset=%llx size=%llx\n",
ipa, platform->vmid, pa, info.base,
info.offset, info.size);
}
return pa;
}
#endif
int nvgpu_init_soc_vars(struct gk20a *g)
{
#ifdef CONFIG_TEGRA_HV_MANAGER
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
int err;
if (nvgpu_is_hypervisor_mode(g)) {
err = hyp_read_gid(&platform->vmid);
if (err) {
nvgpu_err(g, "failed to read vmid");
return err;
}
platform->phys_addr = nvgpu_tegra_hv_ipa_pa;
}
#endif
return 0;
}

View File

@@ -0,0 +1,419 @@
/*
* Semaphore Sync Framework Integration
*
* Copyright (c) 2017-2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/hrtimer.h>
#include <linux/module.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/semaphore.h>
#include <nvgpu/bug.h>
#include <nvgpu/kref.h>
#include "../linux/channel.h"
#include "../drivers/staging/android/sync.h"
#include "sync_sema_android.h"
static const struct sync_timeline_ops gk20a_sync_timeline_ops;
struct gk20a_sync_timeline {
struct sync_timeline obj;
u32 max;
u32 min;
};
/**
* The sync framework dups pts when merging fences. We share a single
* refcounted gk20a_sync_pt for each duped pt.
*/
struct gk20a_sync_pt {
struct gk20a *g;
struct nvgpu_ref refcount;
u32 thresh;
struct nvgpu_semaphore *sema;
struct gk20a_sync_timeline *obj;
/*
* Use a spin lock here since it will have better performance
* than a mutex - there should be very little contention on this
* lock.
*/
struct nvgpu_spinlock lock;
};
struct gk20a_sync_pt_inst {
struct sync_pt pt;
struct gk20a_sync_pt *shared;
};
/**
* Compares sync pt values a and b, both of which will trigger either before
* or after ref (i.e. a and b trigger before ref, or a and b trigger after
* ref). Supplying ref allows us to handle wrapping correctly.
*
* Returns -1 if a < b (a triggers before b)
* 0 if a = b (a and b trigger at the same time)
* 1 if a > b (b triggers before a)
*/
static int __gk20a_sync_pt_compare_ref(
u32 ref,
u32 a,
u32 b)
{
/*
* We normalize both a and b by subtracting ref from them.
* Denote the normalized values by a_n and b_n. Note that because
* of wrapping, a_n and/or b_n may be negative.
*
* The normalized values a_n and b_n satisfy:
* - a positive value triggers before a negative value
* - a smaller positive value triggers before a greater positive value
* - a smaller negative value (greater in absolute value) triggers
* before a greater negative value (smaller in absolute value).
*
* Thus we can just stick to unsigned arithmetic and compare
* (u32)a_n to (u32)b_n.
*
* Just to reiterate the possible cases:
*
* 1A) ...ref..a....b....
* 1B) ...ref..b....a....
* 2A) ...b....ref..a.... b_n < 0
* 2B) ...a....ref..b.... a_n > 0
* 3A) ...a....b....ref.. a_n < 0, b_n < 0
* 3A) ...b....a....ref.. a_n < 0, b_n < 0
*/
u32 a_n = a - ref;
u32 b_n = b - ref;
if (a_n < b_n)
return -1;
else if (a_n > b_n)
return 1;
else
return 0;
}
static struct gk20a_sync_pt *to_gk20a_sync_pt(struct sync_pt *pt)
{
struct gk20a_sync_pt_inst *pti =
container_of(pt, struct gk20a_sync_pt_inst, pt);
return pti->shared;
}
static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
{
if (WARN_ON(obj->ops != &gk20a_sync_timeline_ops))
return NULL;
return (struct gk20a_sync_timeline *)obj;
}
static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
{
struct gk20a_sync_pt *pt =
container_of(ref, struct gk20a_sync_pt, refcount);
struct gk20a *g = pt->g;
if (pt->sema)
nvgpu_semaphore_put(pt->sema);
nvgpu_kfree(g, pt);
}
static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
struct gk20a *g,
struct gk20a_sync_timeline *obj,
struct nvgpu_semaphore *sema)
{
struct gk20a_sync_pt *shared;
shared = nvgpu_kzalloc(g, sizeof(*shared));
if (!shared)
return NULL;
nvgpu_ref_init(&shared->refcount);
shared->g = g;
shared->obj = obj;
shared->sema = sema;
shared->thresh = ++obj->max; /* sync framework has a lock */
nvgpu_spinlock_init(&shared->lock);
nvgpu_semaphore_get(sema);
return shared;
}
static struct sync_pt *gk20a_sync_pt_create_inst(
struct gk20a *g,
struct gk20a_sync_timeline *obj,
struct nvgpu_semaphore *sema)
{
struct gk20a_sync_pt_inst *pti;
pti = (struct gk20a_sync_pt_inst *)
sync_pt_create(&obj->obj, sizeof(*pti));
if (!pti)
return NULL;
pti->shared = gk20a_sync_pt_create_shared(g, obj, sema);
if (!pti->shared) {
sync_pt_free(&pti->pt);
return NULL;
}
return &pti->pt;
}
static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
{
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
if (pt)
nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
}
static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
{
struct gk20a_sync_pt_inst *pti;
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
pti = (struct gk20a_sync_pt_inst *)
sync_pt_create(&pt->obj->obj, sizeof(*pti));
if (!pti)
return NULL;
pti->shared = pt;
nvgpu_ref_get(&pt->refcount);
return &pti->pt;
}
/*
* This function must be able to run on the same sync_pt concurrently. This
* requires a lock to protect access to the sync_pt's internal data structures
* which are modified as a side effect of calling this function.
*/
static int gk20a_sync_pt_has_signaled(struct sync_pt *sync_pt)
{
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
struct gk20a_sync_timeline *obj = pt->obj;
bool signaled = true;
nvgpu_spinlock_acquire(&pt->lock);
if (!pt->sema)
goto done;
/* Acquired == not realeased yet == active == not signaled. */
signaled = !nvgpu_semaphore_is_acquired(pt->sema);
if (signaled) {
/* Update min if necessary. */
if (__gk20a_sync_pt_compare_ref(obj->max, pt->thresh,
obj->min) == 1)
obj->min = pt->thresh;
/* Release the semaphore to the pool. */
nvgpu_semaphore_put(pt->sema);
pt->sema = NULL;
}
done:
nvgpu_spinlock_release(&pt->lock);
return signaled;
}
static int gk20a_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
{
bool a_expired;
bool b_expired;
struct gk20a_sync_pt *pt_a = to_gk20a_sync_pt(a);
struct gk20a_sync_pt *pt_b = to_gk20a_sync_pt(b);
if (WARN_ON(pt_a->obj != pt_b->obj))
return 0;
/* Early out */
if (a == b)
return 0;
a_expired = gk20a_sync_pt_has_signaled(a);
b_expired = gk20a_sync_pt_has_signaled(b);
if (a_expired && !b_expired) {
/* Easy, a was earlier */
return -1;
} else if (!a_expired && b_expired) {
/* Easy, b was earlier */
return 1;
}
/* Both a and b are expired (trigger before min) or not
* expired (trigger after min), so we can use min
* as a reference value for __gk20a_sync_pt_compare_ref.
*/
return __gk20a_sync_pt_compare_ref(pt_a->obj->min,
pt_a->thresh, pt_b->thresh);
}
static u32 gk20a_sync_timeline_current(struct gk20a_sync_timeline *obj)
{
return obj->min;
}
static void gk20a_sync_timeline_value_str(struct sync_timeline *timeline,
char *str, int size)
{
struct gk20a_sync_timeline *obj =
(struct gk20a_sync_timeline *)timeline;
snprintf(str, size, "%d", gk20a_sync_timeline_current(obj));
}
static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
char *str, int size)
{
struct nvgpu_semaphore *s = pt->sema;
snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
s->location.pool->page_idx,
nvgpu_semaphore_get_value(s),
nvgpu_semaphore_read(s));
}
static void gk20a_sync_pt_value_str(struct sync_pt *sync_pt, char *str,
int size)
{
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
if (pt->sema) {
gk20a_sync_pt_value_str_for_sema(pt, str, size);
return;
}
snprintf(str, size, "%d", pt->thresh);
}
static const struct sync_timeline_ops gk20a_sync_timeline_ops = {
.driver_name = "nvgpu_semaphore",
.dup = gk20a_sync_pt_dup_inst,
.has_signaled = gk20a_sync_pt_has_signaled,
.compare = gk20a_sync_pt_compare,
.free_pt = gk20a_sync_pt_free_inst,
.timeline_value_str = gk20a_sync_timeline_value_str,
.pt_value_str = gk20a_sync_pt_value_str,
};
/* Public API */
struct sync_fence *gk20a_sync_fence_fdget(int fd)
{
struct sync_fence *fence = sync_fence_fdget(fd);
int i;
if (!fence)
return NULL;
for (i = 0; i < fence->num_fences; i++) {
struct fence *pt = fence->cbs[i].sync_pt;
struct sync_pt *spt = sync_pt_from_fence(pt);
struct sync_timeline *t;
if (spt == NULL) {
sync_fence_put(fence);
return NULL;
}
t = sync_pt_parent(spt);
if (t->ops != &gk20a_sync_timeline_ops) {
sync_fence_put(fence);
return NULL;
}
}
return fence;
}
struct nvgpu_semaphore *gk20a_sync_pt_sema(struct sync_pt *spt)
{
struct gk20a_sync_pt *pt = to_gk20a_sync_pt(spt);
struct nvgpu_semaphore *sema;
nvgpu_spinlock_acquire(&pt->lock);
sema = pt->sema;
if (sema)
nvgpu_semaphore_get(sema);
nvgpu_spinlock_release(&pt->lock);
return sema;
}
void gk20a_sync_timeline_signal(struct sync_timeline *timeline)
{
sync_timeline_signal(timeline, 0);
}
void gk20a_sync_timeline_destroy(struct sync_timeline *timeline)
{
sync_timeline_destroy(timeline);
}
struct sync_timeline *gk20a_sync_timeline_create(
const char *name)
{
struct gk20a_sync_timeline *obj;
obj = (struct gk20a_sync_timeline *)
sync_timeline_create(&gk20a_sync_timeline_ops,
sizeof(struct gk20a_sync_timeline),
name);
if (!obj)
return NULL;
obj->max = 0;
obj->min = 0;
return &obj->obj;
}
struct sync_fence *gk20a_sync_fence_create(
struct channel_gk20a *c,
struct nvgpu_semaphore *sema,
const char *fmt, ...)
{
char name[30];
va_list args;
struct sync_pt *pt;
struct sync_fence *fence;
struct gk20a *g = c->g;
struct nvgpu_channel_linux *os_channel_priv = c->os_priv;
struct nvgpu_os_fence_framework *fence_framework = NULL;
struct gk20a_sync_timeline *timeline = NULL;
fence_framework = &os_channel_priv->fence_framework;
timeline = to_gk20a_timeline(fence_framework->timeline);
pt = gk20a_sync_pt_create_inst(g, timeline, sema);
if (pt == NULL)
return NULL;
va_start(args, fmt);
vsnprintf(name, sizeof(name), fmt, args);
va_end(args);
fence = sync_fence_create(name, pt);
if (fence == NULL) {
sync_pt_free(pt);
return NULL;
}
return fence;
}

Some files were not shown because too many files have changed in this diff Show More