Open source GPL/LGPL release

This commit is contained in:
svcmobrel-release
2025-12-19 15:25:44 -08:00
commit 9fc87a7ec7
2261 changed files with 576825 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
/*
* Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/bsearch.h>
#include <linux/bsearch.h>
void *nvgpu_bsearch(const void *key, const void *base, size_t nitems, size_t size,
int (*compar)(const void *a, const void *b))
{
return bsearch(key, base, nitems, size, compar);
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,327 @@
/*
* GK20A color decompression engine support
*
* Copyright (c) 2014-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _CDE_GK20A_H_
#define _CDE_GK20A_H_
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <linux/kobject.h>
#include <linux/workqueue.h>
#define MAX_CDE_BUFS 10
#define MAX_CDE_PARAMS 64
#define MAX_CDE_USER_PARAMS 40
#define MAX_CDE_ARRAY_ENTRIES 9
/*
* The size of the context ring buffer that is dedicated for handling cde
* jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
* wait on the previous job to that channel, so increasing this value
* reduces the likelihood of stalls.
*/
#define NUM_CDE_CONTEXTS 4
struct dma_buf;
struct device;
struct nvgpu_os_linux;
struct gk20a;
struct nvgpu_fence_type;
struct nvgpu_channel_fence;
struct nvgpu_channel;
struct vm_gk20a;
struct nvgpu_gpfifo_entry;
struct nvgpu_user_fence;
/*
* this element defines a buffer that is allocated and mapped into gpu address
* space. data_byte_offset defines the beginning of the buffer inside the
* firmare. num_bytes defines how many bytes the firmware contains.
*
* If data_byte_offset is zero, we allocate an empty buffer.
*/
struct gk20a_cde_hdr_buf {
u64 data_byte_offset;
u64 num_bytes;
};
/*
* this element defines a constant patching in buffers. It basically
* computes physical address to <source_buf>+source_byte_offset. The
* address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_replace {
u32 target_buf;
u32 source_buf;
s32 shift;
u32 type;
u64 target_byte_offset;
u64 source_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_TYPE_U32 = 0,
TYPE_PARAM_TYPE_U64_LITTLE,
TYPE_PARAM_TYPE_U64_BIG
};
/*
* this element defines a runtime patching in buffers. Parameters with id from
* 0 to 1024 are reserved for special usage as follows:
* 0 = comptags_per_cacheline,
* 1 = slices_per_fbp,
* 2 = num_fbps
* 3 = source buffer first page offset
* 4 = source buffer block height log2
* 5 = backing store memory address
* 6 = destination memory address
* 7 = destination size (bytes)
* 8 = backing store size (bytes)
* 9 = cache line size
*
* Parameters above id 1024 are user-specified. I.e. they determine where a
* parameters from user space should be placed in buffers, what is their
* type, etc.
*
* Once the value is available, we add data_offset to the value.
*
* The value address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_param {
u32 id;
u32 target_buf;
s32 shift;
u32 type;
s64 data_offset;
u64 target_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
TYPE_PARAM_GPU_CONFIGURATION,
TYPE_PARAM_FIRSTPAGEOFFSET,
TYPE_PARAM_NUMPAGES,
TYPE_PARAM_BACKINGSTORE,
TYPE_PARAM_DESTINATION,
TYPE_PARAM_DESTINATION_SIZE,
TYPE_PARAM_BACKINGSTORE_SIZE,
TYPE_PARAM_SOURCE_SMMU_ADDR,
TYPE_PARAM_BACKINGSTORE_BASE_HW,
TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
TYPE_PARAM_SCATTERBUFFER,
TYPE_PARAM_SCATTERBUFFER_SIZE,
NUM_RESERVED_PARAMS = 1024,
};
/*
* This header element defines a command. The op field determines whether the
* element is defining an init (0) or convert command (1). data_byte_offset
* denotes the beginning address of command elements in the file.
*/
struct gk20a_cde_hdr_command {
u32 op;
u32 num_entries;
u64 data_byte_offset;
};
enum {
TYPE_BUF_COMMAND_INIT = 0,
TYPE_BUF_COMMAND_CONVERT,
TYPE_BUF_COMMAND_NOOP
};
/*
* This is a command element defines one entry inside push buffer. target_buf
* defines the buffer including the pushbuffer entries, target_byte_offset the
* offset inside the buffer and num_bytes the number of words in the buffer.
*/
struct gk20a_cde_cmd_elem {
u32 target_buf;
u32 padding;
u64 target_byte_offset;
u64 num_bytes;
};
/*
* This element is used for storing a small array of data.
*/
enum {
ARRAY_PROGRAM_OFFSET = 0,
ARRAY_REGISTER_COUNT,
ARRAY_LAUNCH_COMMAND,
NUM_CDE_ARRAYS
};
struct gk20a_cde_hdr_array {
u32 id;
u32 data[MAX_CDE_ARRAY_ENTRIES];
};
/*
* Following defines a single header element. Each element has a type and
* some of the data structures.
*/
struct gk20a_cde_hdr_elem {
u32 type;
u32 padding;
union {
struct gk20a_cde_hdr_buf buf;
struct gk20a_cde_hdr_replace replace;
struct gk20a_cde_hdr_param param;
u32 required_class;
struct gk20a_cde_hdr_command command;
struct gk20a_cde_hdr_array array;
};
};
enum {
TYPE_BUF = 0,
TYPE_REPLACE,
TYPE_PARAM,
TYPE_REQUIRED_CLASS,
TYPE_COMMAND,
TYPE_ARRAY
};
struct gk20a_cde_param {
u32 id;
u32 padding;
u64 value;
};
struct gk20a_cde_ctx {
struct nvgpu_os_linux *l;
struct device *dev;
/* channel related data */
struct nvgpu_channel *ch;
struct nvgpu_tsg *tsg;
struct vm_gk20a *vm;
/* buf converter configuration */
struct nvgpu_mem mem[MAX_CDE_BUFS];
unsigned int num_bufs;
/* buffer patching params (where should patching be done) */
struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
unsigned int num_params;
/* storage for user space parameter values */
u32 user_param_values[MAX_CDE_USER_PARAMS];
u32 surf_param_offset;
u32 surf_param_lines;
u64 surf_vaddr;
u64 compbit_vaddr;
u64 compbit_size;
u64 scatterbuffer_vaddr;
u64 scatterbuffer_size;
u64 backing_store_vaddr;
struct nvgpu_gpfifo_entry *init_convert_cmd;
int init_cmd_num_entries;
struct nvgpu_gpfifo_entry *convert_cmd;
int convert_cmd_num_entries;
struct kobj_attribute attr;
bool init_cmd_executed;
struct nvgpu_list_node list;
bool is_temporary;
bool in_use;
struct delayed_work ctx_deleter_work;
};
static inline struct gk20a_cde_ctx *
gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
{
return (struct gk20a_cde_ctx *)
((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
};
struct gk20a_cde_app {
bool initialised;
struct nvgpu_mutex mutex;
struct nvgpu_list_node free_contexts;
struct nvgpu_list_node used_contexts;
unsigned int ctx_count;
unsigned int ctx_usecount;
unsigned int ctx_count_top;
u32 firmware_version;
u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
u32 shader_parameter;
};
void gk20a_cde_destroy(struct nvgpu_os_linux *l);
void gk20a_cde_suspend(struct nvgpu_os_linux *l);
int gk20a_init_cde_support(struct nvgpu_os_linux *l);
int gk20a_cde_reload(struct nvgpu_os_linux *l);
int gk20a_cde_convert(struct nvgpu_os_linux *l,
struct dma_buf *compbits_buf,
u64 compbits_byte_offset,
u64 scatterbuffer_byte_offset,
struct nvgpu_channel_fence *fence,
u32 __flags, struct gk20a_cde_param *params,
int num_params, struct nvgpu_fence_type **fence_out);
int gk20a_prepare_compressible_read(
struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
u64 compbits_hoffset, u64 compbits_voffset,
u64 scatterbuffer_offset,
u32 width, u32 height, u32 block_height_log2,
u32 submit_flags, struct nvgpu_channel_fence *fence,
u32 *valid_compbits, u32 *zbc_color,
struct nvgpu_user_fence *fence_out);
int gk20a_mark_compressible_write(
struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
u32 zbc_color);
int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
#endif

View File

@@ -0,0 +1,53 @@
/*
* GM20B CDE
*
* Copyright (c) 2015-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/gk20a.h>
#include "cde_gm20b.h"
enum programs {
PROG_HPASS = 0,
PROG_VPASS_LARGE = 1,
PROG_VPASS_SMALL = 2,
PROG_HPASS_DEBUG = 3,
PROG_VPASS_LARGE_DEBUG = 4,
PROG_VPASS_SMALL_DEBUG = 5,
PROG_PASSTHROUGH = 6,
};
void gm20b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out)
{
int hprog = PROG_HPASS;
int vprog = (block_height_log2 >= 2) ?
PROG_VPASS_LARGE : PROG_VPASS_SMALL;
if (shader_parameter == 1) {
hprog = PROG_PASSTHROUGH;
vprog = PROG_PASSTHROUGH;
} else if (shader_parameter == 2) {
hprog = PROG_HPASS_DEBUG;
vprog = (block_height_log2 >= 2) ?
PROG_VPASS_LARGE_DEBUG :
PROG_VPASS_SMALL_DEBUG;
}
*hprog_out = hprog;
*vprog_out = vprog;
}

View File

@@ -0,0 +1,27 @@
/*
* GM20B CDE
*
* Copyright (c) 2015-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NVHOST_GM20B_CDE
#define _NVHOST_GM20B_CDE
void gm20b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out);
#endif

View File

@@ -0,0 +1,147 @@
/*
* GP10B CDE
*
* Copyright (c) 2015-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/log.h>
#include <nvgpu/dma.h>
#include <nvgpu/gk20a.h>
#include "cde_gp10b.h"
enum gp10b_programs {
GP10B_PROG_HPASS = 0,
GP10B_PROG_HPASS_4K = 1,
GP10B_PROG_VPASS = 2,
GP10B_PROG_VPASS_4K = 3,
GP10B_PROG_HPASS_DEBUG = 4,
GP10B_PROG_HPASS_4K_DEBUG = 5,
GP10B_PROG_VPASS_DEBUG = 6,
GP10B_PROG_VPASS_4K_DEBUG = 7,
GP10B_PROG_PASSTHROUGH = 8,
};
void gp10b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out)
{
int hprog, vprog;
if (shader_parameter == 1) {
hprog = GP10B_PROG_PASSTHROUGH;
vprog = GP10B_PROG_PASSTHROUGH;
} else {
hprog = GP10B_PROG_HPASS;
vprog = GP10B_PROG_VPASS;
if (shader_parameter == 2) {
hprog = GP10B_PROG_HPASS_DEBUG;
vprog = GP10B_PROG_VPASS_DEBUG;
}
if (!nvgpu_iommuable(g)) {
if (!g->mm.disable_bigpage) {
nvgpu_warn(g,
"When no IOMMU big pages cannot be used");
}
hprog |= 1;
vprog |= 1;
}
}
*hprog_out = hprog;
*vprog_out = vprog;
}
bool gp10b_need_scatter_buffer(struct gk20a *g)
{
return !nvgpu_iommuable(g);
}
static u8 parity(u32 a)
{
a ^= a>>16u;
a ^= a>>8u;
a ^= a>>4u;
a &= 0xfu;
return (0x6996u >> a) & 1u;
}
int gp10b_populate_scatter_buffer(struct gk20a *g,
struct sg_table *sgt,
size_t surface_size,
void *scatter_buffer_ptr,
size_t scatter_buffer_size)
{
/* map scatter buffer to CPU VA and fill it */
const u32 page_size_log2 = 12;
const u32 page_size = 1 << page_size_log2;
const u32 page_size_shift = page_size_log2 - 7u;
/* 0011 1111 1111 1111 1111 1110 0100 1000 */
const u32 getSliceMaskGP10B = 0x3ffffe48;
u8 *scatter_buffer = scatter_buffer_ptr;
size_t i;
struct scatterlist *sg = NULL;
u8 d = 0;
size_t page = 0;
size_t pages_left;
surface_size = round_up(surface_size, page_size);
pages_left = surface_size >> page_size_log2;
if ((pages_left >> 3) > scatter_buffer_size)
return -ENOMEM;
for_each_sg(sgt->sgl, sg, sgt->nents, i) {
unsigned int j;
u64 surf_pa = sg_phys(sg);
unsigned int n = (int)(sg->length >> page_size_log2);
nvgpu_log(g, gpu_dbg_cde, "surfPA=0x%llx + %d pages", surf_pa, n);
for (j=0; j < n && pages_left > 0; j++, surf_pa += page_size) {
u32 addr = (((u32)(surf_pa>>7)) & getSliceMaskGP10B) >> page_size_shift;
u8 scatter_bit = parity(addr);
u8 bit = page & 7;
d |= scatter_bit << bit;
if (bit == 7) {
scatter_buffer[page >> 3] = d;
d = 0;
}
++page;
--pages_left;
}
if (pages_left == 0)
break;
}
/* write the last byte in case the number of pages is not divisible by 8 */
if ((page & 7) != 0)
scatter_buffer[page >> 3] = d;
if (nvgpu_log_mask_enabled(g, gpu_dbg_cde)) {
nvgpu_log(g, gpu_dbg_cde, "scatterBuffer content:");
for (i = 0; i < page >> 3; i++) {
nvgpu_log(g, gpu_dbg_cde, " %x", scatter_buffer[i]);
}
}
return 0;
}

View File

@@ -0,0 +1,34 @@
/*
* GP10B CDE
*
* Copyright (c) 2015-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _NVHOST_GP10B_CDE
#define _NVHOST_GP10B_CDE
#include "os_linux.h"
void gp10b_cde_get_program_numbers(struct gk20a *g,
u32 block_height_log2,
u32 shader_parameter,
int *hprog_out, int *vprog_out);
bool gp10b_need_scatter_buffer(struct gk20a *g);
int gp10b_populate_scatter_buffer(struct gk20a *g,
struct sg_table *sgt,
size_t surface_size,
void *scatter_buffer_ptr,
size_t scatter_buffer_size);
#endif

View File

@@ -0,0 +1,111 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_LINUX_CHANNEL_H
#define NVGPU_LINUX_CHANNEL_H
#include <linux/workqueue.h>
#include <linux/dma-buf.h>
#include <nvgpu/types.h>
struct nvgpu_channel;
struct nvgpu_gpfifo;
struct nvgpu_submit_gpfifo_args;
struct nvgpu_channel_fence;
struct nvgpu_fence_type;
struct nvgpu_swprofile;
struct nvgpu_os_linux;
struct nvgpu_cdev;
struct sync_fence;
struct sync_timeline;
struct nvgpu_channel_completion_cb {
/*
* Signal channel owner via a callback, if set, in job cleanup with
* schedule_work. Means that something finished on the channel (perhaps
* more than one job).
*/
void (*fn)(struct nvgpu_channel *, void *);
void *user_data;
/* Make access to the two above atomic */
struct nvgpu_spinlock lock;
/* Per-channel async work task, cannot reschedule itself */
struct work_struct work;
};
struct nvgpu_error_notifier {
struct dma_buf *dmabuf;
void *vaddr;
struct nvgpu_notification *notification;
struct nvgpu_mutex mutex;
};
/*
* channel-global data for sync fences created from the hardware
* synchronization primitive in each particular channel.
*/
struct nvgpu_os_fence_framework {
#if defined(CONFIG_NVGPU_SYNCFD_ANDROID)
struct sync_timeline *timeline;
#elif defined(CONFIG_NVGPU_SYNCFD_STABLE)
u64 context;
bool exists;
#endif
};
struct nvgpu_usermode_bufs_linux {
/*
* Common low level info of these is stored in nvgpu_mems in
* channel_gk20a; these hold lifetimes for the actual dmabuf and its
* dma mapping.
*/
struct nvgpu_usermode_buf_linux {
struct dma_buf *dmabuf;
struct dma_buf_attachment *attachment;
struct sg_table *sgt;
} gpfifo, userd;
};
struct nvgpu_channel_linux {
struct nvgpu_channel *ch;
struct nvgpu_os_fence_framework fence_framework;
struct nvgpu_channel_completion_cb completion_cb;
struct nvgpu_error_notifier error_notifier;
struct dma_buf *cyclestate_buffer_handler;
struct nvgpu_usermode_bufs_linux usermode;
struct nvgpu_cdev *cdev;
};
u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l);
void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l);
/* Deprecated. Use fences in new code. */
struct nvgpu_channel *gk20a_open_new_channel_with_cb(struct gk20a *g,
void (*update_fn)(struct nvgpu_channel *, void *),
void *update_fn_data,
u32 runlist_id,
bool is_privileged_channel);
#endif

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/cic.h>
#include <nvgpu/types.h>
struct gk20a;
int nvgpu_cic_report_err_safety_services(struct gk20a *g,
void *err_info, size_t err_size, bool is_critical)
{
return 0;
}

View File

@@ -0,0 +1,307 @@
/*
* Linux clock support
*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/clk.h>
#ifdef CONFIG_TEGRA_DVFS
#include <soc/tegra/tegra-dvfs.h>
#endif /* CONFIG_TEGRA_DVFS */
#ifdef CONFIG_NV_TEGRA_BPMP
#include <soc/tegra/tegra-bpmp-dvfs.h>
#endif /* CONFIG_NV_TEGRA_BPMP */
#include <nvgpu/pmu/clk/clk.h>
#include "clk.h"
#include "os_linux.h"
#include "platform_gk20a.h"
#include <nvgpu/gk20a.h>
#include <nvgpu/clk_arb.h>
#define HZ_TO_MHZ(x) ((x) / 1000000)
static unsigned long nvgpu_linux_clk_get_rate(struct gk20a *g, u32 api_domain)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
unsigned long ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_SYSCLK:
case CTRL_CLK_DOMAIN_GPCCLK:
if (g->clk.tegra_clk)
ret = clk_get_rate(g->clk.tegra_clk);
else
ret = clk_get_rate(platform->clk[0]);
break;
case CTRL_CLK_DOMAIN_PWRCLK:
ret = clk_get_rate(platform->clk[1]);
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = 0;
break;
}
return ret;
}
static int nvgpu_linux_clk_set_rate(struct gk20a *g,
u32 api_domain, unsigned long rate)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
int ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
if (g->clk.tegra_clk)
ret = clk_set_rate(g->clk.tegra_clk, rate);
else
ret = clk_set_rate(platform->clk[0], rate);
break;
case CTRL_CLK_DOMAIN_PWRCLK:
ret = clk_set_rate(platform->clk[1], rate);
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = -EINVAL;
break;
}
return ret;
}
static unsigned long nvgpu_linux_get_fmax_at_vmin_safe(struct gk20a *g)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
#ifdef CONFIG_TEGRA_DVFS
/*
* On Tegra platforms with GPCPLL bus (gbus) GPU tegra_clk clock exposed
* to frequency governor is a shared user on the gbus. The latter can be
* accessed as GPU clock parent, and incorporate DVFS related data.
*/
if (g->clk.tegra_clk)
return tegra_dvfs_get_fmax_at_vmin_safe_t(
g->clk.tegra_clk_parent);
#endif
if (platform->maxmin_clk_id) {
#ifdef CONFIG_NV_TEGRA_BPMP
return tegra_bpmp_dvfs_get_fmax_at_vmin(
platform->maxmin_clk_id);
#endif
}
return 0;
}
static u32 nvgpu_linux_get_ref_clock_rate(struct gk20a *g)
{
struct clk *c;
c = clk_get_sys("gpu_ref", "gpu_ref");
if (IS_ERR(c)) {
nvgpu_err(g, "failed to get GPCPLL reference clock");
return 0;
}
return clk_get_rate(c);
}
static int nvgpu_linux_predict_mv_at_hz_cur_tfloor(struct clk_gk20a *clk,
unsigned long rate)
{
#ifdef CONFIG_TEGRA_DVFS
return tegra_dvfs_predict_mv_at_hz_cur_tfloor(
clk->tegra_clk_parent, rate);
#else
return -EINVAL;
#endif
}
static unsigned long nvgpu_linux_get_maxrate(struct gk20a *g, u32 api_domain)
{
int ret;
u16 min_mhz, max_mhz;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
#ifdef CONFIG_TEGRA_DVFS
ret = tegra_dvfs_get_maxrate(g->clk.tegra_clk_parent);
#else
ret = 0;
#endif
/* If dvfs not supported */
if (ret == 0) {
int err = nvgpu_clk_arb_get_arbiter_clk_range(g,
NVGPU_CLK_DOMAIN_GPCCLK,
&min_mhz, &max_mhz);
if (err == 0) {
ret = max_mhz * 1000000L;
}
}
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = 0;
break;
}
return ret;
}
/*
* This API is used to return a list of supported frequencies by igpu.
* Set *num_points as 0 to get the size of the freqs list, returned
* by *num_points itself. freqs array must be provided by caller.
* If *num_points is non-zero, then freqs array size must atleast
* equal *num_points.
*/
static int nvgpu_linux_clk_get_f_points(struct gk20a *g,
u32 api_domain, u32 *num_points, u16 *freqs)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
unsigned long *gpu_freq_table;
int ret = 0;
int num_supported_freq = 0;
u32 i;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
ret = platform->get_clk_freqs(dev, &gpu_freq_table,
&num_supported_freq);
if (ret) {
return ret;
}
if (num_points == NULL) {
return -EINVAL;
}
if (*num_points != 0U) {
if (freqs == NULL || (*num_points > (u32)num_supported_freq)) {
return -EINVAL;
}
}
if (*num_points == 0) {
*num_points = num_supported_freq;
} else {
for (i = 0; i < *num_points; i++) {
freqs[i] = HZ_TO_MHZ(gpu_freq_table[i]);
}
}
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = -EINVAL;
break;
}
return ret;
}
static int nvgpu_clk_get_range(struct gk20a *g, u32 api_domain,
u16 *min_mhz, u16 *max_mhz)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
unsigned long *freqs;
int num_freqs;
int ret;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
ret = platform->get_clk_freqs(dev, &freqs, &num_freqs);
if (!ret) {
*min_mhz = HZ_TO_MHZ(freqs[0]);
*max_mhz = HZ_TO_MHZ(freqs[num_freqs - 1]);
}
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = -EINVAL;
break;
}
return ret;
}
/* rate_target should be passed in as Hz
rounded_rate is returned in Hz */
static int nvgpu_clk_get_round_rate(struct gk20a *g,
u32 api_domain, unsigned long rate_target,
unsigned long *rounded_rate)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
unsigned long *freqs;
int num_freqs;
int i, ret = 0;
switch (api_domain) {
case CTRL_CLK_DOMAIN_GPCCLK:
ret = platform->get_clk_freqs(dev, &freqs, &num_freqs);
for (i = 0; i < num_freqs; ++i) {
if (freqs[i] >= rate_target) {
*rounded_rate = freqs[i];
return 0;
}
}
*rounded_rate = freqs[num_freqs - 1];
break;
default:
nvgpu_err(g, "unknown clock: %u", api_domain);
ret = -EINVAL;
break;
}
return ret;
}
static int nvgpu_linux_prepare_enable(struct clk_gk20a *clk)
{
return clk_prepare_enable(clk->tegra_clk);
}
static void nvgpu_linux_disable_unprepare(struct clk_gk20a *clk)
{
clk_disable_unprepare(clk->tegra_clk);
}
void nvgpu_linux_init_clk_support(struct gk20a *g)
{
g->ops.clk.get_rate = nvgpu_linux_clk_get_rate;
g->ops.clk.set_rate = nvgpu_linux_clk_set_rate;
g->ops.clk.get_fmax_at_vmin_safe = nvgpu_linux_get_fmax_at_vmin_safe;
g->ops.clk.get_ref_clock_rate = nvgpu_linux_get_ref_clock_rate;
g->ops.clk.predict_mv_at_hz_cur_tfloor = nvgpu_linux_predict_mv_at_hz_cur_tfloor;
g->ops.clk.get_maxrate = nvgpu_linux_get_maxrate;
g->ops.clk.prepare_enable = nvgpu_linux_prepare_enable;
g->ops.clk.disable_unprepare = nvgpu_linux_disable_unprepare;
g->ops.clk.clk_domain_get_f_points = nvgpu_linux_clk_get_f_points;
g->ops.clk.get_clk_range = nvgpu_clk_get_range;
g->ops.clk.clk_get_round_rate = nvgpu_clk_get_round_rate;
g->ops.clk.measure_freq = nvgpu_clk_measure_freq;
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_COMMON_LINUX_CLK_H
struct gk20a;
void nvgpu_linux_init_clk_support(struct gk20a *g);
#endif

View File

@@ -0,0 +1,148 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-buf.h>
#include <nvgpu/comptags.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/linux/vm.h>
#include "dmabuf_priv.h"
void gk20a_get_comptags(struct nvgpu_os_buffer *buf,
struct gk20a_comptags *comptags)
{
struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
if (!comptags)
return;
if (!priv) {
(void) memset(comptags, 0, sizeof(*comptags));
return;
}
nvgpu_mutex_acquire(&priv->lock);
*comptags = priv->comptags;
nvgpu_mutex_release(&priv->lock);
}
int gk20a_alloc_or_get_comptags(struct gk20a *g,
struct nvgpu_os_buffer *buf,
struct gk20a_comptag_allocator *allocator,
struct gk20a_comptags *comptags)
{
int ret = 0;
struct gk20a_dmabuf_priv *priv = NULL;
u32 offset;
int err;
u64 ctag_granularity;
u32 lines;
ret = gk20a_dmabuf_alloc_drvdata(buf->dmabuf, buf->dev);
if (ret) {
nvgpu_err(g, "error allocating comptags priv data");
return ret;
}
priv = gk20a_dma_buf_get_drvdata(buf->dmabuf, buf->dev);
if (!priv)
return -ENOSYS;
nvgpu_mutex_acquire(&priv->lock);
if (priv->comptags.allocated) {
/*
* already allocated
*/
*comptags = priv->comptags;
err = 0;
goto exit_locked;
}
ctag_granularity = g->ops.fb.compression_page_size(g);
lines = DIV_ROUND_UP_ULL(buf->dmabuf->size, ctag_granularity);
/* 0-sized buffer? Shouldn't occur, but let's check anyways. */
if (lines < 1) {
err = -EINVAL;
goto exit_locked;
}
/* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator;
err = gk20a_comptaglines_alloc(allocator, &offset, lines);
if (!err) {
priv->comptags.offset = offset;
priv->comptags.lines = lines;
priv->comptags.needs_clear = true;
} else {
priv->comptags.offset = 0;
priv->comptags.lines = 0;
priv->comptags.needs_clear = false;
}
/*
* We don't report an error here if comptag alloc failed. The
* caller will simply fallback to incompressible kinds. It
* would not be safe to re-allocate comptags anyways on
* successive calls, as that would break map aliasing.
*/
err = 0;
priv->comptags.allocated = true;
*comptags = priv->comptags;
exit_locked:
nvgpu_mutex_release(&priv->lock);
return err;
}
bool gk20a_comptags_start_clear(struct nvgpu_os_buffer *buf)
{
struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
bool clear_started = false;
if (priv) {
nvgpu_mutex_acquire(&priv->lock);
clear_started = priv->comptags.needs_clear;
if (!clear_started)
nvgpu_mutex_release(&priv->lock);
}
return clear_started;
}
void gk20a_comptags_finish_clear(struct nvgpu_os_buffer *buf,
bool clear_successful)
{
struct gk20a_dmabuf_priv *priv = gk20a_dma_buf_get_drvdata(buf->dmabuf,
buf->dev);
if (priv) {
if (clear_successful)
priv->comptags.needs_clear = false;
nvgpu_mutex_release(&priv->lock);
}
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/wait.h>
#include <linux/sched.h>
#include <nvgpu/cond.h>
int nvgpu_cond_init(struct nvgpu_cond *cond)
{
init_waitqueue_head(&cond->wq);
cond->initialized = true;
return 0;
}
void nvgpu_cond_destroy(struct nvgpu_cond *cond)
{
cond->initialized = false;
}
void nvgpu_cond_signal(struct nvgpu_cond *cond)
{
BUG_ON(!cond->initialized);
wake_up(&cond->wq);
}
void nvgpu_cond_signal_interruptible(struct nvgpu_cond *cond)
{
BUG_ON(!cond->initialized);
wake_up_interruptible(&cond->wq);
}
int nvgpu_cond_broadcast(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up_all(&cond->wq);
return 0;
}
int nvgpu_cond_broadcast_interruptible(struct nvgpu_cond *cond)
{
if (!cond->initialized)
return -EINVAL;
wake_up_interruptible_all(&cond->wq);
return 0;
}

View File

@@ -0,0 +1,499 @@
/*
* Copyright (C) 2017-2021, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "debug_cde.h"
#include "debug_ce.h"
#include "debug_fifo.h"
#include "debug_gr.h"
#include "debug_allocator.h"
#include "debug_kmem.h"
#include "debug_pmu.h"
#include "debug_sched.h"
#include "debug_hal.h"
#include "debug_xve.h"
#include "debug_ltc.h"
#include "debug_bios.h"
#include "os_linux.h"
#include "platform_gk20a.h"
#include <nvgpu/gk20a.h>
#include <nvgpu/power_features/pg.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/tsg.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
#include <nvgpu/debug.h>
unsigned int gk20a_debug_trace_cmdbuf;
static inline void gk20a_debug_write_printk(void *ctx, const char *str)
{
struct gk20a *g = ctx;
nvgpu_dbg_dump_impl(g, str);
}
static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str)
{
seq_printf((struct seq_file *)ctx, "%s\n", str);
}
void gk20a_debug_output(struct nvgpu_debug_context *o, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vsnprintf(o->buf, sizeof(o->buf), fmt, args);
va_end(args);
o->fn(o->ctx, o->buf);
}
void gk20a_debug_show_dump(struct gk20a *g, struct nvgpu_debug_context *o)
{
nvgpu_channel_debug_dump_all(g, o);
g->ops.pbdma.dump_status(g, o);
g->ops.engine_status.dump_engine_status(g, o);
}
static int gk20a_gr_dump_regs(struct gk20a *g,
struct nvgpu_debug_context *o)
{
if (g->ops.gr.dump_gr_regs)
nvgpu_pg_elpg_protected_call(g, g->ops.gr.dump_gr_regs(g, o));
return 0;
}
void gk20a_gr_debug_dump(struct gk20a *g)
{
struct nvgpu_debug_context o = {
.fn = gk20a_debug_write_printk,
.ctx = g,
};
gk20a_gr_dump_regs(g, &o);
}
static int gk20a_gr_debug_show(struct seq_file *s, void *unused)
{
struct device *dev = s->private;
struct gk20a *g = gk20a_get_platform(dev)->g;
struct nvgpu_debug_context o = {
.fn = gk20a_debug_write_to_seqfile,
.ctx = s,
};
int err;
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu: %d", err);
return -EINVAL;
}
gk20a_gr_dump_regs(g, &o);
gk20a_idle(g);
return 0;
}
void gk20a_debug_dump(struct gk20a *g)
{
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_debug_context o = {
.fn = gk20a_debug_write_printk,
.ctx = g,
};
/* HAL only initialized after 1st power-on */
if (g->ops.debug.show_dump)
g->ops.debug.show_dump(g, &o);
if (platform->dump_platform_dependencies && l->enable_platform_dbg)
platform->dump_platform_dependencies(dev_from_gk20a(g));
}
static int gk20a_debug_show(struct seq_file *s, void *unused)
{
struct device *dev = s->private;
struct nvgpu_debug_context o = {
.fn = gk20a_debug_write_to_seqfile,
.ctx = s,
};
struct gk20a *g;
int err;
g = gk20a_get_platform(dev)->g;
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu: %d", err);
return -EFAULT;
}
/* HAL only initialized after 1st power-on */
if (g->ops.debug.show_dump)
g->ops.debug.show_dump(g, &o);
gk20a_idle(g);
return 0;
}
static int gk20a_gr_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_gr_debug_show, inode->i_private);
}
static int gk20a_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_debug_show, inode->i_private);
}
static const struct file_operations gk20a_gr_debug_fops = {
.open = gk20a_gr_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static const struct file_operations gk20a_debug_fops = {
.open = gk20a_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->mm.disable_bigpage)
buf[0] = 'Y';
else
buf[0] = 'N';
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t disable_bigpage_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
int buf_size;
bool bv;
struct gk20a *g = file->private_data;
int err;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
if (strtobool(buf, &bv) == 0) {
g->mm.disable_bigpage = bv;
err = nvgpu_init_gpu_characteristics(g);
if (err != 0) {
nvgpu_err(g, "failed to init GPU characteristics");
return -ENOSYS;
}
}
return count;
}
static struct file_operations disable_bigpage_fops = {
.open = simple_open,
.read = disable_bigpage_read,
.write = disable_bigpage_write,
};
static int railgate_residency_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
unsigned long time_since_last_state_transition_ms;
unsigned long total_rail_gate_time_ms;
unsigned long total_rail_ungate_time_ms;
if (platform->is_railgated(dev_from_gk20a(g))) {
time_since_last_state_transition_ms =
jiffies_to_msecs(jiffies -
g->pstats.last_rail_gate_complete);
total_rail_ungate_time_ms = g->pstats.total_rail_ungate_time_ms;
total_rail_gate_time_ms =
g->pstats.total_rail_gate_time_ms +
time_since_last_state_transition_ms;
} else {
time_since_last_state_transition_ms =
jiffies_to_msecs(jiffies -
g->pstats.last_rail_ungate_complete);
total_rail_gate_time_ms = g->pstats.total_rail_gate_time_ms;
total_rail_ungate_time_ms =
g->pstats.total_rail_ungate_time_ms +
time_since_last_state_transition_ms;
}
seq_printf(s, "Time with Rails Gated: %lu ms\n"
"Time with Rails UnGated: %lu ms\n"
"Total railgating cycles: %lu\n",
total_rail_gate_time_ms,
total_rail_ungate_time_ms,
g->pstats.railgating_cycle_count - 1);
return 0;
}
static int railgate_residency_open(struct inode *inode, struct file *file)
{
return single_open(file, railgate_residency_show, inode->i_private);
}
static const struct file_operations railgate_residency_fops = {
.open = railgate_residency_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int gk20a_railgating_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *d;
if (!g->is_virtual) {
d = debugfs_create_file(
"railgate_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&railgate_residency_fops);
if (!d)
return -ENOMEM;
}
return 0;
}
static ssize_t timeouts_enabled_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (nvgpu_is_timeouts_enabled(g))
buf[0] = 'Y';
else
buf[0] = 'N';
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t timeouts_enabled_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
int buf_size;
bool timeouts_enabled;
struct gk20a *g = file->private_data;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
if (strtobool(buf, &timeouts_enabled) == 0) {
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (timeouts_enabled == false) {
/* requesting to disable timeouts */
if (g->timeouts_disabled_by_user == false) {
nvgpu_atomic_inc(&g->timeouts_disabled_refcount);
g->timeouts_disabled_by_user = true;
}
} else {
/* requesting to enable timeouts */
if (g->timeouts_disabled_by_user == true) {
nvgpu_atomic_dec(&g->timeouts_disabled_refcount);
g->timeouts_disabled_by_user = false;
}
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
}
return count;
}
static const struct file_operations timeouts_enabled_fops = {
.open = simple_open,
.read = timeouts_enabled_read,
.write = timeouts_enabled_write,
};
static ssize_t dbg_tsg_timeslice_max_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[10];
struct gk20a *g = file->private_data;
unsigned int val = g->tsg_dbg_timeslice_max_us;
memcpy(buf, (char*)&val, sizeof(unsigned int));
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t dbg_tsg_timeslice_max_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[10];
int buf_size;
unsigned int val = 0;
struct gk20a *g = file->private_data;
unsigned int max_hw_timeslice_us = g->ops.runlist.get_tsg_max_timeslice();
(void) memset(buf, 0, sizeof(buf));
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
if (kstrtouint(buf, 10, &val) < 0)
return -EINVAL;
if (val < NVGPU_TSG_TIMESLICE_MIN_US ||
val > max_hw_timeslice_us)
return -EINVAL;
g->tsg_dbg_timeslice_max_us = val;
return count;
}
static const struct file_operations dbg_tsg_timeslice_max_fops = {
.open = simple_open,
.read = dbg_tsg_timeslice_max_read,
.write = dbg_tsg_timeslice_max_write,
};
void gk20a_debug_init(struct gk20a *g, const char *debugfs_symlink)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
l->debugfs = debugfs_create_dir(dev_name(dev), NULL);
if (!l->debugfs)
return;
if (debugfs_symlink)
l->debugfs_alias =
debugfs_create_symlink(debugfs_symlink,
NULL, dev_name(dev));
debugfs_create_file("status", S_IRUGO, l->debugfs,
dev, &gk20a_debug_fops);
debugfs_create_file("gr_status", S_IRUGO, l->debugfs,
dev, &gk20a_gr_debug_fops);
debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR,
l->debugfs, &gk20a_debug_trace_cmdbuf);
debugfs_create_u32("ch_wdt_init_limit_ms", S_IRUGO|S_IWUSR,
l->debugfs, &g->ch_wdt_init_limit_ms);
debugfs_create_bool("disable_syncpoints", S_IRUGO|S_IWUSR,
l->debugfs, &l->disable_syncpoints);
debugfs_create_bool("enable_platform_dbg", S_IRUGO|S_IWUSR,
l->debugfs, &l->enable_platform_dbg);
/* New debug logging API. */
debugfs_create_u64("log_mask", S_IRUGO|S_IWUSR,
l->debugfs, &g->log_mask);
debugfs_create_u32("log_trace", S_IRUGO|S_IWUSR,
l->debugfs, &g->log_trace);
l->debugfs_ltc_enabled =
debugfs_create_bool("ltc_enabled", S_IRUGO|S_IWUSR,
l->debugfs,
&g->mm.ltc_enabled_target);
debugfs_create_u32("poll_timeout_default_ms", S_IRUGO|S_IWUSR,
l->debugfs, &g->poll_timeout_default);
l->debugfs_timeouts_enabled =
debugfs_create_file("timeouts_enabled",
S_IRUGO|S_IWUSR,
l->debugfs,
g,
&timeouts_enabled_fops);
l->debugfs_disable_bigpage =
debugfs_create_file("disable_bigpage",
S_IRUGO|S_IWUSR,
l->debugfs,
g,
&disable_bigpage_fops);
debugfs_create_u32("tsg_timeslice_low_priority_us", S_IRUGO|S_IWUSR,
l->debugfs, &g->tsg_timeslice_low_priority_us);
debugfs_create_u32("tsg_timeslice_medium_priority_us", S_IRUGO|S_IWUSR,
l->debugfs,
&g->tsg_timeslice_medium_priority_us);
debugfs_create_u32("tsg_timeslice_high_priority_us", S_IRUGO|S_IWUSR,
l->debugfs, &g->tsg_timeslice_high_priority_us);
l->debugfs_dbg_tsg_timeslice_max_us =
debugfs_create_file("max_dbg_tsg_timeslice_us",
S_IRUGO|S_IWUSR,
l->debugfs, g,
&dbg_tsg_timeslice_max_fops);
l->debugfs_runlist_interleave =
debugfs_create_bool("runlist_interleave",
S_IRUGO|S_IWUSR,
l->debugfs,
&g->runlist_interleave);
gr_gk20a_debugfs_init(g);
gk20a_pmu_debugfs_init(g);
gk20a_railgating_debugfs_init(g);
#ifdef CONFIG_NVGPU_SUPPORT_CDE
gk20a_cde_debugfs_init(g);
#endif
if (!g->is_virtual)
nvgpu_ce_debugfs_init(g);
nvgpu_alloc_debugfs_init(g);
nvgpu_hal_debugfs_init(g);
gk20a_fifo_debugfs_init(g);
gk20a_sched_debugfs_init(g);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
nvgpu_kmem_debugfs_init(g);
#endif
nvgpu_ltc_debugfs_init(g);
#ifdef CONFIG_NVGPU_DGPU
if (g->pci_vendor_id) {
nvgpu_xve_debugfs_init(g);
nvgpu_bios_debugfs_init(g);
}
#endif
}
void gk20a_debug_deinit(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs)
return;
debugfs_remove_recursive(l->debugfs);
debugfs_remove(l->debugfs_alias);
}

View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_allocator.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <nvgpu/allocator.h>
static int __alloc_show(struct seq_file *s, void *unused)
{
struct nvgpu_allocator *a = s->private;
nvgpu_alloc_print_stats(a, s, 1);
return 0;
}
static int __alloc_open(struct inode *inode, struct file *file)
{
return single_open(file, __alloc_show, inode->i_private);
}
static const struct file_operations __alloc_fops = {
.open = __alloc_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_init_alloc_debug(struct gk20a *g, struct nvgpu_allocator *a)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs_allocators)
return;
a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
l->debugfs_allocators,
a, &__alloc_fops);
}
void nvgpu_fini_alloc_debug(struct nvgpu_allocator *a)
{
}
void nvgpu_alloc_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
l->debugfs_allocators = debugfs_create_dir("allocators", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_allocators)) {
l->debugfs_allocators = NULL;
return;
}
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_ALLOCATOR_H__
#define __NVGPU_DEBUG_ALLOCATOR_H__
struct gk20a;
void nvgpu_alloc_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_ALLOCATOR_H__ */

View File

@@ -0,0 +1,69 @@
/*
* Copyright (C) 2018-2019 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/types.h>
#include <nvgpu/nvgpu_init.h>
#include "debug_bios.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/uaccess.h>
static int bios_version_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
int err;
err = gk20a_busy(g);
if (err != 0) {
return err;
}
seq_printf(s, "Version %02X.%02X.%02X.%02X.%02X\n",
(g->bios->vbios_version >> 24) & 0xFF,
(g->bios->vbios_version >> 16) & 0xFF,
(g->bios->vbios_version >> 8) & 0xFF,
(g->bios->vbios_version >> 0) & 0xFF,
(g->bios->vbios_oem_version) & 0xFF);
gk20a_idle(g);
return 0;
}
static int bios_version_open(struct inode *inode, struct file *file)
{
return single_open(file, bios_version_show, inode->i_private);
}
static const struct file_operations bios_version_fops = {
.open = bios_version_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int nvgpu_bios_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
debugfs_create_file("bios", S_IRUGO,
gpu_root, g,
&bios_version_fops);
return 0;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_BIOS_H__
#define __NVGPU_DEBUG_BIOS_H__
struct gk20a;
int nvgpu_bios_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_BIOS_H__ */

View File

@@ -0,0 +1,53 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_cde.h"
#include "platform_gk20a.h"
#include "os_linux.h"
#include <linux/debugfs.h>
static ssize_t gk20a_cde_reload_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct nvgpu_os_linux *l = file->private_data;
gk20a_cde_reload(l);
return count;
}
static const struct file_operations gk20a_cde_reload_fops = {
.open = simple_open,
.write = gk20a_cde_reload_write,
};
void gk20a_cde_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
if (!platform->has_cde)
return;
debugfs_create_u32("cde_parameter", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.shader_parameter);
debugfs_create_u32("cde_ctx_count", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_count);
debugfs_create_u32("cde_ctx_usecount", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_usecount);
debugfs_create_u32("cde_ctx_count_top", S_IWUSR | S_IRUGO,
l->debugfs, &l->cde_app.ctx_count_top);
debugfs_create_file("reload_cde_firmware", S_IWUSR, l->debugfs,
l, &gk20a_cde_reload_fops);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_CDE_H__
#define __NVGPU_DEBUG_CDE_H__
struct gk20a;
void gk20a_cde_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_CDE_H__ */

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_ce.h"
#include "os_linux.h"
#include <nvgpu/ce_app.h>
#include <common/ce/ce_priv.h>
#include <linux/debugfs.h>
void nvgpu_ce_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
debugfs_create_u32("ce_app_ctx_count", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app->ctx_count);
debugfs_create_u32("ce_app_state", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app->app_state);
debugfs_create_u32("ce_app_next_ctx_id", S_IWUSR | S_IRUGO,
l->debugfs, &g->ce_app->next_ctx_id);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017-2019 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_CE_H__
#define __NVGPU_DEBUG_CE_H__
struct gk20a;
void nvgpu_ce_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_CE_H__ */

View File

@@ -0,0 +1,277 @@
/*
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <nvgpu/io.h>
#include <nvgpu/clk_arb.h>
#include <nvgpu/pmu/clk/clk.h>
#include "hal/clk/clk_gm20b.h"
#include "os_linux.h"
#include "platform_gk20a.h"
static int rate_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
struct clk_gk20a *clk = &g->clk;
*val = (u64)rate_gpc2clk_to_gpu(clk->gpc_pll.freq);
return 0;
}
static int rate_set(void *data, u64 val)
{
struct gk20a *g = (struct gk20a *)data;
if (nvgpu_clk_arb_has_active_req(g))
return 0;
return g->ops.clk.set_rate(g, CTRL_CLK_DOMAIN_GPCCLK, (u32)val);
}
DEFINE_SIMPLE_ATTRIBUTE(rate_fops, rate_get, rate_set, "%llu\n");
static int pll_reg_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_clk_pll_debug_data d;
u32 reg, m, n, pl, f;
int err = 0;
if (g->ops.clk.get_pll_debug_data) {
err = g->ops.clk.get_pll_debug_data(g, &d);
if (err)
return err;
} else {
return -EINVAL;
}
seq_printf(s, "bypassctrl = %s, ",
d.trim_sys_bypassctrl_val ? "bypass" : "vco");
seq_printf(s, "sel_vco = %s, ",
d.trim_sys_sel_vco_val ? "vco" : "bypass");
seq_printf(s, "cfg = 0x%x : %s : %s : %s\n", d.trim_sys_gpcpll_cfg_val,
d.trim_sys_gpcpll_cfg_enabled ? "enabled" : "disabled",
d.trim_sys_gpcpll_cfg_locked ? "locked" : "unlocked",
d.trim_sys_gpcpll_cfg_sync_on ? "sync_on" : "sync_off");
reg = d.trim_sys_gpcpll_coeff_val;
m = d.trim_sys_gpcpll_coeff_mdiv;
n = d.trim_sys_gpcpll_coeff_ndiv;
pl = d.trim_sys_gpcpll_coeff_pldiv;
f = g->clk.gpc_pll.clk_in * n / (m * nvgpu_pl_to_div(pl));
seq_printf(s, "coef = 0x%x : m = %u : n = %u : pl = %u", reg, m, n, pl);
seq_printf(s, " : pll_f(gpu_f) = %u(%u) kHz\n", f, f/2);
seq_printf(s, "dvfs0 = 0x%x : d = %u : dmax = %u : doffs = %u\n",
d.trim_sys_gpcpll_dvfs0_val,
d.trim_sys_gpcpll_dvfs0_dfs_coeff,
d.trim_sys_gpcpll_dvfs0_dfs_det_max,
d.trim_sys_gpcpll_dvfs0_dfs_dc_offset);
return 0;
}
static int pll_reg_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_reg_show, inode->i_private);
}
static const struct file_operations pll_reg_fops = {
.open = pll_reg_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int pll_reg_raw_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_clk_pll_debug_data d;
u32 reg;
int err = 0;
if (g->ops.clk.get_pll_debug_data) {
err = g->ops.clk.get_pll_debug_data(g, &d);
if (err)
return err;
} else {
return -EINVAL;
}
seq_puts(s, "GPCPLL REGISTERS:\n");
for (reg = d.trim_sys_gpcpll_cfg_reg;
reg < d.trim_sys_gpcpll_dvfs2_reg;
reg += sizeof(u32))
seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
reg = d.trim_bcast_gpcpll_dvfs2_reg;
if (reg)
seq_printf(s, "[0x%02x] = 0x%08x\n", reg, gk20a_readl(g, reg));
seq_puts(s, "\nGPC CLK OUT REGISTERS:\n");
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_sel_vco_reg,
d.trim_sys_sel_vco_val);
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_gpc2clk_out_reg,
d.trim_sys_gpc2clk_out_val);
seq_printf(s, "[0x%02x] = 0x%08x\n", d.trim_sys_bypassctrl_reg,
d.trim_sys_bypassctrl_val);
return 0;
}
static int pll_reg_raw_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_reg_raw_show, inode->i_private);
}
static ssize_t pll_reg_raw_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct gk20a *g = file->f_path.dentry->d_inode->i_private;
char buf[80];
u32 reg, val;
int err = 0;
if (sizeof(buf) <= count)
return -EINVAL;
if (copy_from_user(buf, userbuf, count))
return -EFAULT;
/* terminate buffer and trim - white spaces may be appended
* at the end when invoked from shell command line */
buf[count] = '\0';
strim(buf);
if (sscanf(buf, "[0x%x] = 0x%x", &reg, &val) != 2)
return -EINVAL;
if (g->ops.clk.pll_reg_write(g, reg, val))
err = g->ops.clk.pll_reg_write(g, reg, val);
else
err = -EINVAL;
return err;
}
static const struct file_operations pll_reg_raw_fops = {
.open = pll_reg_raw_open,
.read = seq_read,
.write = pll_reg_raw_write,
.llseek = seq_lseek,
.release = single_release,
};
static int monitor_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
int err = 0;
if (g->ops.clk.get_gpcclk_clock_counter)
err = g->ops.clk.get_gpcclk_clock_counter(&g->clk, val);
else
err = -EINVAL;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(monitor_fops, monitor_get, NULL, "%llu\n");
static int voltage_get(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
int err = 0;
if (g->ops.clk.get_voltage)
err = g->ops.clk.get_voltage(&g->clk, val);
else
err = -EINVAL;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(voltage_fops, voltage_get, NULL, "%llu\n");
static int pll_param_show(struct seq_file *s, void *data)
{
struct pll_parms *gpc_pll_params = gm20b_get_gpc_pll_parms();
seq_printf(s, "ADC offs = %d uV, ADC slope = %d uV, VCO ctrl = 0x%x\n",
gpc_pll_params->uvdet_offs, gpc_pll_params->uvdet_slope,
gpc_pll_params->vco_ctrl);
return 0;
}
static int pll_param_open(struct inode *inode, struct file *file)
{
return single_open(file, pll_param_show, inode->i_private);
}
static const struct file_operations pll_param_fops = {
.open = pll_param_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int gm20b_clk_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *d;
if (!l->debugfs)
return -EINVAL;
d = debugfs_create_file(
"rate", S_IRUGO|S_IWUSR, l->debugfs, g, &rate_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pll_reg", S_IRUGO, l->debugfs, g, &pll_reg_fops);
if (!d)
goto err_out;
d = debugfs_create_file("pll_reg_raw",
S_IRUGO, l->debugfs, g, &pll_reg_raw_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"monitor", S_IRUGO, l->debugfs, g, &monitor_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"voltage", S_IRUGO, l->debugfs, g, &voltage_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pll_param", S_IRUGO, l->debugfs, g, &pll_param_fops);
if (!d)
goto err_out;
debugfs_create_u32("pll_na_mode", S_IRUGO, l->debugfs,
(u32 *)&g->clk.gpc_pll.mode);
debugfs_create_u32("fmax2x_at_vmin_safe_t", S_IRUGO,
l->debugfs, (u32 *)&g->clk.dvfs_safe_max_freq);
return 0;
err_out:
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DEBUG_CLK_GM20B_H
#define __DEBUG_CLK_GM20B_H
#ifdef CONFIG_DEBUG_FS
int gm20b_clk_init_debugfs(struct gk20a *g);
#else
inline int gm20b_clk_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,144 @@
/*
* Copyright (c) 2018-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "os_linux.h"
#include <nvgpu/boardobjgrp_e32.h>
#include <nvgpu/boardobjgrp_e255.h>
#include <nvgpu/pmu/clk/clk.h>
#include <nvgpu/pmu/volt.h>
/* Dependency of this include will be removed in further CL */
#include "../../common/pmu/boardobj/boardobj.h"
#include "hal/clk/clk_tu104.h"
void nvgpu_clk_arb_pstate_change_lock(struct gk20a *g, bool lock);
static int tu104_get_rate_show(void *data , u64 *val)
{
struct namemap_cfg *c = (struct namemap_cfg *)data;
struct gk20a *g = c->g;
if (!g->ops.clk.get_rate_cntr)
return -EINVAL;
*val = c->is_counter ? (u64)c->scale * g->ops.clk.get_rate_cntr(g, c) :
0 /* TODO PLL read */;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(get_rate_fops, tu104_get_rate_show, NULL, "%llu\n");
static int vftable_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
int status;
u8 index;
u32 voltage_min_uv, voltage_step_size_uv;
u32 gpcclk_clkmhz = 0, gpcclk_voltuv = 0;
voltage_min_uv = nvgpu_pmu_clk_fll_get_lut_step_size(g->pmu->clk_pmu);
voltage_step_size_uv = nvgpu_pmu_clk_fll_get_lut_step_size(g->pmu->clk_pmu);
for (index = 0; index < CTRL_CLK_LUT_NUM_ENTRIES_GV10x; index++) {
gpcclk_voltuv = voltage_min_uv + index * voltage_step_size_uv;
status = nvgpu_clk_domain_volt_to_freq(g, 0, &gpcclk_clkmhz,
&gpcclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC);
if (status != 0) {
nvgpu_err(g, "Failed to get freq for requested volt");
return status;
}
seq_printf(s, "Voltage: %duV Frequency: %dMHz\n",
gpcclk_voltuv, gpcclk_clkmhz);
}
return 0;
}
static int vftable_open(struct inode *inode, struct file *file)
{
return single_open(file, vftable_show, inode->i_private);
}
static const struct file_operations vftable_fops = {
.open = vftable_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int tu104_change_seq_time(void *data , u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
s64 readval;
if (!g->ops.clk.get_change_seq_time)
return -EINVAL;
g->ops.clk.get_change_seq_time(g, &readval);
*val = (u64)readval;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(change_seq_fops, tu104_change_seq_time, NULL, "%llu\n");
int tu104_clk_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
struct dentry *clocks_root, *clk_freq_ctlr_root;
struct dentry *d;
unsigned int i;
if (NULL == (clocks_root = debugfs_create_dir("clocks", gpu_root)))
return -ENOMEM;
clk_freq_ctlr_root = debugfs_create_dir("clk_freq_ctlr", gpu_root);
if (clk_freq_ctlr_root == NULL)
return -ENOMEM;
d = debugfs_create_file("change_seq_time_us", S_IRUGO, clocks_root,
g, &change_seq_fops);
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
for (i = 0; i < g->clk.namemap_num; i++) {
if (g->clk.clk_namemap[i].is_enable) {
d = debugfs_create_file(
g->clk.clk_namemap[i].name,
S_IRUGO,
clocks_root,
&g->clk.clk_namemap[i],
&get_rate_fops);
if (!d)
goto err_out;
}
}
d = debugfs_create_file("vftable", S_IRUGO,
clocks_root, g, &vftable_fops);
if (!d)
goto err_out;
return 0;
err_out:
pr_err("%s: Failed to make debugfs node\n", __func__);
debugfs_remove_recursive(clocks_root);
return -ENOMEM;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DEBUG_CLK_TU104_H
#define __DEBUG_CLK_TU104_H
#ifdef CONFIG_DEBUG_FS
int tu104_clk_init_debugfs(struct gk20a *g);
#else
static inline int tu104_clk_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,169 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/nvgpu_init.h>
#include "os_linux.h"
/*
* The sequence iterator functions. We simply use the count of the
* next line as our internal position.
*/
static void *gk20a_fecs_trace_debugfs_ring_seq_start(
struct seq_file *s, loff_t *pos)
{
if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
return NULL;
return pos;
}
static void *gk20a_fecs_trace_debugfs_ring_seq_next(
struct seq_file *s, void *v, loff_t *pos)
{
++(*pos);
if (*pos >= GK20A_FECS_TRACE_NUM_RECORDS)
return NULL;
return pos;
}
static void gk20a_fecs_trace_debugfs_ring_seq_stop(
struct seq_file *s, void *v)
{
}
static int gk20a_fecs_trace_debugfs_ring_seq_show(
struct seq_file *s, void *v)
{
loff_t *pos = (loff_t *) v;
struct gk20a *g = *(struct gk20a **)s->private;
struct nvgpu_fecs_trace_record *r =
nvgpu_gr_fecs_trace_get_record(g, *pos);
int i;
const u32 invalid_tag =
g->ops.gr.ctxsw_prog.hw_get_ts_tag_invalid_timestamp();
u32 tag;
u64 timestamp;
seq_printf(s, "record #%lld (%p)\n", *pos, r);
seq_printf(s, "\tmagic_lo=%08x\n", r->magic_lo);
seq_printf(s, "\tmagic_hi=%08x\n", r->magic_hi);
if (nvgpu_gr_fecs_trace_is_valid_record(g, r)) {
seq_printf(s, "\tcontext_ptr=%08x\n", r->context_ptr);
seq_printf(s, "\tcontext_id=%08x\n", r->context_id);
seq_printf(s, "\tnew_context_ptr=%08x\n", r->new_context_ptr);
seq_printf(s, "\tnew_context_id=%08x\n", r->new_context_id);
for (i = 0; i < nvgpu_gr_fecs_trace_num_ts(g); i++) {
tag = g->ops.gr.ctxsw_prog.hw_get_ts_tag(r->ts[i]);
if (tag == invalid_tag)
continue;
timestamp = g->ops.gr.ctxsw_prog.hw_record_ts_timestamp(r->ts[i]);
timestamp <<= GK20A_FECS_TRACE_PTIMER_SHIFT;
seq_printf(s, "\ttag=%02x timestamp=%012llx\n", tag, timestamp);
}
}
return 0;
}
/*
* Tie them all together into a set of seq_operations.
*/
static const struct seq_operations gk20a_fecs_trace_debugfs_ring_seq_ops = {
.start = gk20a_fecs_trace_debugfs_ring_seq_start,
.next = gk20a_fecs_trace_debugfs_ring_seq_next,
.stop = gk20a_fecs_trace_debugfs_ring_seq_stop,
.show = gk20a_fecs_trace_debugfs_ring_seq_show
};
/*
* Time to set up the file operations for our /proc file. In this case,
* all we need is an open function which sets up the sequence ops.
*/
static int gk20a_ctxsw_debugfs_ring_open(struct inode *inode,
struct file *file)
{
struct gk20a **p;
p = __seq_open_private(file, &gk20a_fecs_trace_debugfs_ring_seq_ops,
sizeof(struct gk20a *));
if (!p)
return -ENOMEM;
*p = (struct gk20a *)inode->i_private;
return 0;
};
/*
* The file operations structure contains our open function along with
* set of the canned seq_ ops.
*/
static const struct file_operations gk20a_fecs_trace_debugfs_ring_fops = {
.owner = THIS_MODULE,
.open = gk20a_ctxsw_debugfs_ring_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private
};
static int gk20a_fecs_trace_debugfs_read(void *arg, u64 *val)
{
struct gk20a *g = (struct gk20a *)arg;
int err = gk20a_busy(g);
if (err != 0) {
return err;
}
*val = g->ops.gr.fecs_trace.get_read_index(g);
gk20a_idle(g);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_read_fops,
gk20a_fecs_trace_debugfs_read, NULL, "%llu\n");
static int gk20a_fecs_trace_debugfs_write(void *arg, u64 *val)
{
struct gk20a *g = (struct gk20a *)arg;
int err = gk20a_busy(g);
if (err != 0) {
return err;
}
*val = g->ops.gr.fecs_trace.get_write_index(g);
gk20a_idle(g);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(gk20a_fecs_trace_debugfs_write_fops,
gk20a_fecs_trace_debugfs_write, NULL, "%llu\n");
int nvgpu_fecs_trace_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
debugfs_create_file("ctxsw_trace_read", 0600, l->debugfs, g,
&gk20a_fecs_trace_debugfs_read_fops);
debugfs_create_file("ctxsw_trace_write", 0600, l->debugfs, g,
&gk20a_fecs_trace_debugfs_write_fops);
debugfs_create_file("ctxsw_trace_ring", 0600, l->debugfs, g,
&gk20a_fecs_trace_debugfs_ring_fops);
return 0;
}

View File

@@ -0,0 +1,30 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LINUX_DEBUG_FECS_TRACE_H
#define LINUX_DEBUG_FECS_TRACE_H
struct gk20a;
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_NVGPU_FECS_TRACE)
int nvgpu_fecs_trace_init_debugfs(struct gk20a *g);
#else
static int nvgpu_fecs_trace_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,160 @@
/*
* Copyright (C) 2017-2021 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "debug_fifo.h"
#include "swprofile_debugfs.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <nvgpu/timers.h>
#include <nvgpu/channel.h>
#include <nvgpu/gr/ctx.h>
#include <nvgpu/engines.h>
#include <nvgpu/device.h>
#include <nvgpu/runlist.h>
static void *gk20a_fifo_sched_debugfs_seq_start(
struct seq_file *s, loff_t *pos)
{
struct gk20a *g = s->private;
struct nvgpu_fifo *f = &g->fifo;
if (*pos >= f->num_channels)
return NULL;
return &f->channel[*pos];
}
static void *gk20a_fifo_sched_debugfs_seq_next(
struct seq_file *s, void *v, loff_t *pos)
{
struct gk20a *g = s->private;
struct nvgpu_fifo *f = &g->fifo;
++(*pos);
if (*pos >= f->num_channels)
return NULL;
return &f->channel[*pos];
}
static void gk20a_fifo_sched_debugfs_seq_stop(
struct seq_file *s, void *v)
{
}
static int gk20a_fifo_sched_debugfs_seq_show(
struct seq_file *s, void *v)
{
struct gk20a *g = s->private;
struct nvgpu_fifo *f = &g->fifo;
struct nvgpu_channel *ch = v;
struct nvgpu_tsg *tsg = NULL;
const struct nvgpu_device *dev;
struct nvgpu_runlist *runlist;
u32 runlist_id;
int ret = SEQ_SKIP;
dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
nvgpu_assert(dev != NULL);
runlist_id = dev->runlist_id;
runlist = f->runlists[runlist_id];
if (ch == f->channel) {
seq_puts(s, "chid tsgid pid timeslice timeout interleave graphics_preempt compute_preempt\n");
seq_puts(s, " (usecs) (msecs)\n");
ret = 0;
}
if (!test_bit(ch->chid, runlist->active_channels))
return ret;
if (nvgpu_channel_get(ch)) {
tsg = nvgpu_tsg_from_ch(ch);
if (tsg)
seq_printf(s, "%-8d %-8d %-8d %-9d %-8d %-10d %-8d %-8d\n",
ch->chid,
ch->tsgid,
ch->tgid,
tsg->timeslice_us,
ch->ctxsw_timeout_max_ms,
tsg->interleave_level,
nvgpu_gr_ctx_get_graphics_preemption_mode(tsg->gr_ctx),
nvgpu_gr_ctx_get_compute_preemption_mode(tsg->gr_ctx));
nvgpu_channel_put(ch);
}
return 0;
}
static const struct seq_operations gk20a_fifo_sched_debugfs_seq_ops = {
.start = gk20a_fifo_sched_debugfs_seq_start,
.next = gk20a_fifo_sched_debugfs_seq_next,
.stop = gk20a_fifo_sched_debugfs_seq_stop,
.show = gk20a_fifo_sched_debugfs_seq_show
};
static int gk20a_fifo_sched_debugfs_open(struct inode *inode,
struct file *file)
{
struct gk20a *g = inode->i_private;
int err;
err = seq_open(file, &gk20a_fifo_sched_debugfs_seq_ops);
if (err)
return err;
nvgpu_log(g, gpu_dbg_info, "i_private=%p", inode->i_private);
((struct seq_file *)file->private_data)->private = inode->i_private;
return 0;
};
/*
* The file operations structure contains our open function along with
* set of the canned seq_ ops.
*/
static const struct file_operations gk20a_fifo_sched_debugfs_fops = {
.owner = THIS_MODULE,
.open = gk20a_fifo_sched_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release
};
void gk20a_fifo_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
struct dentry *fifo_root;
fifo_root = debugfs_create_dir("fifo", gpu_root);
if (IS_ERR_OR_NULL(fifo_root))
return;
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
debugfs_create_file("sched", 0600, fifo_root, g,
&gk20a_fifo_sched_debugfs_fops);
nvgpu_debugfs_swprofile_init(g, fifo_root, &g->fifo.kickoff_profiler,
"kickoff_profiler");
nvgpu_debugfs_swprofile_init(g, fifo_root, &g->fifo.recovery_profiler,
"recovery_profiler");
nvgpu_debugfs_swprofile_init(g, fifo_root, &g->fifo.eng_reset_profiler,
"eng_reset_profiler");
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_FIFO_H__
#define __NVGPU_DEBUG_FIFO_H__
struct gk20a;
void gk20a_fifo_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_FIFO_H__ */

View File

@@ -0,0 +1,244 @@
/*
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/ctx.h>
#include "common/gr/ctx_priv.h"
#include "common/gr/gr_priv.h"
#include "debug_gr.h"
#include "os_linux.h"
#include <linux/uaccess.h>
#include <linux/debugfs.h>
static int gr_default_attrib_cb_size_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
/* HAL might not be initialized yet */
if (g->ops.gr.init.get_attrib_cb_default_size == NULL)
return -EFAULT;
seq_printf(s, "%u\n", g->ops.gr.init.get_attrib_cb_default_size(g));
return 0;
}
static int gr_default_attrib_cb_size_open(struct inode *inode,
struct file *file)
{
return single_open(file, gr_default_attrib_cb_size_show,
inode->i_private);
}
static const struct file_operations gr_default_attrib_cb_size_fops= {
.open = gr_default_attrib_cb_size_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static ssize_t force_preemption_gfxp_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
if (g->gr->gr_ctx_desc->force_preemption_gfxp) {
buf[0] = 'Y';
} else {
buf[0] = 'N';
}
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t force_preemption_gfxp_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
int buf_size;
bool val;
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size)) {
return -EFAULT;
}
if (strtobool(buf, &val) == 0) {
g->gr->gr_ctx_desc->force_preemption_gfxp = val;
}
return count;
}
static struct file_operations force_preemption_gfxp_fops = {
.open = simple_open,
.read = force_preemption_gfxp_read,
.write = force_preemption_gfxp_write,
};
static ssize_t force_preemption_cilp_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
if (g->gr->gr_ctx_desc->force_preemption_cilp) {
buf[0] = 'Y';
} else {
buf[0] = 'N';
}
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t force_preemption_cilp_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
int buf_size;
bool val;
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size)) {
return -EFAULT;
}
if (strtobool(buf, &val) == 0) {
g->gr->gr_ctx_desc->force_preemption_cilp = val;
}
return count;
}
static struct file_operations force_preemption_cilp_fops = {
.open = simple_open,
.read = force_preemption_cilp_read,
.write = force_preemption_cilp_write,
};
static ssize_t dump_ctxsw_stats_on_channel_close_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
if (g->gr->gr_ctx_desc->dump_ctxsw_stats_on_channel_close) {
buf[0] = 'Y';
} else {
buf[0] = 'N';
}
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t dump_ctxsw_stats_on_channel_close_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[32];
int buf_size;
bool val;
struct gk20a *g = file->private_data;
if (g->gr->gr_ctx_desc == NULL) {
return -EFAULT;
}
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size)) {
return -EFAULT;
}
if (strtobool(buf, &val) == 0) {
g->gr->gr_ctx_desc->dump_ctxsw_stats_on_channel_close = val;
}
return count;
}
static struct file_operations dump_ctxsw_stats_on_channel_close_fops = {
.open = simple_open,
.read = dump_ctxsw_stats_on_channel_close_read,
.write = dump_ctxsw_stats_on_channel_close_write,
};
int gr_gk20a_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *d;
d = debugfs_create_file(
"gr_default_attrib_cb_size", S_IRUGO, l->debugfs, g,
&gr_default_attrib_cb_size_fops);
if (!d)
return -ENOMEM;
d = debugfs_create_file(
"force_preemption_gfxp", S_IRUGO|S_IWUSR, l->debugfs, g,
&force_preemption_gfxp_fops);
if (!d)
return -ENOMEM;
d = debugfs_create_file(
"force_preemption_cilp", S_IRUGO|S_IWUSR, l->debugfs, g,
&force_preemption_cilp_fops);
if (!d)
return -ENOMEM;
if (!g->is_virtual) {
d = debugfs_create_file(
"dump_ctxsw_stats_on_channel_close", S_IRUGO|S_IWUSR,
l->debugfs, g,
&dump_ctxsw_stats_on_channel_close_fops);
if (!d)
return -ENOMEM;
}
return 0;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_GR_H__
#define __NVGPU_DEBUG_GR_H__
struct gk20a;
int gr_gk20a_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_GR_H__ */

View File

@@ -0,0 +1,95 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_hal.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
/* Format and print a single function pointer to the specified seq_file. */
static void __hal_print_op(struct seq_file *s, void *op_ptr)
{
seq_printf(s, "%pF\n", op_ptr);
}
/*
* Prints an array of function pointer addresses in op_ptrs to the
* specified seq_file
*/
static void __hal_print_ops(struct seq_file *s, void **op_ptrs, int num_ops)
{
int i;
for (i = 0; i < num_ops; i++)
__hal_print_op(s, op_ptrs[i]);
}
/*
* Show file operation, which generates content of the file once. Prints a list
* of gpu operations as defined by gops and the corresponding function pointer
* destination addresses. Relies on no compiler reordering of struct fields and
* assumption that all members are function pointers.
*/
static int __hal_show(struct seq_file *s, void *unused)
{
struct gpu_ops *gops = s->private;
__hal_print_ops(s, (void **)gops, sizeof(*gops) / sizeof(void *));
return 0;
}
static int __hal_open(struct inode *inode, struct file *file)
{
return single_open(file, __hal_show, inode->i_private);
}
static const struct file_operations __hal_fops = {
.open = __hal_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_hal_debugfs_fini(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!(l->debugfs_hal == NULL))
debugfs_remove_recursive(l->debugfs_hal);
}
void nvgpu_hal_debugfs_init(struct gk20a *g)
{
struct dentry *d;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (!l->debugfs)
return;
l->debugfs_hal = debugfs_create_dir("hal", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_hal)) {
l->debugfs_hal = NULL;
return;
}
/* Pass along reference to the gpu_ops struct as private data */
d = debugfs_create_file("gops", S_IRUGO, l->debugfs_hal,
&g->ops, &__hal_fops);
if (!d) {
nvgpu_err(g, "%s: Failed to make debugfs node\n", __func__);
debugfs_remove_recursive(l->debugfs_hal);
return;
}
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_HAL_H__
#define __NVGPU_DEBUG_HAL_H__
struct gk20a;
void nvgpu_hal_debugfs_fini(struct gk20a *g);
void nvgpu_hal_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_HAL_H__ */

View File

@@ -0,0 +1,312 @@
/*
* Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "os_linux.h"
#include "debug_kmem.h"
#include "kmem_priv.h"
/**
* to_human_readable_bytes - Determine suffix for passed size.
*
* @bytes - Number of bytes to generate a suffix for.
* @hr_bytes [out] - The human readable number of bytes.
* @hr_suffix [out] - The suffix for the HR number of bytes.
*
* Computes a human readable decomposition of the passed number of bytes. The
* suffix for the bytes is passed back through the @hr_suffix pointer. The right
* number of bytes is then passed back in @hr_bytes. This returns the following
* ranges:
*
* 0 - 1023 B
* 1 - 1023 KB
* 1 - 1023 MB
* 1 - 1023 GB
* 1 - 1023 TB
* 1 - ... PB
*/
static void __to_human_readable_bytes(u64 bytes, u64 *hr_bytes,
const char **hr_suffix)
{
static const char *suffixes[] =
{ "B", "KB", "MB", "GB", "TB", "PB" };
u64 suffix_ind = 0;
while (suffix_ind < ARRAY_SIZE(suffixes) && bytes >= 1024) {
bytes >>= 10;
suffix_ind++;
}
/*
* Handle case where bytes > 1023PB.
*/
suffix_ind = suffix_ind < ARRAY_SIZE(suffixes) ?
suffix_ind : ARRAY_SIZE(suffixes) - 1;
*hr_bytes = bytes;
*hr_suffix = suffixes[suffix_ind];
}
/**
* print_hr_bytes - Print human readable bytes
*
* @s - A seq_file to print to. May be NULL.
* @msg - A message to print before the bytes.
* @bytes - Number of bytes.
*
* Print @msg followed by the human readable decomposition of the passed number
* of bytes.
*
* If @s is NULL then this prints will be made to the kernel log.
*/
static void print_hr_bytes(struct seq_file *s, const char *msg, u64 bytes)
{
u64 hr_bytes;
const char *hr_suffix;
__to_human_readable_bytes(bytes, &hr_bytes, &hr_suffix);
__pstat(s, "%s%lld %s\n", msg, hr_bytes, hr_suffix);
}
/**
* print_histogram - Build a histogram of the memory usage.
*
* @tracker The tracking to pull data from.
* @s A seq_file to dump info into.
*/
static void print_histogram(struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
int i;
u64 pot_min, pot_max;
u64 nr_buckets;
unsigned int *buckets;
unsigned int total_allocs;
struct nvgpu_rbtree_node *node;
static const char histogram_line[] =
"++++++++++++++++++++++++++++++++++++++++";
/*
* pot_min is essentially a round down to the nearest power of 2. This
* is the start of the histogram. pot_max is just a round up to the
* nearest power of two. Each histogram bucket is one power of two so
* the histogram buckets are exponential.
*/
pot_min = (u64)rounddown_pow_of_two(tracker->min_alloc);
pot_max = (u64)roundup_pow_of_two(tracker->max_alloc);
nr_buckets = __ffs(pot_max) - __ffs(pot_min);
buckets = kzalloc(sizeof(*buckets) * nr_buckets, GFP_KERNEL);
if (!buckets) {
__pstat(s, "OOM: could not allocate bucket storage!?\n");
return;
}
/*
* Iterate across all of the allocs and determine what bucket they
* should go in. Round the size down to the nearest power of two to
* find the right bucket.
*/
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
int b;
u64 bucket_min;
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
bucket_min = (u64)rounddown_pow_of_two(alloc->size);
if (bucket_min < tracker->min_alloc)
bucket_min = tracker->min_alloc;
b = __ffs(bucket_min) - __ffs(pot_min);
/*
* Handle the one case were there's an alloc exactly as big as
* the maximum bucket size of the largest bucket. Most of the
* buckets have an inclusive minimum and exclusive maximum. But
* the largest bucket needs to have an _inclusive_ maximum as
* well.
*/
if (b == (int)nr_buckets)
b--;
buckets[b]++;
nvgpu_rbtree_enum_next(&node, node);
}
total_allocs = 0;
for (i = 0; i < (int)nr_buckets; i++)
total_allocs += buckets[i];
__pstat(s, "Alloc histogram:\n");
/*
* Actually compute the histogram lines.
*/
for (i = 0; i < (int)nr_buckets; i++) {
char this_line[sizeof(histogram_line) + 1];
u64 line_length;
u64 hr_bytes;
const char *hr_suffix;
(void) memset(this_line, 0, sizeof(this_line));
/*
* Compute the normalized line length. Cant use floating point
* so we will just multiply everything by 1000 and use fixed
* point.
*/
line_length = (1000 * buckets[i]) / total_allocs;
line_length *= sizeof(histogram_line);
line_length /= 1000;
(void) memset(this_line, '+', line_length);
__to_human_readable_bytes(1 << (__ffs(pot_min) + i),
&hr_bytes, &hr_suffix);
__pstat(s, " [%-4lld %-4lld] %-2s %5u | %s\n",
hr_bytes, hr_bytes << 1,
hr_suffix, buckets[i], this_line);
}
}
/**
* nvgpu_kmem_print_stats - Print kmem tracking stats.
*
* @tracker The tracking to pull data from.
* @s A seq_file to dump info into.
*
* Print stats from a tracker. If @s is non-null then seq_printf() will be
* used with @s. Otherwise the stats are pr_info()ed.
*/
void nvgpu_kmem_print_stats(struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
nvgpu_lock_tracker(tracker);
__pstat(s, "Mem tracker: %s\n\n", tracker->name);
__pstat(s, "Basic Stats:\n");
__pstat(s, " Number of allocs %lld\n",
tracker->nr_allocs);
__pstat(s, " Number of frees %lld\n",
tracker->nr_frees);
print_hr_bytes(s, " Smallest alloc ", tracker->min_alloc);
print_hr_bytes(s, " Largest alloc ", tracker->max_alloc);
print_hr_bytes(s, " Bytes allocated ", tracker->bytes_alloced);
print_hr_bytes(s, " Bytes freed ", tracker->bytes_freed);
print_hr_bytes(s, " Bytes allocated (real) ",
tracker->bytes_alloced_real);
print_hr_bytes(s, " Bytes freed (real) ",
tracker->bytes_freed_real);
__pstat(s, "\n");
print_histogram(tracker, s);
nvgpu_unlock_tracker(tracker);
}
static int __kmem_tracking_show(struct seq_file *s, void *unused)
{
struct nvgpu_mem_alloc_tracker *tracker = s->private;
nvgpu_kmem_print_stats(tracker, s);
return 0;
}
static int __kmem_tracking_open(struct inode *inode, struct file *file)
{
return single_open(file, __kmem_tracking_show, inode->i_private);
}
static const struct file_operations __kmem_tracking_fops = {
.open = __kmem_tracking_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int __kmem_traces_dump_tracker(struct gk20a *g,
struct nvgpu_mem_alloc_tracker *tracker,
struct seq_file *s)
{
struct nvgpu_rbtree_node *node;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
kmem_print_mem_alloc(g, alloc, s);
nvgpu_rbtree_enum_next(&node, node);
}
return 0;
}
static int __kmem_traces_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
nvgpu_lock_tracker(g->vmallocs);
seq_puts(s, "Oustanding vmallocs:\n");
__kmem_traces_dump_tracker(g, g->vmallocs, s);
seq_puts(s, "\n");
nvgpu_unlock_tracker(g->vmallocs);
nvgpu_lock_tracker(g->kmallocs);
seq_puts(s, "Oustanding kmallocs:\n");
__kmem_traces_dump_tracker(g, g->kmallocs, s);
nvgpu_unlock_tracker(g->kmallocs);
return 0;
}
static int __kmem_traces_open(struct inode *inode, struct file *file)
{
return single_open(file, __kmem_traces_show, inode->i_private);
}
static const struct file_operations __kmem_traces_fops = {
.open = __kmem_traces_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void nvgpu_kmem_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *node;
l->debugfs_kmem = debugfs_create_dir("kmem_tracking", l->debugfs);
if (IS_ERR_OR_NULL(l->debugfs_kmem))
return;
node = debugfs_create_file(g->vmallocs->name, S_IRUGO,
l->debugfs_kmem,
g->vmallocs, &__kmem_tracking_fops);
node = debugfs_create_file(g->kmallocs->name, S_IRUGO,
l->debugfs_kmem,
g->kmallocs, &__kmem_tracking_fops);
node = debugfs_create_file("traces", S_IRUGO,
l->debugfs_kmem,
g, &__kmem_traces_fops);
}

View File

@@ -0,0 +1,23 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_KMEM_H__
#define __NVGPU_DEBUG_KMEM_H__
struct gk20a;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
void nvgpu_kmem_debugfs_init(struct gk20a *g);
#endif
#endif /* __NVGPU_DEBUG_KMEM_H__ */

View File

@@ -0,0 +1,95 @@
/*
* Copyright (C) 2018-2019 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_ltc.h"
#include "os_linux.h"
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
static ssize_t ltc_intr_illegal_compstat_read(struct file *file,
char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
struct gk20a *g = file->private_data;
if (g->ltc_intr_en_illegal_compstat)
buf[0] = 'Y';
else
buf[0] = 'N';
buf[1] = '\n';
buf[2] = 0x00;
return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
}
static ssize_t ltc_intr_illegal_compstat_write(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
char buf[3];
int buf_size;
bool intr_illegal_compstat_enabled;
struct gk20a *g = file->private_data;
int err;
if (!g->ops.ltc.intr.en_illegal_compstat)
return -EINVAL;
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, user_buf, buf_size))
return -EFAULT;
err = gk20a_busy(g);
if (err)
return err;
if (strtobool(buf, &intr_illegal_compstat_enabled) == 0) {
g->ops.ltc.intr.en_illegal_compstat(g,
intr_illegal_compstat_enabled);
g->ltc_intr_en_illegal_compstat = intr_illegal_compstat_enabled;
}
gk20a_idle(g);
return buf_size;
}
static const struct file_operations ltc_intr_illegal_compstat_fops = {
.open = simple_open,
.read = ltc_intr_illegal_compstat_read,
.write = ltc_intr_illegal_compstat_write,
};
int nvgpu_ltc_debugfs_init(struct gk20a *g)
{
struct dentry *d;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
l->debugfs_ltc = debugfs_create_dir("ltc", gpu_root);
if (IS_ERR_OR_NULL(l->debugfs_ltc))
return -ENODEV;
/* Debug fs node to enable/disable illegal_compstat */
d = debugfs_create_file("intr_illegal_compstat_enable", 0600,
l->debugfs_ltc, g,
&ltc_intr_illegal_compstat_fops);
if (!d)
return -ENOMEM;
return 0;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_LTC_H__
#define __NVGPU_DEBUG_LTC_H__
struct gk20a;
int nvgpu_ltc_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_LTC_H__ */

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include <nvgpu/pmu/pmgr.h>
#include "os_linux.h"
static int pmgr_pwr_devices_get_power_u64(void *data, u64 *p)
{
struct gk20a *g = (struct gk20a *)data;
int err;
u32 val;
err = pmgr_pwr_devices_get_power(g, &val);
*p = val;
return err;
}
static int pmgr_pwr_devices_get_current_u64(void *data, u64 *p)
{
struct gk20a *g = (struct gk20a *)data;
int err;
u32 val;
err = pmgr_pwr_devices_get_current(g, &val);
*p = val;
return err;
}
static int pmgr_pwr_devices_get_voltage_u64(void *data, u64 *p)
{
struct gk20a *g = (struct gk20a *)data;
int err;
u32 val;
err = pmgr_pwr_devices_get_voltage(g, &val);
*p = val;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(
pmgr_power_ctrl_fops, pmgr_pwr_devices_get_power_u64, NULL, "%llu\n");
DEFINE_SIMPLE_ATTRIBUTE(
pmgr_current_ctrl_fops, pmgr_pwr_devices_get_current_u64, NULL, "%llu\n");
DEFINE_SIMPLE_ATTRIBUTE(
pmgr_voltage_ctrl_fops, pmgr_pwr_devices_get_voltage_u64, NULL, "%llu\n");
static void pmgr_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *dbgentry;
dbgentry = debugfs_create_file(
"power", S_IRUGO, l->debugfs, g, &pmgr_power_ctrl_fops);
if (!dbgentry)
nvgpu_err(g, "debugfs entry create failed for power");
dbgentry = debugfs_create_file(
"current", S_IRUGO, l->debugfs, g, &pmgr_current_ctrl_fops);
if (!dbgentry)
nvgpu_err(g, "debugfs entry create failed for current");
dbgentry = debugfs_create_file(
"voltage", S_IRUGO, l->debugfs, g, &pmgr_voltage_ctrl_fops);
if (!dbgentry)
nvgpu_err(g, "debugfs entry create failed for voltage");
}
int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l)
{
struct gk20a *g = &l->g;
int ret = 0;
if (!nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
return ret;
if (!g->ops.clk.support_pmgr_domain)
return ret;
pmgr_debugfs_init(g);
return ret;
}

View File

@@ -0,0 +1,28 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __LINUX_DEBUG_PMGR_H
#define __LINUX_DEBUG_PMGR_H
#ifdef CONFIG_DEBUG_FS
int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l);
#else
int nvgpu_pmgr_init_debugfs_linux(struct nvgpu_os_linux *l)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,504 @@
/*
* Copyright (C) 2018-2019, NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/enabled.h>
#include <nvgpu/pmu/pmu_perfmon.h>
#include <nvgpu/pmu/debug.h>
#include <nvgpu/pmu/pmu_pg.h>
#include <nvgpu/pmu/fw.h>
#include <nvgpu/nvgpu_init.h>
#include "debug_pmu.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
static int lpwr_debug_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_pmu *pmu = g->pmu;
if (!g->can_elpg) {
return 0;
}
if (pmu->pg->engines_feature_list &&
pmu->pg->engines_feature_list(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS) !=
NVGPU_PMU_GR_FEATURE_MASK_POWER_GATING) {
seq_printf(s, "PSTATE: %u\n"
"RPPG Enabled: %u\n"
"RPPG ref count: %u\n"
"RPPG state: %u\n"
"MSCG Enabled: %u\n"
"MSCG pstate state: %u\n"
"MSCG transition state: %u\n",
g->ops.clk_arb.get_current_pstate(g),
g->elpg_enabled, g->pmu->pg->elpg_refcnt,
g->pmu->pg->elpg_stat, g->mscg_enabled,
g->pmu->pg->mscg_stat, g->pmu->pg->mscg_transition_state);
} else {
seq_printf(s, "ELPG Enabled: %u\n"
"ELPG ref count: %u\n"
"ELPG state: %u\n",
g->elpg_enabled, g->pmu->pg->elpg_refcnt,
g->pmu->pg->elpg_stat);
}
return 0;
}
static int lpwr_debug_open(struct inode *inode, struct file *file)
{
return single_open(file, lpwr_debug_show, inode->i_private);
}
static const struct file_operations lpwr_debug_fops = {
.open = lpwr_debug_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int mscg_stat_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
u64 total_ingating, total_ungating, residency, divisor, dividend;
struct pmu_pg_stats_data pg_stat_data = { 0 };
int err;
/* Don't unnecessarily power on the device */
if (nvgpu_is_powered_on(g)) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
gk20a_idle(g);
}
total_ingating = g->pg_ingating_time_us +
(u64)pg_stat_data.ingating_time;
total_ungating = g->pg_ungating_time_us +
(u64)pg_stat_data.ungating_time;
divisor = total_ingating + total_ungating;
/* We compute the residency on a scale of 1000 */
dividend = total_ingating * 1000;
if (divisor)
residency = div64_u64(dividend, divisor);
else
residency = 0;
seq_printf(s,
"Time in MSCG: %llu us\n"
"Time out of MSCG: %llu us\n"
"MSCG residency ratio: %llu\n"
"MSCG Entry Count: %u\n"
"MSCG Avg Entry latency %u\n"
"MSCG Avg Exit latency %u\n",
total_ingating, total_ungating,
residency, pg_stat_data.gating_cnt,
pg_stat_data.avg_entry_latency_us,
pg_stat_data.avg_exit_latency_us);
return 0;
}
static int mscg_stat_open(struct inode *inode, struct file *file)
{
return single_open(file, mscg_stat_show, inode->i_private);
}
static const struct file_operations mscg_stat_fops = {
.open = mscg_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int mscg_transitions_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u32 total_gating_cnt;
int err;
if (nvgpu_is_powered_on(g)) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_MS, &pg_stat_data);
gk20a_idle(g);
}
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
seq_printf(s, "%u\n", total_gating_cnt);
return 0;
}
static int mscg_transitions_open(struct inode *inode, struct file *file)
{
return single_open(file, mscg_transitions_show, inode->i_private);
}
static const struct file_operations mscg_transitions_fops = {
.open = mscg_transitions_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int elpg_stat_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u64 total_ingating, total_ungating, residency, divisor, dividend;
int err;
/* Don't unnecessarily power on the device */
if (nvgpu_is_powered_on(g)) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
gk20a_idle(g);
}
total_ingating = g->pg_ingating_time_us +
(u64)pg_stat_data.ingating_time;
total_ungating = g->pg_ungating_time_us +
(u64)pg_stat_data.ungating_time;
divisor = total_ingating + total_ungating;
/* We compute the residency on a scale of 1000 */
dividend = total_ingating * 1000;
if (divisor)
residency = div64_u64(dividend, divisor);
else
residency = 0;
seq_printf(s,
"Time in ELPG: %llu us\n"
"Time out of ELPG: %llu us\n"
"ELPG residency ratio: %llu\n"
"ELPG Entry Count: %u\n"
"ELPG Avg Entry latency %u us\n"
"ELPG Avg Exit latency %u us\n",
total_ingating, total_ungating,
residency, pg_stat_data.gating_cnt,
pg_stat_data.avg_entry_latency_us,
pg_stat_data.avg_exit_latency_us);
return 0;
}
static int elpg_stat_open(struct inode *inode, struct file *file)
{
return single_open(file, elpg_stat_show, inode->i_private);
}
static const struct file_operations elpg_stat_fops = {
.open = elpg_stat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int elpg_transitions_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct pmu_pg_stats_data pg_stat_data = { 0 };
u32 total_gating_cnt;
int err;
if (nvgpu_is_powered_on(g)) {
err = gk20a_busy(g);
if (err)
return err;
nvgpu_pmu_get_pg_stats(g,
PMU_PG_ELPG_ENGINE_ID_GRAPHICS, &pg_stat_data);
gk20a_idle(g);
}
total_gating_cnt = g->pg_gating_cnt + pg_stat_data.gating_cnt;
seq_printf(s, "%u\n", total_gating_cnt);
return 0;
}
static int elpg_transitions_open(struct inode *inode, struct file *file)
{
return single_open(file, elpg_transitions_show, inode->i_private);
}
static const struct file_operations elpg_transitions_fops = {
.open = elpg_transitions_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int falc_trace_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
struct nvgpu_pmu *pmu = g->pmu;
u32 i = 0, j = 0, k, l, m;
char part_str[40];
void *tracebuffer;
char *trace;
u32 *trace1;
/* allocate system memory to copy pmu trace buffer */
tracebuffer = nvgpu_kzalloc(g, PMU_RTOS_TRACE_BUFSIZE);
if (tracebuffer == NULL)
return -ENOMEM;
/* read pmu traces into system memory buffer */
nvgpu_mem_rd_n(g, &pmu->trace_buf,
0, tracebuffer, PMU_RTOS_TRACE_BUFSIZE);
trace = (char *)tracebuffer;
trace1 = (u32 *)tracebuffer;
for (i = 0; i < PMU_RTOS_TRACE_BUFSIZE; i += 0x40) {
for (j = 0; j < 0x40; j++)
if (trace1[(i / 4) + j])
break;
if (j == 0x40)
break;
seq_printf(s, "Index %x: ", trace1[(i / 4)]);
l = 0;
m = 0;
while (nvgpu_find_hex_in_string((trace+i+20+m), g, &k)) {
if (k >= 40)
break;
(void) strncpy(part_str, (trace+i+20+m), k);
part_str[k] = 0;
seq_printf(s, "%s0x%x", part_str,
trace1[(i / 4) + 1 + l]);
l++;
m += k + 2;
}
seq_printf(s, "%s", (trace+i+20+m));
}
nvgpu_kfree(g, tracebuffer);
return 0;
}
static int falc_trace_open(struct inode *inode, struct file *file)
{
return single_open(file, falc_trace_show, inode->i_private);
}
static const struct file_operations falc_trace_fops = {
.open = falc_trace_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int perfmon_events_enable_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%u\n",
nvgpu_pmu_perfmon_get_sampling_enable_status(g->pmu) ? 1 : 0);
return 0;
}
static int perfmon_events_enable_open(struct inode *inode, struct file *file)
{
return single_open(file, perfmon_events_enable_show, inode->i_private);
}
static ssize_t perfmon_events_enable_write(struct file *file,
const char __user *userbuf, size_t count, loff_t *ppos)
{
struct seq_file *s = file->private_data;
struct gk20a *g = s->private;
unsigned long val = 0;
char buf[40];
int buf_size;
int err;
bool status;
(void) memset(buf, 0, sizeof(buf));
buf_size = min(count, (sizeof(buf)-1));
if (copy_from_user(buf, userbuf, buf_size))
return -EFAULT;
if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
/* Don't turn on gk20a unnecessarily */
if (nvgpu_is_powered_on(g)) {
err = gk20a_busy(g);
if (err)
return err;
if (val && !nvgpu_pmu_perfmon_get_sampling_enable_status(g->pmu)
&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
nvgpu_pmu_perfmon_set_sampling_enable_status(g->pmu,
true);
nvgpu_pmu_perfmon_start_sample(g, g->pmu,
g->pmu->pmu_perfmon);
} else if (!val
&& nvgpu_pmu_perfmon_get_sampling_enable_status(g->pmu)
&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
nvgpu_pmu_perfmon_set_sampling_enable_status(g->pmu,
false);
nvgpu_pmu_perfmon_stop_sample(g, g->pmu,
g->pmu->pmu_perfmon);
}
gk20a_idle(g);
} else {
status = val ? true : false;
nvgpu_pmu_perfmon_set_sampling_enable_status(g->pmu, status);
}
return count;
}
static const struct file_operations perfmon_events_enable_fops = {
.open = perfmon_events_enable_open,
.read = seq_read,
.write = perfmon_events_enable_write,
.llseek = seq_lseek,
.release = single_release,
};
static int perfmon_events_count_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%llu\n", nvgpu_pmu_perfmon_get_events_count(g->pmu));
return 0;
}
static int perfmon_events_count_open(struct inode *inode, struct file *file)
{
return single_open(file, perfmon_events_count_show, inode->i_private);
}
static const struct file_operations perfmon_events_count_fops = {
.open = perfmon_events_count_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int security_show(struct seq_file *s, void *data)
{
struct gk20a *g = s->private;
seq_printf(s, "%d\n", nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY));
return 0;
}
static int security_open(struct inode *inode, struct file *file)
{
return single_open(file, security_show, inode->i_private);
}
static const struct file_operations security_fops = {
.open = security_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int gk20a_pmu_debugfs_init(struct gk20a *g)
{
struct dentry *d;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
d = debugfs_create_file(
"lpwr_debug", S_IRUGO|S_IWUSR, l->debugfs, g,
&lpwr_debug_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"mscg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&mscg_stat_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"mscg_transitions", S_IRUGO, l->debugfs, g,
&mscg_transitions_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"elpg_residency", S_IRUGO|S_IWUSR, l->debugfs, g,
&elpg_stat_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"elpg_transitions", S_IRUGO, l->debugfs, g,
&elpg_transitions_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"pmu_security", S_IRUGO, l->debugfs, g,
&security_fops);
if (!d)
goto err_out;
/* No access to PMU if virtual */
if (!g->is_virtual) {
d = debugfs_create_file(
"falc_trace", S_IRUGO, l->debugfs, g,
&falc_trace_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"perfmon_events_enable", S_IRUGO, l->debugfs, g,
&perfmon_events_enable_fops);
if (!d)
goto err_out;
d = debugfs_create_file(
"perfmon_events_count", S_IRUGO, l->debugfs, g,
&perfmon_events_count_fops);
if (!d)
goto err_out;
}
return 0;
err_out:
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_PMU_H__
#define __NVGPU_DEBUG_PMU_H__
struct gk20a;
int gk20a_pmu_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_PMU_H__ */

View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2019-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include "os_linux.h"
#include "include/nvgpu/bios.h"
#include <nvgpu/pmu/clk/clk.h>
#include <nvgpu/pmu/perf.h>
static int get_s_param_info(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
int status = 0;
status = nvgpu_pmu_perf_vfe_get_s_param(g, val);
if(status != 0) {
nvgpu_err(g, "Vfe_var get s_param failed");
return status;
}
return status;
}
DEFINE_SIMPLE_ATTRIBUTE(s_param_fops, get_s_param_info , NULL, "%llu\n");
int nvgpu_s_param_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *dbgentry;
dbgentry = debugfs_create_file(
"s_param", S_IRUGO, l->debugfs, g, &s_param_fops);
if (!dbgentry) {
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}
return 0;
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DEBUG_S_PARAM_TU104_H
#define __DEBUG_S_PARAM_TU104_H
#include <nvgpu/gk20a.h>
#ifdef CONFIG_DEBUG_FS
int nvgpu_s_param_init_debugfs(struct gk20a *g);
#else
static inline int nvgpu_s_param_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,80 @@
/*
* Copyright (C) 2017-2020 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include "debug_sched.h"
#include "os_linux.h"
#include <nvgpu/nvgpu_init.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
static int gk20a_sched_debugfs_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
bool sched_busy = true;
int n = sched->bitmap_size / sizeof(u64);
int i;
int err;
err = gk20a_busy(g);
if (err)
return err;
if (nvgpu_mutex_tryacquire(&sched->busy_lock)) {
sched_busy = false;
nvgpu_mutex_release(&sched->busy_lock);
}
seq_printf(s, "control_locked=%d\n", sched->control_locked);
seq_printf(s, "busy=%d\n", sched_busy);
seq_printf(s, "bitmap_size=%zu\n", sched->bitmap_size);
nvgpu_mutex_acquire(&sched->status_lock);
seq_puts(s, "active_tsg_bitmap\n");
for (i = 0; i < n; i++)
seq_printf(s, "\t0x%016llx\n", sched->active_tsg_bitmap[i]);
seq_puts(s, "recent_tsg_bitmap\n");
for (i = 0; i < n; i++)
seq_printf(s, "\t0x%016llx\n", sched->recent_tsg_bitmap[i]);
nvgpu_mutex_release(&sched->status_lock);
gk20a_idle(g);
return 0;
}
static int gk20a_sched_debugfs_open(struct inode *inode, struct file *file)
{
return single_open(file, gk20a_sched_debugfs_show, inode->i_private);
}
static const struct file_operations gk20a_sched_debugfs_fops = {
.open = gk20a_sched_debugfs_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void gk20a_sched_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
debugfs_create_file("sched_ctrl", S_IRUGO, l->debugfs,
g, &gk20a_sched_debugfs_fops);
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_SCHED_H__
#define __NVGPU_DEBUG_SCHED_H__
struct gk20a;
void gk20a_sched_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_SCHED_H__ */

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2019-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include <nvgpu/pmu/therm.h>
#include "os_linux.h"
static int therm_get_internal_sensor_curr_temp(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
u32 readval;
int err;
err = nvgpu_pmu_therm_channel_get_curr_temp(g, &readval);
if (!err)
*val = readval;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(therm_ctrl_fops, therm_get_internal_sensor_curr_temp, NULL, "%llu\n");
int tu104_therm_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *dbgentry;
dbgentry = debugfs_create_file(
"temp", S_IRUGO, l->debugfs, g, &therm_ctrl_fops);
if (!dbgentry)
nvgpu_err(g, "debugfs entry create failed for therm_curr_temp");
return 0;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DEBUG_THERM_GP106_H
#define __DEBUG_THERM_GP106_H
#ifdef CONFIG_DEBUG_FS
int tu104_therm_init_debugfs(struct gk20a *g);
#else
static inline int tu104_therm_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,93 @@
/*
* Copyright (c) 2019-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/debugfs.h>
#include "os_linux.h"
#include <nvgpu/pmu/volt.h>
static int get_curr_voltage(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
u32 readval;
int err;
err = nvgpu_pmu_volt_get_curr_volt_ps35(g, &readval);
if (!err)
*val = readval;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(curr_volt_ctrl_fops, get_curr_voltage, NULL, "%llu\n");
static int get_min_voltage(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
u32 minval, maxval;
int err;
err = nvgpu_pmu_volt_get_vmin_vmax_ps35(g, &minval, &maxval);
if (!err)
*val = minval;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(min_volt_ctrl_fops, get_min_voltage, NULL, "%llu\n");
static int get_max_voltage(void *data, u64 *val)
{
struct gk20a *g = (struct gk20a *)data;
u32 minval, maxval;
int err;
err = nvgpu_pmu_volt_get_vmin_vmax_ps35(g, &minval, &maxval);
if (!err)
*val = maxval;
return err;
}
DEFINE_SIMPLE_ATTRIBUTE(max_volt_ctrl_fops, get_max_voltage, NULL, "%llu\n");
int nvgpu_volt_init_debugfs(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *dbgentry, *volt_root;
volt_root = debugfs_create_dir("volt", l->debugfs);
dbgentry = debugfs_create_file(
"current_voltage", S_IRUGO, volt_root, g, &curr_volt_ctrl_fops);
if (!dbgentry) {
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}
dbgentry = debugfs_create_file("minimum_voltage",
S_IRUGO, volt_root, g, &min_volt_ctrl_fops);
if (!dbgentry) {
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}
dbgentry = debugfs_create_file("maximum_voltage",
S_IRUGO, volt_root, g, &max_volt_ctrl_fops);
if (!dbgentry) {
pr_err("%s: Failed to make debugfs node\n", __func__);
return -ENOMEM;
}
return 0;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __DEBUG_VOLT_H
#define __DEBUG_VOLT_H
#ifdef CONFIG_DEBUG_FS
int nvgpu_volt_init_debugfs(struct gk20a *g);
#else
static inline int nvgpu_volt_init_debugfs(struct gk20a *g)
{
return 0;
}
#endif
#endif

View File

@@ -0,0 +1,177 @@
/*
* Copyright (C) 2018 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <nvgpu/types.h>
#include <nvgpu/xve.h>
#include <nvgpu/timers.h>
#include "debug_xve.h"
#include "os_linux.h"
#include <linux/debugfs.h>
#include <linux/uaccess.h>
static ssize_t xve_link_speed_write(struct file *filp,
const char __user *buff,
size_t len, loff_t *off)
{
struct gk20a *g = ((struct seq_file *)filp->private_data)->private;
char kbuff[16];
u32 buff_size, check_len;
u32 link_speed = 0;
int ret;
buff_size = min_t(size_t, 16, len);
(void) memset(kbuff, 0, 16);
if (copy_from_user(kbuff, buff, buff_size))
return -EFAULT;
check_len = strlen("Gen1");
if (strncmp(kbuff, "Gen1", check_len) == 0)
link_speed = GPU_XVE_SPEED_2P5;
else if (strncmp(kbuff, "Gen2", check_len) == 0)
link_speed = GPU_XVE_SPEED_5P0;
else if (strncmp(kbuff, "Gen3", check_len) == 0)
link_speed = GPU_XVE_SPEED_8P0;
else
nvgpu_err(g, "%s: Unknown PCIe speed: %s",
__func__, kbuff);
if (!link_speed)
return -EINVAL;
/* Brief pause... To help rate limit this. */
nvgpu_msleep(250);
/*
* And actually set the speed. Yay.
*/
ret = g->ops.xve.set_speed(g, link_speed);
if (ret)
return ret;
return len;
}
static int xve_link_speed_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 speed;
int err;
err = g->ops.xve.get_speed(g, &speed);
if (err)
return err;
seq_printf(s, "Current PCIe speed:\n %s\n", xve_speed_to_str(speed));
return 0;
}
static int xve_link_speed_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_link_speed_show, inode->i_private);
}
static const struct file_operations xve_link_speed_fops = {
.open = xve_link_speed_open,
.read = seq_read,
.write = xve_link_speed_write,
.llseek = seq_lseek,
.release = single_release,
};
static int xve_available_speeds_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 available_speeds;
g->ops.xve.available_speeds(g, &available_speeds);
seq_puts(s, "Available PCIe bus speeds:\n");
if (available_speeds & GPU_XVE_SPEED_2P5)
seq_puts(s, " Gen1\n");
if (available_speeds & GPU_XVE_SPEED_5P0)
seq_puts(s, " Gen2\n");
if (available_speeds & GPU_XVE_SPEED_8P0)
seq_puts(s, " Gen3\n");
return 0;
}
static int xve_available_speeds_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_available_speeds_show, inode->i_private);
}
static const struct file_operations xve_available_speeds_fops = {
.open = xve_available_speeds_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
static int xve_link_control_status_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
u32 link_status;
link_status = g->ops.xve.get_link_control_status(g);
seq_printf(s, "0x%08x\n", link_status);
return 0;
}
static int xve_link_control_status_open(struct inode *inode, struct file *file)
{
return single_open(file, xve_link_control_status_show, inode->i_private);
}
static const struct file_operations xve_link_control_status_fops = {
.open = xve_link_control_status_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int nvgpu_xve_debugfs_init(struct gk20a *g)
{
int err = -ENODEV;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
l->debugfs_xve = debugfs_create_dir("xve", gpu_root);
if (IS_ERR_OR_NULL(l->debugfs_xve))
goto fail;
/*
* These are just debug nodes. If they fail to get made it's not worth
* worrying the higher level SW.
*/
debugfs_create_file("link_speed", S_IRUGO,
l->debugfs_xve, g,
&xve_link_speed_fops);
debugfs_create_file("available_speeds", S_IRUGO,
l->debugfs_xve, g,
&xve_available_speeds_fops);
debugfs_create_file("link_control_status", S_IRUGO,
l->debugfs_xve, g,
&xve_link_control_status_fops);
err = 0;
fail:
return err;
}

View File

@@ -0,0 +1,21 @@
/*
* Copyright (C) 2017 NVIDIA Corporation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __NVGPU_DEBUG_XVE_H__
#define __NVGPU_DEBUG_XVE_H__
struct gk20a;
int nvgpu_xve_debugfs_init(struct gk20a *g);
#endif /* __NVGPU_DEBUG_SVE_H__ */

View File

@@ -0,0 +1,315 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>
#include <nvgpu/comptags.h>
#include <nvgpu/enabled.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/linux/vm.h>
#include <nvgpu/bug.h>
#include <nvgpu/fence.h>
#include <nvgpu/vm.h>
#include "platform_gk20a.h"
#include "dmabuf_priv.h"
#include "os_linux.h"
#include "dmabuf_vidmem.h"
void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv);
enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g,
struct dma_buf *dmabuf)
{
#ifdef CONFIG_NVGPU_DGPU
struct gk20a *buf_owner = nvgpu_vidmem_buf_owner(dmabuf);
bool unified_memory = nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY);
if (buf_owner == NULL) {
/* Not nvgpu-allocated, assume system memory */
return APERTURE_SYSMEM;
} else if ((buf_owner == g) && unified_memory) {
/* Looks like our video memory, but this gpu doesn't support
* it. Warn about a bug and bail out */
nvgpu_do_assert_print(g,
"dmabuf is our vidmem but we don't have local vidmem");
return APERTURE_INVALID;
} else if (buf_owner != g) {
/* Someone else's vidmem */
return APERTURE_INVALID;
} else {
/* Yay, buf_owner == g */
return APERTURE_VIDMEM;
}
#else
return APERTURE_SYSMEM;
#endif
}
static struct gk20a_dmabuf_priv *dma_buf_ops_to_gk20a_priv(
struct dma_buf_ops *ops)
{
struct gk20a_dmabuf_priv *priv = container_of(ops,
struct gk20a_dmabuf_priv, local_ops);
return priv;
}
static void nvgpu_dma_buf_release(struct dma_buf *dmabuf)
{
struct gk20a_dmabuf_priv *priv = NULL;
struct nvgpu_os_linux *l = NULL;
priv = dma_buf_ops_to_gk20a_priv((struct dma_buf_ops *)dmabuf->ops);
if (priv != NULL) {
l = nvgpu_os_linux_from_gk20a(priv->g);
} else {
BUG();
return;
}
/* remove this entry from the global tracking list */
nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
gk20a_mm_delete_priv(priv);
nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
dmabuf->ops->release(dmabuf);
}
static int gk20a_dma_buf_set_drvdata(struct dma_buf *dmabuf, struct device *device,
struct gk20a_dmabuf_priv *priv)
{
nvgpu_mutex_acquire(&priv->lock);
priv->dmabuf = dmabuf;
mutex_lock(&dmabuf->lock);
priv->previous_ops = dmabuf->ops;
/*
* Make a copy of the original ops struct and then update the
* release pointer
*/
priv->local_ops = *(dmabuf->ops);
priv->local_ops.release = nvgpu_dma_buf_release;
dmabuf->ops = &priv->local_ops;
mutex_unlock(&dmabuf->lock);
nvgpu_mutex_release(&priv->lock);
return 0;
}
static struct gk20a_dmabuf_priv *gk20a_dmabuf_priv_from_list(
struct nvgpu_list_node *node)
{
return container_of(node, struct gk20a_dmabuf_priv, list);
}
struct gk20a_dmabuf_priv *gk20a_dma_buf_get_drvdata(
struct dma_buf *dmabuf, struct device *device)
{
struct gk20a_dmabuf_priv *priv = NULL;
mutex_lock(&dmabuf->lock);
if (dmabuf->ops->release == nvgpu_dma_buf_release) {
priv = dma_buf_ops_to_gk20a_priv((struct dma_buf_ops *)dmabuf->ops);
}
mutex_unlock(&dmabuf->lock);
return priv;
}
struct sg_table *nvgpu_mm_pin(struct device *dev,
struct dma_buf *dmabuf, struct dma_buf_attachment **attachment)
{
struct gk20a *g = get_gk20a(dev);
struct dma_buf_attachment *attach = NULL;
struct sg_table *sgt = NULL;
attach = dma_buf_attach(dmabuf, dev);
if (IS_ERR(attach)) {
nvgpu_err(g, "Failed to attach dma_buf (err = %ld)!",
PTR_ERR(attach));
return ERR_CAST(attach);
}
sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
if (IS_ERR(sgt)) {
dma_buf_detach(dmabuf, attach);
nvgpu_err(g, "Failed to map attachment (err = %ld)!",
PTR_ERR(sgt));
return ERR_CAST(sgt);
}
*attachment = attach;
return sgt;
}
void nvgpu_mm_unpin(struct device *dev,
struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment,
struct sg_table *sgt)
{
dma_buf_unmap_attachment(attachment, sgt, DMA_BIDIRECTIONAL);
dma_buf_detach(dmabuf, attachment);
}
/* This function must be called after acquiring the global level
* dmabuf_priv_list_lock.
*/
void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv)
{
struct gk20a_buffer_state *s, *s_tmp;
struct gk20a *g;
struct dma_buf *dmabuf;
if (!priv)
return;
g = priv->g;
dmabuf = priv->dmabuf;
if (priv->comptags.allocated && priv->comptags.lines) {
WARN_ON(!priv->comptag_allocator);
gk20a_comptaglines_free(priv->comptag_allocator,
priv->comptags.offset,
priv->comptags.lines);
}
/* Free buffer states */
nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
gk20a_buffer_state, list) {
nvgpu_user_fence_release(&s->fence);
nvgpu_list_del(&s->list);
nvgpu_kfree(g, s);
}
/* The original pointer to dma_buf_ops is always put back here*/
mutex_lock(&dmabuf->lock);
dmabuf->ops = priv->previous_ops;
mutex_unlock(&dmabuf->lock);
/* Remove this entry from the global tracking list */
nvgpu_list_del(&priv->list);
nvgpu_kfree(g, priv);
}
void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l)
{
struct gk20a_dmabuf_priv *priv, *priv_next;
nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
nvgpu_list_for_each_entry_safe(priv, priv_next, &l->dmabuf_priv_list,
gk20a_dmabuf_priv, list) {
gk20a_mm_delete_priv(priv);
}
nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
}
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev)
{
struct gk20a *g = gk20a_get_platform(dev)->g;
struct gk20a_dmabuf_priv *priv;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (likely(priv))
return 0;
nvgpu_mutex_acquire(&g->mm.priv_lock);
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (priv)
goto priv_exist_or_err;
priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv) {
priv = ERR_PTR(-ENOMEM);
goto priv_exist_or_err;
}
nvgpu_mutex_init(&priv->lock);
nvgpu_init_list_node(&priv->states);
priv->g = g;
gk20a_dma_buf_set_drvdata(dmabuf, dev, priv);
nvgpu_init_list_node(&priv->list);
/* Append this priv to the global tracker */
nvgpu_mutex_acquire(&l->dmabuf_priv_list_lock);
nvgpu_list_add_tail(&l->dmabuf_priv_list, &priv->list);
nvgpu_mutex_release(&l->dmabuf_priv_list_lock);
priv_exist_or_err:
nvgpu_mutex_release(&g->mm.priv_lock);
if (IS_ERR(priv))
return -ENOMEM;
return 0;
}
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
u64 offset, struct gk20a_buffer_state **state)
{
int err = 0;
struct gk20a_dmabuf_priv *priv;
struct gk20a_buffer_state *s;
struct device *dev = dev_from_gk20a(g);
if (offset >= (u64)dmabuf->size) {
nvgpu_do_assert();
return -EINVAL;
}
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
if (err)
return err;
priv = gk20a_dma_buf_get_drvdata(dmabuf, dev);
if (!priv) {
nvgpu_do_assert();
return -ENOSYS;
}
nvgpu_mutex_acquire(&priv->lock);
nvgpu_list_for_each_entry(s, &priv->states, gk20a_buffer_state, list)
if (s->offset == offset)
goto out;
/* State not found, create state. */
s = nvgpu_kzalloc(g, sizeof(*s));
if (!s) {
err = -ENOMEM;
goto out;
}
s->offset = offset;
nvgpu_init_list_node(&s->list);
nvgpu_mutex_init(&s->lock);
nvgpu_list_add_tail(&s->list, &priv->states);
out:
nvgpu_mutex_release(&priv->lock);
if (!err)
*state = s;
return err;
}

View File

@@ -0,0 +1,120 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_DMABUF_PRIV_H
#define NVGPU_DMABUF_PRIV_H
#include <linux/dma-buf.h>
#include <nvgpu/comptags.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/user_fence.h>
struct sg_table;
struct dma_buf;
struct dma_buf_attachment;
struct device;
struct nvgpu_os_linux;
struct gk20a;
struct gk20a_buffer_state {
struct nvgpu_list_node list;
/* The valid compbits and the fence must be changed atomically. */
struct nvgpu_mutex lock;
/*
* Offset of the surface within the dma-buf whose state is
* described by this struct (one dma-buf can contain multiple
* surfaces with different states).
*/
size_t offset;
/* A bitmask of valid sets of compbits (0 = uncompressed). */
u32 valid_compbits;
/* The ZBC color used on this buffer. */
u32 zbc_color;
/*
* This struct reflects the compression state of the associated buffer
* when this fence signals. The fence is provided to userspace via
* struct nvgpu_gpu_prepare_compressible_read_args.
*/
struct nvgpu_user_fence fence;
};
static inline struct gk20a_buffer_state *
gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
{
return (struct gk20a_buffer_state *)
((uintptr_t)node - offsetof(struct gk20a_buffer_state, list));
};
struct gk20a_dmabuf_priv {
struct nvgpu_mutex lock;
struct gk20a *g;
struct gk20a_comptag_allocator *comptag_allocator;
struct gk20a_comptags comptags;
struct nvgpu_list_node states;
u64 buffer_id;
/* Used for retrieving the associated dmabuf from the priv */
struct dma_buf *dmabuf;
/* 'dma_buf->ops' which is a pointer to a constant struct is
* altered to point to the local copy for the entire lifetime
* of this existing dma-buf until the driver is shutdown or
* the last reference to this dma_buf instance is put. This local
* copy replaces the 'release' callback with nvgpu's custom
* release function handler. This custom function handler frees the
* priv structure and replaces back the original pointer associated
* with the 'producer' of the dma_buf.
*/
struct dma_buf_ops local_ops;
/* Store a copy of the original ops for later restoration */
const struct dma_buf_ops *previous_ops;
/* list node for tracking the dmabuf_priv instances per gpu */
struct nvgpu_list_node list;
};
struct sg_table *nvgpu_mm_pin(struct device *dev,
struct dma_buf *dmabuf,
struct dma_buf_attachment **attachment);
void nvgpu_mm_unpin(struct device *dev,
struct dma_buf *dmabuf,
struct dma_buf_attachment *attachment,
struct sg_table *sgt);
void gk20a_mm_delete_priv(struct gk20a_dmabuf_priv *priv);
int gk20a_dmabuf_alloc_drvdata(struct dma_buf *dmabuf, struct device *dev);
int gk20a_dmabuf_get_state(struct dma_buf *dmabuf, struct gk20a *g,
u64 offset, struct gk20a_buffer_state **state);
void gk20a_dma_buf_priv_list_clear(struct nvgpu_os_linux *l);
struct gk20a_dmabuf_priv *gk20a_dma_buf_get_drvdata(
struct dma_buf *dmabuf, struct device *device);
#endif

View File

@@ -0,0 +1,259 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/version.h>
#include <linux/scatterlist.h>
#include <linux/dma-direction.h>
#include <linux/dma-buf.h>
#include <uapi/linux/nvgpu.h>
#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
#include <linux/platform/tegra/tegra_fd.h>
#endif
#include <nvgpu/dma.h>
#include <nvgpu/enabled.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/page_allocator.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/linux/vm.h>
#include <nvgpu/linux/dma.h>
#include "dmabuf_vidmem.h"
bool nvgpu_addr_is_vidmem_page_alloc(u64 addr)
{
return !!(addr & 1ULL);
}
/* This constant string is used to determine if the dmabuf belongs
* to nvgpu.
*/
static const char exporter_name[] = "nvgpu";
void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr)
{
/* set bit 0 to indicate vidmem allocation */
sg_dma_address(sgl) = (addr | 1ULL);
}
struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl)
{
u64 addr;
addr = sg_dma_address(sgl);
if (nvgpu_addr_is_vidmem_page_alloc(addr))
addr = addr & ~1ULL;
else
WARN_ON(1);
return (struct nvgpu_page_alloc *)(uintptr_t)addr;
}
static struct sg_table *gk20a_vidbuf_map_dma_buf(
struct dma_buf_attachment *attach, enum dma_data_direction dir)
{
struct nvgpu_vidmem_buf *buf = attach->dmabuf->priv;
return buf->mem->priv.sgt;
}
static void gk20a_vidbuf_unmap_dma_buf(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
}
#if LINUX_VERSION_CODE <= KERNEL_VERSION(5, 5, 0)
static void *gk20a_vidbuf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
{
WARN_ON("Not supported");
return NULL;
}
#endif
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4, 16, 0)
static void *gk20a_vidbuf_kmap_atomic(struct dma_buf *dmabuf,
unsigned long page_num)
{
WARN_ON("Not supported");
return NULL;
}
#endif
static int gk20a_vidbuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
{
return -EINVAL;
}
static void gk20a_vidbuf_release(struct dma_buf *dmabuf)
{
struct nvgpu_vidmem_buf *buf = dmabuf->priv;
struct nvgpu_vidmem_linux *linux_buf = buf->priv;
struct gk20a *g = buf->g;
vidmem_dbg(g, "Releasing Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
dmabuf, buf->mem->size >> 10);
nvgpu_kfree(g, linux_buf);
nvgpu_vidmem_buf_free(g, buf);
nvgpu_put(g);
}
static struct dma_buf_ops gk20a_vidbuf_ops = {
.map_dma_buf = gk20a_vidbuf_map_dma_buf,
.unmap_dma_buf = gk20a_vidbuf_unmap_dma_buf,
.release = gk20a_vidbuf_release,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4, 16, 0)
.map_atomic = gk20a_vidbuf_kmap_atomic,
#endif
#if LINUX_VERSION_CODE <= KERNEL_VERSION(5, 5, 0)
.map = gk20a_vidbuf_kmap,
#endif
#else
.kmap_atomic = gk20a_vidbuf_kmap_atomic,
.kmap = gk20a_vidbuf_kmap,
#endif
.mmap = gk20a_vidbuf_mmap,
};
static struct dma_buf *gk20a_vidbuf_export(struct nvgpu_vidmem_buf *buf)
{
DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
exp_info.priv = buf;
exp_info.ops = &gk20a_vidbuf_ops;
exp_info.size = buf->mem->size;
exp_info.flags = O_RDWR;
exp_info.exp_name = exporter_name;
return dma_buf_export(&exp_info);
}
struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf)
{
struct nvgpu_vidmem_buf *buf = dmabuf->priv;
if (dmabuf->exp_name != exporter_name) {
return NULL;
}
return buf->g;
}
int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes)
{
struct nvgpu_vidmem_buf *buf = NULL;
struct nvgpu_vidmem_linux *priv;
int err, fd;
/*
* This ref is released when the dma_buf is closed.
*/
if (!nvgpu_get(g))
return -ENODEV;
vidmem_dbg(g, "Allocating vidmem buf: %zu bytes", bytes);
priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv) {
err = -ENOMEM;
goto fail;
}
err = nvgpu_vidmem_user_alloc(g, bytes, &buf);
if (0 != err) {
goto fail;
}
priv->dmabuf = gk20a_vidbuf_export(buf);
if (IS_ERR(priv->dmabuf)) {
err = PTR_ERR(priv->dmabuf);
goto fail;
}
buf->priv = priv;
#ifdef CONFIG_NVGPU_USE_TEGRA_ALLOC_FD
fd = tegra_alloc_fd(current->files, 1024, O_RDWR);
#else
fd = get_unused_fd_flags(O_RDWR);
#endif
if (fd < 0) {
/* ->release frees what we have done */
dma_buf_put(priv->dmabuf);
return fd;
}
/* fclose() on this drops one ref, freeing the dma buf */
fd_install(fd, priv->dmabuf->file);
vidmem_dbg(g, "Alloced Linux VIDMEM buf: dmabuf=0x%p size=%zuKB",
priv->dmabuf, buf->mem->size >> 10);
return fd;
fail:
nvgpu_vidmem_buf_free(g, buf);
nvgpu_kfree(g, priv);
nvgpu_put(g);
vidmem_dbg(g, "Failed to alloc Linux VIDMEM buf: %d", err);
return err;
}
int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
void *buffer, u64 offset, u64 size, u32 cmd)
{
struct nvgpu_vidmem_buf *vidmem_buf;
struct nvgpu_mem *mem;
int err = 0;
if (gk20a_dmabuf_aperture(g, dmabuf) != APERTURE_VIDMEM)
return -EINVAL;
vidmem_buf = dmabuf->priv;
mem = vidmem_buf->mem;
nvgpu_speculation_barrier();
switch (cmd) {
case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_READ:
nvgpu_mem_rd_n(g, mem, offset, buffer, size);
break;
case NVGPU_DBG_GPU_IOCTL_ACCESS_FB_MEMORY_CMD_WRITE:
nvgpu_mem_wr_n(g, mem, offset, buffer, size);
break;
default:
err = -EINVAL;
}
return err;
}
void nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem)
{
nvgpu_free(vidmem->allocator,
(u64)nvgpu_vidmem_get_page_alloc(vidmem->priv.sgt->sgl));
nvgpu_free_sgtable(g, &vidmem->priv.sgt);
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_DMABUF_VIDMEM_H
#define NVGPU_DMABUF_VIDMEM_H
#ifdef CONFIG_NVGPU_DGPU
#include <nvgpu/types.h>
struct dma_buf;
struct gk20a;
struct scatterlist;
struct gk20a *nvgpu_vidmem_buf_owner(struct dma_buf *dmabuf);
int nvgpu_vidmem_export_linux(struct gk20a *g, size_t bytes);
void nvgpu_vidmem_set_page_alloc(struct scatterlist *sgl, u64 addr);
struct nvgpu_page_alloc *nvgpu_vidmem_get_page_alloc(struct scatterlist *sgl);
int nvgpu_vidmem_buf_access_memory(struct gk20a *g, struct dma_buf *dmabuf,
void *buffer, u64 offset, u64 size, u32 cmd);
struct nvgpu_vidmem_linux {
struct dma_buf *dmabuf;
void *dmabuf_priv;
void (*dmabuf_priv_delete)(void *);
};
#endif
#endif

View File

@@ -0,0 +1,365 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/reboot.h>
#include <nvgpu/errata.h>
#include <linux/dma-mapping.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pm_runtime.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/defaults.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/soc.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/sizes.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/regops.h>
#include <nvgpu/tsg.h>
#include <nvgpu/gr/gr.h>
#include "platform_gk20a.h"
#include "module.h"
#include "os_linux.h"
#include "sysfs.h"
#include "ioctl.h"
#define EMC3D_DEFAULT_RATIO 750
void nvgpu_kernel_restart(void *cmd)
{
kernel_restart(cmd);
}
static void nvgpu_init_vars(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
nvgpu_cond_init(&g->mc.sw_irq_stall_last_handled_cond);
nvgpu_cond_init(&g->mc.sw_irq_nonstall_last_handled_cond);
init_rwsem(&l->busy_lock);
nvgpu_rwsem_init(&g->deterministic_busy);
nvgpu_spinlock_init(&g->mc.enable_lock);
nvgpu_spinlock_init(&g->power_spinlock);
nvgpu_spinlock_init(&g->mc.intr_lock);
nvgpu_mutex_init(&platform->railgate_lock);
nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->client_lock);
nvgpu_mutex_init(&g->power_lock);
nvgpu_mutex_init(&g->tpc_pg_lock);
nvgpu_mutex_init(&g->clk_arb_enable_lock);
nvgpu_mutex_init(&g->cg_pg_lock);
#if defined(CONFIG_NVGPU_CYCLESTATS)
nvgpu_mutex_init(&g->cs_lock);
#endif
/* Init the clock req count to 0 */
nvgpu_atomic_set(&g->clk_arb_global_nr, 0);
nvgpu_mutex_init(&l->ctrl_privs_lock);
nvgpu_init_list_node(&l->ctrl_privs);
g->regs_saved = g->regs;
g->bar1_saved = g->bar1;
g->emc3d_ratio = EMC3D_DEFAULT_RATIO;
/* Set DMA parameters to allow larger sgt lists */
dev->dma_parms = &l->dma_parms;
dma_set_max_seg_size(dev, UINT_MAX);
/*
* A default of 16GB is the largest supported DMA size that is
* acceptable to all currently supported Tegra SoCs.
*/
if (!platform->dma_mask)
platform->dma_mask = DMA_BIT_MASK(34);
dma_set_mask(dev, platform->dma_mask);
dma_set_coherent_mask(dev, platform->dma_mask);
dma_set_seg_boundary(dev, platform->dma_mask);
nvgpu_init_list_node(&g->profiler_objects);
nvgpu_init_list_node(&g->boardobj_head);
nvgpu_init_list_node(&g->boardobjgrp_head);
nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, platform->has_syncpoints);
}
static void nvgpu_init_max_comptag(struct gk20a *g)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 0, 0)
nvgpu_log_info(g, "total ram pages : %lu", totalram_pages());
#else
nvgpu_log_info(g, "total ram pages : %lu", totalram_pages);
#endif
g->max_comptag_mem = totalram_size_in_mb;
}
static void nvgpu_init_timeout(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->timeouts_disabled_by_user = false;
nvgpu_atomic_set(&g->timeouts_disabled_refcount, 0);
if (nvgpu_platform_is_silicon(g)) {
g->poll_timeout_default = NVGPU_DEFAULT_POLL_TIMEOUT_MS;
} else if (nvgpu_platform_is_fpga(g)) {
g->poll_timeout_default = NVGPU_DEFAULT_FPGA_TIMEOUT_MS;
} else {
g->poll_timeout_default = (u32)ULONG_MAX;
}
g->ch_wdt_init_limit_ms = platform->ch_wdt_init_limit_ms;
g->ctxsw_timeout_period_ms = CTXSW_TIMEOUT_PERIOD_MS;
}
static void nvgpu_init_timeslice(struct gk20a *g)
{
g->runlist_interleave = true;
g->tsg_timeslice_low_priority_us =
NVGPU_TSG_TIMESLICE_LOW_PRIORITY_US;
g->tsg_timeslice_medium_priority_us =
NVGPU_TSG_TIMESLICE_MEDIUM_PRIORITY_US;
g->tsg_timeslice_high_priority_us =
NVGPU_TSG_TIMESLICE_HIGH_PRIORITY_US;
g->tsg_timeslice_min_us = NVGPU_TSG_TIMESLICE_MIN_US;
g->tsg_timeslice_max_us = NVGPU_TSG_TIMESLICE_MAX_US;
g->tsg_dbg_timeslice_max_us = NVGPU_TSG_DBG_TIMESLICE_MAX_US_DEFAULT;
}
static void nvgpu_init_pm_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
u32 i = 0;
/*
* Set up initial power settings. For non-slicon platforms, disable
* power features and for silicon platforms, read from platform data
*/
g->slcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_slcg : false;
g->blcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_blcg : false;
g->elcg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_elcg : false;
nvgpu_set_enabled(g, NVGPU_GPU_CAN_ELCG,
nvgpu_platform_is_silicon(g) ? platform->can_elcg : false);
nvgpu_set_enabled(g, NVGPU_GPU_CAN_SLCG,
nvgpu_platform_is_silicon(g) ? platform->can_slcg : false);
nvgpu_set_enabled(g, NVGPU_GPU_CAN_BLCG,
nvgpu_platform_is_silicon(g) ? platform->can_blcg : false);
g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
#ifdef CONFIG_NVGPU_SUPPORT_CDE
g->has_cde = platform->has_cde;
#endif
g->ptimer_src_freq = platform->ptimer_src_freq;
nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
g->can_tpc_powergate = platform->can_tpc_powergate;
for (i = 0; i < MAX_TPC_PG_CONFIGS; i++)
g->valid_tpc_mask[i] = platform->valid_tpc_mask[i];
g->ldiv_slowdown_factor = platform->ldiv_slowdown_factor_init;
/* if default delay is not set, set default delay to 500msec */
if (platform->railgate_delay_init)
g->railgate_delay = platform->railgate_delay_init;
else
g->railgate_delay = NVGPU_DEFAULT_RAILGATE_IDLE_TIMEOUT;
g->support_ls_pmu = support_gk20a_pmu(dev_from_gk20a(g));
if (g->support_ls_pmu) {
g->elpg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_elpg : false;
g->aelpg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_aelpg : false;
g->mscg_enabled =
nvgpu_platform_is_silicon(g) ? platform->enable_mscg : false;
g->can_elpg =
nvgpu_platform_is_silicon(g) ? platform->can_elpg_init : false;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
g->can_elpg = false;
}
nvgpu_set_enabled(g, NVGPU_PMU_PERFMON, platform->enable_perfmon);
/* ELPG feature enable is SW pre-requisite for ELPG_MS */
if (g->elpg_enabled) {
nvgpu_set_enabled(g, NVGPU_ELPG_MS_ENABLED,
platform->enable_elpg_ms);
}
}
nvgpu_set_enabled(g, NVGPU_SUPPORT_ASPM, !platform->disable_aspm);
#ifdef CONFIG_NVGPU_SIM
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);
} else
#endif
{
nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, platform->pstate);
}
}
static void nvgpu_init_vbios_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
nvgpu_set_enabled(g, NVGPU_PMU_RUN_PREOS, platform->run_preos);
}
static void nvgpu_init_ltc_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->ltc_streamid = platform->ltc_streamid;
}
static void nvgpu_init_mm_vars(struct gk20a *g)
{
struct gk20a_platform *platform = dev_get_drvdata(dev_from_gk20a(g));
g->mm.disable_bigpage = platform->disable_bigpage;
nvgpu_set_enabled(g, NVGPU_MM_HONORS_APERTURE,
platform->honors_aperture);
nvgpu_set_enabled(g, NVGPU_MM_UNIFIED_MEMORY,
platform->unified_memory);
nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
platform->unify_address_spaces);
nvgpu_set_errata(g, NVGPU_ERRATA_MM_FORCE_128K_PMU_VM,
platform->force_128K_pmu_vm);
nvgpu_mutex_init(&g->mm.tlb_lock);
nvgpu_mutex_init(&g->mm.priv_lock);
}
int nvgpu_probe(struct gk20a *g,
const char *debugfs_symlink)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = dev_get_drvdata(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
int err = 0;
nvgpu_init_vars(g);
nvgpu_init_max_comptag(g);
nvgpu_init_timeout(g);
nvgpu_init_timeslice(g);
nvgpu_init_pm_vars(g);
nvgpu_init_vbios_vars(g);
nvgpu_init_ltc_vars(g);
err = nvgpu_init_soc_vars(g);
if (err) {
nvgpu_err(g, "init soc vars failed");
return err;
}
/* Initialize the platform interface. */
err = platform->probe(dev);
if (err) {
if (err == -EPROBE_DEFER)
nvgpu_info(g, "platform probe failed");
else
nvgpu_err(g, "platform probe failed");
return err;
}
nvgpu_init_mm_vars(g);
err = gk20a_power_node_init(dev);
if (err) {
nvgpu_err(g, "power_node creation failed");
return err;
}
/*
* TODO: While removing the legacy nodes the following condition
* need to be removed.
*/
if (platform->platform_chip_id == TEGRA_210) {
err = gk20a_user_nodes_init(dev);
if (err)
return err;
l->dev_nodes_created = true;
}
/*
* Note that for runtime suspend to work the clocks have to be setup
* which happens in the probe call above. Hence the driver resume
* is done here and not in gk20a_pm_init.
*/
pm_runtime_get_sync(dev);
if (platform->late_probe) {
err = platform->late_probe(dev);
if (err) {
nvgpu_err(g, "late probe failed");
return err;
}
}
pm_runtime_put_sync_autosuspend(dev);
nvgpu_create_sysfs(dev);
gk20a_debug_init(g, debugfs_symlink);
#ifdef CONFIG_NVGPU_DEBUGGER
g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
if (!g->dbg_regops_tmp_buf) {
nvgpu_err(g, "couldn't allocate regops tmp buf");
return -ENOMEM;
}
g->dbg_regops_tmp_buf_ops =
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
#endif
g->remove_support = gk20a_remove_support;
nvgpu_ref_init(&g->refcount);
return 0;
}
static void nvgpu_free_gk20a(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
g->probe_done = false;
kfree(l);
}
void nvgpu_init_gk20a(struct gk20a *g)
{
g->gfree = nvgpu_free_gk20a;
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2016-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_LINUX_DRIVER_COMMON
#define NVGPU_LINUX_DRIVER_COMMON
struct gk20a;
int nvgpu_probe(struct gk20a *g,
const char *debugfs_symlink);
void nvgpu_init_gk20a(struct gk20a *g);
#endif

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2018, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/dt.h>
#include <linux/of.h>
#include "os_linux.h"
int nvgpu_dt_read_u32_index(struct gk20a *g, const char *name,
u32 index, u32 *value)
{
struct device *dev = dev_from_gk20a(g);
struct device_node *np = dev->of_node;
return of_property_read_u32_index(np, name, index, value);
}

View File

@@ -0,0 +1,91 @@
/*
* Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/ecc.h>
#include <nvgpu/gk20a.h>
#include "os_linux.h"
int nvgpu_ecc_sysfs_init(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_ecc *ecc = &g->ecc;
struct dev_ext_attribute *attr;
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_ecc_stat *stat;
int i = 0, err;
nvgpu_mutex_acquire(&ecc->stats_lock);
attr = nvgpu_kzalloc(g, sizeof(*attr) * ecc->stats_count);
if (!attr) {
nvgpu_mutex_release(&ecc->stats_lock);
return -ENOMEM;
}
nvgpu_list_for_each_entry(stat,
&ecc->stats_list, nvgpu_ecc_stat, node) {
if (i >= ecc->stats_count) {
err = -EINVAL;
nvgpu_err(g, "stats_list longer than stats_count %d",
ecc->stats_count);
break;
}
sysfs_attr_init(&attr[i].attr.attr);
attr[i].attr.attr.name = stat->name;
attr[i].attr.attr.mode = VERIFY_OCTAL_PERMISSIONS(S_IRUGO);
attr[i].var = &stat->counter;
attr[i].attr.show = device_show_int;
err = device_create_file(dev, &attr[i].attr);
if (err) {
nvgpu_err(g, "sysfs node create failed for %s\n",
stat->name);
break;
}
i++;
}
nvgpu_mutex_release(&ecc->stats_lock);
if (err) {
while (i-- > 0)
device_remove_file(dev, &attr[i].attr);
nvgpu_kfree(g, attr);
return err;
}
l->ecc_attrs = attr;
return 0;
}
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_ecc *ecc = &g->ecc;
int i;
nvgpu_mutex_acquire(&ecc->stats_lock);
for (i = 0; i < ecc->stats_count; i++)
device_remove_file(dev, &l->ecc_attrs[i].attr);
nvgpu_mutex_release(&ecc->stats_lock);
nvgpu_kfree(g, l->ecc_attrs);
l->ecc_attrs = NULL;
}

View File

@@ -0,0 +1,797 @@
/*
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/wait.h>
#include <linux/ktime.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <nvgpu/trace.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/atomic.h>
#include <nvgpu/barrier.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/enabled.h>
#include <nvgpu/gr/fecs_trace.h>
#include <nvgpu/string.h>
#include <nvgpu/nvgpu_init.h>
#include "platform_gk20a.h"
#include "os_linux.h"
#include "fecs_trace_linux.h"
#include "ioctl.h"
/* Userland-facing FIFO (one global + eventually one per VM) */
struct gk20a_ctxsw_dev {
struct gk20a *g;
struct nvgpu_ctxsw_ring_header *hdr;
struct nvgpu_gpu_ctxsw_trace_entry *ents;
struct nvgpu_gpu_ctxsw_trace_filter filter;
bool write_enabled;
struct nvgpu_cond readout_wq;
size_t size;
u32 num_ents;
nvgpu_atomic_t vma_ref;
struct nvgpu_mutex write_lock;
};
struct gk20a_ctxsw_trace {
struct gk20a_ctxsw_dev devs[GK20A_CTXSW_TRACE_NUM_DEVS];
};
static inline int ring_is_empty(struct nvgpu_ctxsw_ring_header *hdr)
{
return (hdr->write_idx == hdr->read_idx);
}
static inline int ring_is_full(struct nvgpu_ctxsw_ring_header *hdr)
{
return ((hdr->write_idx + 1) % hdr->num_ents) == hdr->read_idx;
}
static inline int ring_len(struct nvgpu_ctxsw_ring_header *hdr)
{
return (hdr->write_idx - hdr->read_idx) % hdr->num_ents;
}
static void nvgpu_set_ctxsw_trace_entry(
struct nvgpu_ctxsw_trace_entry *entry_dst,
struct nvgpu_gpu_ctxsw_trace_entry *entry_src)
{
entry_dst->tag = entry_src->tag;
entry_dst->vmid = entry_src->vmid;
entry_dst->seqno = entry_src->seqno;
entry_dst->context_id = entry_src->context_id;
entry_dst->pid = entry_src->pid;
entry_dst->timestamp = entry_src->timestamp;
}
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf, size_t size,
loff_t *off)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
struct nvgpu_ctxsw_trace_entry __user *entry =
(struct nvgpu_ctxsw_trace_entry *) buf;
struct nvgpu_ctxsw_trace_entry user_entry;
size_t copied = 0;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw,
"filp=%p buf=%p size=%zu", filp, buf, size);
nvgpu_mutex_acquire(&dev->write_lock);
while (ring_is_empty(hdr)) {
nvgpu_mutex_release(&dev->write_lock);
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
!ring_is_empty(hdr), 0U);
if (err)
return err;
nvgpu_mutex_acquire(&dev->write_lock);
}
while (size >= sizeof(struct nvgpu_gpu_ctxsw_trace_entry)) {
if (ring_is_empty(hdr))
break;
nvgpu_set_ctxsw_trace_entry(&user_entry,
&dev->ents[hdr->read_idx]);
if (copy_to_user(entry, &user_entry,
sizeof(*entry))) {
nvgpu_mutex_release(&dev->write_lock);
return -EFAULT;
}
hdr->read_idx++;
if (hdr->read_idx >= hdr->num_ents)
hdr->read_idx = 0;
entry++;
copied += sizeof(*entry);
size -= sizeof(*entry);
}
nvgpu_log(g, gpu_dbg_ctxsw, "copied=%zu read_idx=%d", copied,
hdr->read_idx);
*off = hdr->read_idx;
nvgpu_mutex_release(&dev->write_lock);
return copied;
}
static int gk20a_ctxsw_dev_ioctl_trace_enable(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace enabled");
nvgpu_mutex_acquire(&dev->write_lock);
dev->write_enabled = true;
nvgpu_mutex_release(&dev->write_lock);
dev->g->ops.gr.fecs_trace.enable(dev->g);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_trace_disable(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "trace disabled");
dev->g->ops.gr.fecs_trace.disable(dev->g);
nvgpu_mutex_acquire(&dev->write_lock);
dev->write_enabled = false;
nvgpu_mutex_release(&dev->write_lock);
return 0;
}
static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
size_t *size)
{
struct gk20a *g = dev->g;
void *buf;
int err;
if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
return -EBUSY;
if (dev->hdr) {
g->ops.gr.fecs_trace.free_user_buffer(g);
dev->hdr = NULL;
}
err = g->ops.gr.fecs_trace.alloc_user_buffer(g, &buf, size);
if (err)
return err;
dev->hdr = buf;
dev->ents = (struct nvgpu_gpu_ctxsw_trace_entry *) (dev->hdr + 1);
dev->size = *size;
dev->num_ents = dev->hdr->num_ents;
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu hdr=%p ents=%p num_ents=%d",
dev->size, dev->hdr, dev->ents, dev->hdr->num_ents);
return 0;
}
int nvgpu_gr_fecs_trace_ring_alloc(struct gk20a *g,
void **buf, size_t *size)
{
struct nvgpu_ctxsw_ring_header *hdr;
*size = round_up(*size, NVGPU_CPU_PAGE_SIZE);
hdr = vmalloc_user(*size);
if (!hdr)
return -ENOMEM;
hdr->magic = NVGPU_CTXSW_RING_HEADER_MAGIC;
hdr->version = NVGPU_CTXSW_RING_HEADER_VERSION;
hdr->num_ents = (*size - sizeof(struct nvgpu_ctxsw_ring_header))
/ sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
hdr->ent_size = sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
hdr->drop_count = 0;
hdr->read_idx = 0;
hdr->write_idx = 0;
hdr->write_seqno = 0;
*buf = hdr;
return 0;
}
int nvgpu_gr_fecs_trace_ring_free(struct gk20a *g)
{
struct gk20a_ctxsw_dev *dev = &g->ctxsw_trace->devs[0];
nvgpu_vfree(g, dev->hdr);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_ring_setup(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_ring_setup_args *args)
{
struct gk20a *g = dev->g;
size_t size = args->size;
int ret;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "size=%zu", size);
if (size > GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE)
return -EINVAL;
nvgpu_mutex_acquire(&dev->write_lock);
ret = gk20a_ctxsw_dev_alloc_buffer(dev, &size);
nvgpu_mutex_release(&dev->write_lock);
args->size = size;
return ret;
}
static void nvgpu_set_ctxsw_trace_filter_args(
struct nvgpu_gpu_ctxsw_trace_filter *filter_dst,
struct nvgpu_ctxsw_trace_filter *filter_src)
{
nvgpu_memcpy((u8 *)filter_dst->tag_bits, (u8 *)filter_src->tag_bits,
(NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
}
static void nvgpu_get_ctxsw_trace_filter_args(
struct nvgpu_ctxsw_trace_filter *filter_dst,
struct nvgpu_gpu_ctxsw_trace_filter *filter_src)
{
nvgpu_memcpy((u8 *)filter_dst->tag_bits, (u8 *)filter_src->tag_bits,
(NVGPU_CTXSW_FILTER_SIZE + 63) / 64);
}
static int gk20a_ctxsw_dev_ioctl_set_filter(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_trace_filter_args *args)
{
struct gk20a *g = dev->g;
nvgpu_mutex_acquire(&dev->write_lock);
nvgpu_set_ctxsw_trace_filter_args(&dev->filter, &args->filter);
nvgpu_mutex_release(&dev->write_lock);
if (g->ops.gr.fecs_trace.set_filter)
g->ops.gr.fecs_trace.set_filter(g, &dev->filter);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_get_filter(struct gk20a_ctxsw_dev *dev,
struct nvgpu_ctxsw_trace_filter_args *args)
{
nvgpu_mutex_acquire(&dev->write_lock);
nvgpu_get_ctxsw_trace_filter_args(&args->filter, &dev->filter);
nvgpu_mutex_release(&dev->write_lock);
return 0;
}
static int gk20a_ctxsw_dev_ioctl_poll(struct gk20a_ctxsw_dev *dev)
{
struct gk20a *g = dev->g;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
err = gk20a_busy(g);
if (err)
return err;
if (g->ops.gr.fecs_trace.flush)
err = g->ops.gr.fecs_trace.flush(g);
if (likely(!err))
err = g->ops.gr.fecs_trace.poll(g);
gk20a_idle(g);
return err;
}
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
{
struct gk20a *g;
struct gk20a_ctxsw_trace *trace;
struct gk20a_ctxsw_dev *dev;
int err;
size_t size;
u32 n;
/* only one VM for now */
const int vmid = 0;
struct nvgpu_cdev *cdev;
cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
g = nvgpu_get_gk20a_from_cdev(cdev);
g = nvgpu_get(g);
if (!g)
return -ENODEV;
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE)) {
nvgpu_put(g);
return -ENODEV;
}
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
err = gk20a_busy(g);
if (err)
goto free_ref;
trace = g->ctxsw_trace;
if (!trace) {
err = -ENODEV;
goto idle;
}
/* Allow only one user for this device */
dev = &trace->devs[vmid];
nvgpu_mutex_acquire(&dev->write_lock);
if (dev->hdr) {
err = -EBUSY;
goto done;
}
/* By default, allocate ring buffer big enough to accommodate
* FECS records with default event filter */
/* enable all traces by default */
NVGPU_CTXSW_FILTER_SET_ALL(&dev->filter);
if (g->ops.gr.fecs_trace.set_filter)
g->ops.gr.fecs_trace.set_filter(g, &dev->filter);
/* compute max number of entries generated with this filter */
n = g->ops.gr.fecs_trace.max_entries(g, &dev->filter);
size = sizeof(struct nvgpu_ctxsw_ring_header) +
n * sizeof(struct nvgpu_gpu_ctxsw_trace_entry);
nvgpu_log(g, gpu_dbg_ctxsw, "size=%zu entries=%d ent_size=%zu",
size, n, sizeof(struct nvgpu_gpu_ctxsw_trace_entry));
err = gk20a_ctxsw_dev_alloc_buffer(dev, &size);
if (!err) {
filp->private_data = dev;
nvgpu_log(g, gpu_dbg_ctxsw, "filp=%p dev=%p size=%zu",
filp, dev, size);
}
done:
nvgpu_mutex_release(&dev->write_lock);
idle:
gk20a_idle(g);
free_ref:
if (err)
nvgpu_put(g);
return err;
}
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "dev: %p", dev);
nvgpu_mutex_acquire(&dev->write_lock);
if (dev->write_enabled) {
dev->write_enabled = false;
g->ops.gr.fecs_trace.disable(g);
}
nvgpu_mutex_release(&dev->write_lock);
if (dev->hdr) {
dev->g->ops.gr.fecs_trace.free_user_buffer(dev->g);
dev->hdr = NULL;
}
nvgpu_put(g);
return 0;
}
long gk20a_ctxsw_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
u8 buf[NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "nr=%d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_CTXSW_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_CTXSW_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_CTXSW_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
(void) memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
switch (cmd) {
case NVGPU_CTXSW_IOCTL_TRACE_ENABLE:
err = gk20a_ctxsw_dev_ioctl_trace_enable(dev);
break;
case NVGPU_CTXSW_IOCTL_TRACE_DISABLE:
err = gk20a_ctxsw_dev_ioctl_trace_disable(dev);
break;
case NVGPU_CTXSW_IOCTL_RING_SETUP:
err = gk20a_ctxsw_dev_ioctl_ring_setup(dev,
(struct nvgpu_ctxsw_ring_setup_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_SET_FILTER:
err = gk20a_ctxsw_dev_ioctl_set_filter(dev,
(struct nvgpu_ctxsw_trace_filter_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_GET_FILTER:
err = gk20a_ctxsw_dev_ioctl_get_filter(dev,
(struct nvgpu_ctxsw_trace_filter_args *) buf);
break;
case NVGPU_CTXSW_IOCTL_POLL:
err = gk20a_ctxsw_dev_ioctl_poll(dev);
break;
default:
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x",
cmd);
err = -ENOTTY;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
return err;
}
unsigned int gk20a_ctxsw_dev_poll(struct file *filp, poll_table *wait)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
struct nvgpu_ctxsw_ring_header *hdr = dev->hdr;
unsigned int mask = 0;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, " ");
nvgpu_mutex_acquire(&dev->write_lock);
poll_wait(filp, &dev->readout_wq.wq, wait);
if (!ring_is_empty(hdr))
mask |= POLLIN | POLLRDNORM;
nvgpu_mutex_release(&dev->write_lock);
return mask;
}
static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
struct gk20a *g = dev->g;
nvgpu_atomic_inc(&dev->vma_ref);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
nvgpu_atomic_read(&dev->vma_ref));
}
static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
struct gk20a *g = dev->g;
nvgpu_atomic_dec(&dev->vma_ref);
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
nvgpu_atomic_read(&dev->vma_ref));
}
static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
.open = gk20a_ctxsw_dev_vma_open,
.close = gk20a_ctxsw_dev_vma_close,
};
void nvgpu_gr_fecs_trace_get_mmap_buffer_info(struct gk20a *g,
void **mmapaddr, size_t *mmapsize)
{
*mmapaddr = g->ctxsw_trace->devs[0].hdr;
*mmapsize = 0;
}
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct gk20a_ctxsw_dev *dev = filp->private_data;
struct gk20a *g = dev->g;
size_t mmapsize = 0;
void *mmapaddr;
int ret;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "vm_start=%lx vm_end=%lx",
vma->vm_start, vma->vm_end);
dev->g->ops.gr.fecs_trace.get_mmap_user_buffer_info(dev->g,
&mmapaddr, &mmapsize);
if (mmapsize) {
unsigned long size = 0;
unsigned long vsize = vma->vm_end - vma->vm_start;
size = min(mmapsize, vsize);
size = round_up(size, NVGPU_CPU_PAGE_SIZE);
ret = remap_pfn_range(vma, vma->vm_start,
(unsigned long) mmapaddr,
size,
vma->vm_page_prot);
} else {
ret = remap_vmalloc_range(vma, mmapaddr, 0);
}
if (likely(!ret)) {
vma->vm_private_data = dev;
vma->vm_ops = &gk20a_ctxsw_dev_vma_ops;
vma->vm_ops->open(vma);
}
return ret;
}
static int gk20a_ctxsw_init_devs(struct gk20a *g)
{
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
struct gk20a_ctxsw_dev *dev = trace->devs;
int i;
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
dev->g = g;
dev->hdr = NULL;
dev->write_enabled = false;
nvgpu_cond_init(&dev->readout_wq);
nvgpu_mutex_init(&dev->write_lock);
nvgpu_atomic_set(&dev->vma_ref, 0);
dev++;
}
return 0;
}
int gk20a_ctxsw_trace_init(struct gk20a *g)
{
struct gk20a_ctxsw_trace *trace = g->ctxsw_trace;
int err;
nvgpu_log(g, gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p trace=%p", g, trace);
/* if tracing is not supported, skip this */
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_FECS_CTXSW_TRACE))
return 0;
if (likely(trace))
return 0;
trace = nvgpu_kzalloc(g, sizeof(*trace));
if (unlikely(!trace))
return -ENOMEM;
g->ctxsw_trace = trace;
err = gk20a_ctxsw_init_devs(g);
if (err)
goto fail;
err = g->ops.gr.fecs_trace.init(g);
if (unlikely(err))
goto fail;
return 0;
fail:
(void) memset(&g->ops.gr.fecs_trace, 0, sizeof(g->ops.gr.fecs_trace));
nvgpu_kfree(g, trace);
g->ctxsw_trace = NULL;
return err;
}
void gk20a_ctxsw_trace_cleanup(struct gk20a *g)
{
struct gk20a_ctxsw_trace *trace;
struct gk20a_ctxsw_dev *dev;
int i;
if (!g->ctxsw_trace)
return;
trace = g->ctxsw_trace;
dev = trace->devs;
for (i = 0; i < GK20A_CTXSW_TRACE_NUM_DEVS; i++) {
nvgpu_mutex_destroy(&dev->write_lock);
dev++;
}
nvgpu_kfree(g, g->ctxsw_trace);
g->ctxsw_trace = NULL;
g->ops.gr.fecs_trace.deinit(g);
}
int nvgpu_gr_fecs_trace_write_entry(struct gk20a *g,
struct nvgpu_gpu_ctxsw_trace_entry *entry)
{
struct nvgpu_ctxsw_ring_header *hdr;
struct gk20a_ctxsw_dev *dev;
int ret = 0;
const char *reason;
u32 write_idx;
if (!g->ctxsw_trace)
return 0;
if (unlikely(entry->vmid >= GK20A_CTXSW_TRACE_NUM_DEVS))
return -ENODEV;
dev = &g->ctxsw_trace->devs[entry->vmid];
hdr = dev->hdr;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_ctxsw,
"dev=%p hdr=%p", dev, hdr);
nvgpu_mutex_acquire(&dev->write_lock);
if (unlikely(!hdr)) {
/* device has been released */
ret = -ENODEV;
goto done;
}
write_idx = hdr->write_idx;
if (write_idx >= dev->num_ents) {
nvgpu_err(dev->g,
"write_idx=%u out of range [0..%u]",
write_idx, dev->num_ents);
ret = -ENOSPC;
reason = "write_idx out of range";
goto disable;
}
entry->seqno = hdr->write_seqno++;
if (!dev->write_enabled) {
ret = -EBUSY;
reason = "write disabled";
goto drop;
}
if (unlikely(ring_is_full(hdr))) {
ret = -ENOSPC;
reason = "user fifo full";
goto drop;
}
if (!NVGPU_GPU_CTXSW_FILTER_ISSET(entry->tag, &dev->filter)) {
reason = "filtered out";
goto filter;
}
nvgpu_log(g, gpu_dbg_ctxsw,
"seqno=%d context_id=%08x pid=%lld tag=%x timestamp=%llx",
entry->seqno, entry->context_id, entry->pid,
entry->tag, entry->timestamp);
dev->ents[write_idx] = *entry;
/* ensure record is written before updating write index */
nvgpu_smp_wmb();
write_idx++;
if (unlikely(write_idx >= hdr->num_ents))
write_idx = 0;
hdr->write_idx = write_idx;
nvgpu_log(g, gpu_dbg_ctxsw, "added: read=%d write=%d len=%d",
hdr->read_idx, hdr->write_idx, ring_len(hdr));
nvgpu_mutex_release(&dev->write_lock);
return ret;
disable:
g->ops.gr.fecs_trace.disable(g);
drop:
hdr->drop_count++;
filter:
nvgpu_log(g, gpu_dbg_ctxsw,
"dropping seqno=%d context_id=%08x pid=%lld "
"tag=%x time=%llx (%s)",
entry->seqno, entry->context_id, entry->pid,
entry->tag, entry->timestamp, reason);
done:
nvgpu_mutex_release(&dev->write_lock);
return ret;
}
void nvgpu_gr_fecs_trace_wake_up(struct gk20a *g, int vmid)
{
struct gk20a_ctxsw_dev *dev;
if (!g->ctxsw_trace)
return;
dev = &g->ctxsw_trace->devs[vmid];
nvgpu_cond_signal_interruptible(&dev->readout_wq);
}
void nvgpu_gr_fecs_trace_add_tsg_reset(struct gk20a *g, struct nvgpu_tsg *tsg)
{
struct nvgpu_gpu_ctxsw_trace_entry entry = {
.vmid = 0,
.tag = NVGPU_CTXSW_TAG_ENGINE_RESET,
.context_id = 0,
.pid = tsg->tgid,
};
if (!g->ctxsw_trace)
return;
g->ops.ptimer.read_ptimer(g, &entry.timestamp);
nvgpu_gr_fecs_trace_write_entry(g, &entry);
nvgpu_gr_fecs_trace_wake_up(g, 0);
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_channel_reset(~0, tsg->tsgid);
#endif
}
/*
* Convert linux nvgpu ctxsw tags type of the form of NVGPU_CTXSW_TAG_*
* into common nvgpu ctxsw tags type of the form of NVGPU_GPU_CTXSW_TAG_*
*/
u8 nvgpu_gpu_ctxsw_tags_to_common_tags(u8 tags)
{
switch (tags) {
case NVGPU_CTXSW_TAG_SOF:
return NVGPU_GPU_CTXSW_TAG_SOF;
case NVGPU_CTXSW_TAG_CTXSW_REQ_BY_HOST:
return NVGPU_GPU_CTXSW_TAG_CTXSW_REQ_BY_HOST;
case NVGPU_CTXSW_TAG_FE_ACK:
return NVGPU_GPU_CTXSW_TAG_FE_ACK;
case NVGPU_CTXSW_TAG_FE_ACK_WFI:
return NVGPU_GPU_CTXSW_TAG_FE_ACK_WFI;
case NVGPU_CTXSW_TAG_FE_ACK_GFXP:
return NVGPU_GPU_CTXSW_TAG_FE_ACK_GFXP;
case NVGPU_CTXSW_TAG_FE_ACK_CTAP:
return NVGPU_GPU_CTXSW_TAG_FE_ACK_CTAP;
case NVGPU_CTXSW_TAG_FE_ACK_CILP:
return NVGPU_GPU_CTXSW_TAG_FE_ACK_CILP;
case NVGPU_CTXSW_TAG_SAVE_END:
return NVGPU_GPU_CTXSW_TAG_SAVE_END;
case NVGPU_CTXSW_TAG_RESTORE_START:
return NVGPU_GPU_CTXSW_TAG_RESTORE_START;
case NVGPU_CTXSW_TAG_CONTEXT_START:
return NVGPU_GPU_CTXSW_TAG_CONTEXT_START;
case NVGPU_CTXSW_TAG_ENGINE_RESET:
return NVGPU_GPU_CTXSW_TAG_ENGINE_RESET;
case NVGPU_CTXSW_TAG_INVALID_TIMESTAMP:
return NVGPU_GPU_CTXSW_TAG_INVALID_TIMESTAMP;
}
WARN_ON(1);
return tags;
}

View File

@@ -0,0 +1,47 @@
/*
* Copyright (c) 2017-2020, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_FECS_TRACE_LINUX_H
#define NVGPU_FECS_TRACE_LINUX_H
#include <nvgpu/types.h>
#define GK20A_CTXSW_TRACE_NUM_DEVS 1
#define GK20A_CTXSW_TRACE_MAX_VM_RING_SIZE (128*NVGPU_CPU_PAGE_SIZE)
struct file;
struct inode;
struct gk20a;
struct nvgpu_tsg;
struct nvgpu_channel;
struct vm_area_struct;
struct poll_table_struct;
int gk20a_ctxsw_trace_init(struct gk20a *g);
void gk20a_ctxsw_trace_cleanup(struct gk20a *g);
int gk20a_ctxsw_dev_mmap(struct file *filp, struct vm_area_struct *vma);
int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp);
int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp);
long gk20a_ctxsw_dev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
ssize_t gk20a_ctxsw_dev_read(struct file *filp, char __user *buf,
size_t size, loff_t *offs);
unsigned int gk20a_ctxsw_dev_poll(struct file *filp,
struct poll_table_struct *pts);
#endif /*NVGPU_FECS_TRACE_LINUX_H */

View File

@@ -0,0 +1,118 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/firmware.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/firmware.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/string.h>
#include "platform_gk20a.h"
#include "os_linux.h"
static const struct firmware *do_request_firmware(struct device *dev,
const char *prefix, const char *fw_name, u32 flags)
{
const struct firmware *fw;
char *fw_path = NULL;
int path_len, err;
if (prefix) {
path_len = strlen(prefix) + strlen(fw_name);
path_len += 2; /* for the path separator and zero terminator*/
fw_path = nvgpu_kzalloc(get_gk20a(dev),
sizeof(*fw_path) * path_len);
if (!fw_path)
return NULL;
(void) sprintf(fw_path, "%s/%s", prefix, fw_name);
fw_name = fw_path;
}
if (flags & NVGPU_REQUEST_FIRMWARE_NO_WARN)
err = request_firmware_direct(&fw, fw_name, dev);
else
err = request_firmware(&fw, fw_name, dev);
nvgpu_kfree(get_gk20a(dev), fw_path);
if (err)
return NULL;
return fw;
}
/* This is a simple wrapper around request_firmware that takes 'fw_name' and
* applies an IP specific relative path prefix to it. The caller is
* responsible for calling nvgpu_release_firmware later. */
struct nvgpu_firmware *nvgpu_request_firmware(struct gk20a *g,
const char *fw_name,
u32 flags)
{
struct device *dev = dev_from_gk20a(g);
struct nvgpu_firmware *fw;
const struct firmware *linux_fw;
/* current->fs is NULL when calling from SYS_EXIT.
Add a check here to prevent crash in request_firmware */
if (!current->fs || !fw_name)
return NULL;
fw = nvgpu_kzalloc(g, sizeof(*fw));
if (!fw)
return NULL;
linux_fw = do_request_firmware(dev, g->name, fw_name, flags);
#ifdef CONFIG_TEGRA_GK20A
/* TO BE REMOVED - Support loading from legacy SOC specific path. */
if (!linux_fw && !(flags & NVGPU_REQUEST_FIRMWARE_NO_SOC)) {
struct gk20a_platform *platform = gk20a_get_platform(dev);
linux_fw = do_request_firmware(dev,
platform->soc_name, fw_name, flags);
}
#endif
if (!linux_fw)
goto err;
fw->data = nvgpu_kmalloc(g, linux_fw->size);
if (!fw->data)
goto err_release;
nvgpu_memcpy((u8 *)fw->data, (u8 *)linux_fw->data, linux_fw->size);
fw->size = linux_fw->size;
release_firmware(linux_fw);
return fw;
err_release:
release_firmware(linux_fw);
err:
nvgpu_kfree(g, fw);
return NULL;
}
void nvgpu_release_firmware(struct gk20a *g, struct nvgpu_firmware *fw)
{
if(!fw)
return;
nvgpu_kfree(g, fw->data);
nvgpu_kfree(g, fw);
}

View File

@@ -0,0 +1,82 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/fuse.h>
#include <nvgpu/linux/soc_fuse.h>
#include <soc/tegra/fuse.h>
int nvgpu_tegra_get_gpu_speedo_id(struct gk20a *g, int *id)
{
*id = tegra_sku_info.gpu_speedo_id;
return 0;
}
int nvgpu_tegra_fuse_read_reserved_calib(struct gk20a *g, u32 *val)
{
return tegra_fuse_readl(FUSE_RESERVED_CALIB0_0, val);
}
int nvgpu_tegra_fuse_read_gcplex_config_fuse(struct gk20a *g, u32 *val)
{
return tegra_fuse_readl(FUSE_GCPLEX_CONFIG_FUSE_0, val);
}
int nvgpu_tegra_fuse_read_per_device_identifier(struct gk20a *g, u64 *pdi)
{
u32 lo = 0U;
u32 hi = 0U;
int err;
err = tegra_fuse_readl(FUSE_PDI0, &lo);
if (err)
return err;
err = tegra_fuse_readl(FUSE_PDI1, &hi);
if (err)
return err;
*pdi = ((u64)lo) | (((u64)hi) << 32);
return 0;
}
#ifdef CONFIG_NVGPU_TEGRA_FUSE
/*
* Use tegra_fuse_control_read/write() APIs for fuse offsets upto 0x100
* Use tegra_fuse_readl/writel() APIs for fuse offsets above 0x100
*/
void nvgpu_tegra_fuse_write_bypass(struct gk20a *g, u32 val)
{
tegra_fuse_control_write(val, FUSE_FUSEBYPASS_0);
}
void nvgpu_tegra_fuse_write_access_sw(struct gk20a *g, u32 val)
{
tegra_fuse_control_write(val, FUSE_WRITE_ACCESS_SW_0);
}
void nvgpu_tegra_fuse_write_opt_gpu_tpc0_disable(struct gk20a *g, u32 val)
{
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC0_DISABLE_0);
}
void nvgpu_tegra_fuse_write_opt_gpu_tpc1_disable(struct gk20a *g, u32 val)
{
tegra_fuse_writel(val, FUSE_OPT_GPU_TPC1_DISABLE_0);
}
#endif /* CONFIG_NVGPU_TEGRA_FUSE */

View File

@@ -0,0 +1,48 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/trace.h>
#include <nvgpu/gk20a.h>
void nvgpu_trace_intr_thread_stall_start(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_TRACE
trace_mc_gk20a_intr_thread_stall(g->name);
#endif
}
void nvgpu_trace_intr_thread_stall_done(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_TRACE
trace_mc_gk20a_intr_thread_stall_done(g->name);
#endif
}
void nvgpu_trace_intr_stall_start(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_TRACE
trace_mc_gk20a_intr_stall(g->name);
#endif
}
void nvgpu_trace_intr_stall_done(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_TRACE
trace_mc_gk20a_intr_stall_done(g->name);
#endif
}

View File

@@ -0,0 +1,26 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/io.h>
#include <nvgpu/types.h>
#include <nvgpu/gk20a.h>
#include "os_linux.h"
void nvgpu_usermode_writel(struct gk20a *g, u32 r, u32 v)
{
uintptr_t reg = g->usermode_regs + (r - g->ops.usermode.base(g));
nvgpu_os_writel_relaxed(v, reg);
nvgpu_log(g, gpu_dbg_reg, "usermode r=0x%x v=0x%x", r, v);
}

View File

@@ -0,0 +1,724 @@
/*
* NVGPU IOCTLs
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/file.h>
#include <linux/slab.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/mig.h>
#include <nvgpu/grmgr.h>
#include <nvgpu/nvgpu_init.h>
#include "ioctl_channel.h"
#include "ioctl_ctrl.h"
#include "ioctl_as.h"
#include "ioctl_tsg.h"
#include "ioctl_dbg.h"
#include "ioctl_prof.h"
#include "power_ops.h"
#include "ioctl.h"
#include "module.h"
#include "os_linux.h"
#include "fecs_trace_linux.h"
#include "platform_gk20a.h"
const struct file_operations gk20a_power_node_ops = {
.owner = THIS_MODULE,
.release = gk20a_power_release,
.open = gk20a_power_open,
.read = gk20a_power_read,
.write = gk20a_power_write,
};
const struct file_operations gk20a_channel_ops = {
.owner = THIS_MODULE,
.release = gk20a_channel_release,
.open = gk20a_channel_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_channel_ioctl,
#endif
.unlocked_ioctl = gk20a_channel_ioctl,
};
static const struct file_operations gk20a_ctrl_ops = {
.owner = THIS_MODULE,
.release = gk20a_ctrl_dev_release,
.open = gk20a_ctrl_dev_open,
.unlocked_ioctl = gk20a_ctrl_dev_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_ctrl_dev_ioctl,
#endif
.mmap = gk20a_ctrl_dev_mmap,
};
static const struct file_operations gk20a_dbg_ops = {
.owner = THIS_MODULE,
.release = gk20a_dbg_gpu_dev_release,
.open = gk20a_dbg_gpu_dev_open,
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
.poll = gk20a_dbg_gpu_dev_poll,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
#endif
};
const struct file_operations gk20a_as_ops = {
.owner = THIS_MODULE,
.release = gk20a_as_dev_release,
.open = gk20a_as_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_as_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_as_dev_ioctl,
};
/*
* Note: We use a different 'open' to trigger handling of the profiler session.
* Most of the code is shared between them... Though, at some point if the
* code does get too tangled trying to handle each in the same path we can
* separate them cleanly.
*/
static const struct file_operations gk20a_prof_ops = {
.owner = THIS_MODULE,
.release = gk20a_dbg_gpu_dev_release,
.open = gk20a_prof_gpu_dev_open,
.unlocked_ioctl = gk20a_dbg_gpu_dev_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_dbg_gpu_dev_ioctl,
#endif
};
static const struct file_operations gk20a_prof_dev_ops = {
.owner = THIS_MODULE,
.release = nvgpu_prof_fops_release,
.open = nvgpu_prof_dev_fops_open,
.unlocked_ioctl = nvgpu_prof_fops_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_prof_fops_ioctl,
#endif
};
static const struct file_operations gk20a_prof_ctx_ops = {
.owner = THIS_MODULE,
.release = nvgpu_prof_fops_release,
.open = nvgpu_prof_ctx_fops_open,
.unlocked_ioctl = nvgpu_prof_fops_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_prof_fops_ioctl,
#endif
};
const struct file_operations gk20a_tsg_ops = {
.owner = THIS_MODULE,
.release = nvgpu_ioctl_tsg_dev_release,
.open = nvgpu_ioctl_tsg_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
#endif
.unlocked_ioctl = nvgpu_ioctl_tsg_dev_ioctl,
};
#ifdef CONFIG_NVGPU_FECS_TRACE
static const struct file_operations gk20a_ctxsw_ops = {
.owner = THIS_MODULE,
.release = gk20a_ctxsw_dev_release,
.open = gk20a_ctxsw_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_ctxsw_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_ctxsw_dev_ioctl,
.poll = gk20a_ctxsw_dev_poll,
.read = gk20a_ctxsw_dev_read,
.mmap = gk20a_ctxsw_dev_mmap,
};
#endif
static const struct file_operations gk20a_sched_ops = {
.owner = THIS_MODULE,
.release = gk20a_sched_dev_release,
.open = gk20a_sched_dev_open,
#ifdef CONFIG_COMPAT
.compat_ioctl = gk20a_sched_dev_ioctl,
#endif
.unlocked_ioctl = gk20a_sched_dev_ioctl,
.poll = gk20a_sched_dev_poll,
.read = gk20a_sched_dev_read,
};
struct nvgpu_dev_node {
/* Device node name */
char name[20];
/* file operations for device */
const struct file_operations *fops;
/* If node should be created for physical instance in MIG mode */
bool mig_physical_node;
};
static const struct nvgpu_dev_node dev_node_list[] = {
{"power", &gk20a_power_node_ops, false },
{"as", &gk20a_as_ops, false },
{"channel", &gk20a_channel_ops, false },
{"ctrl", &gk20a_ctrl_ops, true },
#if defined(CONFIG_NVGPU_FECS_TRACE)
{"ctxsw", &gk20a_ctxsw_ops, false },
#endif
{"dbg", &gk20a_dbg_ops, false },
{"prof", &gk20a_prof_ops, false },
{"prof-ctx", &gk20a_prof_ctx_ops, false },
{"prof-dev", &gk20a_prof_dev_ops, false },
{"sched", &gk20a_sched_ops, false },
{"tsg", &gk20a_tsg_ops, false },
};
static char *nvgpu_devnode(const char *cdev_name)
{
/* Special case to maintain legacy names */
if (strcmp(cdev_name, "channel") == 0) {
return kasprintf(GFP_KERNEL, "nvhost-gpu");
}
return kasprintf(GFP_KERNEL, "nvhost-%s-gpu", cdev_name);
}
static char *nvgpu_pci_devnode(struct device *dev, umode_t *mode)
{
if (mode) {
*mode = S_IRUSR | S_IWUSR;
}
/* Special case to maintain legacy names */
if (strcmp(dev_name(dev), "channel") == 0) {
return kasprintf(GFP_KERNEL, "nvgpu-pci/card-%s",
dev_name(dev->parent));
}
return kasprintf(GFP_KERNEL, "nvgpu-pci/card-%s-%s",
dev_name(dev->parent), dev_name(dev));
}
static char *nvgpu_devnode_v2(struct device *dev, umode_t *mode)
{
if (mode) {
*mode = S_IRUSR | S_IWUSR;
}
return kasprintf(GFP_KERNEL, "nvgpu/igpu0/%s", dev_name(dev));
}
static char *nvgpu_pci_devnode_v2(struct device *dev, umode_t *mode)
{
if (mode) {
*mode = S_IRUSR | S_IWUSR;
}
return kasprintf(GFP_KERNEL, "nvgpu/dgpu-%s/%s", dev_name(dev->parent),
dev_name(dev));
}
static char *nvgpu_mig_fgpu_devnode(struct device *dev, umode_t *mode)
{
struct nvgpu_cdev_class_priv_data *priv_data;
if (mode) {
*mode = S_IRUSR | S_IWUSR;
}
priv_data = dev_get_drvdata(dev);
if (priv_data->pci) {
return kasprintf(GFP_KERNEL, "nvgpu/dgpu-%s/fgpu-%u-%u/%s",
dev_name(dev->parent), priv_data->major_instance_id,
priv_data->minor_instance_id, dev_name(dev));
}
return kasprintf(GFP_KERNEL, "nvgpu/igpu0/fgpu-%u-%u/%s",
priv_data->major_instance_id,
priv_data->minor_instance_id, dev_name(dev));
}
static int gk20a_create_device(
struct device *dev, int devno,
const char *cdev_name,
struct cdev *cdev, struct device **out,
const struct file_operations *ops,
struct nvgpu_class *class)
{
struct device *subdev;
int err;
struct gk20a *g = gk20a_from_dev(dev);
const char *device_name = NULL;
nvgpu_log_fn(g, " ");
cdev_init(cdev, ops);
cdev->owner = THIS_MODULE;
err = cdev_add(cdev, devno, 1);
if (err) {
dev_err(dev, "failed to add %s cdev\n", cdev_name);
return err;
}
if (class->class->devnode == NULL) {
device_name = nvgpu_devnode(cdev_name);
}
subdev = device_create(class->class, dev, devno,
class->priv_data ? class->priv_data : NULL,
device_name ? device_name : cdev_name);
if (IS_ERR(subdev)) {
err = PTR_ERR(dev);
cdev_del(cdev);
dev_err(dev, "failed to create %s device for %s\n",
cdev_name, dev_name(dev));
return err;
}
if (device_name != NULL) {
kfree(device_name);
}
*out = subdev;
return 0;
}
void gk20a_remove_devices_and_classes(struct gk20a *g, bool power_node)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_cdev *cdev, *n;
struct nvgpu_class *class, *p;
nvgpu_list_for_each_entry_safe(cdev, n, &l->cdev_list_head, nvgpu_cdev, list_entry) {
class = cdev->class;
if (class->power_node != power_node)
continue;
nvgpu_list_del(&cdev->list_entry);
device_destroy(nvgpu_class_get_class(cdev->class), cdev->cdev.dev);
cdev_del(&cdev->cdev);
nvgpu_kfree(g, cdev);
}
nvgpu_list_for_each_entry_safe(class, p, &l->class_list_head, nvgpu_class, list_entry) {
if (class->power_node != power_node)
continue;
nvgpu_list_del(&class->list_entry);
class_destroy(class->class);
nvgpu_kfree(g, class);
}
}
void gk20a_power_node_deinit(struct device *dev)
{
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
gk20a_remove_devices_and_classes(g, true);
if (l->power_cdev_region) {
unregister_chrdev_region(l->power_cdev_region, l->power_cdevs);
}
}
void gk20a_user_nodes_deinit(struct device *dev)
{
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
gk20a_remove_devices_and_classes(g, false);
if (l->cdev_region) {
unregister_chrdev_region(l->cdev_region, l->num_cdevs);
l->num_cdevs = 0;
}
l->dev_nodes_created = false;
}
static struct nvgpu_class *nvgpu_create_class(struct gk20a *g, const char *class_name)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_class *class;
class = nvgpu_kzalloc(g, sizeof(*class));
if (class == NULL) {
return NULL;
}
class->class = class_create(THIS_MODULE, class_name);
if (IS_ERR(class->class)) {
nvgpu_err(g, "failed to create class");
nvgpu_kfree(g, class);
return NULL;
}
nvgpu_init_list_node(&class->list_entry);
nvgpu_list_add_tail(&class->list_entry, &l->class_list_head);
return class;
}
/*
* GPU instance information in MIG mode should be fetched from
* common.grmgr unit. But instance information is populated during GPU
* poweron and device nodes are enumerated during probe.
*
* Handle this temporarily by adding static information of instances
* where GPU is partitioned into two instances. In long term, this will
* need to be handled with design changes.
*
* This static information should be removed once instance information
* is fetched from common.grmgr unit.
*/
struct nvgpu_mig_static_info {
enum nvgpu_mig_gpu_instance_type instance_type;
u32 major_instance_id;
u32 minor_instance_id;
};
static int nvgpu_prepare_mig_dev_node_class_list(struct gk20a *g, u32 *num_classes)
{
u32 class_count = 0U;
struct nvgpu_class *class;
u32 i;
u32 num_instances;
struct nvgpu_cdev_class_priv_data *priv_data;
num_instances = g->mig.num_gpu_instances;
/*
* TODO: i=0 need to be added after ctrl node fixup.
*/
for (i = 1U; i < num_instances; i++) {
priv_data = nvgpu_kzalloc(g, sizeof(*priv_data));
if (priv_data == NULL) {
return -ENOMEM;
}
snprintf(priv_data->class_name, sizeof(priv_data->class_name),
"nvidia%s-gpu-fgpu%u",
(g->pci_class != 0U) ? "-pci" : "", i);
class = nvgpu_create_class(g, priv_data->class_name);
if (class == NULL) {
kfree(priv_data);
return -ENOMEM;
}
class_count++;
class->class->devnode = nvgpu_mig_fgpu_devnode;
priv_data->major_instance_id = g->mig.gpu_instance[i].gpu_instance_id;
priv_data->minor_instance_id = g->mig.gpu_instance[i].gr_syspipe.gr_syspipe_id;
class->instance_type = NVGPU_MIG_TYPE_MIG;
class->priv_data = priv_data;
priv_data->local_instance_id = i;
priv_data->pci = (g->pci_class != 0U);
}
*num_classes = class_count;
return 0;
}
static int nvgpu_prepare_default_dev_node_class_list(struct gk20a *g,
u32 *num_classes, bool power_node)
{
struct nvgpu_class *class;
u32 count = 0U;
if (g->pci_class != 0U) {
if (power_node) {
class = nvgpu_create_class(g, "nvidia-pci-gpu-power");
} else {
class = nvgpu_create_class(g, "nvidia-pci-gpu");
}
if (class == NULL) {
return -ENOMEM;
}
class->class->devnode = nvgpu_pci_devnode;
count++;
} else {
if (power_node) {
class = nvgpu_create_class(g, "nvidia-gpu-power");
} else {
class = nvgpu_create_class(g, "nvidia-gpu");
}
if (class == NULL) {
return -ENOMEM;
}
class->class->devnode = NULL;
count++;
}
if (power_node) {
class->power_node = true;
}
/*
* V2 device node names hierarchy.
* This hierarchy will replace above hierarchy in second phase.
* Both legacy and V2 device node hierarchies will co-exist until then.
*/
if (g->pci_class != 0U) {
if (power_node) {
class = nvgpu_create_class(g, "nvidia-pci-gpu-v2-power");
} else {
class = nvgpu_create_class(g, "nvidia-pci-gpu-v2");
}
if (class == NULL) {
return -ENOMEM;
}
class->class->devnode = nvgpu_pci_devnode_v2;
count++;
} else {
if (power_node) {
class = nvgpu_create_class(g, "nvidia-gpu-v2-power");
} else {
class = nvgpu_create_class(g, "nvidia-gpu-v2");
}
if (class == NULL) {
return -ENOMEM;
}
class->class->devnode = nvgpu_devnode_v2;
count++;
}
if (power_node) {
class->power_node = true;
}
*num_classes = count;
return 0;
}
static int nvgpu_prepare_dev_node_class_list(struct gk20a *g, u32 *num_classes,
bool power_node)
{
int err;
if ((!power_node) && nvgpu_grmgr_is_multi_gr_enabled(g)) {
err = nvgpu_prepare_mig_dev_node_class_list(g, num_classes);
} else {
err = nvgpu_prepare_default_dev_node_class_list(g, num_classes, power_node);
}
return err;
}
static bool check_valid_dev_node(struct gk20a *g, struct nvgpu_class *class,
const struct nvgpu_dev_node *node)
{
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL) &&
!node->mig_physical_node) {
return false;
}
}
return true;
}
static bool check_valid_class(struct gk20a *g, struct nvgpu_class *class)
{
if (class->power_node) {
return false;
}
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
if ((class->instance_type == NVGPU_MIG_TYPE_PHYSICAL)) {
return false;
}
}
return true;
}
int gk20a_power_node_init(struct device *dev)
{
int err;
dev_t devno;
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_class *class;
u32 total_cdevs;
u32 num_classes;
struct nvgpu_cdev *cdev;
if (!l->cdev_list_init_done) {
nvgpu_init_list_node(&l->cdev_list_head);
nvgpu_init_list_node(&l->class_list_head);
l->cdev_list_init_done = true;
}
err = nvgpu_prepare_dev_node_class_list(g, &num_classes, true);
if (err != 0) {
return err;
}
total_cdevs = num_classes;
err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev));
if (err) {
dev_err(dev, "failed to allocate devno\n");
goto fail;
}
l->power_cdev_region = devno;
nvgpu_list_for_each_entry(class, &l->class_list_head, nvgpu_class, list_entry) {
cdev = nvgpu_kzalloc(g, sizeof(*cdev));
if (cdev == NULL) {
dev_err(dev, "failed to allocate cdev\n");
goto fail;
}
/*
* dev_node_list[0] is the power node to issue
* power-on to the GPU.
*/
err = gk20a_create_device(dev, devno++,
dev_node_list[0].name,
&cdev->cdev, &cdev->node,
dev_node_list[0].fops,
class);
if (err) {
goto fail;
}
cdev->class = class;
nvgpu_init_list_node(&cdev->list_entry);
nvgpu_list_add(&cdev->list_entry, &l->cdev_list_head);
}
l->power_cdevs = total_cdevs;
return 0;
fail:
gk20a_power_node_deinit(dev);
return err;
}
int gk20a_user_nodes_init(struct device *dev)
{
int err;
dev_t devno;
struct gk20a *g = gk20a_from_dev(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct nvgpu_class *class;
u32 num_cdevs, total_cdevs;
u32 num_classes;
struct nvgpu_cdev *cdev;
u32 cdev_index;
if (!l->cdev_list_init_done) {
nvgpu_init_list_node(&l->cdev_list_head);
nvgpu_init_list_node(&l->class_list_head);
l->cdev_list_init_done = true;
}
err = nvgpu_prepare_dev_node_class_list(g, &num_classes, false);
if (err != 0) {
return err;
}
num_cdevs = sizeof(dev_node_list) / sizeof(dev_node_list[0]);
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
/**
* As mig physical node needs the ctrl node only.
* We need to add total_cdevs + 1 when we enable ctrl node.
*/
total_cdevs = (num_cdevs - 1) * (num_classes - 1);
} else {
/*
* As the power node is already created, we need to
* reduced devs by by one.
*/
total_cdevs = (num_cdevs - 1) * num_classes;
}
err = alloc_chrdev_region(&devno, 0, total_cdevs, dev_name(dev));
if (err) {
dev_err(dev, "failed to allocate devno\n");
goto fail;
}
l->cdev_region = devno;
nvgpu_list_for_each_entry(class, &l->class_list_head, nvgpu_class, list_entry) {
if (!check_valid_class(g, class)) {
continue;
}
/*
* As we created the power node with power class already, the
* index is starting from one.
*/
for (cdev_index = 1; cdev_index < num_cdevs; cdev_index++) {
if (!check_valid_dev_node(g, class, &dev_node_list[cdev_index])) {
continue;
}
cdev = nvgpu_kzalloc(g, sizeof(*cdev));
if (cdev == NULL) {
dev_err(dev, "failed to allocate cdev\n");
goto fail;
}
err = gk20a_create_device(dev, devno++,
dev_node_list[cdev_index].name,
&cdev->cdev, &cdev->node,
dev_node_list[cdev_index].fops,
class);
if (err) {
goto fail;
}
cdev->class = class;
nvgpu_init_list_node(&cdev->list_entry);
nvgpu_list_add(&cdev->list_entry, &l->cdev_list_head);
}
}
l->num_cdevs = total_cdevs;
return 0;
fail:
gk20a_user_nodes_deinit(dev);
return err;
}
struct gk20a *nvgpu_get_gk20a_from_cdev(struct nvgpu_cdev *cdev)
{
return get_gk20a(cdev->node->parent);
}
u32 nvgpu_get_gpu_instance_id_from_cdev(struct gk20a *g, struct nvgpu_cdev *cdev)
{
struct nvgpu_cdev_class_priv_data *priv_data;
if (nvgpu_grmgr_is_multi_gr_enabled(g)) {
priv_data = dev_get_drvdata(cdev->node);
return priv_data->local_instance_id;
}
return 0;
}

View File

@@ -0,0 +1,76 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_IOCTL_H__
#define __NVGPU_IOCTL_H__
#include <linux/cdev.h>
#include <nvgpu/types.h>
#include <nvgpu/list.h>
struct device;
struct class;
struct nvgpu_class {
struct class *class;
struct nvgpu_list_node list_entry;
struct nvgpu_cdev_class_priv_data *priv_data;
enum nvgpu_mig_gpu_instance_type instance_type;
bool power_node;
};
static inline struct class *nvgpu_class_get_class(struct nvgpu_class *class)
{
return class->class;
}
struct nvgpu_cdev {
struct cdev cdev;
struct device *node;
struct nvgpu_class *class;
struct nvgpu_list_node list_entry;
};
static inline struct nvgpu_cdev *
nvgpu_cdev_from_list_entry(struct nvgpu_list_node *node)
{
return (struct nvgpu_cdev *)
((uintptr_t)node - offsetof(struct nvgpu_cdev, list_entry));
};
struct nvgpu_cdev_class_priv_data {
char class_name[64];
u32 local_instance_id;
u32 major_instance_id;
u32 minor_instance_id;
bool pci;
};
static inline struct nvgpu_class *
nvgpu_class_from_list_entry(struct nvgpu_list_node *node)
{
return (struct nvgpu_class *)
((uintptr_t)node - offsetof(struct nvgpu_class, list_entry));
};
int gk20a_user_nodes_init(struct device *dev);
int gk20a_power_node_init(struct device *dev);
void gk20a_user_nodes_deinit(struct device *dev);
void gk20a_power_node_deinit(struct device *dev);
struct gk20a *nvgpu_get_gk20a_from_cdev(struct nvgpu_cdev *cdev);
u32 nvgpu_get_gpu_instance_id_from_cdev(struct gk20a *g, struct nvgpu_cdev *cdev);
#endif

View File

@@ -0,0 +1,481 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <nvgpu/trace.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/mm.h>
#include <nvgpu/vm_area.h>
#include <nvgpu/log2.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/channel.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/linux/vm.h>
#include "platform_gk20a.h"
#include "ioctl_as.h"
#include "ioctl_channel.h"
#include "ioctl.h"
#include "os_linux.h"
static u32 gk20a_as_translate_as_alloc_space_flags(struct gk20a *g, u32 flags)
{
u32 core_flags = 0;
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
core_flags |= NVGPU_VM_AREA_ALLOC_FIXED_OFFSET;
if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE)
core_flags |= NVGPU_VM_AREA_ALLOC_SPARSE;
return core_flags;
}
static int gk20a_as_ioctl_bind_channel(
struct gk20a_as_share *as_share,
struct nvgpu_as_bind_channel_args *args)
{
int err = 0;
struct nvgpu_channel *ch;
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
ch = nvgpu_channel_get_from_file(args->channel_fd);
if (!ch)
return -EINVAL;
if (nvgpu_channel_as_bound(ch)) {
err = -EINVAL;
goto out;
}
/* this will set nvgpu_channel->vm */
err = ch->g->ops.mm.vm_bind_channel(as_share->vm, ch);
out:
nvgpu_channel_put(ch);
return err;
}
static int gk20a_as_ioctl_alloc_space(
struct gk20a_as_share *as_share,
struct nvgpu_as_alloc_space_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
return nvgpu_vm_area_alloc(as_share->vm, args->pages, args->page_size,
&args->o_a.offset,
gk20a_as_translate_as_alloc_space_flags(g,
args->flags));
}
static int gk20a_as_ioctl_free_space(
struct gk20a_as_share *as_share,
struct nvgpu_as_free_space_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
return nvgpu_vm_area_free(as_share->vm, args->offset);
}
static int gk20a_as_ioctl_map_buffer_ex(
struct gk20a_as_share *as_share,
struct nvgpu_as_map_buffer_ex_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
/* unsupported, direct kind control must be used */
if (!(args->flags & NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL)) {
struct gk20a *g = as_share->vm->mm->g;
nvgpu_log_info(g, "Direct kind control must be requested");
return -EINVAL;
}
return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
&args->offset, args->flags,
args->page_size,
args->compr_kind,
args->incompr_kind,
args->buffer_offset,
args->mapping_size,
NULL);
}
static int gk20a_as_ioctl_unmap_buffer(
struct gk20a_as_share *as_share,
struct nvgpu_as_unmap_buffer_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
return 0;
}
static int gk20a_as_ioctl_map_buffer_batch(
struct gk20a_as_share *as_share,
struct nvgpu_as_map_buffer_batch_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
u32 i;
int err = 0;
struct nvgpu_as_unmap_buffer_args __user *user_unmap_args =
(struct nvgpu_as_unmap_buffer_args __user *)(uintptr_t)
args->unmaps;
struct nvgpu_as_map_buffer_ex_args __user *user_map_args =
(struct nvgpu_as_map_buffer_ex_args __user *)(uintptr_t)
args->maps;
struct vm_gk20a_mapping_batch batch;
nvgpu_log_fn(g, " ");
if (args->num_unmaps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT ||
args->num_maps > NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT)
return -EINVAL;
nvgpu_vm_mapping_batch_start(&batch);
for (i = 0; i < args->num_unmaps; ++i) {
struct nvgpu_as_unmap_buffer_args unmap_args;
if (copy_from_user(&unmap_args, &user_unmap_args[i],
sizeof(unmap_args))) {
err = -EFAULT;
break;
}
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
}
nvgpu_speculation_barrier();
if (err) {
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
args->num_unmaps = i;
args->num_maps = 0;
return err;
}
for (i = 0; i < args->num_maps; ++i) {
s16 compressible_kind;
s16 incompressible_kind;
struct nvgpu_as_map_buffer_ex_args map_args;
(void) memset(&map_args, 0, sizeof(map_args));
if (copy_from_user(&map_args, &user_map_args[i],
sizeof(map_args))) {
err = -EFAULT;
break;
}
if (map_args.flags &
NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL) {
compressible_kind = map_args.compr_kind;
incompressible_kind = map_args.incompr_kind;
} else {
/* direct kind control must be used */
err = -EINVAL;
break;
}
err = nvgpu_vm_map_buffer(
as_share->vm, map_args.dmabuf_fd,
&map_args.offset, map_args.flags, map_args.page_size,
compressible_kind, incompressible_kind,
map_args.buffer_offset,
map_args.mapping_size,
&batch);
if (err)
break;
}
nvgpu_vm_mapping_batch_finish(as_share->vm, &batch);
if (err)
args->num_maps = i;
/* note: args->num_unmaps will be unmodified, which is ok
* since all unmaps are done */
return err;
}
static int gk20a_as_ioctl_get_va_regions(
struct gk20a_as_share *as_share,
struct nvgpu_as_get_va_regions_args *args)
{
unsigned int i;
unsigned int write_entries;
struct nvgpu_as_va_region __user *user_region_ptr;
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = gk20a_from_vm(vm);
unsigned int page_sizes = GMMU_PAGE_SIZE_KERNEL;
nvgpu_log_fn(g, " ");
if (!vm->big_pages)
page_sizes--;
write_entries = args->buf_size / sizeof(struct nvgpu_as_va_region);
if (write_entries > page_sizes)
write_entries = page_sizes;
user_region_ptr =
(struct nvgpu_as_va_region __user *)(uintptr_t)args->buf_addr;
for (i = 0; i < write_entries; ++i) {
struct nvgpu_as_va_region region;
struct nvgpu_allocator *vma = vm->vma[i];
(void) memset(&region, 0, sizeof(struct nvgpu_as_va_region));
region.page_size = vm->gmmu_page_sizes[i];
region.offset = nvgpu_alloc_base(vma);
/* No __aeabi_uldivmod() on some platforms... */
region.pages = (nvgpu_alloc_end(vma) -
nvgpu_alloc_base(vma)) >> ilog2(region.page_size);
if (copy_to_user(user_region_ptr + i, &region, sizeof(region)))
return -EFAULT;
}
args->buf_size =
page_sizes * sizeof(struct nvgpu_as_va_region);
return 0;
}
static int nvgpu_as_ioctl_get_sync_ro_map(
struct gk20a_as_share *as_share,
struct nvgpu_as_get_sync_ro_map_args *args)
{
#ifdef CONFIG_TEGRA_GK20A_NVHOST
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = gk20a_from_vm(vm);
u64 base_gpuva;
u32 sync_size;
u32 num_syncpoints;
int err = 0;
if (g->ops.sync.syncpt.get_sync_ro_map == NULL)
return -EINVAL;
if (!nvgpu_has_syncpoints(g))
return -EINVAL;
err = g->ops.sync.syncpt.get_sync_ro_map(vm, &base_gpuva, &sync_size,
&num_syncpoints);
if (err)
return err;
args->base_gpuva = base_gpuva;
args->sync_size = sync_size;
args->num_syncpoints = num_syncpoints;
return err;
#else
return -EINVAL;
#endif
}
static int nvgpu_as_ioctl_mapping_modify(
struct gk20a_as_share *as_share,
struct nvgpu_as_mapping_modify_args *args)
{
struct gk20a *g = gk20a_from_vm(as_share->vm);
nvgpu_log_fn(g, " ");
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MAPPING_MODIFY)) {
return -ENOTTY;
}
return nvgpu_vm_mapping_modify(as_share->vm,
args->compr_kind,
args->incompr_kind,
args->map_address,
args->buffer_offset,
args->buffer_size);
}
int gk20a_as_dev_open(struct inode *inode, struct file *filp)
{
struct gk20a_as_share *as_share;
struct gk20a *g;
struct mm_gk20a *mm;
int err;
struct nvgpu_cdev *cdev;
u32 big_page_size;
cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
g = nvgpu_get_gk20a_from_cdev(cdev);
mm = &g->mm;
big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
nvgpu_log_fn(g, " ");
err = gk20a_as_alloc_share(g,
big_page_size,
NVGPU_AS_ALLOC_UNIFIED_VA,
U64(big_page_size) << U64(10),
mm->channel.user_size,
0ULL, &as_share);
if (err) {
nvgpu_log_fn(g, "failed to alloc share");
return err;
}
filp->private_data = as_share;
return 0;
}
int gk20a_as_dev_release(struct inode *inode, struct file *filp)
{
struct gk20a_as_share *as_share = filp->private_data;
if (!as_share)
return 0;
return gk20a_as_release_share(as_share);
}
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
int err = 0;
struct gk20a_as_share *as_share = filp->private_data;
struct gk20a *g = gk20a_from_as(as_share->as);
u8 buf[NVGPU_AS_IOCTL_MAX_ARG_SIZE];
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_AS_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_AS_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_AS_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
(void) memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
err = gk20a_busy(g);
if (err)
return err;
nvgpu_speculation_barrier();
switch (cmd) {
case NVGPU_AS_IOCTL_BIND_CHANNEL:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_bind_channel(g->name);
#endif
err = gk20a_as_ioctl_bind_channel(as_share,
(struct nvgpu_as_bind_channel_args *)buf);
break;
case NVGPU32_AS_IOCTL_ALLOC_SPACE:
{
struct nvgpu32_as_alloc_space_args *args32 =
(struct nvgpu32_as_alloc_space_args *)buf;
struct nvgpu_as_alloc_space_args args;
args.pages = args32->pages;
args.page_size = args32->page_size;
args.flags = args32->flags;
args.o_a.offset = args32->o_a.offset;
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_alloc_space(g->name);
#endif
err = gk20a_as_ioctl_alloc_space(as_share, &args);
args32->o_a.offset = args.o_a.offset;
break;
}
case NVGPU_AS_IOCTL_ALLOC_SPACE:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_alloc_space(g->name);
#endif
err = gk20a_as_ioctl_alloc_space(as_share,
(struct nvgpu_as_alloc_space_args *)buf);
break;
case NVGPU_AS_IOCTL_FREE_SPACE:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_free_space(g->name);
#endif
err = gk20a_as_ioctl_free_space(as_share,
(struct nvgpu_as_free_space_args *)buf);
break;
case NVGPU_AS_IOCTL_MAP_BUFFER_EX:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_map_buffer(g->name);
#endif
err = gk20a_as_ioctl_map_buffer_ex(as_share,
(struct nvgpu_as_map_buffer_ex_args *)buf);
break;
case NVGPU_AS_IOCTL_UNMAP_BUFFER:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_unmap_buffer(g->name);
#endif
err = gk20a_as_ioctl_unmap_buffer(as_share,
(struct nvgpu_as_unmap_buffer_args *)buf);
break;
case NVGPU_AS_IOCTL_GET_VA_REGIONS:
#ifdef CONFIG_NVGPU_TRACE
trace_gk20a_as_ioctl_get_va_regions(g->name);
#endif
err = gk20a_as_ioctl_get_va_regions(as_share,
(struct nvgpu_as_get_va_regions_args *)buf);
break;
case NVGPU_AS_IOCTL_MAP_BUFFER_BATCH:
err = gk20a_as_ioctl_map_buffer_batch(as_share,
(struct nvgpu_as_map_buffer_batch_args *)buf);
break;
case NVGPU_AS_IOCTL_GET_SYNC_RO_MAP:
err = nvgpu_as_ioctl_get_sync_ro_map(as_share,
(struct nvgpu_as_get_sync_ro_map_args *)buf);
break;
case NVGPU_AS_IOCTL_MAPPING_MODIFY:
err = nvgpu_as_ioctl_mapping_modify(as_share,
(struct nvgpu_as_mapping_modify_args *)buf);
break;
default:
err = -ENOTTY;
break;
}
gk20a_idle(g);
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
if (copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)))
err = -EFAULT;
return err;
}

View File

@@ -0,0 +1,30 @@
/*
* GK20A Address Spaces
*
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_COMMON_LINUX_AS_H__
#define __NVGPU_COMMON_LINUX_AS_H__
struct inode;
struct file;
/* MAP_BUFFER_BATCH_LIMIT: the upper limit for num_unmaps and
* num_maps */
#define NVGPU_IOCTL_AS_MAP_BUFFER_BATCH_LIMIT 256
/* struct file_operations driver interface */
int gk20a_as_dev_open(struct inode *inode, struct file *filp);
int gk20a_as_dev_release(struct inode *inode, struct file *filp);
long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_IOCTL_CHANNEL_H__
#define __NVGPU_IOCTL_CHANNEL_H__
#include <linux/fs.h>
#include <nvgpu/cyclestats_snapshot.h>
struct inode;
struct file;
struct gk20a;
struct nvgpu_channel_open_args;
struct nvgpu_channel;
struct nvgpu_cdev;
struct gk20a_cs_snapshot_client_linux {
struct gk20a_cs_snapshot_client cs_client;
u32 dmabuf_fd;
struct dma_buf *dma_handler;
};
struct nvgpu_channel *nvgpu_channel_get_from_file(int fd);
int gk20a_channel_open(struct inode *inode, struct file *filp);
int gk20a_channel_release(struct inode *inode, struct file *filp);
long gk20a_channel_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
int gk20a_channel_open_ioctl(struct gk20a *g, struct nvgpu_cdev *cdev,
struct nvgpu_channel_open_args *args);
int gk20a_channel_cycle_stats(struct nvgpu_channel *ch, int dmabuf_fd);
void gk20a_channel_free_cycle_stats_buffer(struct nvgpu_channel *ch);
int gk20a_attach_cycle_stats_snapshot(struct nvgpu_channel *ch,
u32 dmabuf_fd,
u32 perfmon_id_count,
u32 *perfmon_id_start);
int gk20a_flush_cycle_stats_snapshot(struct nvgpu_channel *ch);
int gk20a_channel_free_cycle_stats_snapshot(struct nvgpu_channel *ch);
extern const struct file_operations gk20a_channel_ops;
u32 nvgpu_get_common_runlist_level(u32 level);
u32 nvgpu_get_ioctl_graphics_preempt_mode_flags(u32 graphics_preempt_mode_flags);
u32 nvgpu_get_ioctl_compute_preempt_mode_flags(u32 compute_preempt_mode_flags);
u32 nvgpu_get_ioctl_graphics_preempt_mode(u32 graphics_preempt_mode);
u32 nvgpu_get_ioctl_compute_preempt_mode(u32 compute_preempt_mode);
#endif

View File

@@ -0,0 +1,574 @@
/*
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/cdev.h>
#include <linux/file.h>
#include <linux/anon_inodes.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#ifdef CONFIG_DEBUG_FS
#include <linux/debugfs.h>
#endif
#include <uapi/linux/nvgpu.h>
#include <nvgpu/bitops.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/kref.h>
#include <nvgpu/log.h>
#include <nvgpu/barrier.h>
#include <nvgpu/cond.h>
#include <nvgpu/list.h>
#include <nvgpu/clk_arb.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/enabled.h>
#include <nvgpu/pmu/pmu_pstate.h>
#include <nvgpu/pmu/volt.h>
#include <nvgpu/pmu/lpwr.h>
#include <nvgpu/pmu/clk/clk.h>
#ifdef CONFIG_DEBUG_FS
#include "os_linux.h"
#endif
static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
struct file *filp)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
clk_arb_dbg(session->g, " ");
/* This is done to account for the extra refcount taken in
* nvgpu_clk_arb_commit_request_fd without events support in iGPU
*/
if (!nvgpu_is_enabled(session->g, NVGPU_SUPPORT_DEVICE_EVENTS)) {
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
}
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}
static inline unsigned int nvgpu_convert_poll_mask(unsigned int nvgpu_poll_mask)
{
unsigned int poll_mask = 0;
if (nvgpu_poll_mask & NVGPU_POLLIN)
poll_mask |= POLLIN;
if (nvgpu_poll_mask & NVGPU_POLLPRI)
poll_mask |= POLLPRI;
if (nvgpu_poll_mask & NVGPU_POLLOUT)
poll_mask |= POLLOUT;
if (nvgpu_poll_mask & NVGPU_POLLRDNORM)
poll_mask |= POLLRDNORM;
if (nvgpu_poll_mask & NVGPU_POLLHUP)
poll_mask |= POLLHUP;
return poll_mask;
}
static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
{
struct nvgpu_clk_dev *dev = filp->private_data;
clk_arb_dbg(dev->session->g, " ");
poll_wait(filp, &dev->readout_wq.wq, wait);
return nvgpu_convert_poll_mask(nvgpu_atomic_xchg(&dev->poll_mask, 0));
}
void nvgpu_clk_arb_event_post_event(struct nvgpu_clk_dev *dev)
{
nvgpu_cond_broadcast_interruptible(&dev->readout_wq);
}
static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
struct file *filp)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_clk_session *session = dev->session;
struct nvgpu_clk_arb *arb;
arb = session->g->clk_arb;
clk_arb_dbg(session->g, " ");
if (arb) {
nvgpu_spinlock_acquire(&arb->users_lock);
nvgpu_list_del(&dev->link);
nvgpu_spinlock_release(&arb->users_lock);
nvgpu_clk_notification_queue_free(arb->g, &dev->queue);
}
nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
return 0;
}
static inline u32 nvgpu_convert_gpu_event(u32 nvgpu_event)
{
u32 nvgpu_gpu_event;
switch (nvgpu_event) {
case NVGPU_EVENT_VF_UPDATE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_VF_UPDATE;
break;
case NVGPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_TARGET_VF_NOT_POSSIBLE;
break;
case NVGPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_LOCAL_TARGET_VF_NOT_POSSIBLE;
break;
case NVGPU_EVENT_ALARM_CLOCK_ARBITER_FAILED:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_CLOCK_ARBITER_FAILED;
break;
case NVGPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_VF_TABLE_UPDATE_FAILED;
break;
case NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD;
break;
case NVGPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_POWER_ABOVE_THRESHOLD;
break;
case NVGPU_EVENT_ALARM_GPU_LOST:
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST;
break;
default:
/* Control shouldn't come here */
nvgpu_gpu_event = NVGPU_GPU_EVENT_ALARM_GPU_LOST + 1;
break;
}
return nvgpu_gpu_event;
}
static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
struct nvgpu_gpu_event_info *info) {
u32 tail, head;
u32 events = 0;
struct nvgpu_clk_notification *p_notif;
tail = nvgpu_atomic_read(&dev->queue.tail);
head = nvgpu_atomic_read(&dev->queue.head);
head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
if (WRAPGTEQ(tail, head) && info) {
head++;
p_notif = &dev->queue.clk_q_notifications[
head % dev->queue.size];
events = p_notif->clk_notification;
info->event_id = ffs(events) - 1;
info->timestamp = p_notif->timestamp;
nvgpu_atomic_set(&dev->queue.head, head);
}
return events;
}
static ssize_t nvgpu_clk_arb_read_event_dev(struct file *filp, char __user *buf,
size_t size, loff_t *off)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct nvgpu_gpu_event_info info;
ssize_t err;
clk_arb_dbg(dev->session->g,
"filp=%p, buf=%p, size=%zu", filp, buf, size);
if ((size - *off) < sizeof(info))
return 0;
(void) memset(&info, 0, sizeof(info));
/* Get the oldest event from the queue */
while (!__pending_event(dev, &info)) {
if (filp->f_flags & O_NONBLOCK)
return -EAGAIN;
err = NVGPU_COND_WAIT_INTERRUPTIBLE(&dev->readout_wq,
__pending_event(dev, &info), 0U);
if (err)
return err;
if (info.timestamp)
break;
}
if (copy_to_user(buf + *off, &info, sizeof(info)))
return -EFAULT;
return sizeof(info);
}
static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
struct nvgpu_gpu_set_event_filter_args *args)
{
struct gk20a *g = dev->session->g;
u32 mask;
nvgpu_log(g, gpu_dbg_fn, " ");
if (args->flags)
return -EINVAL;
if (args->size != 1)
return -EINVAL;
if (copy_from_user(&mask, (void __user *) args->buffer,
args->size * sizeof(u32)))
return -EFAULT;
/* update alarm mask */
nvgpu_atomic_set(&dev->enabled_mask, mask);
return 0;
}
static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct nvgpu_clk_dev *dev = filp->private_data;
struct gk20a *g = dev->session->g;
u8 buf[NVGPU_EVENT_IOCTL_MAX_ARG_SIZE];
int err = 0;
nvgpu_log(g, gpu_dbg_fn, "nr=%d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_EVENT_IOCTL_MAGIC) || (_IOC_NR(cmd) == 0)
|| (_IOC_NR(cmd) > NVGPU_EVENT_IOCTL_LAST)
|| (_IOC_SIZE(cmd) > NVGPU_EVENT_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
(void) memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *) arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
switch (cmd) {
case NVGPU_EVENT_IOCTL_SET_FILTER:
err = nvgpu_clk_arb_set_event_filter(dev,
(struct nvgpu_gpu_set_event_filter_args *)buf);
break;
default:
nvgpu_warn(g, "unrecognized event ioctl cmd: 0x%x", cmd);
err = -ENOTTY;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *) arg, buf, _IOC_SIZE(cmd));
return err;
}
static const struct file_operations completion_dev_ops = {
.owner = THIS_MODULE,
.release = nvgpu_clk_arb_release_completion_dev,
.poll = nvgpu_clk_arb_poll_dev,
};
static const struct file_operations event_dev_ops = {
.owner = THIS_MODULE,
.release = nvgpu_clk_arb_release_event_dev,
.poll = nvgpu_clk_arb_poll_dev,
.read = nvgpu_clk_arb_read_event_dev,
#ifdef CONFIG_COMPAT
.compat_ioctl = nvgpu_clk_arb_ioctl_event_dev,
#endif
.unlocked_ioctl = nvgpu_clk_arb_ioctl_event_dev,
};
static int nvgpu_clk_arb_install_fd(struct gk20a *g,
struct nvgpu_clk_session *session,
const struct file_operations *fops,
struct nvgpu_clk_dev **_dev)
{
struct file *file;
int fd;
int err;
int status;
char name[64];
struct nvgpu_clk_dev *dev;
clk_arb_dbg(g, " ");
dev = nvgpu_kzalloc(g, sizeof(*dev));
if (!dev)
return -ENOMEM;
status = nvgpu_clk_notification_queue_alloc(g, &dev->queue,
DEFAULT_EVENT_NUMBER);
if (status < 0) {
err = status;
goto fail;
}
fd = get_unused_fd_flags(O_RDWR);
if (fd < 0) {
err = fd;
goto fail;
}
(void) snprintf(name, sizeof(name), "%s-clk-fd%d", g->name, fd);
file = anon_inode_getfile(name, fops, dev, O_RDWR);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto fail_fd;
}
fd_install(fd, file);
nvgpu_cond_init(&dev->readout_wq);
nvgpu_atomic_set(&dev->poll_mask, 0);
dev->session = session;
nvgpu_ref_init(&dev->refcount);
nvgpu_ref_get(&session->refcount);
*_dev = dev;
return fd;
fail_fd:
put_unused_fd(fd);
fail:
nvgpu_kfree(g, dev);
return err;
}
int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int *event_fd, u32 alarm_mask)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_dev *dev;
int fd;
clk_arb_dbg(g, " ");
fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev);
if (fd < 0)
return fd;
/* TODO: alarm mask needs to be set to default value to prevent
* failures of legacy tests. This will be removed when sanity is
* updated
*/
if (alarm_mask)
nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
else
nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
nvgpu_spinlock_acquire(&arb->users_lock);
nvgpu_list_add_tail(&dev->link, &arb->users);
nvgpu_spinlock_release(&arb->users_lock);
*event_fd = fd;
return 0;
}
int nvgpu_clk_arb_install_request_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int *request_fd)
{
struct nvgpu_clk_dev *dev;
int fd;
clk_arb_dbg(g, " ");
fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev);
if (fd < 0)
return fd;
*request_fd = fd;
return 0;
}
int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
struct nvgpu_clk_session *session, int request_fd)
{
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_dev *dev;
struct fd fd;
int err = 0;
clk_arb_dbg(g, " ");
fd = fdget(request_fd);
if (!fd.file)
return -EINVAL;
if (fd.file->f_op != &completion_dev_ops) {
err = -EINVAL;
goto fdput_fd;
}
dev = (struct nvgpu_clk_dev *) fd.file->private_data;
if (!dev || dev->session != session) {
err = -EINVAL;
goto fdput_fd;
}
clk_arb_dbg(g, "requested target = %u\n",
(u32)dev->gpc2clk_target_mhz);
nvgpu_atomic_inc(&g->clk_arb_global_nr);
nvgpu_ref_get(&dev->refcount);
nvgpu_spinlock_acquire(&session->session_lock);
nvgpu_list_add(&dev->node, &session->targets);
nvgpu_spinlock_release(&session->session_lock);
nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
fdput_fd:
fdput(fd);
return err;
}
int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session,
int request_fd, u32 api_domain, u16 target_mhz)
{
struct nvgpu_clk_dev *dev;
struct fd fd;
int err = 0;
clk_arb_dbg(session->g,
"domain=0x%08x target_mhz=%u", api_domain, target_mhz);
fd = fdget(request_fd);
if (!fd.file)
return -EINVAL;
if (fd.file->f_op != &completion_dev_ops) {
err = -EINVAL;
goto fdput_fd;
}
dev = fd.file->private_data;
if (!dev || dev->session != session) {
err = -EINVAL;
goto fdput_fd;
}
switch (api_domain) {
case NVGPU_CLK_DOMAIN_MCLK:
dev->mclk_target_mhz = target_mhz;
break;
case NVGPU_CLK_DOMAIN_GPCCLK:
dev->gpc2clk_target_mhz = target_mhz;
break;
default:
err = -EINVAL;
}
fdput_fd:
fdput(fd);
return err;
}
u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g)
{
u32 clk_domains = g->ops.clk_arb.get_arbiter_clk_domains(g);
u32 api_domains = 0;
if (clk_domains & CTRL_CLK_DOMAIN_GPCCLK)
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_GPCCLK);
if (clk_domains & CTRL_CLK_DOMAIN_MCLK)
api_domains |= BIT(NVGPU_GPU_CLK_DOMAIN_MCLK);
return api_domains;
}
#ifdef CONFIG_DEBUG_FS
static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused)
{
struct gk20a *g = s->private;
struct nvgpu_clk_arb *arb = g->clk_arb;
struct nvgpu_clk_arb_debug *debug;
u64 num;
s64 tmp, avg, std, max, min;
debug = READ_ONCE(arb->debug);
/* Make copy of structure and ensure no reordering */
nvgpu_smp_rmb();
if (!debug)
return -EINVAL;
std = debug->switch_std;
avg = debug->switch_avg;
max = debug->switch_max;
min = debug->switch_min;
num = debug->switch_num;
tmp = std;
do_div(tmp, num);
seq_printf(s, "Number of transitions: %lld\n",
num);
seq_printf(s, "max / min : %lld / %lld usec\n",
max, min);
seq_printf(s, "avg / std : %lld / %ld usec\n",
avg, int_sqrt(tmp));
return 0;
}
static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file)
{
return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private);
}
static const struct file_operations nvgpu_clk_arb_stats_fops = {
.open = nvgpu_clk_arb_stats_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
int nvgpu_clk_arb_debugfs_init(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct dentry *gpu_root = l->debugfs;
struct dentry *d;
nvgpu_log(g, gpu_dbg_info, "g=%p", g);
d = debugfs_create_file(
"arb_stats",
S_IRUGO,
gpu_root,
g,
&nvgpu_clk_arb_stats_fops);
if (!d)
return -ENOMEM;
return 0;
}
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_IOCTL_CTRL_H__
#define __NVGPU_IOCTL_CTRL_H__
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp);
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp);
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
int gk20a_ctrl_dev_mmap(struct file *filp, struct vm_area_struct *vma);
void nvgpu_hide_usermode_for_poweroff(struct gk20a *g);
void nvgpu_restore_usermode_for_poweron(struct gk20a *g);
#endif

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,43 @@
/*
* Tegra GK20A GPU Debugger Driver
*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef DBG_GPU_IOCTL_GK20A_H
#define DBG_GPU_IOCTL_GK20A_H
struct inode;
struct file;
typedef struct poll_table_struct poll_table;
/* NVGPU_DBG_GPU_IOCTL_REG_OPS: the upper limit for the number
* of regops */
#define NVGPU_IOCTL_DBG_REG_OPS_LIMIT 1024
/* module debug driver interface */
int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp);
int gk20a_dbg_gpu_dev_open(struct inode *inode, struct file *filp);
long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
unsigned int gk20a_dbg_gpu_dev_poll(struct file *filep, poll_table *wait);
/* used by profiler driver interface */
int gk20a_prof_gpu_dev_open(struct inode *inode, struct file *filp);
u32 nvgpu_get_regops_op_values_common(u32 regops_op);
u32 nvgpu_get_regops_status_values_common(u32 regops_status);
u32 nvgpu_get_regops_op_values_linux(u32 regops_op);
u32 nvgpu_get_regops_status_values_linux(u32 regops_status);
#endif

View File

@@ -0,0 +1,871 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/dma-buf.h>
#include <uapi/linux/nvgpu.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/profiler.h>
#include <nvgpu/regops.h>
#include <nvgpu/perfbuf.h>
#include <nvgpu/pm_reservation.h>
#include <nvgpu/tsg.h>
#include <nvgpu/fb.h>
#include "platform_gk20a.h"
#include "os_linux.h"
#include "ioctl_prof.h"
#include "ioctl_dbg.h"
#include "ioctl_tsg.h"
#include "ioctl.h"
/** @cond DOXYGEN_SHOULD_SKIP_THIS */
#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
#include "os/linux/nvgpu_next_ioctl_prof.h"
#endif
/** @endcond DOXYGEN_SHOULD_SKIP_THIS */
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/grmgr.h>
#define NVGPU_PROF_UMD_COPY_WINDOW_SIZE SZ_4K
struct nvgpu_profiler_object_priv {
struct nvgpu_profiler_object *prof;
struct gk20a *g;
/*
* Staging buffer to hold regops copied from userspace.
* Regops are stored in struct nvgpu_profiler_reg_op format. This
* struct is added for new profiler design and is trimmed down
* version of legacy regop struct nvgpu_dbg_reg_op.
*
* Struct nvgpu_profiler_reg_op is OS specific struct and cannot
* be used in common nvgpu code.
*/
struct nvgpu_profiler_reg_op *regops_umd_copy_buf;
/*
* Staging buffer to execute regops in common code.
* Regops are stored in struct nvgpu_dbg_reg_op which is defined
* in common code.
*
* Regops in struct nvgpu_profiler_reg_op should be first converted
* to this format and this handle should be passed for regops
* execution.
*/
struct nvgpu_dbg_reg_op *regops_staging_buf;
/*
* dmabuf handle of the buffer that would store available bytes in PMA buffer
* (if PMA stream resource is reserved successfully).
*/
struct dma_buf *pma_bytes_available_buffer_dmabuf;
};
static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv);
static int nvgpu_prof_fops_open(struct gk20a *g, struct file *filp,
enum nvgpu_profiler_pm_reservation_scope scope,
u32 gpu_instance_id)
{
struct nvgpu_profiler_object_priv *prof_priv;
struct nvgpu_profiler_object *prof;
u32 num_regops;
int err;
nvgpu_log(g, gpu_dbg_prof, "Request to open profiler session with scope %u",
scope);
prof_priv = nvgpu_kzalloc(g, sizeof(*prof_priv));
if (prof_priv == NULL) {
return -ENOMEM;
}
err = nvgpu_profiler_alloc(g, &prof, scope, gpu_instance_id);
if (err != 0) {
goto free_priv;
}
prof_priv->g = g;
prof_priv->prof = prof;
filp->private_data = prof_priv;
prof_priv->regops_umd_copy_buf = nvgpu_kzalloc(g,
NVGPU_PROF_UMD_COPY_WINDOW_SIZE);
if (prof_priv->regops_umd_copy_buf == NULL) {
goto free_prof;
}
num_regops = NVGPU_PROF_UMD_COPY_WINDOW_SIZE /
sizeof(prof_priv->regops_umd_copy_buf[0]);
prof_priv->regops_staging_buf = nvgpu_kzalloc(g,
num_regops * sizeof(prof_priv->regops_staging_buf[0]));
if (prof_priv->regops_staging_buf == NULL) {
goto free_umd_buf;
}
nvgpu_log(g, gpu_dbg_prof,
"Profiler session with scope %u created successfully with profiler handle %u",
scope, prof->prof_handle);
return 0;
free_umd_buf:
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
free_prof:
nvgpu_profiler_free(prof);
free_priv:
nvgpu_kfree(g, prof_priv);
return err;
}
int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp)
{
struct gk20a *g;
int err;
struct nvgpu_cdev *cdev;
u32 gpu_instance_id;
cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
g = nvgpu_get_gk20a_from_cdev(cdev);
gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
g = nvgpu_get(g);
if (!g) {
return -ENODEV;
}
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PROFILER_V2_DEVICE)) {
nvgpu_err(g, "Profiler V2 not supported");
nvgpu_put(g);
return -EINVAL;
}
err = nvgpu_prof_fops_open(g, filp,
NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE,
gpu_instance_id);
if (err != 0) {
nvgpu_put(g);
}
return err;
}
int nvgpu_prof_ctx_fops_open(struct inode *inode, struct file *filp)
{
struct gk20a *g;
int err;
struct nvgpu_cdev *cdev;
u32 gpu_instance_id;
cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
g = nvgpu_get_gk20a_from_cdev(cdev);
gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, cdev);
g = nvgpu_get(g);
if (!g) {
return -ENODEV;
}
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_PROFILER_V2_CONTEXT)) {
nvgpu_put(g);
return -EINVAL;
}
err = nvgpu_prof_fops_open(g, filp,
NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT,
gpu_instance_id);
if (err != 0) {
nvgpu_put(g);
}
return err;
}
int nvgpu_prof_fops_release(struct inode *inode, struct file *filp)
{
struct nvgpu_profiler_object_priv *prof_priv = filp->private_data;
struct nvgpu_profiler_object *prof = prof_priv->prof;
struct gk20a *g = prof_priv->g;
nvgpu_log(g, gpu_dbg_prof,
"Request to close profiler session with scope %u and profiler handle %u",
prof->scope, prof->prof_handle);
nvgpu_prof_free_pma_stream_priv_data(prof_priv);
nvgpu_profiler_free(prof);
nvgpu_kfree(g, prof_priv->regops_umd_copy_buf);
nvgpu_kfree(g, prof_priv->regops_staging_buf);
nvgpu_kfree(g, prof_priv);
nvgpu_put(g);
nvgpu_log(g, gpu_dbg_prof, "Profiler session closed successfully");
return 0;
}
static int nvgpu_prof_ioctl_bind_context(struct nvgpu_profiler_object *prof,
struct nvgpu_profiler_bind_context_args *args)
{
int tsg_fd = args->tsg_fd;
struct nvgpu_tsg *tsg;
struct gk20a *g = prof->g;
if (prof->context_init) {
nvgpu_err(g, "Context info is already initialized");
return -EINVAL;
}
if (tsg_fd < 0) {
if (prof->scope == NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE) {
prof->context_init = true;
return 0;
}
return -EINVAL;
}
tsg = nvgpu_tsg_get_from_file(tsg_fd);
if (tsg == NULL) {
nvgpu_err(g, "invalid TSG fd %d", tsg_fd);
return -EINVAL;
}
return nvgpu_profiler_bind_context(prof, tsg);
}
static int nvgpu_prof_ioctl_unbind_context(struct nvgpu_profiler_object *prof)
{
return nvgpu_profiler_unbind_context(prof);
}
static int nvgpu_prof_ioctl_get_pm_resource_type(u32 resource,
enum nvgpu_profiler_pm_resource_type *pm_resource)
{
switch (resource) {
case NVGPU_PROFILER_PM_RESOURCE_ARG_HWPM_LEGACY:
*pm_resource = NVGPU_PROFILER_PM_RESOURCE_TYPE_HWPM_LEGACY;
return 0;
case NVGPU_PROFILER_PM_RESOURCE_ARG_SMPC:
*pm_resource = NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC;
return 0;
default:
break;
}
return -EINVAL;
}
static int nvgpu_prof_ioctl_reserve_pm_resource(struct nvgpu_profiler_object *prof,
struct nvgpu_profiler_reserve_pm_resource_args *args)
{
enum nvgpu_profiler_pm_resource_type pm_resource;
struct gk20a *g = prof->g;
bool flag_ctxsw;
int err;
if (!prof->context_init) {
nvgpu_err(g, "Context info not initialized");
return -EINVAL;
}
err = nvgpu_prof_ioctl_get_pm_resource_type(args->resource,
&pm_resource);
if (err) {
nvgpu_err(prof->g, "invalid resource %u", args->resource);
return err;
}
flag_ctxsw = ((args->flags & NVGPU_PROFILER_RESERVE_PM_RESOURCE_ARG_FLAG_CTXSW) != 0);
switch (prof->scope) {
case NVGPU_PROFILER_PM_RESERVATION_SCOPE_DEVICE:
if (flag_ctxsw && (prof->tsg == NULL)) {
nvgpu_err(g, "Context must be bound to enable context switch");
return -EINVAL;
}
if (!flag_ctxsw && (pm_resource == NVGPU_PROFILER_PM_RESOURCE_TYPE_SMPC)
&& !nvgpu_is_enabled(g, NVGPU_SUPPORT_SMPC_GLOBAL_MODE)) {
nvgpu_err(g, "SMPC global mode not supported");
return -EINVAL;
}
if (flag_ctxsw) {
prof->ctxsw[pm_resource] = true;
} else {
prof->ctxsw[pm_resource] = false;
}
break;
case NVGPU_PROFILER_PM_RESERVATION_SCOPE_CONTEXT:
if (prof->tsg == NULL) {
nvgpu_err(g, "Context must be bound for context session");
return -EINVAL;
}
prof->ctxsw[pm_resource] = true;
break;
default:
return -EINVAL;
}
err = nvgpu_profiler_pm_resource_reserve(prof, pm_resource);
if (err) {
return err;
}
return 0;
}
static int nvgpu_prof_ioctl_release_pm_resource(struct nvgpu_profiler_object *prof,
struct nvgpu_profiler_release_pm_resource_args *args)
{
enum nvgpu_profiler_pm_resource_type pm_resource;
int err;
err = nvgpu_prof_ioctl_get_pm_resource_type(args->resource,
&pm_resource);
if (err) {
return err;
}
err = nvgpu_profiler_pm_resource_release(prof, pm_resource);
if (err) {
return err;
}
prof->ctxsw[pm_resource] = false;
return 0;
}
static int nvgpu_prof_ioctl_alloc_pma_stream(struct nvgpu_profiler_object_priv *priv,
struct nvgpu_profiler_alloc_pma_stream_args *args)
{
struct nvgpu_profiler_object *prof = priv->prof;
struct gk20a *g = prof->g;
struct mm_gk20a *mm = &g->mm;
u64 pma_bytes_available_buffer_offset;
struct dma_buf *pma_dmabuf;
struct dma_buf *pma_bytes_available_dmabuf;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
struct dma_buf_map map;
#endif
void *cpuva;
u32 pma_buffer_size;
int err;
nvgpu_log(g, gpu_dbg_prof, "Request to setup PMA stream for handle %u",
prof->prof_handle);
if (prof->pma_buffer_va != 0U) {
nvgpu_err(g, "PMA stream already initialized");
return -EINVAL;
}
err = nvgpu_profiler_alloc_pma_stream(prof);
if (err != 0) {
nvgpu_err(g, "failed to init PMA stream");
return err;
}
/*
* PMA available byte buffer GPU_VA needs to fit in 32 bit
* register, hence use a fixed GPU_VA to map it.
*/
pma_bytes_available_buffer_offset = mm->perfbuf.pma_bytes_available_buffer_gpu_va;
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_bytes_available_buffer_fd,
&pma_bytes_available_buffer_offset,
NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET, SZ_4K, 0, 0,
0, 0, NULL);
if (err != 0) {
nvgpu_err(g, "failed to map available bytes buffer");
goto err_put_vm;
}
/*
* Size register is 32-bit in HW, ensure requested size does
* not violate that.
*/
if (args->pma_buffer_map_size >= (1ULL << 32U)) {
nvgpu_err(g, "pma_buffer_map_size does not fit in 32 bits");
goto err_unmap_bytes_available;
}
pma_buffer_size = nvgpu_safe_cast_u64_to_u32(args->pma_buffer_map_size);
/*
* Validate that the pma buffer is large enough.
*/
pma_dmabuf = dma_buf_get(args->pma_buffer_fd);
if (IS_ERR(pma_dmabuf)) {
err = -EINVAL;
nvgpu_err(g, "failed to get pma buffer FD");
goto err_unmap_bytes_available;
}
if (pma_dmabuf->size < pma_buffer_size) {
err = -EINVAL;
nvgpu_err(g, "pma_dmabuf is not large enough");
goto err_dma_buf_put_pma;
}
err = nvgpu_vm_map_buffer(mm->perfbuf.vm, args->pma_buffer_fd,
&args->pma_buffer_offset, 0, SZ_4K, 0, 0,
0, 0, NULL);
if (err != 0) {
nvgpu_err(g, "failed to map PMA buffer");
goto err_dma_buf_put_pma;
}
pma_bytes_available_dmabuf = dma_buf_get(args->pma_bytes_available_buffer_fd);
if (IS_ERR(pma_bytes_available_dmabuf)) {
err = -EINVAL;
nvgpu_err(g, "failed to get available bytes buffer FD");
goto err_unmap_pma;
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
err = dma_buf_vmap(pma_bytes_available_dmabuf, &map);
cpuva = err ? NULL : map.vaddr;
#else
cpuva = dma_buf_vmap(pma_bytes_available_dmabuf);
#endif
if (cpuva == NULL) {
err = -ENOMEM;
nvgpu_err(g, "failed to vmap available bytes buffer FD");
goto err_dma_buf_put_pma_bytes_available;
}
prof->pma_buffer_va = args->pma_buffer_offset;
prof->pma_buffer_size = pma_buffer_size;
prof->pma_bytes_available_buffer_va = pma_bytes_available_buffer_offset;
prof->pma_bytes_available_buffer_cpuva = cpuva;
priv->pma_bytes_available_buffer_dmabuf = pma_bytes_available_dmabuf;
nvgpu_log(g, gpu_dbg_prof, "PMA stream initialized for profiler handle %u, 0x%llx 0x%x 0x%llx",
prof->prof_handle, prof->pma_buffer_va, prof->pma_buffer_size,
prof->pma_bytes_available_buffer_va);
args->pma_buffer_va = args->pma_buffer_offset;
/* Decrement pma_dmabuf ref count as we already mapped it. */
dma_buf_put(pma_dmabuf);
return 0;
err_dma_buf_put_pma_bytes_available:
dma_buf_put(pma_bytes_available_dmabuf);
err_unmap_pma:
nvgpu_vm_unmap(mm->perfbuf.vm, args->pma_buffer_offset, NULL);
err_dma_buf_put_pma:
dma_buf_put(pma_dmabuf);
err_unmap_bytes_available:
nvgpu_vm_unmap(mm->perfbuf.vm, pma_bytes_available_buffer_offset, NULL);
err_put_vm:
nvgpu_perfbuf_deinit_vm(g);
nvgpu_profiler_pm_resource_release(prof,
NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM);
return err;
}
static void nvgpu_prof_free_pma_stream_priv_data(struct nvgpu_profiler_object_priv *priv)
{
struct nvgpu_profiler_object *prof = priv->prof;
struct gk20a *g = prof->g;
struct mm_gk20a *mm = &g->mm;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
struct dma_buf_map map;
#endif
if (priv->pma_bytes_available_buffer_dmabuf == NULL) {
return;
}
nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_bytes_available_buffer_va, NULL);
prof->pma_bytes_available_buffer_va = 0U;
nvgpu_vm_unmap(mm->perfbuf.vm, prof->pma_buffer_va, NULL);
prof->pma_buffer_va = 0U;
prof->pma_buffer_size = 0U;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
dma_buf_map_set_vaddr(&map, prof->pma_bytes_available_buffer_cpuva);
dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf, &map);
#else
dma_buf_vunmap(priv->pma_bytes_available_buffer_dmabuf,
prof->pma_bytes_available_buffer_cpuva);
#endif
dma_buf_put(priv->pma_bytes_available_buffer_dmabuf);
priv->pma_bytes_available_buffer_dmabuf = NULL;
prof->pma_bytes_available_buffer_cpuva = NULL;
}
static int nvgpu_prof_ioctl_free_pma_stream(struct nvgpu_profiler_object_priv *priv)
{
struct nvgpu_profiler_object *prof = priv->prof;
struct gk20a *g = prof->g;
nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u",
prof->prof_handle);
if (prof->pma_buffer_va == 0U) {
nvgpu_err(g, "PMA stream not initialized");
return -EINVAL;
}
if (prof->bound) {
nvgpu_err(g, "PM resources are bound, cannot free PMA");
return -EINVAL;
}
nvgpu_prof_free_pma_stream_priv_data(priv);
nvgpu_profiler_free_pma_stream(prof);
nvgpu_log(g, gpu_dbg_prof, "Request to free PMA stream for handle %u completed",
prof->prof_handle);
return 0;
}
static int nvgpu_prof_ioctl_bind_pm_resources(struct nvgpu_profiler_object *prof)
{
return nvgpu_profiler_bind_pm_resources(prof);
}
static int nvgpu_prof_ioctl_unbind_pm_resources(struct nvgpu_profiler_object *prof)
{
return nvgpu_profiler_unbind_pm_resources(prof);
}
static void nvgpu_prof_get_regops_staging_data(struct nvgpu_profiler_reg_op *in,
struct nvgpu_dbg_reg_op *out, u32 num_ops)
{
u32 i;
for (i = 0; i < num_ops; i++) {
out[i].op = nvgpu_get_regops_op_values_common(in[i].op);
out[i].type = 0U; /* Selected based on per-resource ctxsw flags */
out[i].status = nvgpu_get_regops_status_values_common(in[i].status);
out[i].quad = 0U;
out[i].group_mask = 0U;
out[i].sub_group_mask = 0U;
out[i].offset = in[i].offset;
out[i].value_lo = u64_lo32(in[i].value);
out[i].value_hi = u64_hi32(in[i].value);
out[i].and_n_mask_lo = u64_lo32(in[i].and_n_mask);
out[i].and_n_mask_hi = u64_hi32(in[i].and_n_mask);
}
}
static void nvgpu_prof_get_regops_linux_data(struct nvgpu_dbg_reg_op *in,
struct nvgpu_profiler_reg_op *out, u32 num_ops)
{
u32 i;
for (i = 0; i < num_ops; i++) {
out[i].op = nvgpu_get_regops_op_values_linux(in[i].op);
out[i].status = nvgpu_get_regops_status_values_linux(in[i].status);
out[i].offset = in[i].offset;
out[i].value = hi32_lo32_to_u64(in[i].value_hi, in[i].value_lo);
out[i].and_n_mask = hi32_lo32_to_u64(in[i].and_n_mask_hi, in[i].and_n_mask_lo);
}
}
static int nvgpu_prof_ioctl_exec_reg_ops(struct nvgpu_profiler_object_priv *priv,
struct nvgpu_profiler_exec_reg_ops_args *args)
{
struct nvgpu_profiler_object *prof = priv->prof;
struct gk20a *g = prof->g;
struct nvgpu_tsg *tsg = prof->tsg;
u32 num_regops_in_copy_buf = NVGPU_PROF_UMD_COPY_WINDOW_SIZE /
sizeof(priv->regops_umd_copy_buf[0]);
u32 ops_offset = 0;
u32 flags = 0U;
bool all_passed = true;
int err;
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
nvgpu_log(g, gpu_dbg_prof,
"REG_OPS for handle %u: count=%u mode=%u flags=0x%x",
prof->prof_handle, args->count, args->mode, args->flags);
if (args->count == 0) {
return -EINVAL;
}
if (args->count > NVGPU_IOCTL_DBG_REG_OPS_LIMIT) {
nvgpu_err(g, "regops limit exceeded");
return -EINVAL;
}
if (!prof->bound) {
nvgpu_err(g, "PM resources are not bound to profiler");
return -EINVAL;
}
err = gk20a_busy(g);
if (err != 0) {
nvgpu_err(g, "failed to poweron");
return -EINVAL;
}
if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
flags |= NVGPU_REG_OP_FLAG_MODE_CONTINUE_ON_ERROR;
} else {
flags |= NVGPU_REG_OP_FLAG_MODE_ALL_OR_NONE;
}
while (ops_offset < args->count) {
const u32 num_ops =
min(args->count - ops_offset, num_regops_in_copy_buf);
const u64 fragment_size =
num_ops * sizeof(priv->regops_umd_copy_buf[0]);
void __user *const user_fragment =
(void __user *)(uintptr_t)
(args->ops +
ops_offset * sizeof(priv->regops_umd_copy_buf[0]));
nvgpu_log(g, gpu_dbg_prof, "Regops fragment: start_op=%u ops=%u",
ops_offset, num_ops);
if (copy_from_user(priv->regops_umd_copy_buf,
user_fragment, fragment_size)) {
nvgpu_err(g, "copy_from_user failed!");
err = -EFAULT;
break;
}
nvgpu_prof_get_regops_staging_data(
priv->regops_umd_copy_buf,
priv->regops_staging_buf, num_ops);
if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
flags &= ~NVGPU_REG_OP_FLAG_ALL_PASSED;
}
err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
nvgpu_regops_exec(g, tsg, prof,
priv->regops_staging_buf, num_ops,
&flags));
if (err) {
nvgpu_err(g, "regop execution failed");
break;
}
if (ops_offset == 0) {
if (flags & NVGPU_REG_OP_FLAG_DIRECT_OPS) {
args->flags |=
NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_DIRECT_OPS;
}
}
if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR) {
if ((flags & NVGPU_REG_OP_FLAG_ALL_PASSED) == 0) {
all_passed = false;
}
}
nvgpu_prof_get_regops_linux_data(
priv->regops_staging_buf,
priv->regops_umd_copy_buf, num_ops);
if (copy_to_user(user_fragment,
priv->regops_umd_copy_buf,
fragment_size)) {
nvgpu_err(g, "copy_to_user failed!");
err = -EFAULT;
break;
}
ops_offset += num_ops;
}
if (args->mode == NVGPU_PROFILER_EXEC_REG_OPS_ARG_MODE_CONTINUE_ON_ERROR
&& all_passed && (err == 0)) {
args->flags |= NVGPU_PROFILER_EXEC_REG_OPS_ARG_FLAG_ALL_PASSED;
}
nvgpu_log(g, gpu_dbg_prof,
"REG_OPS for handle %u complete: count=%u mode=%u flags=0x%x err=%d",
prof->prof_handle, args->count, args->mode, args->flags, err);
gk20a_idle(g);
return err;
}
static int nvgpu_prof_ioctl_pma_stream_update_get_put(struct nvgpu_profiler_object *prof,
struct nvgpu_profiler_pma_stream_update_get_put_args *args)
{
bool update_bytes_available = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_UPDATE_AVAILABLE_BYTES;
bool wait = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_WAIT_FOR_UPDATE;
bool update_put_ptr = args->flags &
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_RETURN_PUT_PTR;
struct gk20a *g = prof->g;
bool overflowed;
int err;
nvgpu_log(g, gpu_dbg_prof,
"Update PMA stream request %u: flags = 0x%x bytes_consumed=%llu",
prof->prof_handle, args->flags, args->bytes_consumed);
if (!prof->reserved[NVGPU_PROFILER_PM_RESOURCE_TYPE_PMA_STREAM]) {
nvgpu_err(g, "PMA stream resource not reserved");
return -EINVAL;
}
err = nvgpu_perfbuf_update_get_put(prof->g, args->bytes_consumed,
update_bytes_available ? &args->bytes_available : NULL,
prof->pma_bytes_available_buffer_cpuva, wait,
update_put_ptr ? &args->put_ptr : NULL,
&overflowed);
if (err != 0) {
return err;
}
if (overflowed) {
args->flags |=
NVGPU_PROFILER_PMA_STREAM_UPDATE_GET_PUT_ARG_FLAG_OVERFLOW_TRIGGERED;
}
nvgpu_log(g, gpu_dbg_prof,
"Update PMA stream request %u complete: flags = 0x%x"
"bytes_available=%llu put_ptr=%llu",
prof->prof_handle, args->flags, args->bytes_available, args->put_ptr);
return 0;
}
long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct nvgpu_profiler_object_priv *prof_priv = filp->private_data;
struct nvgpu_profiler_object *prof = prof_priv->prof;
struct gk20a *g = prof_priv->g;
u8 __maybe_unused buf[NVGPU_PROFILER_IOCTL_MAX_ARG_SIZE];
int err = 0;
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, prof->gpu_instance_id);
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
"gpu_instance_id [%u] gr_instance_id [%u]",
prof->gpu_instance_id, gr_instance_id);
nvgpu_assert(prof->gpu_instance_id < g->mig.num_gpu_instances);
nvgpu_assert(gr_instance_id < g->num_gr_instances);
if ((_IOC_TYPE(cmd) != NVGPU_PROFILER_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_PROFILER_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_PROFILER_IOCTL_MAX_ARG_SIZE)) {
return -EINVAL;
}
(void) memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd))) {
return -EFAULT;
}
}
nvgpu_log(g, gpu_dbg_prof, "Profiler handle %u received IOCTL cmd %u",
prof->prof_handle, cmd);
nvgpu_mutex_acquire(&prof->ioctl_lock);
nvgpu_speculation_barrier();
switch (cmd) {
case NVGPU_PROFILER_IOCTL_BIND_CONTEXT:
err = nvgpu_prof_ioctl_bind_context(prof,
(struct nvgpu_profiler_bind_context_args *)buf);
break;
case NVGPU_PROFILER_IOCTL_UNBIND_CONTEXT:
err = nvgpu_prof_ioctl_unbind_context(prof);
break;
case NVGPU_PROFILER_IOCTL_RESERVE_PM_RESOURCE:
err = nvgpu_prof_ioctl_reserve_pm_resource(prof,
(struct nvgpu_profiler_reserve_pm_resource_args *)buf);
break;
case NVGPU_PROFILER_IOCTL_RELEASE_PM_RESOURCE:
err = nvgpu_prof_ioctl_release_pm_resource(prof,
(struct nvgpu_profiler_release_pm_resource_args *)buf);
break;
case NVGPU_PROFILER_IOCTL_BIND_PM_RESOURCES:
err = nvgpu_prof_ioctl_bind_pm_resources(prof);
break;
case NVGPU_PROFILER_IOCTL_UNBIND_PM_RESOURCES:
err = nvgpu_prof_ioctl_unbind_pm_resources(prof);
break;
case NVGPU_PROFILER_IOCTL_ALLOC_PMA_STREAM:
err = nvgpu_prof_ioctl_alloc_pma_stream(prof_priv,
(struct nvgpu_profiler_alloc_pma_stream_args *)buf);
break;
case NVGPU_PROFILER_IOCTL_FREE_PMA_STREAM:
err = nvgpu_prof_ioctl_free_pma_stream(prof_priv);
break;
case NVGPU_PROFILER_IOCTL_EXEC_REG_OPS:
err = nvgpu_prof_ioctl_exec_reg_ops(prof_priv,
(struct nvgpu_profiler_exec_reg_ops_args *)buf);
break;
case NVGPU_PROFILER_IOCTL_PMA_STREAM_UPDATE_GET_PUT:
err = nvgpu_prof_ioctl_pma_stream_update_get_put(prof,
(struct nvgpu_profiler_pma_stream_update_get_put_args *)buf);
break;
default:
#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT)
err = nvgpu_next_prof_fops_ioctl(prof, cmd, (void *)buf);
#else
nvgpu_err(g, "unrecognized profiler ioctl cmd: 0x%x", cmd);
err = -ENOTTY;
#endif
break;
}
nvgpu_mutex_release(&prof->ioctl_lock);
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *)arg,
buf, _IOC_SIZE(cmd));
nvgpu_log(g, gpu_dbg_prof, "Profiler handle %u IOCTL err = %d",
prof->prof_handle, err);
return err;
}

View File

@@ -0,0 +1,29 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LINUX_IOCTL_PROF_H
#define LINUX_IOCTL_PROF_H
#include <nvgpu/types.h>
struct inode;
struct file;
int nvgpu_prof_dev_fops_open(struct inode *inode, struct file *filp);
int nvgpu_prof_ctx_fops_open(struct inode *inode, struct file *filp);
int nvgpu_prof_fops_release(struct inode *inode, struct file *filp);
long nvgpu_prof_fops_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg);
#endif /* LINUX_IOCTL_PROF_H */

View File

@@ -0,0 +1,957 @@
/*
* Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/uaccess.h>
#include <linux/poll.h>
#include <uapi/linux/nvgpu.h>
#include <linux/anon_inodes.h>
#include <nvgpu/kmem.h>
#include <nvgpu/log.h>
#include <nvgpu/os_sched.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/config.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/channel.h>
#include <nvgpu/tsg.h>
#include <nvgpu/fifo.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/grmgr.h>
#include <nvgpu/ltc.h>
#include "platform_gk20a.h"
#include "ioctl_tsg.h"
#include "ioctl_channel.h"
#include "ioctl.h"
#include "os_linux.h"
struct tsg_private {
struct gk20a *g;
struct nvgpu_tsg *tsg;
struct nvgpu_cdev *cdev;
};
extern const struct file_operations gk20a_tsg_ops;
struct nvgpu_tsg *nvgpu_tsg_get_from_file(int fd)
{
struct nvgpu_tsg *tsg;
struct tsg_private *priv;
struct file *f = fget(fd);
if (!f) {
return NULL;
}
if (f->f_op != &gk20a_tsg_ops) {
fput(f);
return NULL;
}
priv = (struct tsg_private *)f->private_data;
tsg = priv->tsg;
fput(f);
return tsg;
}
static int nvgpu_tsg_bind_channel_fd(struct nvgpu_tsg *tsg, int ch_fd)
{
struct nvgpu_channel *ch;
int err;
ch = nvgpu_channel_get_from_file(ch_fd);
if (!ch)
return -EINVAL;
err = nvgpu_tsg_bind_channel(tsg, ch);
nvgpu_channel_put(ch);
return err;
}
static int gk20a_tsg_ioctl_bind_channel_ex(struct gk20a *g,
struct tsg_private *priv, struct nvgpu_tsg_bind_channel_ex_args *arg)
{
struct nvgpu_tsg *tsg = priv->tsg;
struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
struct nvgpu_channel *ch;
struct nvgpu_gr_config *gr_config = nvgpu_gr_get_config_ptr(g);
u32 max_subctx_count;
u32 gpu_instance_id;
int err = 0;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto mutex_release;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto mutex_release;
}
ch = nvgpu_channel_get_from_file(arg->channel_fd);
if (!ch) {
err = -EINVAL;
goto idle;
}
if (arg->tpc_pg_enabled && (!tsg->tpc_num_initialized)) {
if ((arg->num_active_tpcs >
nvgpu_gr_config_get_max_tpc_count(gr_config)) ||
!(arg->num_active_tpcs)) {
nvgpu_err(g, "Invalid num of active TPCs");
err = -EINVAL;
goto ch_put;
}
tsg->tpc_num_initialized = true;
tsg->num_active_tpcs = arg->num_active_tpcs;
tsg->tpc_pg_enabled = true;
} else {
tsg->tpc_pg_enabled = false; nvgpu_log(g, gpu_dbg_info, "dynamic TPC-PG not enabled");
}
gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, priv->cdev);
nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances);
max_subctx_count = nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
if (arg->subcontext_id < max_subctx_count) {
ch->subctx_id = arg->subcontext_id;
} else {
err = -EINVAL;
goto ch_put;
}
nvgpu_log(g, gpu_dbg_info, "channel id : %d : subctx: %d",
ch->chid, ch->subctx_id);
/* Use runqueue selector 1 for all ASYNC ids */
if (ch->subctx_id > CHANNEL_INFO_VEID0)
ch->runqueue_sel = 1;
err = nvgpu_tsg_bind_channel(tsg, ch);
ch_put:
nvgpu_channel_put(ch);
idle:
gk20a_idle(g);
mutex_release:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int nvgpu_tsg_unbind_channel_fd(struct nvgpu_tsg *tsg, int ch_fd)
{
struct nvgpu_channel *ch;
int err = 0;
ch = nvgpu_channel_get_from_file(ch_fd);
if (!ch) {
return -EINVAL;
}
if (tsg != nvgpu_tsg_from_ch(ch)) {
err = -EINVAL;
goto out;
}
err = nvgpu_tsg_unbind_channel(tsg, ch, false);
if (err == -EAGAIN) {
goto out;
}
/*
* Mark the channel unserviceable since channel unbound from TSG
* has no context of its own so it can't serve any job
*/
nvgpu_channel_set_unserviceable(ch);
out:
nvgpu_channel_put(ch);
return err;
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
static int gk20a_tsg_get_event_data_from_id(struct nvgpu_tsg *tsg,
unsigned int event_id,
struct gk20a_event_id_data **event_id_data)
{
struct gk20a_event_id_data *local_event_id_data;
bool event_found = false;
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_for_each_entry(local_event_id_data, &tsg->event_id_list,
gk20a_event_id_data, event_id_node) {
if (local_event_id_data->event_id == event_id) {
event_found = true;
break;
}
}
nvgpu_mutex_release(&tsg->event_id_list_lock);
if (event_found) {
*event_id_data = local_event_id_data;
return 0;
} else {
return -1;
}
}
/*
* Convert common event_id of the form NVGPU_EVENT_ID_* to Linux specific
* event_id of the form NVGPU_IOCTL_CHANNEL_EVENT_ID_* which is used in IOCTLs
*/
static u32 nvgpu_event_id_to_ioctl_channel_event_id(
enum nvgpu_event_id_type event_id)
{
switch (event_id) {
case NVGPU_EVENT_ID_BPT_INT:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_INT;
case NVGPU_EVENT_ID_BPT_PAUSE:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BPT_PAUSE;
case NVGPU_EVENT_ID_BLOCKING_SYNC:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_BLOCKING_SYNC;
case NVGPU_EVENT_ID_CILP_PREEMPTION_STARTED:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_STARTED;
case NVGPU_EVENT_ID_CILP_PREEMPTION_COMPLETE:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_CILP_PREEMPTION_COMPLETE;
case NVGPU_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_GR_SEMAPHORE_WRITE_AWAKEN;
case NVGPU_EVENT_ID_MAX:
return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
}
return NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX;
}
void nvgpu_tsg_post_event_id(struct nvgpu_tsg *tsg,
enum nvgpu_event_id_type event_id)
{
struct gk20a_event_id_data *channel_event_id_data;
u32 channel_event_id;
int err = 0;
struct gk20a *g = tsg->g;
channel_event_id = nvgpu_event_id_to_ioctl_channel_event_id(event_id);
if (event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
return;
err = gk20a_tsg_get_event_data_from_id(tsg, channel_event_id,
&channel_event_id_data);
if (err)
return;
nvgpu_mutex_acquire(&channel_event_id_data->lock);
nvgpu_log_info(g,
"posting event for event_id=%d on tsg=%d\n",
channel_event_id, tsg->tsgid);
channel_event_id_data->event_posted = true;
nvgpu_cond_broadcast_interruptible(&channel_event_id_data->event_id_wq);
nvgpu_mutex_release(&channel_event_id_data->lock);
}
static unsigned int gk20a_event_id_poll(struct file *filep, poll_table *wait)
{
unsigned int mask = 0;
struct gk20a_event_id_data *event_id_data = filep->private_data;
struct gk20a *g = event_id_data->g;
u32 event_id = event_id_data->event_id;
struct nvgpu_tsg *tsg = g->fifo.tsg + event_id_data->id;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_info, " ");
poll_wait(filep, &event_id_data->event_id_wq.wq, wait);
nvgpu_mutex_acquire(&event_id_data->lock);
if (event_id_data->event_posted) {
nvgpu_log_info(g,
"found pending event_id=%d on TSG=%d\n",
event_id, tsg->tsgid);
mask = (POLLPRI | POLLIN);
event_id_data->event_posted = false;
}
nvgpu_mutex_release(&event_id_data->lock);
return mask;
}
static int gk20a_event_id_release(struct inode *inode, struct file *filp)
{
struct gk20a_event_id_data *event_id_data = filp->private_data;
struct gk20a *g;
struct nvgpu_tsg *tsg;
if (event_id_data == NULL)
return -EINVAL;
g = event_id_data->g;
tsg = g->fifo.tsg + event_id_data->id;
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_del(&event_id_data->event_id_node);
nvgpu_mutex_release(&tsg->event_id_list_lock);
nvgpu_mutex_destroy(&event_id_data->lock);
nvgpu_put(g);
nvgpu_kfree(g, event_id_data);
filp->private_data = NULL;
return 0;
}
const struct file_operations gk20a_event_id_ops = {
.owner = THIS_MODULE,
.poll = gk20a_event_id_poll,
.release = gk20a_event_id_release,
};
static int gk20a_tsg_event_id_enable(struct nvgpu_tsg *tsg,
int event_id,
int *fd)
{
int err = 0;
int local_fd;
struct file *file;
char name[64];
struct gk20a_event_id_data *event_id_data;
struct gk20a *g;
g = nvgpu_get(tsg->g);
if (!g)
return -ENODEV;
err = gk20a_tsg_get_event_data_from_id(tsg,
event_id, &event_id_data);
if (err == 0) {
/* We already have event enabled */
err = -EINVAL;
goto free_ref;
}
err = get_unused_fd_flags(O_RDWR);
if (err < 0)
goto free_ref;
local_fd = err;
(void) snprintf(name, sizeof(name), "nvgpu-event%d-fd%d",
event_id, local_fd);
event_id_data = nvgpu_kzalloc(tsg->g, sizeof(*event_id_data));
if (!event_id_data) {
err = -ENOMEM;
goto clean_up;
}
event_id_data->g = g;
event_id_data->id = tsg->tsgid;
event_id_data->event_id = event_id;
nvgpu_cond_init(&event_id_data->event_id_wq);
nvgpu_mutex_init(&event_id_data->lock);
nvgpu_init_list_node(&event_id_data->event_id_node);
file = anon_inode_getfile(name, &gk20a_event_id_ops,
event_id_data, O_RDWR);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto clean_up_free;
}
nvgpu_mutex_acquire(&tsg->event_id_list_lock);
nvgpu_list_add_tail(&event_id_data->event_id_node, &tsg->event_id_list);
nvgpu_mutex_release(&tsg->event_id_list_lock);
fd_install(local_fd, file);
*fd = local_fd;
return 0;
clean_up_free:
nvgpu_kfree(g, event_id_data);
clean_up:
put_unused_fd(local_fd);
free_ref:
nvgpu_put(g);
return err;
}
static int gk20a_tsg_event_id_ctrl(struct gk20a *g, struct nvgpu_tsg *tsg,
struct nvgpu_event_id_ctrl_args *args)
{
int err = 0;
int fd = -1;
if (args->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX)
return -EINVAL;
nvgpu_speculation_barrier();
switch (args->cmd) {
case NVGPU_IOCTL_CHANNEL_EVENT_ID_CMD_ENABLE:
err = gk20a_tsg_event_id_enable(tsg, args->event_id, &fd);
if (!err)
args->event_fd = fd;
break;
default:
nvgpu_err(tsg->g, "unrecognized tsg event id cmd: 0x%x",
args->cmd);
err = -EINVAL;
break;
}
return err;
}
#endif /* CONFIG_NVGPU_CHANNEL_TSG_CONTROL */
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct nvgpu_cdev *cdev,
struct file *filp)
{
struct tsg_private *priv;
struct nvgpu_tsg *tsg;
struct device *dev;
int err;
g = nvgpu_get(g);
if (!g)
return -ENODEV;
dev = dev_from_gk20a(g);
nvgpu_log(g, gpu_dbg_fn, "tsg: %s", dev_name(dev));
priv = nvgpu_kmalloc(g, sizeof(*priv));
if (!priv) {
err = -ENOMEM;
goto free_ref;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on, %d", err);
goto free_mem;
}
tsg = nvgpu_tsg_open(g, nvgpu_current_pid(g));
gk20a_idle(g);
if (!tsg) {
err = -ENOMEM;
goto free_mem;
}
priv->g = g;
priv->tsg = tsg;
priv->cdev = cdev;
filp->private_data = priv;
gk20a_sched_ctrl_tsg_added(g, tsg);
return 0;
free_mem:
nvgpu_kfree(g, priv);
free_ref:
nvgpu_put(g);
return err;
}
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp)
{
struct gk20a *g;
int ret;
struct nvgpu_cdev *cdev;
cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev);
g = nvgpu_get_gk20a_from_cdev(cdev);
nvgpu_log_fn(g, " ");
ret = gk20a_busy(g);
if (ret) {
nvgpu_err(g, "failed to power on, %d", ret);
return ret;
}
ret = nvgpu_ioctl_tsg_open(g, cdev, filp);
gk20a_idle(g);
nvgpu_log_fn(g, "done");
return ret;
}
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref)
{
struct nvgpu_tsg *tsg = container_of(ref, struct nvgpu_tsg, refcount);
struct gk20a *g = tsg->g;
gk20a_sched_ctrl_tsg_removed(g, tsg);
nvgpu_tsg_release(ref);
nvgpu_put(g);
}
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
{
struct tsg_private *priv = filp->private_data;
struct nvgpu_tsg *tsg;
if (!priv) {
/* open failed, never got a tsg for this file */
return 0;
}
tsg = priv->tsg;
nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release);
nvgpu_kfree(tsg->g, priv);
return 0;
}
static int gk20a_tsg_ioctl_set_runlist_interleave(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_runlist_interleave_args *arg)
{
struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
u32 level = arg->level;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto done;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto done;
}
level = nvgpu_get_common_runlist_level(level);
err = nvgpu_tsg_set_interleave(tsg, level);
gk20a_idle(g);
done:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int gk20a_tsg_ioctl_set_timeslice(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_timeslice_args *arg)
{
struct nvgpu_sched_ctrl *sched = &g->sched_ctrl;
int err;
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_sched, "tsgid=%u", tsg->tsgid);
nvgpu_mutex_acquire(&sched->control_lock);
if (sched->control_locked) {
err = -EPERM;
goto done;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
goto done;
}
err = g->ops.tsg.set_timeslice(tsg, arg->timeslice_us);
gk20a_idle(g);
done:
nvgpu_mutex_release(&sched->control_lock);
return err;
}
static int gk20a_tsg_ioctl_get_timeslice(struct gk20a *g,
struct nvgpu_tsg *tsg, struct nvgpu_timeslice_args *arg)
{
arg->timeslice_us = nvgpu_tsg_get_timeslice(tsg);
return 0;
}
static int gk20a_tsg_ioctl_read_single_sm_error_state(struct gk20a *g,
u32 gpu_instance_id,
struct nvgpu_tsg *tsg,
struct nvgpu_tsg_read_single_sm_error_state_args *args)
{
struct nvgpu_tsg_sm_error_state *sm_error_state;
struct nvgpu_tsg_sm_error_state_record sm_error_state_record;
u32 sm_id;
int err = 0;
struct nvgpu_gr_config *gr_config;
gr_config = nvgpu_gr_get_gpu_instance_config_ptr(g, gpu_instance_id);
sm_id = args->sm_id;
if (sm_id >= nvgpu_gr_config_get_no_of_sm(gr_config)) {
return -EINVAL;
}
nvgpu_speculation_barrier();
sm_error_state = tsg->sm_error_states + sm_id;
sm_error_state_record.global_esr =
sm_error_state->hww_global_esr;
sm_error_state_record.warp_esr =
sm_error_state->hww_warp_esr;
sm_error_state_record.warp_esr_pc =
sm_error_state->hww_warp_esr_pc;
sm_error_state_record.global_esr_report_mask =
sm_error_state->hww_global_esr_report_mask;
sm_error_state_record.warp_esr_report_mask =
sm_error_state->hww_warp_esr_report_mask;
if (args->record_size > 0) {
size_t write_size = sizeof(*sm_error_state);
nvgpu_speculation_barrier();
if (write_size > args->record_size)
write_size = args->record_size;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = copy_to_user((void __user *)(uintptr_t)
args->record_mem,
&sm_error_state_record,
write_size);
nvgpu_mutex_release(&g->dbg_sessions_lock);
if (err) {
nvgpu_err(g, "copy_to_user failed!");
return err;
}
args->record_size = write_size;
}
return 0;
}
static int nvgpu_gpu_ioctl_set_l2_max_ways_evict_last(
struct gk20a *g, u32 gpu_instance_id, struct nvgpu_tsg *tsg,
struct nvgpu_tsg_l2_max_ways_evict_last_args *args)
{
int err;
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, gpu_instance_id);
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (g->ops.ltc.set_l2_max_ways_evict_last) {
err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
g->ops.ltc.set_l2_max_ways_evict_last(g, tsg,
args->max_ways));
} else {
err = -ENOSYS;
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static int nvgpu_gpu_ioctl_get_l2_max_ways_evict_last(
struct gk20a *g, u32 gpu_instance_id, struct nvgpu_tsg *tsg,
struct nvgpu_tsg_l2_max_ways_evict_last_args *args)
{
int err;
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, gpu_instance_id);
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
if (g->ops.ltc.get_l2_max_ways_evict_last) {
err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
g->ops.ltc.get_l2_max_ways_evict_last(g, tsg,
&args->max_ways));
} else {
err = -ENOSYS;
}
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static u32 nvgpu_translate_l2_sector_promotion_flag(struct gk20a *g, u32 flag)
{
u32 promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID;
switch (flag) {
case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_NONE:
promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_NONE;
break;
case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_64B:
promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_64B;
break;
case NVGPU_GPU_IOCTL_TSG_L2_SECTOR_PROMOTE_FLAG_128B:
promotion_flag = NVGPU_L2_SECTOR_PROMOTE_FLAG_128B;
break;
default:
nvgpu_err(g, "invalid sector promotion flag(%d)",
flag);
break;
}
return promotion_flag;
}
static int nvgpu_gpu_ioctl_set_l2_sector_promotion(struct gk20a *g,
u32 gpu_instance_id, struct nvgpu_tsg *tsg,
struct nvgpu_tsg_set_l2_sector_promotion_args *args)
{
u32 promotion_flag = 0U;
int err = 0;
u32 gr_instance_id =
nvgpu_grmgr_get_gr_instance_id(g, gpu_instance_id);
/*
* L2 sector promotion is a perf feature so return silently without
* error if not supported.
*/
if (g->ops.ltc.set_l2_sector_promotion == NULL) {
return 0;
}
promotion_flag =
nvgpu_translate_l2_sector_promotion_flag(g,
args->promotion_flag);
if (promotion_flag ==
NVGPU_L2_SECTOR_PROMOTE_FLAG_INVALID) {
return -EINVAL;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g, "failed to power on gpu");
return err;
}
err = nvgpu_gr_exec_with_err_for_instance(g, gr_instance_id,
g->ops.ltc.set_l2_sector_promotion(g, tsg,
promotion_flag));
gk20a_idle(g);
return err;
}
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
struct tsg_private *priv = filp->private_data;
struct nvgpu_tsg *tsg = priv->tsg;
struct gk20a *g = tsg->g;
u8 __maybe_unused buf[NVGPU_TSG_IOCTL_MAX_ARG_SIZE];
int err = 0;
u32 gpu_instance_id, gr_instance_id;
nvgpu_log_fn(g, "start %d", _IOC_NR(cmd));
if ((_IOC_TYPE(cmd) != NVGPU_TSG_IOCTL_MAGIC) ||
(_IOC_NR(cmd) == 0) ||
(_IOC_NR(cmd) > NVGPU_TSG_IOCTL_LAST) ||
(_IOC_SIZE(cmd) > NVGPU_TSG_IOCTL_MAX_ARG_SIZE))
return -EINVAL;
(void) memset(buf, 0, sizeof(buf));
if (_IOC_DIR(cmd) & _IOC_WRITE) {
if (copy_from_user(buf, (void __user *)arg, _IOC_SIZE(cmd)))
return -EFAULT;
}
if (!g->sw_ready) {
err = gk20a_busy(g);
if (err)
return err;
gk20a_idle(g);
}
gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, priv->cdev);
nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances);
gr_instance_id = nvgpu_grmgr_get_gr_instance_id(g, gpu_instance_id);
nvgpu_assert(gr_instance_id < g->num_gr_instances);
switch (cmd) {
case NVGPU_TSG_IOCTL_BIND_CHANNEL:
{
int ch_fd = *(int *)buf;
if (ch_fd < 0) {
err = -EINVAL;
break;
}
err = nvgpu_tsg_bind_channel_fd(tsg, ch_fd);
break;
}
case NVGPU_TSG_IOCTL_BIND_CHANNEL_EX:
{
err = gk20a_tsg_ioctl_bind_channel_ex(g, priv,
(struct nvgpu_tsg_bind_channel_ex_args *)buf);
break;
}
case NVGPU_TSG_IOCTL_UNBIND_CHANNEL:
{
int ch_fd = *(int *)buf;
if (ch_fd < 0) {
err = -EINVAL;
break;
}
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
break;
}
err = nvgpu_tsg_unbind_channel_fd(tsg, ch_fd);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_ENABLE:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
g->ops.tsg.enable(tsg);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_DISABLE:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
g->ops.tsg.disable(tsg);
gk20a_idle(g);
break;
}
case NVGPU_IOCTL_TSG_PREEMPT:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to host gk20a for ioctl cmd: 0x%x", cmd);
return err;
}
/* preempt TSG */
err = g->ops.fifo.preempt_tsg(g, tsg);
gk20a_idle(g);
break;
}
#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
case NVGPU_IOCTL_TSG_EVENT_ID_CTRL:
{
err = gk20a_tsg_event_id_ctrl(g, tsg,
(struct nvgpu_event_id_ctrl_args *)buf);
break;
}
#endif
case NVGPU_IOCTL_TSG_SET_RUNLIST_INTERLEAVE:
err = gk20a_tsg_ioctl_set_runlist_interleave(g, tsg,
(struct nvgpu_runlist_interleave_args *)buf);
break;
case NVGPU_IOCTL_TSG_SET_TIMESLICE:
{
err = gk20a_tsg_ioctl_set_timeslice(g, tsg,
(struct nvgpu_timeslice_args *)buf);
break;
}
case NVGPU_IOCTL_TSG_GET_TIMESLICE:
{
err = gk20a_tsg_ioctl_get_timeslice(g, tsg,
(struct nvgpu_timeslice_args *)buf);
break;
}
case NVGPU_TSG_IOCTL_READ_SINGLE_SM_ERROR_STATE:
{
err = gk20a_tsg_ioctl_read_single_sm_error_state(g, gpu_instance_id, tsg,
(struct nvgpu_tsg_read_single_sm_error_state_args *)buf);
break;
}
case NVGPU_TSG_IOCTL_SET_L2_MAX_WAYS_EVICT_LAST:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to power on gpu for ioctl cmd: 0x%x", cmd);
break;
}
err = nvgpu_gpu_ioctl_set_l2_max_ways_evict_last(g,
gpu_instance_id, tsg,
(struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf);
gk20a_idle(g);
break;
}
case NVGPU_TSG_IOCTL_GET_L2_MAX_WAYS_EVICT_LAST:
{
err = gk20a_busy(g);
if (err) {
nvgpu_err(g,
"failed to power on gpu for ioctl cmd: 0x%x", cmd);
break;
}
err = nvgpu_gpu_ioctl_get_l2_max_ways_evict_last(g,
gpu_instance_id, tsg,
(struct nvgpu_tsg_l2_max_ways_evict_last_args *)buf);
gk20a_idle(g);
break;
}
case NVGPU_TSG_IOCTL_SET_L2_SECTOR_PROMOTION:
{
err = nvgpu_gpu_ioctl_set_l2_sector_promotion(g,
gpu_instance_id, tsg,
(struct nvgpu_tsg_set_l2_sector_promotion_args *)buf);
break;
}
default:
nvgpu_err(g, "unrecognized tsg gpu ioctl cmd: 0x%x",
cmd);
err = -ENOTTY;
break;
}
if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ))
err = copy_to_user((void __user *)arg,
buf, _IOC_SIZE(cmd));
return err;
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef NVGPU_IOCTL_TSG_H
#define NVGPU_IOCTL_TSG_H
struct inode;
struct file;
struct gk20a;
struct nvgpu_ref;
struct nvgpu_cdev;
struct nvgpu_tsg *nvgpu_tsg_get_from_file(int fd);
int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp);
int nvgpu_ioctl_tsg_dev_open(struct inode *inode, struct file *filp);
int nvgpu_ioctl_tsg_open(struct gk20a *g, struct nvgpu_cdev *cdev, struct file *filp);
long nvgpu_ioctl_tsg_dev_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg);
void nvgpu_ioctl_tsg_release(struct nvgpu_ref *ref);
#endif

View File

@@ -0,0 +1,655 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
#include <linux/stacktrace.h>
#include <nvgpu/lock.h>
#include <nvgpu/kmem.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include <nvgpu/gk20a.h>
#include "kmem_priv.h"
/*
* Statically declared because this needs to be shared across all nvgpu driver
* instances. This makes sure that all kmem caches are _definitely_ uniquely
* named.
*/
static atomic_t kmem_cache_id;
void *nvgpu_big_alloc_impl(struct gk20a *g, size_t size, bool clear)
{
void *p;
if (size > NVGPU_CPU_PAGE_SIZE) {
if (clear)
p = nvgpu_vzalloc(g, size);
else
p = nvgpu_vmalloc(g, size);
} else {
if (clear)
p = nvgpu_kzalloc(g, size);
else
p = nvgpu_kmalloc(g, size);
}
return p;
}
void nvgpu_big_free(struct gk20a *g, void *p)
{
/*
* This will have to be fixed eventually. Allocs that use
* nvgpu_big_[mz]alloc() will need to remember the size of the alloc
* when freeing.
*/
if (is_vmalloc_addr(p))
nvgpu_vfree(g, p);
else
nvgpu_kfree(g, p);
}
void *nvgpu_kmalloc_impl(struct gk20a *g, size_t size, void *ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kmalloc(g, size, ip);
#else
alloc = kmalloc(size, GFP_KERNEL);
#endif
kmem_dbg(g, "kmalloc: size=%-6ld addr=0x%p gfp=0x%08x",
size, alloc, GFP_KERNEL);
return alloc;
}
void *nvgpu_kzalloc_impl(struct gk20a *g, size_t size, void *ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kzalloc(g, size, ip);
#else
alloc = kzalloc(size, GFP_KERNEL);
#endif
kmem_dbg(g, "kzalloc: size=%-6ld addr=0x%p gfp=0x%08x",
size, alloc, GFP_KERNEL);
return alloc;
}
void *nvgpu_kcalloc_impl(struct gk20a *g, size_t n, size_t size, void *ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_kcalloc(g, n, size, ip);
#else
alloc = kcalloc(n, size, GFP_KERNEL);
#endif
kmem_dbg(g, "kcalloc: size=%-6ld addr=0x%p gfp=0x%08x",
n * size, alloc, GFP_KERNEL);
return alloc;
}
void *nvgpu_vmalloc_impl(struct gk20a *g, unsigned long size, void *ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_vmalloc(g, size, ip);
#else
alloc = vmalloc(size);
#endif
kmem_dbg(g, "vmalloc: size=%-6ld addr=0x%p", size, alloc);
return alloc;
}
void *nvgpu_vzalloc_impl(struct gk20a *g, unsigned long size, void *ip)
{
void *alloc;
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
alloc = __nvgpu_track_vzalloc(g, size, ip);
#else
alloc = vzalloc(size);
#endif
kmem_dbg(g, "vzalloc: size=%-6ld addr=0x%p", size, alloc);
return alloc;
}
void nvgpu_kfree_impl(struct gk20a *g, void *addr)
{
kmem_dbg(g, "kfree: addr=0x%p", addr);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
__nvgpu_track_kfree(g, addr);
#else
kfree(addr);
#endif
}
void nvgpu_vfree_impl(struct gk20a *g, void *addr)
{
kmem_dbg(g, "vfree: addr=0x%p", addr);
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
__nvgpu_track_vfree(g, addr);
#else
vfree(addr);
#endif
}
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
{
nvgpu_mutex_acquire(&tracker->lock);
}
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker)
{
nvgpu_mutex_release(&tracker->lock);
}
void kmem_print_mem_alloc(struct gk20a *g,
struct nvgpu_mem_alloc *alloc,
struct seq_file *s)
{
#ifdef NVGPU_SAVE_KALLOC_STACK_TRACES
int i;
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld\n",
alloc->addr, alloc->size);
for (i = 0; i < alloc->stack_length; i++)
__pstat(s, " %3d [<%p>] %pS\n", i,
(void *)alloc->stack[i],
(void *)alloc->stack[i]);
__pstat(s, "\n");
#else
__pstat(s, "nvgpu-alloc: addr=0x%llx size=%ld src=%pF\n",
alloc->addr, alloc->size, alloc->ip);
#endif
}
static int nvgpu_add_alloc(struct nvgpu_mem_alloc_tracker *tracker,
struct nvgpu_mem_alloc *alloc)
{
alloc->allocs_entry.key_start = alloc->addr;
alloc->allocs_entry.key_end = alloc->addr + alloc->size;
nvgpu_rbtree_insert(&alloc->allocs_entry, &tracker->allocs);
return 0;
}
static struct nvgpu_mem_alloc *nvgpu_rem_alloc(
struct nvgpu_mem_alloc_tracker *tracker, u64 alloc_addr)
{
struct nvgpu_mem_alloc *alloc;
struct nvgpu_rbtree_node *node = NULL;
nvgpu_rbtree_search(alloc_addr, &node, tracker->allocs);
if (!node)
return NULL;
alloc = nvgpu_mem_alloc_from_rbtree_node(node);
nvgpu_rbtree_unlink(node, &tracker->allocs);
return alloc;
}
static int __nvgpu_save_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
unsigned long size, unsigned long real_size,
u64 addr, void *ip)
{
int ret;
struct nvgpu_mem_alloc *alloc;
#ifdef NVGPU_SAVE_KALLOC_STACK_TRACES
struct stack_trace stack_trace;
#endif
alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
if (!alloc)
return -ENOMEM;
alloc->owner = tracker;
alloc->size = size;
alloc->real_size = real_size;
alloc->addr = addr;
alloc->ip = ip;
#ifdef NVGPU_SAVE_KALLOC_STACK_TRACES
stack_trace.max_entries = MAX_STACK_TRACE;
stack_trace.nr_entries = 0;
stack_trace.entries = alloc->stack;
/*
* This 4 here skips the 2 function calls that happen for all traced
* allocs due to nvgpu:
*
* __nvgpu_save_kmem_alloc+0x7c/0x128
* __nvgpu_track_kzalloc+0xcc/0xf8
*
* And the function calls that get made by the stack trace code itself.
* If the trace savings code changes this will likely have to change
* as well.
*/
stack_trace.skip = 4;
save_stack_trace(&stack_trace);
alloc->stack_length = stack_trace.nr_entries;
#endif
nvgpu_lock_tracker(tracker);
tracker->bytes_alloced += size;
tracker->bytes_alloced_real += real_size;
tracker->nr_allocs++;
/* Keep track of this for building a histogram later on. */
if (tracker->max_alloc < size)
tracker->max_alloc = size;
if (tracker->min_alloc > size)
tracker->min_alloc = size;
ret = nvgpu_add_alloc(tracker, alloc);
if (ret) {
WARN(1, "Duplicate alloc??? 0x%llx\n", addr);
kfree(alloc);
nvgpu_unlock_tracker(tracker);
return ret;
}
nvgpu_unlock_tracker(tracker);
return 0;
}
static int __nvgpu_free_kmem_alloc(struct nvgpu_mem_alloc_tracker *tracker,
u64 addr)
{
struct nvgpu_mem_alloc *alloc;
nvgpu_lock_tracker(tracker);
alloc = nvgpu_rem_alloc(tracker, addr);
if (!alloc) {
nvgpu_unlock_tracker(tracker);
nvgpu_do_assert_print(g,
"Possible double-free detected: 0x%llx!", addr);
return -EINVAL;
}
(void) memset((void *)alloc->addr, 0, alloc->size);
tracker->nr_frees++;
tracker->bytes_freed += alloc->size;
tracker->bytes_freed_real += alloc->real_size;
nvgpu_unlock_tracker(tracker);
return 0;
}
static void __nvgpu_check_valloc_size(unsigned long size)
{
WARN(size < NVGPU_CPU_PAGE_SIZE, "Alloc smaller than page size! (%lu)!\n", size);
}
static void __nvgpu_check_kalloc_size(size_t size)
{
WARN(size > NVGPU_CPU_PAGE_SIZE, "Alloc larger than page size! (%zu)!\n", size);
}
void *__nvgpu_track_vmalloc(struct gk20a *g, unsigned long size,
void *ip)
{
void *alloc = vmalloc(size);
if (!alloc)
return NULL;
__nvgpu_check_valloc_size(size);
/*
* Ignore the return message. If this fails let's not cause any issues
* for the rest of the driver.
*/
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_vzalloc(struct gk20a *g, unsigned long size,
void *ip)
{
void *alloc = vzalloc(size);
if (!alloc)
return NULL;
__nvgpu_check_valloc_size(size);
/*
* Ignore the return message. If this fails let's not cause any issues
* for the rest of the driver.
*/
__nvgpu_save_kmem_alloc(g->vmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kmalloc(struct gk20a *g, size_t size, void *ip)
{
void *alloc = kmalloc(size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(size);
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kzalloc(struct gk20a *g, size_t size, void *ip)
{
void *alloc = kzalloc(size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(size);
__nvgpu_save_kmem_alloc(g->kmallocs, size, roundup_pow_of_two(size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void *__nvgpu_track_kcalloc(struct gk20a *g, size_t n, size_t size,
void *ip)
{
void *alloc = kcalloc(n, size, GFP_KERNEL);
if (!alloc)
return NULL;
__nvgpu_check_kalloc_size(n * size);
__nvgpu_save_kmem_alloc(g->kmallocs, n * size,
roundup_pow_of_two(n * size),
(u64)(uintptr_t)alloc, ip);
return alloc;
}
void __nvgpu_track_vfree(struct gk20a *g, void *addr)
{
/*
* Often it is accepted practice to pass NULL pointers into free
* functions to save code.
*/
if (!addr)
return;
__nvgpu_free_kmem_alloc(g->vmallocs, (u64)(uintptr_t)addr);
vfree(addr);
}
void __nvgpu_track_kfree(struct gk20a *g, void *addr)
{
if (!addr)
return;
__nvgpu_free_kmem_alloc(g->kmallocs, (u64)(uintptr_t)addr);
kfree(addr);
}
static int __do_check_for_outstanding_allocs(
struct gk20a *g,
struct nvgpu_mem_alloc_tracker *tracker,
const char *type, bool silent)
{
struct nvgpu_rbtree_node *node;
int count = 0;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
if (!silent)
kmem_print_mem_alloc(g, alloc, NULL);
count++;
nvgpu_rbtree_enum_next(&node, node);
}
return count;
}
/**
* check_for_outstanding_allocs - Count and display outstanding allocs
*
* @g - The GPU.
* @silent - If set don't print anything about the allocs.
*
* Dump (or just count) the number of allocations left outstanding.
*/
static int check_for_outstanding_allocs(struct gk20a *g, bool silent)
{
int count = 0;
count += __do_check_for_outstanding_allocs(g, g->kmallocs, "kmalloc",
silent);
count += __do_check_for_outstanding_allocs(g, g->vmallocs, "vmalloc",
silent);
return count;
}
static void do_nvgpu_kmem_cleanup(struct nvgpu_mem_alloc_tracker *tracker,
void (*force_free_func)(const void *))
{
struct nvgpu_rbtree_node *node;
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
while (node) {
struct nvgpu_mem_alloc *alloc =
nvgpu_mem_alloc_from_rbtree_node(node);
if (force_free_func)
force_free_func((void *)alloc->addr);
nvgpu_rbtree_unlink(node, &tracker->allocs);
kfree(alloc);
nvgpu_rbtree_enum_start(0, &node, tracker->allocs);
}
}
/**
* nvgpu_kmem_cleanup - Cleanup the kmem tracking
*
* @g - The GPU.
* @force_free - If set will also free leaked objects if possible.
*
* Cleanup all of the allocs made by nvgpu_kmem tracking code. If @force_free
* is non-zero then the allocation made by nvgpu is also freed. This is risky,
* though, as it is possible that the memory is still in use by other parts of
* the GPU driver not aware that this has happened.
*
* In theory it should be fine if the GPU driver has been deinitialized and
* there are no bugs in that code. However, if there are any bugs in that code
* then they could likely manifest as odd crashes indeterminate amounts of time
* in the future. So use @force_free at your own risk.
*/
static void nvgpu_kmem_cleanup(struct gk20a *g, bool force_free)
{
do_nvgpu_kmem_cleanup(g->kmallocs, force_free ? kfree : NULL);
do_nvgpu_kmem_cleanup(g->vmallocs, force_free ? vfree : NULL);
}
void nvgpu_kmem_fini(struct gk20a *g, int flags)
{
int count;
bool silent, force_free;
if (!flags)
return;
silent = !(flags & NVGPU_KMEM_FINI_DUMP_ALLOCS);
force_free = !!(flags & NVGPU_KMEM_FINI_FORCE_CLEANUP);
count = check_for_outstanding_allocs(g, silent);
nvgpu_kmem_cleanup(g, force_free);
/*
* If we leak objects we can either BUG() out or just WARN(). In general
* it doesn't make sense to BUG() on here since leaking a few objects
* won't crash the kernel but it can be helpful for development.
*
* If neither flag is set then we just silently do nothing.
*/
if (count > 0) {
if (flags & NVGPU_KMEM_FINI_WARN) {
WARN(1, "Letting %d allocs leak!!\n", count);
} else if (flags & NVGPU_KMEM_FINI_BUG) {
nvgpu_err(g, "Letting %d allocs leak!!", count);
BUG();
}
}
}
int nvgpu_kmem_init(struct gk20a *g)
{
int err;
g->vmallocs = kzalloc(sizeof(*g->vmallocs), GFP_KERNEL);
g->kmallocs = kzalloc(sizeof(*g->kmallocs), GFP_KERNEL);
if (!g->vmallocs || !g->kmallocs) {
err = -ENOMEM;
goto fail;
}
g->vmallocs->name = "vmalloc";
g->kmallocs->name = "kmalloc";
g->vmallocs->allocs = NULL;
g->kmallocs->allocs = NULL;
nvgpu_mutex_init(&g->vmallocs->lock);
nvgpu_mutex_init(&g->kmallocs->lock);
g->vmallocs->min_alloc = NVGPU_CPU_PAGE_SIZE;
g->kmallocs->min_alloc = KMALLOC_MIN_SIZE;
/*
* This needs to go after all the other initialization since they use
* the nvgpu_kzalloc() API.
*/
g->vmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
sizeof(struct nvgpu_mem_alloc));
g->kmallocs->allocs_cache = nvgpu_kmem_cache_create(g,
sizeof(struct nvgpu_mem_alloc));
if (!g->vmallocs->allocs_cache || !g->kmallocs->allocs_cache) {
err = -ENOMEM;
if (g->vmallocs->allocs_cache)
nvgpu_kmem_cache_destroy(g->vmallocs->allocs_cache);
if (g->kmallocs->allocs_cache)
nvgpu_kmem_cache_destroy(g->kmallocs->allocs_cache);
goto fail;
}
return 0;
fail:
if (g->vmallocs)
kfree(g->vmallocs);
if (g->kmallocs)
kfree(g->kmallocs);
return err;
}
#else /* !CONFIG_NVGPU_TRACK_MEM_USAGE */
int nvgpu_kmem_init(struct gk20a *g)
{
return 0;
}
void nvgpu_kmem_fini(struct gk20a *g, int flags)
{
}
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
struct nvgpu_kmem_cache *nvgpu_kmem_cache_create(struct gk20a *g, size_t size)
{
struct nvgpu_kmem_cache *cache =
nvgpu_kzalloc(g, sizeof(struct nvgpu_kmem_cache));
if (!cache)
return NULL;
cache->g = g;
(void) snprintf(cache->name, sizeof(cache->name),
"nvgpu-cache-0x%p-%d-%d", g, (int)size,
atomic_inc_return(&kmem_cache_id));
cache->cache = kmem_cache_create(cache->name,
size, size, 0, NULL);
if (!cache->cache) {
nvgpu_kfree(g, cache);
return NULL;
}
return cache;
}
void nvgpu_kmem_cache_destroy(struct nvgpu_kmem_cache *cache)
{
struct gk20a *g = cache->g;
kmem_cache_destroy(cache->cache);
nvgpu_kfree(g, cache);
}
void *nvgpu_kmem_cache_alloc(struct nvgpu_kmem_cache *cache)
{
return kmem_cache_alloc(cache->cache, GFP_KERNEL);
}
void nvgpu_kmem_cache_free(struct nvgpu_kmem_cache *cache, void *ptr)
{
kmem_cache_free(cache->cache, ptr);
}

View File

@@ -0,0 +1,105 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __KMEM_PRIV_H__
#define __KMEM_PRIV_H__
#include <nvgpu/rbtree.h>
#include <nvgpu/lock.h>
struct seq_file;
#define __pstat(s, fmt, msg...) \
do { \
if (s) \
seq_printf(s, fmt, ##msg); \
else \
pr_info(fmt, ##msg); \
} while (false)
#define MAX_STACK_TRACE 20
/*
* Linux specific version of the nvgpu_kmem_cache struct. This type is
* completely opaque to the rest of the driver.
*/
struct nvgpu_kmem_cache {
struct gk20a *g;
struct kmem_cache *cache;
/*
* Memory to hold the kmem_cache unique name. Only necessary on our
* k3.10 kernel when not using the SLUB allocator but it's easier to
* just carry this on to newer kernels.
*/
char name[128];
};
#ifdef CONFIG_NVGPU_TRACK_MEM_USAGE
struct nvgpu_mem_alloc {
struct nvgpu_mem_alloc_tracker *owner;
void *ip;
#ifdef NVGPU_SAVE_KALLOC_STACK_TRACES
unsigned long stack[MAX_STACK_TRACE];
int stack_length;
#endif
u64 addr;
unsigned long size;
unsigned long real_size;
struct nvgpu_rbtree_node allocs_entry;
};
static inline struct nvgpu_mem_alloc *
nvgpu_mem_alloc_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct nvgpu_mem_alloc *)
((uintptr_t)node - offsetof(struct nvgpu_mem_alloc, allocs_entry));
};
/*
* Linux specific tracking of vmalloc, kmalloc, etc.
*/
struct nvgpu_mem_alloc_tracker {
const char *name;
struct nvgpu_kmem_cache *allocs_cache;
struct nvgpu_rbtree_node *allocs;
struct nvgpu_mutex lock;
u64 bytes_alloced;
u64 bytes_freed;
u64 bytes_alloced_real;
u64 bytes_freed_real;
u64 nr_allocs;
u64 nr_frees;
unsigned long min_alloc;
unsigned long max_alloc;
};
void nvgpu_lock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
void nvgpu_unlock_tracker(struct nvgpu_mem_alloc_tracker *tracker);
void kmem_print_mem_alloc(struct gk20a *g,
struct nvgpu_mem_alloc *alloc,
struct seq_file *s);
#endif /* CONFIG_NVGPU_TRACK_MEM_USAGE */
#endif /* __KMEM_PRIV_H__ */

View File

@@ -0,0 +1,714 @@
/*
* Copyright (c) 2017-2021, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include <nvgpu/error_notifier.h>
#include <nvgpu/os_sched.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/dma.h>
#include <nvgpu/fence.h>
#include <nvgpu/grmgr.h>
/*
* This is required for nvgpu_vm_find_buf() which is used in the tracing
* code. Once we can get and access userspace buffers without requiring
* direct dma_buf usage this can be removed.
*/
#include <nvgpu/linux/vm.h>
#include "channel.h"
#include "ioctl_channel.h"
#include "ioctl.h"
#include "os_linux.h"
#include "dmabuf_priv.h"
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
#include <linux/uaccess.h>
#include <linux/dma-buf.h>
#include <nvgpu/trace.h>
#include <uapi/linux/nvgpu.h>
#include "sync_sema_android.h"
#include "sync_sema_dma.h"
#include <nvgpu/linux/os_fence_dma.h>
u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
{
u32 flags = 0;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
return flags;
}
/*
* API to convert error_notifiers in common code and of the form
* NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
* space and of the form NVGPU_CHANNEL_*
*/
static u32 nvgpu_error_notifier_to_channel_notifier(u32 error_notifier)
{
switch (error_notifier) {
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT:
return NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT;
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_METHOD:
return NVGPU_CHANNEL_GR_ERROR_SW_METHOD;
case NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY:
return NVGPU_CHANNEL_GR_ERROR_SW_NOTIFY;
case NVGPU_ERR_NOTIFIER_GR_EXCEPTION:
return NVGPU_CHANNEL_GR_EXCEPTION;
case NVGPU_ERR_NOTIFIER_GR_SEMAPHORE_TIMEOUT:
return NVGPU_CHANNEL_GR_SEMAPHORE_TIMEOUT;
case NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY:
return NVGPU_CHANNEL_GR_ILLEGAL_NOTIFY;
case NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT:
return NVGPU_CHANNEL_FIFO_ERROR_MMU_ERR_FLT;
case NVGPU_ERR_NOTIFIER_PBDMA_ERROR:
return NVGPU_CHANNEL_PBDMA_ERROR;
case NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD:
return NVGPU_CHANNEL_FECS_ERR_UNIMP_FIRMWARE_METHOD;
case NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR:
return NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR;
case NVGPU_ERR_NOTIFIER_PBDMA_PUSHBUFFER_CRC_MISMATCH:
return NVGPU_CHANNEL_PBDMA_PUSHBUFFER_CRC_MISMATCH;
}
pr_warn("%s: invalid error_notifier requested %u\n", __func__, error_notifier);
return error_notifier;
}
/**
* nvgpu_set_err_notifier_locked()
* Should be called with ch->error_notifier_mutex held
*
* error should be of the form NVGPU_ERR_NOTIFIER_*
*/
void nvgpu_set_err_notifier_locked(struct nvgpu_channel *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
error = nvgpu_error_notifier_to_channel_notifier(error);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
struct timespec64 time_data;
u64 nsec;
ktime_get_real_ts64(&time_data);
nsec = time_data.tv_sec * 1000000000u + time_data.tv_nsec;
notification->time_stamp.nanoseconds[0] =
(u32)nsec;
notification->time_stamp.nanoseconds[1] =
(u32)(nsec >> 32);
notification->info32 = error;
notification->status = 0xffff;
nvgpu_err(ch->g,
"error notifier set to %d for ch %d", error, ch->chid);
}
}
/* error should be of the form NVGPU_ERR_NOTIFIER_* */
void nvgpu_set_err_notifier(struct nvgpu_channel *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
nvgpu_set_err_notifier_locked(ch, error);
nvgpu_mutex_release(&priv->error_notifier.mutex);
}
void nvgpu_set_err_notifier_if_empty(struct nvgpu_channel *ch, u32 error)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
/* Don't overwrite error flag if it is already set */
if (notification->status != 0xffff)
nvgpu_set_err_notifier_locked(ch, error);
}
nvgpu_mutex_release(&priv->error_notifier.mutex);
}
/* error_notifier should be of the form NVGPU_ERR_NOTIFIER_* */
bool nvgpu_is_err_notifier_set(struct nvgpu_channel *ch, u32 error_notifier)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
bool notifier_set = false;
error_notifier = nvgpu_error_notifier_to_channel_notifier(error_notifier);
nvgpu_mutex_acquire(&priv->error_notifier.mutex);
if (priv->error_notifier.dmabuf) {
struct nvgpu_notification *notification =
priv->error_notifier.notification;
u32 err = notification->info32;
if (err == error_notifier)
notifier_set = true;
}
nvgpu_mutex_release(&priv->error_notifier.mutex);
return notifier_set;
}
static void gk20a_channel_update_runcb_fn(struct work_struct *work)
{
struct nvgpu_channel_completion_cb *completion_cb =
container_of(work, struct nvgpu_channel_completion_cb, work);
struct nvgpu_channel_linux *priv =
container_of(completion_cb,
struct nvgpu_channel_linux, completion_cb);
struct nvgpu_channel *ch = priv->ch;
void (*fn)(struct nvgpu_channel *, void *);
void *user_data;
nvgpu_spinlock_acquire(&completion_cb->lock);
fn = completion_cb->fn;
user_data = completion_cb->user_data;
nvgpu_spinlock_release(&completion_cb->lock);
if (fn)
fn(ch, user_data);
}
static void nvgpu_channel_work_completion_init(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
priv->completion_cb.fn = NULL;
priv->completion_cb.user_data = NULL;
nvgpu_spinlock_init(&priv->completion_cb.lock);
INIT_WORK(&priv->completion_cb.work, gk20a_channel_update_runcb_fn);
}
static void nvgpu_channel_work_completion_clear(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_spinlock_acquire(&priv->completion_cb.lock);
priv->completion_cb.fn = NULL;
priv->completion_cb.user_data = NULL;
nvgpu_spinlock_release(&priv->completion_cb.lock);
cancel_work_sync(&priv->completion_cb.work);
}
static void nvgpu_channel_work_completion_signal(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
if (priv->completion_cb.fn)
schedule_work(&priv->completion_cb.work);
}
static void nvgpu_channel_work_completion_cancel_sync(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
if (priv->completion_cb.fn)
cancel_work_sync(&priv->completion_cb.work);
}
struct nvgpu_channel *gk20a_open_new_channel_with_cb(struct gk20a *g,
void (*update_fn)(struct nvgpu_channel *, void *),
void *update_fn_data,
u32 runlist_id,
bool is_privileged_channel)
{
struct nvgpu_channel *ch;
struct nvgpu_channel_linux *priv;
ch = nvgpu_channel_open_new(g, runlist_id, is_privileged_channel,
nvgpu_current_pid(g), nvgpu_current_tid(g));
if (ch) {
priv = ch->os_priv;
nvgpu_spinlock_acquire(&priv->completion_cb.lock);
priv->completion_cb.fn = update_fn;
priv->completion_cb.user_data = update_fn_data;
nvgpu_spinlock_release(&priv->completion_cb.lock);
}
return ch;
}
static void nvgpu_channel_open_linux(struct nvgpu_channel *ch)
{
}
static void nvgpu_channel_close_linux(struct nvgpu_channel *ch, bool force)
{
nvgpu_channel_work_completion_clear(ch);
#if defined(CONFIG_NVGPU_CYCLESTATS)
gk20a_channel_free_cycle_stats_buffer(ch);
gk20a_channel_free_cycle_stats_snapshot(ch);
#endif
}
static int nvgpu_channel_alloc_linux(struct gk20a *g, struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv;
priv = nvgpu_kzalloc(g, sizeof(*priv));
if (!priv)
return -ENOMEM;
ch->os_priv = priv;
priv->ch = ch;
#ifndef CONFIG_NVGPU_SYNCFD_NONE
ch->has_os_fence_framework_support = true;
#endif
nvgpu_mutex_init(&priv->error_notifier.mutex);
nvgpu_channel_work_completion_init(ch);
return 0;
}
static void nvgpu_channel_free_linux(struct gk20a *g, struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
nvgpu_mutex_destroy(&priv->error_notifier.mutex);
nvgpu_kfree(g, priv);
ch->os_priv = NULL;
#ifndef CONFIG_NVGPU_SYNCFD_NONE
ch->has_os_fence_framework_support = false;
#endif
}
static int nvgpu_channel_init_os_fence_framework(struct nvgpu_channel *ch,
const char *fmt, ...)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
struct nvgpu_os_fence_framework *fence_framework;
char name[30];
va_list args;
fence_framework = &priv->fence_framework;
va_start(args, fmt);
(void) vsnprintf(name, sizeof(name), fmt, args);
va_end(args);
#if defined(CONFIG_NVGPU_SYNCFD_ANDROID)
fence_framework->timeline = gk20a_sync_timeline_create(name);
if (!fence_framework->timeline)
return -EINVAL;
#elif defined(CONFIG_NVGPU_SYNCFD_STABLE)
fence_framework->context = nvgpu_sync_dma_context_create();
fence_framework->exists = true;
#endif
return 0;
}
static void nvgpu_channel_signal_os_fence_framework(struct nvgpu_channel *ch,
struct nvgpu_fence_type *fence)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
struct nvgpu_os_fence_framework *fence_framework;
#if defined(CONFIG_NVGPU_SYNCFD_STABLE)
struct dma_fence *f;
#endif
fence_framework = &priv->fence_framework;
#if defined(CONFIG_NVGPU_SYNCFD_ANDROID)
gk20a_sync_timeline_signal(fence_framework->timeline);
#elif defined(CONFIG_NVGPU_SYNCFD_STABLE)
/*
* This is not a good example on how to use the fence type. Don't touch
* the priv data. This is os-specific code for the fence unit.
*/
f = nvgpu_get_dma_fence(&fence->priv.os_fence);
/*
* Sometimes the post fence of a job isn't a file. It can be a raw
* semaphore for kernel-internal tracking, or a raw syncpoint for
* internal tracking or for exposing to user.
*/
if (f != NULL) {
nvgpu_sync_dma_signal(f);
}
#endif
}
static void nvgpu_channel_destroy_os_fence_framework(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
struct nvgpu_os_fence_framework *fence_framework;
fence_framework = &priv->fence_framework;
#if defined(CONFIG_NVGPU_SYNCFD_ANDROID)
gk20a_sync_timeline_destroy(fence_framework->timeline);
fence_framework->timeline = NULL;
#elif defined(CONFIG_NVGPU_SYNCFD_STABLE)
/* fence_framework->context cannot be freed, see linux/dma-fence.h */
fence_framework->exists = false;
#endif
}
static bool nvgpu_channel_fence_framework_exists(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
struct nvgpu_os_fence_framework *fence_framework;
fence_framework = &priv->fence_framework;
#if defined(CONFIG_NVGPU_SYNCFD_ANDROID)
return (fence_framework->timeline != NULL);
#elif defined(CONFIG_NVGPU_SYNCFD_STABLE)
return fence_framework->exists;
#else
return false;
#endif
}
static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest,
struct nvgpu_gpfifo_userdata userdata, u32 start, u32 length)
{
struct nvgpu_gpfifo_entry __user *user_gpfifo = userdata.entries;
unsigned long n;
n = copy_from_user(dest, user_gpfifo + start,
length * sizeof(struct nvgpu_gpfifo_entry));
return n == 0 ? 0 : -EFAULT;
}
int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd,
struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf)
{
struct device *dev = dev_from_gk20a(g);
struct dma_buf *dmabuf;
struct sg_table *sgt;
struct dma_buf_attachment *attachment;
int err;
dmabuf = dma_buf_get(dmabuf_fd);
if (IS_ERR(dmabuf)) {
return PTR_ERR(dmabuf);
}
if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
err = -EINVAL;
goto put_dmabuf;
}
sgt = nvgpu_mm_pin(dev, dmabuf, &attachment);
if (IS_ERR(sgt)) {
nvgpu_err(g, "Failed to pin dma_buf!");
err = PTR_ERR(sgt);
goto put_dmabuf;
}
buf->dmabuf = dmabuf;
buf->attachment = attachment;
buf->sgt = sgt;
/*
* This mem is unmapped and freed in a common path; for Linux, we'll
* also need to unref the dmabuf stuff (above) but the sgt here is only
* borrowed, so it cannot be freed by nvgpu_mem_*.
*/
mem->mem_flags = NVGPU_MEM_FLAG_FOREIGN_SGT;
mem->aperture = APERTURE_SYSMEM;
mem->skip_wmb = 0;
mem->size = dmabuf->size;
mem->priv.flags = 0;
mem->priv.pages = NULL;
mem->priv.sgt = sgt;
return 0;
put_dmabuf:
dma_buf_put(dmabuf);
return err;
}
void nvgpu_os_channel_free_usermode_buffers(struct nvgpu_channel *c)
{
struct nvgpu_channel_linux *priv = c->os_priv;
struct gk20a *g = c->g;
struct device *dev = dev_from_gk20a(g);
if (priv->usermode.gpfifo.dmabuf != NULL) {
nvgpu_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
priv->usermode.gpfifo.attachment,
priv->usermode.gpfifo.sgt);
dma_buf_put(priv->usermode.gpfifo.dmabuf);
priv->usermode.gpfifo.dmabuf = NULL;
}
if (priv->usermode.userd.dmabuf != NULL) {
nvgpu_mm_unpin(dev, priv->usermode.userd.dmabuf,
priv->usermode.userd.attachment,
priv->usermode.userd.sgt);
dma_buf_put(priv->usermode.userd.dmabuf);
priv->usermode.userd.dmabuf = NULL;
}
}
static int nvgpu_channel_alloc_usermode_buffers(struct nvgpu_channel *c,
struct nvgpu_setup_bind_args *args)
{
struct nvgpu_channel_linux *priv = c->os_priv;
struct gk20a *g = c->g;
struct device *dev = dev_from_gk20a(g);
size_t gpfifo_size;
int err;
if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) {
return -EINVAL;
}
if (args->gpfifo_dmabuf_offset != 0 ||
args->userd_dmabuf_offset != 0) {
/* TODO - not yet supported */
return -EINVAL;
}
err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd,
&c->usermode_gpfifo, &priv->usermode.gpfifo);
if (err < 0) {
return err;
}
gpfifo_size = max_t(u32, SZ_4K,
args->num_gpfifo_entries *
nvgpu_get_gpfifo_entry_size());
if (c->usermode_gpfifo.size < gpfifo_size) {
err = -EINVAL;
goto free_gpfifo;
}
c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo,
c->usermode_gpfifo.size, 0, gk20a_mem_flag_none,
false, c->usermode_gpfifo.aperture);
if (c->usermode_gpfifo.gpu_va == 0) {
err = -ENOMEM;
goto unmap_free_gpfifo;
}
err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd,
&c->usermode_userd, &priv->usermode.userd);
if (err < 0) {
goto unmap_free_gpfifo;
}
args->work_submit_token = g->ops.usermode.doorbell_token(c);
return 0;
unmap_free_gpfifo:
nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
free_gpfifo:
nvgpu_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
priv->usermode.gpfifo.attachment,
priv->usermode.gpfifo.sgt);
dma_buf_put(priv->usermode.gpfifo.dmabuf);
priv->usermode.gpfifo.dmabuf = NULL;
return err;
}
int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l)
{
struct gk20a *g = &l->g;
struct nvgpu_fifo *f = &g->fifo;
int chid;
int err;
for (chid = 0; chid < (int)f->num_channels; chid++) {
struct nvgpu_channel *ch = &f->channel[chid];
err = nvgpu_channel_alloc_linux(g, ch);
if (err)
goto err_clean;
}
g->os_channel.open = nvgpu_channel_open_linux;
g->os_channel.close = nvgpu_channel_close_linux;
g->os_channel.work_completion_signal =
nvgpu_channel_work_completion_signal;
g->os_channel.work_completion_cancel_sync =
nvgpu_channel_work_completion_cancel_sync;
g->os_channel.os_fence_framework_inst_exists =
nvgpu_channel_fence_framework_exists;
g->os_channel.init_os_fence_framework =
nvgpu_channel_init_os_fence_framework;
g->os_channel.signal_os_fence_framework =
nvgpu_channel_signal_os_fence_framework;
g->os_channel.destroy_os_fence_framework =
nvgpu_channel_destroy_os_fence_framework;
g->os_channel.copy_user_gpfifo =
nvgpu_channel_copy_user_gpfifo;
g->os_channel.alloc_usermode_buffers =
nvgpu_channel_alloc_usermode_buffers;
g->os_channel.free_usermode_buffers =
nvgpu_os_channel_free_usermode_buffers;
return 0;
err_clean:
for (; chid >= 0; chid--) {
struct nvgpu_channel *ch = &f->channel[chid];
nvgpu_channel_free_linux(g, ch);
}
return err;
}
void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l)
{
struct gk20a *g = &l->g;
struct nvgpu_fifo *f = &g->fifo;
unsigned int chid;
for (chid = 0; chid < f->num_channels; chid++) {
struct nvgpu_channel *ch = &f->channel[chid];
nvgpu_channel_free_linux(g, ch);
}
g->os_channel.os_fence_framework_inst_exists = NULL;
g->os_channel.init_os_fence_framework = NULL;
g->os_channel.signal_os_fence_framework = NULL;
g->os_channel.destroy_os_fence_framework = NULL;
}
u32 nvgpu_channel_get_max_subctx_count(struct nvgpu_channel *ch)
{
struct nvgpu_channel_linux *priv = ch->os_priv;
struct gk20a *g = ch->g;
u32 gpu_instance_id;
if (priv->cdev == NULL) {
/* CE channels reserved by nvgpu do not have cdev pointer */
return nvgpu_grmgr_get_gpu_instance_max_veid_count(g, 0U);
}
gpu_instance_id = nvgpu_get_gpu_instance_id_from_cdev(g, priv->cdev);
nvgpu_assert(gpu_instance_id < g->mig.num_gpu_instances);
return nvgpu_grmgr_get_gpu_instance_max_veid_count(g, gpu_instance_id);
}
#ifdef CONFIG_DEBUG_FS
static void trace_write_pushbuffer(struct nvgpu_channel *c,
struct nvgpu_gpfifo_entry *g)
{
void *mem = NULL;
unsigned int words;
u64 offset;
struct dma_buf *dmabuf = NULL;
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
struct dma_buf_map map;
#endif
if (gk20a_debug_trace_cmdbuf) {
u64 gpu_va = (u64)g->entry0 |
(u64)((u64)pbdma_gp_entry1_get_hi_v(g->entry1) << 32);
int err;
words = pbdma_gp_entry1_length_v(g->entry1);
err = nvgpu_vm_find_buf(c->vm, gpu_va, &dmabuf, &offset);
if (!err) {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
err = dma_buf_vmap(dmabuf, &map);
mem = err ? NULL : map.vaddr;
#else
mem = dma_buf_vmap(dmabuf);
#endif
}
}
if (mem) {
#ifdef CONFIG_NVGPU_TRACE
u32 i;
/*
* Write in batches of 128 as there seems to be a limit
* of how much you can output to ftrace at once.
*/
for (i = 0; i < words; i += 128U) {
trace_gk20a_push_cmdbuf(
c->g->name,
0,
min(words - i, 128U),
offset + i * sizeof(u32),
mem);
}
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
dma_buf_vunmap(dmabuf, &map);
#else
dma_buf_vunmap(dmabuf, mem);
#endif
}
}
void trace_write_pushbuffers(struct nvgpu_channel *c, u32 count)
{
struct nvgpu_gpfifo_entry *gp = c->gpfifo.mem.cpu_va;
u32 n = c->gpfifo.entry_num;
u32 start = c->gpfifo.put;
u32 i;
if (!gk20a_debug_trace_cmdbuf)
return;
if (!gp)
return;
for (i = 0; i < count; i++)
trace_write_pushbuffer(c, &gp[(start + i) % n]);
}
#endif

View File

@@ -0,0 +1,612 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-mapping.h>
#include <linux/slab.h>
#include <linux/iommu.h>
#include <nvgpu/log.h>
#include <nvgpu/dma.h>
#include <nvgpu/lock.h>
#include <nvgpu/bug.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/kmem.h>
#include <nvgpu/enabled.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/linux/dma.h>
#include "platform_gk20a.h"
#include "os_linux.h"
#include "dmabuf_vidmem.h"
/*
* Enough to hold all the possible flags in string form. When a new flag is
* added it must be added here as well!!
*/
#define NVGPU_DMA_STR_SIZE \
sizeof("NO_KERNEL_MAPPING PHYSICALLY_ADDRESSED")
/*
* This function can't fail. It will always at minimum memset() the buf which
* is assumed to be able to hold at least %NVGPU_DMA_STR_SIZE bytes.
*/
void nvgpu_dma_flags_to_str(struct gk20a *g, unsigned long flags, char *buf)
{
int bytes_available = NVGPU_DMA_STR_SIZE;
memset(buf, 0, NVGPU_DMA_STR_SIZE);
#define APPEND_FLAG(flag, str_flag) \
do { \
if (flags & flag) { \
strncat(buf, str_flag, bytes_available); \
bytes_available -= strlen(str_flag); \
} \
} while (false)
APPEND_FLAG(NVGPU_DMA_NO_KERNEL_MAPPING, "NO_KERNEL_MAPPING ");
APPEND_FLAG(NVGPU_DMA_PHYSICALLY_ADDRESSED, "PHYSICALLY_ADDRESSED");
#undef APPEND_FLAG
}
/**
* __dma_dbg - Debug print for DMA allocs and frees.
*
* @g - The GPU.
* @size - The requested size of the alloc (size_t).
* @flags - The flags (unsigned long).
* @type - A string describing the type (i.e: sysmem or vidmem).
* @what - A string with 'alloc' or 'free'.
*
* @flags is the DMA flags. If there are none or it doesn't make sense to print
* flags just pass 0.
*
* Please use dma_dbg_alloc() and dma_dbg_free() instead of this function.
*/
static void __dma_dbg(struct gk20a *g, size_t size, unsigned long flags,
const char *type, const char *what,
const char *func, int line)
{
char flags_str[NVGPU_DMA_STR_SIZE];
/*
* Don't bother making the flags_str if debugging is not enabled.
*/
if (!nvgpu_log_mask_enabled(g, gpu_dbg_dma))
return;
nvgpu_dma_flags_to_str(g, flags, flags_str);
nvgpu_log_dbg_impl(g, gpu_dbg_dma,
func, line,
"DMA %s: [%s] size=%-7zu "
"aligned=%-7zu total=%-10llukB %s",
what, type,
size, PAGE_ALIGN(size),
g->dma_memory_used >> 10,
flags_str);
}
static void nvgpu_dma_print_err(struct gk20a *g, size_t size,
const char *type, const char *what,
unsigned long flags)
{
char flags_str[NVGPU_DMA_STR_SIZE];
nvgpu_dma_flags_to_str(g, flags, flags_str);
nvgpu_info(g,
"DMA %s FAILED: [%s] size=%-7zu "
"aligned=%-7zu flags:%s",
what, type,
size, PAGE_ALIGN(size), flags_str);
}
#define dma_dbg_alloc(g, size, flags, type) \
__dma_dbg(g, size, flags, type, "alloc", __func__, __LINE__)
#define dma_dbg_free(g, size, flags, type) \
__dma_dbg(g, size, flags, type, "free", __func__, __LINE__)
/*
* For after the DMA alloc is done.
*/
#define __dma_dbg_done(g, size, type, what) \
nvgpu_log(g, gpu_dbg_dma, \
"DMA %s: [%s] size=%-7zu Done!", \
what, type, size); \
#define dma_dbg_alloc_done(g, size, type) \
__dma_dbg_done(g, size, type, "alloc")
#define dma_dbg_free_done(g, size, type) \
__dma_dbg_done(g, size, type, "free")
#if defined(CONFIG_NVGPU_DGPU)
static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
size_t size)
{
u64 addr = 0;
if (at)
addr = nvgpu_alloc_fixed(allocator, at, size, 0);
else
addr = nvgpu_alloc(allocator, size);
return addr;
}
#endif
/**
* The nvgpu_dma_alloc_no_iommu/nvgpu_dma_free_no_iommu() are for use
* cases where memory can be physically non-contiguous even if GPU is
* not iommuable as GPU uses nvlink to access the memory and lets GMMU
* fully control it
*/
static void __nvgpu_dma_free_no_iommu(struct page **pages,
int max, bool big_array)
{
int i;
for (i = 0; i < max; i++)
if (pages[i])
__free_pages(pages[i], 0);
if (big_array)
vfree(pages);
else
kfree(pages);
}
static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfps)
{
int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned int array_size = count * sizeof(struct page *);
struct page **pages;
int i = 0;
if (array_size <= NVGPU_CPU_PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
else
pages = vzalloc(array_size);
if (!pages)
return NULL;
gfps |= __GFP_HIGHMEM | __GFP_NOWARN;
while (count) {
int j, order = __fls(count);
pages[i] = alloc_pages(gfps, order);
while (!pages[i] && order)
pages[i] = alloc_pages(gfps, --order);
if (!pages[i])
goto error;
if (order) {
split_page(pages[i], order);
j = 1 << order;
while (--j)
pages[i + j] = pages[i] + j;
}
memset(page_address(pages[i]), 0, NVGPU_CPU_PAGE_SIZE << order);
i += 1 << order;
count -= 1 << order;
}
*dma_handle = __pfn_to_phys(page_to_pfn(pages[0]));
return (void *)pages;
error:
__nvgpu_dma_free_no_iommu(pages, i, array_size > NVGPU_CPU_PAGE_SIZE);
return NULL;
}
static void nvgpu_dma_free_no_iommu(size_t size, void *vaddr)
{
int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
unsigned int array_size = count * sizeof(struct page *);
struct page **pages = vaddr;
WARN_ON(!pages);
__nvgpu_dma_free_no_iommu(pages, count, array_size > NVGPU_CPU_PAGE_SIZE);
}
/* Check if IOMMU is available and if GPU uses it */
#define nvgpu_uses_iommu(g) \
(nvgpu_iommuable(g) && !nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
static void nvgpu_dma_flags_to_attrs(struct gk20a *g, unsigned long *attrs,
unsigned long flags)
{
if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
*attrs |= DMA_ATTR_NO_KERNEL_MAPPING;
if (flags & NVGPU_DMA_PHYSICALLY_ADDRESSED && !nvgpu_uses_iommu(g))
*attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
}
/*
* When GPU uses nvlink instead of IOMMU, memory can be non-contiguous if
* no NVGPU_DMA_PHYSICALLY_ADDRESSED flag is assigned. This means the GPU
* driver will need to map the memory after allocation
*/
#define nvgpu_nvlink_non_contig(g, flags) \
(nvgpu_is_enabled(g, NVGPU_MM_BYPASSES_IOMMU) && \
!(flags & NVGPU_DMA_PHYSICALLY_ADDRESSED))
int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem)
{
struct device *d = dev_from_gk20a(g);
gfp_t gfps = GFP_KERNEL|__GFP_ZERO;
dma_addr_t iova;
unsigned long dma_attrs = 0;
void *alloc_ret;
int err;
if (nvgpu_mem_is_valid(mem)) {
nvgpu_warn(g, "memory leak !!");
WARN_ON(1);
}
/*
* Before the debug print so we see this in the total. But during
* cleanup in the fail path this has to be subtracted.
*/
g->dma_memory_used += PAGE_ALIGN(size);
dma_dbg_alloc(g, size, flags, "sysmem");
/*
* Save the old size but for actual allocation purposes the size is
* going to be page aligned.
*/
mem->size = size;
size = PAGE_ALIGN(size);
nvgpu_dma_flags_to_attrs(g, &dma_attrs, flags);
if (nvgpu_nvlink_non_contig(g, flags))
alloc_ret = nvgpu_dma_alloc_no_iommu(d, size, &iova, gfps);
else
alloc_ret = dma_alloc_attrs(d, size, &iova, gfps, dma_attrs);
if (!alloc_ret) {
err = -ENOMEM;
goto print_dma_err;
}
if (nvgpu_nvlink_non_contig(g, flags) ||
flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
mem->priv.pages = alloc_ret;
err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
mem->priv.pages,
iova, size);
} else {
mem->cpu_va = alloc_ret;
err = nvgpu_get_sgtable_attrs(g, &mem->priv.sgt, mem->cpu_va,
iova, size, flags);
}
if (err)
goto fail_free_dma;
/* Map the page list from the non-contiguous allocation */
if (nvgpu_nvlink_non_contig(g, flags)) {
mem->cpu_va = vmap(mem->priv.pages, size >> PAGE_SHIFT,
0, PAGE_KERNEL);
if (!mem->cpu_va) {
err = -ENOMEM;
goto fail_free_sgt;
}
}
mem->aligned_size = size;
mem->aperture = APERTURE_SYSMEM;
mem->priv.flags = flags;
dma_dbg_alloc_done(g, mem->size, "sysmem");
return 0;
fail_free_sgt:
nvgpu_free_sgtable(g, &mem->priv.sgt);
fail_free_dma:
dma_free_attrs(d, size, alloc_ret, iova, dma_attrs);
mem->cpu_va = NULL;
mem->priv.sgt = NULL;
mem->size = 0;
g->dma_memory_used -= mem->aligned_size;
print_dma_err:
nvgpu_dma_print_err(g, size, "sysmem", "alloc", flags);
return err;
}
#if defined(CONFIG_NVGPU_DGPU)
int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
size_t size, struct nvgpu_mem *mem, u64 at)
{
u64 addr;
int err;
struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
&g->mm.vidmem.allocator :
&g->mm.vidmem.bootstrap_allocator;
u64 before_pending;
if (nvgpu_mem_is_valid(mem)) {
nvgpu_warn(g, "memory leak !!");
WARN_ON(1);
}
dma_dbg_alloc(g, size, flags, "vidmem");
mem->size = size;
size = PAGE_ALIGN(size);
if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator)) {
err = -ENOSYS;
goto print_dma_err;
}
/*
* Our own allocator doesn't have any flags yet, and we can't
* kernel-map these, so require explicit flags.
*/
WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
if (!addr) {
/*
* If memory is known to be freed soon, let the user know that
* it may be available after a while.
*/
if (before_pending) {
return -EAGAIN;
} else {
err = -ENOMEM;
goto print_dma_err;
}
}
if (at)
mem->mem_flags |= NVGPU_MEM_FLAG_FIXED;
mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!mem->priv.sgt) {
err = -ENOMEM;
goto fail_physfree;
}
err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
if (err)
goto fail_kfree;
nvgpu_vidmem_set_page_alloc(mem->priv.sgt->sgl, addr);
sg_set_page(mem->priv.sgt->sgl, NULL, size, 0);
mem->aligned_size = size;
mem->aperture = APERTURE_VIDMEM;
mem->vidmem_alloc = (struct nvgpu_page_alloc *)(uintptr_t)addr;
mem->allocator = vidmem_alloc;
mem->priv.flags = flags;
nvgpu_init_list_node(&mem->clear_list_entry);
dma_dbg_alloc_done(g, mem->size, "vidmem");
return 0;
fail_kfree:
nvgpu_kfree(g, mem->priv.sgt);
fail_physfree:
nvgpu_free(&g->mm.vidmem.allocator, addr);
mem->size = 0;
print_dma_err:
nvgpu_dma_print_err(g, size, "vidmem", "alloc", flags);
return err;
}
#endif
void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
{
struct device *d = dev_from_gk20a(g);
unsigned long dma_attrs = 0;
g->dma_memory_used -= mem->aligned_size;
dma_dbg_free(g, mem->size, mem->priv.flags, "sysmem");
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
!(mem->mem_flags & NVGPU_MEM_FLAG_NO_DMA) &&
(mem->cpu_va || mem->priv.pages)) {
void *cpu_addr = mem->cpu_va;
/* These two use pages pointer instead of cpu_va */
if (nvgpu_nvlink_non_contig(g, mem->priv.flags) ||
mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
cpu_addr = mem->priv.pages;
if (nvgpu_nvlink_non_contig(g, mem->priv.flags)) {
vunmap(mem->cpu_va);
nvgpu_dma_free_no_iommu(mem->aligned_size, cpu_addr);
} else {
nvgpu_dma_flags_to_attrs(g, &dma_attrs,
mem->priv.flags);
dma_free_attrs(d, mem->aligned_size, cpu_addr,
sg_dma_address(mem->priv.sgt->sgl),
dma_attrs);
}
mem->cpu_va = NULL;
mem->priv.pages = NULL;
}
/*
* When this flag is set this means we are freeing a "phys" nvgpu_mem.
* To handle this just nvgpu_kfree() the nvgpu_sgt and nvgpu_sgl.
*/
if (mem->mem_flags & NVGPU_MEM_FLAG_NO_DMA) {
nvgpu_kfree(g, mem->phys_sgt->sgl);
nvgpu_kfree(g, mem->phys_sgt);
}
if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 &&
mem->priv.sgt != NULL) {
nvgpu_free_sgtable(g, &mem->priv.sgt);
}
dma_dbg_free_done(g, mem->size, "sysmem");
mem->size = 0;
mem->aligned_size = 0;
mem->aperture = APERTURE_INVALID;
}
void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
{
#if defined(CONFIG_NVGPU_DGPU)
size_t mem_size = mem->size;
dma_dbg_free(g, mem->size, mem->priv.flags, "vidmem");
/* Sanity check - only this supported when allocating. */
WARN_ON(mem->priv.flags != NVGPU_DMA_NO_KERNEL_MAPPING);
if (mem->mem_flags & NVGPU_MEM_FLAG_USER_MEM) {
int err = nvgpu_vidmem_clear_list_enqueue(g, mem);
/*
* If there's an error here then that means we can't clear the
* vidmem. That's too bad; however, we still own the nvgpu_mem
* buf so we have to free that.
*
* We don't need to worry about the vidmem allocator itself
* since when that gets cleaned up in the driver shutdown path
* all the outstanding allocs are force freed.
*/
if (err)
nvgpu_kfree(g, mem);
} else {
nvgpu_memset(g, mem, 0, 0, mem->aligned_size);
nvgpu_free(mem->allocator,
(u64)nvgpu_vidmem_get_page_alloc(mem->priv.sgt->sgl));
nvgpu_free_sgtable(g, &mem->priv.sgt);
mem->size = 0;
mem->aligned_size = 0;
mem->aperture = APERTURE_INVALID;
}
dma_dbg_free_done(g, mem_size, "vidmem");
#endif
}
int nvgpu_get_sgtable_attrs(struct gk20a *g, struct sg_table **sgt,
void *cpuva, u64 iova, size_t size, unsigned long flags)
{
int err = 0;
struct sg_table *tbl;
unsigned long dma_attrs = 0;
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!tbl) {
err = -ENOMEM;
goto fail;
}
nvgpu_dma_flags_to_attrs(g, &dma_attrs, flags);
err = dma_get_sgtable_attrs(dev_from_gk20a(g), tbl, cpuva, iova,
size, dma_attrs);
if (err)
goto fail;
sg_dma_address(tbl->sgl) = iova;
*sgt = tbl;
return 0;
fail:
if (tbl)
nvgpu_kfree(g, tbl);
return err;
}
int nvgpu_get_sgtable(struct gk20a *g, struct sg_table **sgt,
void *cpuva, u64 iova, size_t size)
{
return nvgpu_get_sgtable_attrs(g, sgt, cpuva, iova, size, 0);
}
int nvgpu_get_sgtable_from_pages(struct gk20a *g, struct sg_table **sgt,
struct page **pages, u64 iova, size_t size)
{
int err = 0;
struct sg_table *tbl;
tbl = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!tbl) {
err = -ENOMEM;
goto fail;
}
err = sg_alloc_table_from_pages(tbl, pages,
DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE),
0, size, GFP_KERNEL);
if (err)
goto fail;
sg_dma_address(tbl->sgl) = iova;
*sgt = tbl;
return 0;
fail:
if (tbl)
nvgpu_kfree(g, tbl);
return err;
}
void nvgpu_free_sgtable(struct gk20a *g, struct sg_table **sgt)
{
sg_free_table(*sgt);
nvgpu_kfree(g, *sgt);
*sgt = NULL;
}
bool nvgpu_iommuable(struct gk20a *g)
{
#ifdef CONFIG_TEGRA_GK20A
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
struct device *dev = l->dev;
/*
* Check against the nvgpu device to see if it's been marked as
* IOMMU'able.
*/
if (iommu_get_domain_for_dev(dev) == NULL)
return false;
#endif
return true;
}

View File

@@ -0,0 +1,31 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <nvgpu/io.h>
#include "os_linux.h"
u32 nvgpu_os_readl(uintptr_t addr)
{
return readl((void __iomem *)addr);
}
void nvgpu_os_writel(u32 v, uintptr_t addr)
{
writel(v, (void __iomem *)addr);
}
void nvgpu_os_writel_relaxed(u32 v, uintptr_t addr)
{
writel_relaxed(v, (void __iomem *)addr);
}

View File

@@ -0,0 +1,138 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/kernel.h>
#include <linux/device.h>
#include <nvgpu/linux/log.h>
#include <nvgpu/gk20a.h>
#include "platform_gk20a.h"
#include "os_linux.h"
/*
* Define a length for log buffers. This is the buffer that the 'fmt, ...' part
* of __nvgpu_do_log_print() prints into. This buffer lives on the stack so it
* needs to not be overly sized since we have limited kernel stack space. But at
* the same time we don't want it to be restrictive either.
*/
#define LOG_BUFFER_LENGTH 160
/*
* Annoying quirk of Linux: this has to be a string literal since the printk()
* function and friends use the preprocessor to concatenate stuff to the start
* of this string when printing.
*/
#define LOG_FMT "nvgpu: %s %33s:%-4d [%s] %s\n"
static const char *log_types[] = {
"ERR",
"WRN",
"DBG",
"INFO",
};
static inline const char *nvgpu_log_name(struct gk20a *g)
{
return dev_from_gk20a(g) == NULL ? "" : dev_name(dev_from_gk20a(g));
}
#ifdef CONFIG_GK20A_TRACE_PRINTK
static void __nvgpu_trace_printk_log(u32 trace, const char *gpu_name,
const char *func_name, int line,
const char *log_type, const char *log)
{
trace_printk(LOG_FMT, gpu_name, func_name, line, log_type, log);
}
#endif
static void __nvgpu_really_print_log(u32 trace, const char *gpu_name,
const char *func_name, int line,
enum nvgpu_log_type type, const char *log)
{
const char *name = gpu_name ? gpu_name : "";
const char *log_type = log_types[type];
#ifdef CONFIG_GK20A_TRACE_PRINTK
if (trace)
return __nvgpu_trace_printk_log(trace, name, func_name,
line, log_type, log);
#endif
switch (type) {
case NVGPU_DEBUG:
/*
* We could use pr_debug() here but we control debug enablement
* separately from the Linux kernel. Perhaps this is a bug in
* nvgpu.
*/
pr_info(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_INFO:
pr_info(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_WARNING:
pr_warn(LOG_FMT, name, func_name, line, log_type, log);
break;
case NVGPU_ERROR:
pr_err(LOG_FMT, name, func_name, line, log_type, log);
break;
default:
break;
}
}
__attribute__((format (printf, 5, 6)))
void nvgpu_log_msg_impl(struct gk20a *g, const char *func_name, int line,
enum nvgpu_log_type type, const char *fmt, ...)
{
char log[LOG_BUFFER_LENGTH];
va_list args;
va_start(args, fmt);
(void) vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
va_end(args);
__nvgpu_really_print_log(0, g ? nvgpu_log_name(g) : "",
func_name, line, type, log);
}
__attribute__((format (printf, 5, 6)))
void nvgpu_log_dbg_impl(struct gk20a *g, u64 log_mask,
const char *func_name, int line,
const char *fmt, ...)
{
char log[LOG_BUFFER_LENGTH];
va_list args;
if ((log_mask & g->log_mask) == 0)
return;
va_start(args, fmt);
(void) vsnprintf(log, LOG_BUFFER_LENGTH, fmt, args);
va_end(args);
__nvgpu_really_print_log(g->log_trace, g ? nvgpu_log_name(g) : "",
func_name, line, NVGPU_DEBUG, log);
}
void nvgpu_dbg_dump_impl(struct gk20a *g, const char *str)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
pr_err("__%s__ %s", g->name, str);
#else
pr_err("__%s__ %s\n", g->name, str);
#endif
}

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,37 @@
/*
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __NVGPU_COMMON_LINUX_MODULE_H__
#define __NVGPU_COMMON_LINUX_MODULE_H__
struct gk20a;
struct device;
struct platform_device;
struct nvgpu_os_linux;
int gk20a_pm_finalize_poweron(struct device *dev);
int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
void gk20a_remove_support(struct gk20a *g);
void gk20a_driver_start_unload(struct gk20a *g);
int nvgpu_quiesce(struct gk20a *g);
int nvgpu_remove(struct device *dev);
int nvgpu_wait_for_gpu_idle(struct gk20a *g);
void nvgpu_free_irq(struct gk20a *g);
struct device_node *nvgpu_get_node(struct gk20a *g);
void __iomem *nvgpu_devm_ioremap_resource(struct platform_device *dev, int i,
struct resource **out);
void __iomem *nvgpu_devm_ioremap(struct device *dev, resource_size_t offset,
resource_size_t size);
u64 nvgpu_resource_addr(struct platform_device *dev, int i);
extern struct class nvgpu_class;
void gk20a_init_linux_characteristics(struct gk20a *g);
#endif

View File

@@ -0,0 +1,61 @@
/*
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/types.h>
#include "os_linux.h"
/*
* Locks out the driver from accessing GPU registers. This prevents access to
* thse registers after the GPU has been clock or power gated. This should help
* find annoying bugs where register reads and writes are silently dropped
* after the GPU has been turned off. On older chips these reads and writes can
* also lock the entire CPU up.
*/
void nvgpu_lockout_usermode_registers(struct gk20a *g)
{
g->usermode_regs = 0U;
}
/*
* Undoes t19x_lockout_registers().
*/
void nvgpu_restore_usermode_registers(struct gk20a *g)
{
g->usermode_regs = g->usermode_regs_saved;
}
void nvgpu_remove_usermode_support(struct gk20a *g)
{
if (g->usermode_regs) {
g->usermode_regs = 0U;
}
}
void nvgpu_init_usermode_support(struct gk20a *g)
{
if (g->ops.usermode.base == NULL) {
return;
}
if (g->usermode_regs == 0U) {
g->usermode_regs = g->regs + g->ops.usermode.bus_base(g);
g->usermode_regs_saved = g->usermode_regs;
}
g->usermode_regs_bus_addr = g->regs_bus_addr +
g->ops.usermode.bus_base(g);
}

View File

@@ -0,0 +1,27 @@
/*
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_MODULE_T19X_H__
#define __NVGPU_MODULE_T19X_H__
struct gk20a;
void nvgpu_init_usermode_support(struct gk20a *g);
void nvgpu_remove_usermode_support(struct gk20a *g);
void nvgpu_lockout_usermode_registers(struct gk20a *g);
void nvgpu_restore_usermode_registers(struct gk20a *g);
#endif

View File

@@ -0,0 +1,326 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/page_allocator.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/kmem.h>
#include <nvgpu/vidmem.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/string.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/nvgpu_sgt_os.h>
#include <nvgpu/linux/dma.h>
#include <linux/vmalloc.h>
#include <linux/dma-mapping.h>
#include "os_linux.h"
#include "dmabuf_vidmem.h"
#include "platform_gk20a.h"
#ifndef DMA_ERROR_CODE
#define DMA_ERROR_CODE DMA_MAPPING_ERROR
#endif
static u64 __nvgpu_sgl_ipa(struct gk20a *g, void *sgl)
{
return sg_phys((struct scatterlist *)sgl);
}
static u64 __nvgpu_sgl_phys(struct gk20a *g, void *sgl)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
u64 ipa = sg_phys((struct scatterlist *)sgl);
if (platform->phys_addr)
return platform->phys_addr(g, ipa, NULL);
return ipa;
}
/*
* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
* and/or become private to this file once all bad usages of Linux SGLs are
* cleaned up in the driver.
*/
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
{
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
!nvgpu_iommuable(g))
return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
__nvgpu_sgl_phys(g, (void *)sgl));
if (sg_dma_address(sgl) == 0)
return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
__nvgpu_sgl_phys(g, (void *)sgl));
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
return 0;
return nvgpu_mem_iommu_translate(g, sg_dma_address(sgl));
}
/*
* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
* allocation.
*/
static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
{
return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
}
/*
* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
* allocation.
*
* Note: this API does not make sense to use for _VIDMEM_ buffers with greater
* than one scatterlist chunk. If there's more than one scatterlist chunk then
* the buffer will not be contiguous. As such the base address probably isn't
* very useful. This is true for SYSMEM as well, if there's no IOMMU.
*
* However! It _is_ OK to use this on discontiguous sysmem buffers _if_ there's
* an IOMMU present and enabled for the GPU.
*
* %attrs can be NULL. If it is not NULL then it may be inspected to determine
* if the address needs to be modified before writing into a PTE.
*/
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
{
#ifdef CONFIG_NVGPU_DGPU
struct nvgpu_page_alloc *alloc;
if (mem->aperture == APERTURE_SYSMEM)
return nvgpu_mem_get_addr_sysmem(g, mem);
/*
* Otherwise get the vidmem address.
*/
alloc = mem->vidmem_alloc;
/* This API should not be used with > 1 chunks */
WARN_ON(alloc->nr_chunks != 1);
return alloc->base;
#else
if (mem->aperture == APERTURE_SYSMEM)
return nvgpu_mem_get_addr_sysmem(g, mem);
return 0;
#endif
}
/*
* This should only be used on contiguous buffers regardless of whether
* there's an IOMMU present/enabled. This applies to both SYSMEM and
* VIDMEM.
*/
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
{
#ifdef CONFIG_NVGPU_DGPU
/*
* For a VIDMEM buf, this is identical to simply get_addr() so just fall
* back to that.
*/
if (mem->aperture == APERTURE_VIDMEM)
return nvgpu_mem_get_addr(g, mem);
#endif
return __nvgpu_sgl_phys(g, (void *)mem->priv.sgt->sgl);
}
/*
* Be careful how you use this! You are responsible for correctly freeing this
* memory.
*/
int nvgpu_mem_create_from_mem(struct gk20a *g,
struct nvgpu_mem *dest, struct nvgpu_mem *src,
u64 start_page, size_t nr_pages)
{
int ret;
u64 start = start_page * NVGPU_CPU_PAGE_SIZE;
u64 size = nr_pages * NVGPU_CPU_PAGE_SIZE;
dma_addr_t new_iova;
if (src->aperture != APERTURE_SYSMEM)
return -EINVAL;
/* Some silly things a caller might do... */
if (size > src->size)
return -EINVAL;
if ((start + size) > src->size)
return -EINVAL;
dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
dest->aperture = src->aperture;
dest->skip_wmb = src->skip_wmb;
dest->size = size;
/* Re-use the CPU mapping only if the mapping was made by the DMA API */
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
dest->cpu_va = src->cpu_va + (NVGPU_CPU_PAGE_SIZE * start_page);
dest->priv.pages = src->priv.pages + start_page;
dest->priv.flags = src->priv.flags;
new_iova = sg_dma_address(src->priv.sgt->sgl) ?
sg_dma_address(src->priv.sgt->sgl) + start : 0;
/*
* Make a new SG table that is based only on the subset of pages that
* is passed to us. This table gets freed by the dma free routines.
*/
if (src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
ret = nvgpu_get_sgtable_from_pages(g, &dest->priv.sgt,
src->priv.pages + start_page,
new_iova, size);
else
ret = nvgpu_get_sgtable(g, &dest->priv.sgt, dest->cpu_va,
new_iova, size);
return ret;
}
static void *nvgpu_mem_linux_sgl_next(void *sgl)
{
return (void *)sg_next((struct scatterlist *)sgl);
}
static u64 nvgpu_mem_linux_sgl_ipa(struct gk20a *g, void *sgl)
{
return __nvgpu_sgl_ipa(g, sgl);
}
static u64 nvgpu_mem_linux_sgl_ipa_to_pa(struct gk20a *g,
void *sgl, u64 ipa, u64 *pa_len)
{
struct device *dev = dev_from_gk20a(g);
struct gk20a_platform *platform = gk20a_get_platform(dev);
if (platform->phys_addr)
return platform->phys_addr(g, ipa, pa_len);
return ipa;
}
static u64 nvgpu_mem_linux_sgl_phys(struct gk20a *g, void *sgl)
{
return (u64)__nvgpu_sgl_phys(g, sgl);
}
static u64 nvgpu_mem_linux_sgl_dma(void *sgl)
{
return (u64)sg_dma_address((struct scatterlist *)sgl);
}
static u64 nvgpu_mem_linux_sgl_length(void *sgl)
{
return (u64)((struct scatterlist *)sgl)->length;
}
static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
void *sgl,
struct nvgpu_gmmu_attrs *attrs)
{
if (sg_dma_address((struct scatterlist *)sgl) == 0)
return g->ops.mm.gmmu.gpu_phys_addr(g, attrs,
__nvgpu_sgl_phys(g, sgl));
if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
return 0;
return nvgpu_mem_iommu_translate(g,
sg_dma_address((struct scatterlist *)sgl));
}
static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g,
struct nvgpu_sgt *sgt)
{
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
return false;
return true;
}
static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt)
{
/*
* Free this SGT. All we do is free the passed SGT. The actual Linux
* SGT/SGL needs to be freed separately.
*/
nvgpu_kfree(g, sgt);
}
static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = {
.sgl_next = nvgpu_mem_linux_sgl_next,
.sgl_phys = nvgpu_mem_linux_sgl_phys,
.sgl_ipa = nvgpu_mem_linux_sgl_ipa,
.sgl_ipa_to_pa = nvgpu_mem_linux_sgl_ipa_to_pa,
.sgl_dma = nvgpu_mem_linux_sgl_dma,
.sgl_length = nvgpu_mem_linux_sgl_length,
.sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr,
.sgt_iommuable = nvgpu_mem_linux_sgt_iommuable,
.sgt_free = nvgpu_mem_linux_sgl_free,
};
#ifdef CONFIG_NVGPU_DGPU
static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem(
struct gk20a *g,
struct scatterlist *linux_sgl)
{
struct nvgpu_page_alloc *vidmem_alloc;
vidmem_alloc = nvgpu_vidmem_get_page_alloc(linux_sgl);
if (!vidmem_alloc)
return NULL;
return &vidmem_alloc->sgt;
}
#endif
struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt)
{
struct nvgpu_sgt *nvgpu_sgt;
struct scatterlist *linux_sgl = sgt->sgl;
#ifdef CONFIG_NVGPU_DGPU
if (nvgpu_addr_is_vidmem_page_alloc(sg_dma_address(linux_sgl)))
return __nvgpu_mem_get_sgl_from_vidmem(g, linux_sgl);
#endif
nvgpu_sgt = nvgpu_kzalloc(g, sizeof(*nvgpu_sgt));
if (!nvgpu_sgt)
return NULL;
nvgpu_log(g, gpu_dbg_sgl, "Making Linux SGL!");
nvgpu_sgt->sgl = (void *)linux_sgl;
nvgpu_sgt->ops = &nvgpu_linux_sgt_ops;
return nvgpu_sgt;
}
struct nvgpu_sgt *nvgpu_sgt_os_create_from_mem(struct gk20a *g,
struct nvgpu_mem *mem)
{
return nvgpu_linux_sgt_create(g, mem->priv.sgt);
}

View File

@@ -0,0 +1,224 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/nvhost.h>
#include <linux/nvhost_t194.h>
#include <linux/dma-mapping.h>
#include <uapi/linux/nvhost_ioctl.h>
#include <linux/of_platform.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/os_fence_syncpts.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/enabled.h>
#include <nvgpu/dma.h>
#include "nvhost_priv.h"
#include "os_linux.h"
#include "module.h"
int nvgpu_get_nvhost_dev(struct gk20a *g)
{
struct device_node *np = nvgpu_get_node(g);
struct platform_device *host1x_pdev = NULL;
const __be32 *host1x_ptr;
host1x_ptr = of_get_property(np, "nvidia,host1x", NULL);
if (host1x_ptr) {
struct device_node *host1x_node =
of_find_node_by_phandle(be32_to_cpup(host1x_ptr));
host1x_pdev = of_find_device_by_node(host1x_node);
if (!host1x_pdev) {
nvgpu_warn(g, "host1x device not available");
return -EPROBE_DEFER;
}
} else {
if (nvgpu_has_syncpoints(g)) {
nvgpu_warn(g, "host1x reference not found. assuming no syncpoints support");
nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
}
return 0;
}
g->nvhost = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
if (!g->nvhost)
return -ENOMEM;
g->nvhost->host1x_pdev = host1x_pdev;
return 0;
}
int nvgpu_nvhost_module_busy_ext(
struct nvgpu_nvhost_dev *nvhost_dev)
{
return nvhost_module_busy_ext(nvhost_dev->host1x_pdev);
}
void nvgpu_nvhost_module_idle_ext(
struct nvgpu_nvhost_dev *nvhost_dev)
{
nvhost_module_idle_ext(nvhost_dev->host1x_pdev);
}
void nvgpu_nvhost_debug_dump_device(
struct nvgpu_nvhost_dev *nvhost_dev)
{
nvhost_debug_dump_device(nvhost_dev->host1x_pdev);
}
const char *nvgpu_nvhost_syncpt_get_name(
struct nvgpu_nvhost_dev *nvhost_dev, int id)
{
return nvhost_syncpt_get_name(nvhost_dev->host1x_pdev, id);
}
bool nvgpu_nvhost_syncpt_is_valid_pt_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
return nvhost_syncpt_is_valid_pt_ext(nvhost_dev->host1x_pdev, id);
}
bool nvgpu_nvhost_syncpt_is_expired_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
{
return nvhost_syncpt_is_expired_ext(nvhost_dev->host1x_pdev,
id, thresh);
}
int nvgpu_nvhost_intr_register_notifier(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh,
void (*callback)(void *, int), void *private_data)
{
return nvhost_intr_register_notifier(nvhost_dev->host1x_pdev,
id, thresh,
callback, private_data);
}
void nvgpu_nvhost_syncpt_set_minval(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 val)
{
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
}
void nvgpu_nvhost_syncpt_put_ref_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
nvhost_syncpt_put_ref_ext(nvhost_dev->host1x_pdev, id);
}
u32 nvgpu_nvhost_get_syncpt_client_managed(
struct nvgpu_nvhost_dev *nvhost_dev,
const char *syncpt_name)
{
return nvhost_get_syncpt_client_managed(nvhost_dev->host1x_pdev,
syncpt_name);
}
int nvgpu_nvhost_syncpt_wait_timeout_ext(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
u32 thresh, u32 timeout, u32 waiter_index)
{
return nvhost_syncpt_wait_timeout_ext(nvhost_dev->host1x_pdev,
id, thresh, timeout, NULL, NULL);
}
int nvgpu_nvhost_syncpt_read_ext_check(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val)
{
return nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev, id, val);
}
void nvgpu_nvhost_syncpt_set_safe_state(
struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
{
u32 val = 0;
int err;
/*
* Add large number of increments to current value
* so that all waiters on this syncpoint are released
*
* We don't expect any case where more than 0x10000 increments
* are pending
*/
err = nvhost_syncpt_read_ext_check(nvhost_dev->host1x_pdev,
id, &val);
if (err != 0) {
pr_err("%s: syncpt id read failed, cannot reset for safe state",
__func__);
} else {
val += 0x10000;
nvhost_syncpt_set_minval(nvhost_dev->host1x_pdev, id, val);
}
}
int nvgpu_nvhost_get_syncpt_aperture(
struct nvgpu_nvhost_dev *nvhost_dev,
u64 *base, size_t *size)
{
return nvhost_syncpt_unit_interface_get_aperture(
nvhost_dev->host1x_pdev, (phys_addr_t *)base, size);
}
u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(struct gk20a *g,
u32 syncpt_id)
{
return nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id);
}
int nvgpu_nvhost_fence_install(struct nvhost_fence *fence, int fd)
{
return nvhost_fence_install(fence, fd);
}
struct nvhost_fence *nvgpu_nvhost_fence_get(int fd)
{
return nvhost_fence_get(fd);
}
void nvgpu_nvhost_fence_put(struct nvhost_fence *fence)
{
nvhost_fence_put(fence);
}
void nvgpu_nvhost_fence_dup(struct nvhost_fence *fence)
{
nvhost_fence_dup(fence);
}
struct nvhost_fence *nvgpu_nvhost_fence_create(struct platform_device *pdev,
struct nvhost_ctrl_sync_fence_info *pts,
u32 num_pts, const char *name)
{
return nvhost_fence_create(pdev, pts, num_pts, name);
}
u32 nvgpu_nvhost_fence_num_pts(struct nvhost_fence *fence)
{
return nvhost_fence_num_pts(fence);
}
int nvgpu_nvhost_fence_foreach_pt(struct nvhost_fence *fence,
int (*iter)(struct nvhost_ctrl_sync_fence_info, void *),
void *data)
{
return nvhost_fence_foreach_pt(fence, iter, data);
}

View File

@@ -0,0 +1,154 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-mapping.h>
#include <linux/of_platform.h>
#include <linux/slab.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/dma.h>
#include "nvhost_priv.h"
#include "os_linux.h"
#include "module.h"
void nvgpu_free_nvhost_dev(struct gk20a *g)
{
if (nvgpu_iommuable(g) && !nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
struct device *dev = dev_from_gk20a(g);
struct nvgpu_mem *mem = &g->syncpt_mem;
dma_unmap_sg_attrs(dev, mem->priv.sgt->sgl, 1,
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
sg_free_table(mem->priv.sgt);
nvgpu_kfree(g, mem->priv.sgt);
}
nvgpu_kfree(g, g->nvhost);
}
bool nvgpu_has_syncpoints(struct gk20a *g)
{
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
return nvgpu_is_enabled(g, NVGPU_HAS_SYNCPOINTS) &&
!l->disable_syncpoints;
}
int nvgpu_nvhost_create_symlink(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
int err = 0;
if (g->nvhost &&
(dev->parent != &g->nvhost->host1x_pdev->dev)) {
err = sysfs_create_link(&g->nvhost->host1x_pdev->dev.kobj,
&dev->kobj,
dev_name(dev));
}
return err;
}
void nvgpu_nvhost_remove_symlink(struct gk20a *g)
{
struct device *dev = dev_from_gk20a(g);
if (g->nvhost &&
(dev->parent != &g->nvhost->host1x_pdev->dev)) {
sysfs_remove_link(&g->nvhost->host1x_pdev->dev.kobj,
dev_name(dev));
}
}
int nvgpu_nvhost_syncpt_init(struct gk20a *g)
{
int err = 0;
struct nvgpu_mem *mem = &g->syncpt_mem;
if (!nvgpu_has_syncpoints(g))
return -ENOSYS;
err = nvgpu_get_nvhost_dev(g);
if (err) {
nvgpu_err(g, "host1x device not available");
err = -ENOSYS;
goto fail_sync;
}
err = nvgpu_nvhost_get_syncpt_aperture(
g->nvhost,
&g->syncpt_unit_base,
&g->syncpt_unit_size);
if (err) {
nvgpu_err(g, "Failed to get syncpt interface");
err = -ENOSYS;
goto fail_sync;
}
/*
* If IOMMU is enabled, create iova for syncpt region. This iova is then
* used to create nvgpu_mem for syncpt by nvgpu_mem_create_from_phys.
* For entire syncpt shim read-only mapping full iova range is used and
* for a given syncpt read-write mapping only a part of iova range is
* used. Instead of creating another variable to store the sgt,
* g->syncpt_mem's priv field is used which later on is needed for
* freeing the mapping in deinit.
*/
if (nvgpu_iommuable(g) && !nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
struct device *dev = dev_from_gk20a(g);
struct scatterlist *sg;
mem->priv.sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
if (!mem->priv.sgt) {
err = -ENOMEM;
goto fail_sync;
}
err = sg_alloc_table(mem->priv.sgt, 1, GFP_KERNEL);
if (err) {
err = -ENOMEM;
goto fail_kfree;
}
sg = mem->priv.sgt->sgl;
sg_set_page(sg, phys_to_page(g->syncpt_unit_base),
g->syncpt_unit_size, 0);
err = dma_map_sg_attrs(dev, sg, 1,
DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
/* dma_map_sg_attrs returns 0 on errors */
if (err == 0) {
nvgpu_err(g, "iova creation for syncpoint failed");
err = -ENOMEM;
goto fail_sgt;
}
g->syncpt_unit_base = sg_dma_address(sg);
}
g->syncpt_size =
nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(g, 1);
nvgpu_info(g, "syncpt_unit_base %llx syncpt_unit_size %zx size %x\n",
g->syncpt_unit_base, g->syncpt_unit_size,
g->syncpt_size);
return 0;
fail_sgt:
sg_free_table(mem->priv.sgt);
fail_kfree:
nvgpu_kfree(g, mem->priv.sgt);
fail_sync:
nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
return err;
}

View File

@@ -0,0 +1,416 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-fence.h>
#include <linux/file.h>
#include <linux/host1x-next.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/sync_file.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/os_fence_syncpts.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/gk20a.h>
#include "nvhost_priv.h"
#define TEGRA194_SYNCPT_PAGE_SIZE 0x1000
#define TEGRA194_SYNCPT_SHIM_BASE 0x60000000
#define TEGRA194_SYNCPT_SHIM_SIZE 0x00400000
static const struct of_device_id host1x_match[] = {
{ .compatible = "nvidia,tegra186-host1x", },
{ .compatible = "nvidia,tegra194-host1x", },
{},
};
int nvgpu_get_nvhost_dev(struct gk20a *g)
{
struct platform_device *host1x_pdev;
struct device_node *np;
np = of_find_matching_node(NULL, host1x_match);
if (!np) {
nvgpu_warn(g, "Failed to find host1x, syncpt support disabled");
nvgpu_set_enabled(g, NVGPU_HAS_SYNCPOINTS, false);
return 0;
}
host1x_pdev = of_find_device_by_node(np);
if (!host1x_pdev) {
nvgpu_warn(g, "host1x device not available");
return -EPROBE_DEFER;
}
g->nvhost = nvgpu_kzalloc(g, sizeof(struct nvgpu_nvhost_dev));
if (!g->nvhost)
return -ENOMEM;
g->nvhost->host1x_pdev = host1x_pdev;
return 0;
}
int nvgpu_nvhost_module_busy_ext(struct nvgpu_nvhost_dev *nvhost_dev)
{
return 0;
}
void nvgpu_nvhost_module_idle_ext(struct nvgpu_nvhost_dev *nvhost_dev) { }
void nvgpu_nvhost_debug_dump_device(struct nvgpu_nvhost_dev *nvhost_dev) { }
const char *nvgpu_nvhost_syncpt_get_name(struct nvgpu_nvhost_dev *nvhost_dev,
int id)
{
return NULL;
}
bool nvgpu_nvhost_syncpt_is_valid_pt_ext(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (WARN_ON(!host1x))
return false;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (!sp)
return false;
return true;
}
bool nvgpu_nvhost_syncpt_is_expired_ext(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (WARN_ON(!host1x))
return true;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (WARN_ON(!sp))
return true;
if (host1x_syncpt_wait(sp, thresh, 0, NULL))
return false;
return true;
}
struct nvgpu_host1x_cb {
struct dma_fence_cb cb;
void (*notifier)(void *, int);
void *notifier_data;
};
static void nvgpu_host1x_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
{
struct nvgpu_host1x_cb *host1x_cb;
host1x_cb = container_of(cb, struct nvgpu_host1x_cb, cb);
host1x_cb->notifier(host1x_cb->notifier_data, 0);
dma_fence_put(f);
kfree(host1x_cb);
}
int nvgpu_nvhost_intr_register_notifier(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh,
void (*notifier)(void *, int),
void *notifier_data)
{
struct dma_fence *fence;
struct nvgpu_host1x_cb *cb;
struct host1x_syncpt *sp;
struct host1x *host1x;
int err;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (!host1x)
return -ENODEV;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (!sp)
return -EINVAL;
fence = host1x_fence_create(sp, thresh);
if (IS_ERR(fence)) {
pr_err("error %d during construction of fence!",
(int)PTR_ERR(fence));
return PTR_ERR(fence);
}
cb = kzalloc(sizeof(*cb), GFP_KERNEL);
if (!cb)
return -ENOMEM;
cb->notifier = notifier;
cb->notifier_data = notifier_data;
err = dma_fence_add_callback(fence, &cb->cb, nvgpu_host1x_cb_func);
if (err < 0) {
dma_fence_put(fence);
kfree(cb);
}
return err;
}
void nvgpu_nvhost_syncpt_set_minval(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 val)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
u32 cur;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (WARN_ON(!host1x))
return;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (WARN_ON(!sp))
return;
cur = host1x_syncpt_read(sp);
while (cur++ != val)
host1x_syncpt_incr(sp);
}
void nvgpu_nvhost_syncpt_put_ref_ext(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (WARN_ON(!host1x))
return;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (WARN_ON(!sp))
return;
host1x_syncpt_put(sp);
}
u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev,
const char *syncpt_name)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (!host1x)
return 0;
sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED,
syncpt_name);
if (!sp)
return 0;
return host1x_syncpt_id(sp);
}
int nvgpu_nvhost_syncpt_wait_timeout_ext(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh, u32 timeout,
u32 waiter_index)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (!host1x)
return -ENODEV;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (!sp)
return -EINVAL;
return host1x_syncpt_wait(sp, thresh, timeout, NULL);
}
int nvgpu_nvhost_syncpt_read_ext_check(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 *val)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (!host1x)
return -ENODEV;
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (!sp)
return -EINVAL;
*val = host1x_syncpt_read(sp);
return 0;
}
void nvgpu_nvhost_syncpt_set_safe_state(struct nvgpu_nvhost_dev *nvhost_dev,
u32 id)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
u32 val, cur;
host1x = platform_get_drvdata(nvhost_dev->host1x_pdev);
if (WARN_ON(!host1x))
return;
/*
* Add large number of increments to current value
* so that all waiters on this syncpoint are released
*/
sp = host1x_syncpt_get_by_id_noref(host1x, id);
if (WARN_ON(!sp))
return;
cur = host1x_syncpt_read(sp);
val = cur + 1000;
while (cur++ != val)
host1x_syncpt_incr(sp);
}
int nvgpu_nvhost_get_syncpt_aperture(struct nvgpu_nvhost_dev *nvhost_dev,
u64 *base, size_t *size)
{
struct device_node *np = nvhost_dev->host1x_pdev->dev.of_node;
if (of_device_is_compatible(np, "nvidia,tegra194-host1x")) {
*base = TEGRA194_SYNCPT_SHIM_BASE;
*size = TEGRA194_SYNCPT_SHIM_SIZE;
return 0;
}
return -ENOTSUPP;
}
u32 nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(struct gk20a *g,
u32 syncpt_id)
{
struct platform_device *host1x_pdev = g->nvhost->host1x_pdev;
struct device_node *np = host1x_pdev->dev.of_node;
if (of_device_is_compatible(np, "nvidia,tegra194-host1x"))
return syncpt_id * TEGRA194_SYNCPT_PAGE_SIZE;
return 0;
}
int nvgpu_nvhost_fence_install(struct nvhost_fence *fence, int fd)
{
struct dma_fence *f = (struct dma_fence *)fence;
struct sync_file *file = sync_file_create(f);
if (!file)
return -ENOMEM;
dma_fence_get(f);
fd_install(fd, file->file);
return 0;
}
void nvgpu_nvhost_fence_put(struct nvhost_fence *fence)
{
dma_fence_put((struct dma_fence *)fence);
}
void nvgpu_nvhost_fence_dup(struct nvhost_fence *fence)
{
dma_fence_get((struct dma_fence *)fence);
}
struct nvhost_fence *nvgpu_nvhost_fence_create(struct platform_device *pdev,
struct nvhost_ctrl_sync_fence_info *pts,
u32 num_pts, const char *name)
{
struct host1x_syncpt *sp;
struct host1x *host1x;
if (num_pts != 1)
return ERR_PTR(-EINVAL);
host1x = platform_get_drvdata(pdev);
if (!host1x)
return ERR_PTR(-ENODEV);
sp = host1x_syncpt_get_by_id_noref(host1x, pts->id);
if (WARN_ON(!sp))
return ERR_PTR(-EINVAL);
return (struct nvhost_fence *)host1x_fence_create(sp, pts->thresh);
}
struct nvhost_fence *nvgpu_nvhost_fence_get(int fd)
{
return (struct nvhost_fence *)sync_file_get_fence(fd);
}
u32 nvgpu_nvhost_fence_num_pts(struct nvhost_fence *fence)
{
struct dma_fence_array *array;
array = to_dma_fence_array((struct dma_fence *)fence);
if (!array)
return 1;
return array->num_fences;
}
int nvgpu_nvhost_fence_foreach_pt(struct nvhost_fence *fence,
int (*iter)(struct nvhost_ctrl_sync_fence_info, void *),
void *data)
{
struct nvhost_ctrl_sync_fence_info info;
struct dma_fence_array *array;
int i, err;
array = to_dma_fence_array((struct dma_fence *)fence);
if (!array) {
err = host1x_fence_extract((struct dma_fence *)fence, &info.id,
&info.thresh);
if (err)
return err;
return iter(info, data);
}
for (i = 0; i < array->num_fences; ++i) {
err = host1x_fence_extract(array->fences[i], &info.id,
&info.thresh);
if (err)
return err;
err = iter(info, data);
if (err)
return err;
}
return 0;
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVGPU_NVHOST_PRIV_H__
#define __NVGPU_NVHOST_PRIV_H__
#include <nvgpu/os_fence_syncpts.h>
struct nvhost_fence;
struct nvgpu_nvhost_dev {
struct platform_device *host1x_pdev;
};
int nvgpu_nvhost_fence_install(struct nvhost_fence *f, int fd);
struct nvhost_fence *nvgpu_nvhost_fence_get(int fd);
void nvgpu_nvhost_fence_put(struct nvhost_fence *f);
void nvgpu_nvhost_fence_dup(struct nvhost_fence *f);
struct nvhost_fence *nvgpu_nvhost_fence_create(struct platform_device *pdev,
struct nvhost_ctrl_sync_fence_info *pts,
u32 num_pts, const char *name);
u32 nvgpu_nvhost_fence_num_pts(struct nvhost_fence *fence);
int nvgpu_nvhost_fence_foreach_pt(struct nvhost_fence *fence,
int (*iter)(struct nvhost_ctrl_sync_fence_info, void *),
void *data);
#endif /* __NVGPU_NVHOST_PRIV_H__ */

View File

@@ -0,0 +1,312 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/mutex.h>
#include <linux/io.h>
#ifdef CONFIG_NVGPU_NVLINK
#include <nvlink/common/tegra-nvlink.h>
#endif
#include <nvgpu/gk20a.h>
#include <nvgpu/nvlink.h>
#include <nvgpu/nvlink_minion.h>
#include <nvgpu/enabled.h>
#include <nvgpu/firmware.h>
#ifdef CONFIG_NVGPU_NVLINK
int nvgpu_nvlink_enumerate(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
return nvlink_enumerate(ndev);
}
int nvgpu_nvlink_train(struct gk20a *g, u32 link_id, bool from_off)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
/* Check if the link is connected */
if (!g->nvlink.links[link_id].remote_info.is_connected)
return -ENODEV;
if (from_off)
return nvlink_transition_intranode_conn_off_to_safe(ndev);
return nvlink_train_intranode_conn_safe_to_hs(ndev);
}
void nvgpu_nvlink_free_minion_used_mem(struct gk20a *g,
struct nvgpu_firmware *nvgpu_minion_fw)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
struct minion_hdr *minion_hdr = &ndev->minion_hdr;
nvgpu_kfree(g, minion_hdr->app_code_offsets);
nvgpu_kfree(g, minion_hdr->app_code_sizes);
nvgpu_kfree(g, minion_hdr->app_data_offsets);
nvgpu_kfree(g, minion_hdr->app_data_sizes);
if (nvgpu_minion_fw) {
nvgpu_release_firmware(g, nvgpu_minion_fw);
ndev->minion_img = NULL;
}
}
/*
* Load minion FW
*/
int nvgpu_nvlink_minion_load_ucode(struct gk20a *g,
struct nvgpu_firmware *nvgpu_minion_fw)
{
int err = 0;
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
struct minion_hdr *minion_hdr = &ndev->minion_hdr;
u32 data_idx = 0;
u32 app = 0;
nvgpu_log_fn(g, " ");
/* Read ucode header */
minion_hdr->os_code_offset = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->os_code_size = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->os_data_offset = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->os_data_size = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->num_apps = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
nvgpu_log(g, gpu_dbg_nvlink,
"MINION Ucode Header Info:");
nvgpu_log(g, gpu_dbg_nvlink,
"-------------------------");
nvgpu_log(g, gpu_dbg_nvlink,
" - OS Code Offset = %u", minion_hdr->os_code_offset);
nvgpu_log(g, gpu_dbg_nvlink,
" - OS Code Size = %u", minion_hdr->os_code_size);
nvgpu_log(g, gpu_dbg_nvlink,
" - OS Data Offset = %u", minion_hdr->os_data_offset);
nvgpu_log(g, gpu_dbg_nvlink,
" - OS Data Size = %u", minion_hdr->os_data_size);
nvgpu_log(g, gpu_dbg_nvlink,
" - Num Apps = %u", minion_hdr->num_apps);
/* Allocate offset/size arrays for all the ucode apps */
minion_hdr->app_code_offsets = nvgpu_kcalloc(g,
minion_hdr->num_apps,
sizeof(u32));
if (!minion_hdr->app_code_offsets) {
nvgpu_err(g, "Couldn't allocate MINION app_code_offsets array");
return -ENOMEM;
}
minion_hdr->app_code_sizes = nvgpu_kcalloc(g,
minion_hdr->num_apps,
sizeof(u32));
if (!minion_hdr->app_code_sizes) {
nvgpu_err(g, "Couldn't allocate MINION app_code_sizes array");
return -ENOMEM;
}
minion_hdr->app_data_offsets = nvgpu_kcalloc(g,
minion_hdr->num_apps,
sizeof(u32));
if (!minion_hdr->app_data_offsets) {
nvgpu_err(g, "Couldn't allocate MINION app_data_offsets array");
return -ENOMEM;
}
minion_hdr->app_data_sizes = nvgpu_kcalloc(g,
minion_hdr->num_apps,
sizeof(u32));
if (!minion_hdr->app_data_sizes) {
nvgpu_err(g, "Couldn't allocate MINION app_data_sizes array");
return -ENOMEM;
}
/* Get app code offsets and sizes */
for (app = 0; app < minion_hdr->num_apps; app++) {
minion_hdr->app_code_offsets[app] =
nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->app_code_sizes[app] =
nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
nvgpu_log(g, gpu_dbg_nvlink,
" - App Code:");
nvgpu_log(g, gpu_dbg_nvlink,
" - App #%d: Code Offset = %u, Code Size = %u",
app,
minion_hdr->app_code_offsets[app],
minion_hdr->app_code_sizes[app]);
}
/* Get app data offsets and sizes */
for (app = 0; app < minion_hdr->num_apps; app++) {
minion_hdr->app_data_offsets[app] =
nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->app_data_sizes[app] =
nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
nvgpu_log(g, gpu_dbg_nvlink,
" - App Data:");
nvgpu_log(g, gpu_dbg_nvlink,
" - App #%d: Data Offset = %u, Data Size = %u",
app,
minion_hdr->app_data_offsets[app],
minion_hdr->app_data_sizes[app]);
}
minion_hdr->ovl_offset = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
minion_hdr->ovl_size = nvgpu_nvlink_minion_extract_word(
nvgpu_minion_fw,
data_idx);
data_idx += 4;
ndev->minion_img = &(nvgpu_minion_fw->data[data_idx]);
minion_hdr->ucode_data_size = nvgpu_minion_fw->size - data_idx;
nvgpu_log(g, gpu_dbg_nvlink,
" - Overlay Offset = %u", minion_hdr->ovl_offset);
nvgpu_log(g, gpu_dbg_nvlink,
" - Overlay Size = %u", minion_hdr->ovl_size);
nvgpu_log(g, gpu_dbg_nvlink,
" - Ucode Data Size = %u", minion_hdr->ucode_data_size);
/* Copy Non Secure IMEM code */
nvgpu_falcon_copy_to_imem(&g->minion_flcn, 0,
(u8 *)&ndev->minion_img[minion_hdr->os_code_offset],
minion_hdr->os_code_size, 0, false,
GET_IMEM_TAG(minion_hdr->os_code_offset));
/* Copy Non Secure DMEM code */
nvgpu_falcon_copy_to_dmem(&g->minion_flcn, 0,
(u8 *)&ndev->minion_img[minion_hdr->os_data_offset],
minion_hdr->os_data_size, 0);
/* Load the apps securely */
for (app = 0; app < minion_hdr->num_apps; app++) {
u32 app_code_start = minion_hdr->app_code_offsets[app];
u32 app_code_size = minion_hdr->app_code_sizes[app];
u32 app_data_start = minion_hdr->app_data_offsets[app];
u32 app_data_size = minion_hdr->app_data_sizes[app];
if (app_code_size)
nvgpu_falcon_copy_to_imem(&g->minion_flcn,
app_code_start,
(u8 *)&ndev->minion_img[app_code_start],
app_code_size, 0, true,
GET_IMEM_TAG(app_code_start));
if (app_data_size)
nvgpu_falcon_copy_to_dmem(&g->minion_flcn,
app_data_start,
(u8 *)&ndev->minion_img[app_data_start],
app_data_size, 0);
}
return err;
}
void nvgpu_mss_nvlink_init_credits(struct gk20a *g)
{
/* MSS_NVLINK_1_BASE */
void __iomem *soc1 = ioremap(0x01f20010, 4096);
/* MSS_NVLINK_2_BASE */
void __iomem *soc2 = ioremap(0x01f40010, 4096);
/* MSS_NVLINK_3_BASE */
void __iomem *soc3 = ioremap(0x01f60010, 4096);
/* MSS_NVLINK_4_BASE */
void __iomem *soc4 = ioremap(0x01f80010, 4096);
u32 val;
nvgpu_log(g, gpu_dbg_info, "init nvlink soc credits");
val = readl_relaxed(soc1);
writel_relaxed(val, soc1);
val = readl_relaxed(soc1 + 4);
writel_relaxed(val, soc1 + 4);
val = readl_relaxed(soc2);
writel_relaxed(val, soc2);
val = readl_relaxed(soc2 + 4);
writel_relaxed(val, soc2 + 4);
val = readl_relaxed(soc3);
writel_relaxed(val, soc3);
val = readl_relaxed(soc3 + 4);
writel_relaxed(val, soc3 + 4);
val = readl_relaxed(soc4);
writel_relaxed(val, soc4);
val = readl_relaxed(soc4 + 4);
writel_relaxed(val, soc4 + 4);
}
#endif /* CONFIG_NVGPU_NVLINK */
int nvgpu_nvlink_deinit(struct gk20a *g)
{
#ifdef CONFIG_NVGPU_NVLINK
struct nvlink_device *ndev = g->nvlink.priv;
int err;
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK))
return -ENODEV;
err = nvlink_shutdown(ndev);
if (err) {
nvgpu_err(g, "failed to shut down nvlink");
return err;
}
nvgpu_nvlink_remove(g);
return 0;
#endif
return -ENODEV;
}

View File

@@ -0,0 +1,22 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef NVGPU_OS_LINUX_NVLINK_H
struct gk20a;
int nvgpu_nvlink_deinit(struct gk20a *g);
#endif

View File

@@ -0,0 +1,462 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/mutex.h>
#ifdef CONFIG_NVGPU_NVLINK
#include <nvlink/common/tegra-nvlink.h>
#endif
#include <nvgpu/gk20a.h>
#include <nvgpu/nvlink.h>
#include <nvgpu/enabled.h>
#include "module.h"
#include <nvgpu/nvlink_probe.h>
#include <nvgpu/nvlink_device_reginit.h>
#include <nvgpu/nvlink_link_mode_transitions.h>
#ifdef CONFIG_NVGPU_NVLINK
int nvgpu_nvlink_read_dt_props(struct gk20a *g)
{
struct device_node *np;
struct nvlink_device *ndev = g->nvlink.priv;
u32 local_dev_id;
u32 local_link_id;
u32 remote_dev_id;
u32 remote_link_id;
bool is_master;
/* Parse DT */
np = nvgpu_get_node(g);
if (!np)
goto fail;
np = of_get_child_by_name(np, "nvidia,nvlink");
if (!np)
goto fail;
np = of_get_child_by_name(np, "endpoint");
if (!np)
goto fail;
/* Parse DT structure to detect endpoint topology */
of_property_read_u32(np, "local_dev_id", &local_dev_id);
of_property_read_u32(np, "local_link_id", &local_link_id);
of_property_read_u32(np, "remote_dev_id", &remote_dev_id);
of_property_read_u32(np, "remote_link_id", &remote_link_id);
is_master = of_property_read_bool(np, "is_master");
/* Check that we are in dGPU mode */
if (local_dev_id != NVLINK_ENDPT_GV100) {
nvgpu_err(g, "Local nvlink device is not dGPU");
return -EINVAL;
}
ndev->is_master = is_master;
ndev->device_id = local_dev_id;
ndev->link.link_id = local_link_id;
ndev->link.remote_dev_info.device_id = remote_dev_id;
ndev->link.remote_dev_info.link_id = remote_link_id;
return 0;
fail:
nvgpu_info(g, "nvlink endpoint not found or invaling in DT");
return -ENODEV;
}
static int nvgpu_nvlink_ops_early_init(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_early_init(g);
}
static int nvgpu_nvlink_ops_link_early_init(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_link_early_init(g);
}
static int nvgpu_nvlink_ops_interface_init(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_interface_init(g);
}
static int nvgpu_nvlink_ops_interface_disable(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_interface_disable(g);
}
static int nvgpu_nvlink_ops_dev_shutdown(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_dev_shutdown(g);
}
static int nvgpu_nvlink_ops_reg_init(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_reg_init(g);
}
static u32 nvgpu_nvlink_ops_get_link_mode(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
enum nvgpu_nvlink_link_mode mode;
mode = nvgpu_nvlink_get_link_mode(g);
switch (mode) {
case nvgpu_nvlink_link_off:
return NVLINK_LINK_OFF;
case nvgpu_nvlink_link_hs:
return NVLINK_LINK_HS;
case nvgpu_nvlink_link_safe:
return NVLINK_LINK_SAFE;
case nvgpu_nvlink_link_fault:
return NVLINK_LINK_FAULT;
case nvgpu_nvlink_link_rcvy_ac:
return NVLINK_LINK_RCVY_AC;
case nvgpu_nvlink_link_rcvy_sw:
return NVLINK_LINK_RCVY_SW;
case nvgpu_nvlink_link_rcvy_rx:
return NVLINK_LINK_RCVY_RX;
case nvgpu_nvlink_link_detect:
return NVLINK_LINK_DETECT;
case nvgpu_nvlink_link_reset:
return NVLINK_LINK_RESET;
case nvgpu_nvlink_link_enable_pm:
return NVLINK_LINK_ENABLE_PM;
case nvgpu_nvlink_link_disable_pm:
return NVLINK_LINK_DISABLE_PM;
case nvgpu_nvlink_link_disable_err_detect:
return NVLINK_LINK_DISABLE_ERR_DETECT;
case nvgpu_nvlink_link_lane_disable:
return NVLINK_LINK_LANE_DISABLE;
case nvgpu_nvlink_link_lane_shutdown:
return NVLINK_LINK_LANE_SHUTDOWN;
default:
nvgpu_log(g, gpu_dbg_info | gpu_dbg_nvlink,
"unsupported mode %u", mode);
}
return NVLINK_LINK_OFF;
}
static u32 nvgpu_nvlink_ops_get_link_state(struct nvlink_device *ndev)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_get_link_state(g);
}
static int nvgpu_nvlink_ops_set_link_mode(struct nvlink_device *ndev, u32 mode)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
enum nvgpu_nvlink_link_mode mode_sw;
switch (mode) {
case NVLINK_LINK_OFF:
mode_sw = nvgpu_nvlink_link_off;
break;
case NVLINK_LINK_HS:
mode_sw = nvgpu_nvlink_link_hs;
break;
case NVLINK_LINK_SAFE:
mode_sw = nvgpu_nvlink_link_safe;
break;
case NVLINK_LINK_FAULT:
mode_sw = nvgpu_nvlink_link_fault;
break;
case NVLINK_LINK_RCVY_AC:
mode_sw = nvgpu_nvlink_link_rcvy_ac;
break;
case NVLINK_LINK_RCVY_SW:
mode_sw = nvgpu_nvlink_link_rcvy_sw;
break;
case NVLINK_LINK_RCVY_RX:
mode_sw = nvgpu_nvlink_link_rcvy_rx;
break;
case NVLINK_LINK_DETECT:
mode_sw = nvgpu_nvlink_link_detect;
break;
case NVLINK_LINK_RESET:
mode_sw = nvgpu_nvlink_link_reset;
break;
case NVLINK_LINK_ENABLE_PM:
mode_sw = nvgpu_nvlink_link_enable_pm;
break;
case NVLINK_LINK_DISABLE_PM:
mode_sw = nvgpu_nvlink_link_disable_pm;
break;
case NVLINK_LINK_DISABLE_ERR_DETECT:
mode_sw = nvgpu_nvlink_link_disable_err_detect;
break;
case NVLINK_LINK_LANE_DISABLE:
mode_sw = nvgpu_nvlink_link_lane_disable;
break;
case NVLINK_LINK_LANE_SHUTDOWN:
mode_sw = nvgpu_nvlink_link_lane_shutdown;
break;
default:
mode_sw = nvgpu_nvlink_link_off;
}
return nvgpu_nvlink_set_link_mode(g, mode_sw);
}
static void nvgpu_nvlink_ops_get_tx_sublink_state(struct nvlink_device *ndev,
u32 *tx_sublink_state)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_get_tx_sublink_state(g, tx_sublink_state);
}
static void nvgpu_nvlink_ops_get_rx_sublink_state(struct nvlink_device *ndev,
u32 *rx_sublink_state)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
return nvgpu_nvlink_get_rx_sublink_state(g, rx_sublink_state);
}
static u32 nvgpu_nvlink_ops_get_sublink_mode(struct nvlink_device *ndev,
bool is_rx_sublink)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
enum nvgpu_nvlink_sublink_mode mode;
mode = nvgpu_nvlink_get_sublink_mode(g, is_rx_sublink);
switch (mode) {
case nvgpu_nvlink_sublink_tx_hs:
return NVLINK_TX_HS;
case nvgpu_nvlink_sublink_tx_off:
return NVLINK_TX_OFF;
case nvgpu_nvlink_sublink_tx_single_lane:
return NVLINK_TX_SINGLE_LANE;
case nvgpu_nvlink_sublink_tx_safe:
return NVLINK_TX_SAFE;
case nvgpu_nvlink_sublink_tx_enable_pm:
return NVLINK_TX_ENABLE_PM;
case nvgpu_nvlink_sublink_tx_disable_pm:
return NVLINK_TX_DISABLE_PM;
case nvgpu_nvlink_sublink_tx_common:
return NVLINK_TX_COMMON;
case nvgpu_nvlink_sublink_tx_common_disable:
return NVLINK_TX_COMMON_DISABLE;
case nvgpu_nvlink_sublink_tx_data_ready:
return NVLINK_TX_DATA_READY;
case nvgpu_nvlink_sublink_tx_prbs_en:
return NVLINK_TX_PRBS_EN;
case nvgpu_nvlink_sublink_rx_hs:
return NVLINK_RX_HS;
case nvgpu_nvlink_sublink_rx_enable_pm:
return NVLINK_RX_ENABLE_PM;
case nvgpu_nvlink_sublink_rx_disable_pm:
return NVLINK_RX_DISABLE_PM;
case nvgpu_nvlink_sublink_rx_single_lane:
return NVLINK_RX_SINGLE_LANE;
case nvgpu_nvlink_sublink_rx_safe:
return NVLINK_RX_SAFE;
case nvgpu_nvlink_sublink_rx_off:
return NVLINK_RX_OFF;
case nvgpu_nvlink_sublink_rx_rxcal:
return NVLINK_RX_RXCAL;
default:
nvgpu_log(g, gpu_dbg_nvlink, "Unsupported mode: %u", mode);
break;
}
if (is_rx_sublink)
return NVLINK_RX_OFF;
return NVLINK_TX_OFF;
}
static int nvgpu_nvlink_ops_set_sublink_mode(struct nvlink_device *ndev,
bool is_rx_sublink, u32 mode)
{
struct gk20a *g = (struct gk20a *) ndev->priv;
enum nvgpu_nvlink_sublink_mode mode_sw;
if (!is_rx_sublink) {
switch (mode) {
case NVLINK_TX_HS:
mode_sw = nvgpu_nvlink_sublink_tx_hs;
break;
case NVLINK_TX_ENABLE_PM:
mode_sw = nvgpu_nvlink_sublink_tx_enable_pm;
break;
case NVLINK_TX_DISABLE_PM:
mode_sw = nvgpu_nvlink_sublink_tx_disable_pm;
break;
case NVLINK_TX_SINGLE_LANE:
mode_sw = nvgpu_nvlink_sublink_tx_single_lane;
break;
case NVLINK_TX_SAFE:
mode_sw = nvgpu_nvlink_sublink_tx_safe;
break;
case NVLINK_TX_OFF:
mode_sw = nvgpu_nvlink_sublink_tx_off;
break;
case NVLINK_TX_COMMON:
mode_sw = nvgpu_nvlink_sublink_tx_common;
break;
case NVLINK_TX_COMMON_DISABLE:
mode_sw = nvgpu_nvlink_sublink_tx_common_disable;
break;
case NVLINK_TX_DATA_READY:
mode_sw = nvgpu_nvlink_sublink_tx_data_ready;
break;
case NVLINK_TX_PRBS_EN:
mode_sw = nvgpu_nvlink_sublink_tx_prbs_en;
break;
default:
return -EINVAL;
}
} else {
switch (mode) {
case NVLINK_RX_HS:
mode_sw = nvgpu_nvlink_sublink_rx_hs;
break;
case NVLINK_RX_ENABLE_PM:
mode_sw = nvgpu_nvlink_sublink_rx_enable_pm;
break;
case NVLINK_RX_DISABLE_PM:
mode_sw = nvgpu_nvlink_sublink_rx_disable_pm;
break;
case NVLINK_RX_SINGLE_LANE:
mode_sw = nvgpu_nvlink_sublink_rx_single_lane;
break;
case NVLINK_RX_SAFE:
mode_sw = nvgpu_nvlink_sublink_rx_safe;
break;
case NVLINK_RX_OFF:
mode_sw = nvgpu_nvlink_sublink_rx_off;
break;
case NVLINK_RX_RXCAL:
mode_sw = nvgpu_nvlink_sublink_rx_rxcal;
break;
default:
return -EINVAL;
}
}
return nvgpu_nvlink_set_sublink_mode(g, is_rx_sublink, mode_sw);
}
int nvgpu_nvlink_setup_ndev(struct gk20a *g)
{
struct nvlink_device *ndev;
/* Allocating structures */
ndev = nvgpu_kzalloc(g, sizeof(struct nvlink_device));
if (!ndev) {
nvgpu_err(g, "OOM while allocating nvlink device struct");
return -ENOMEM;
}
ndev->priv = (void *) g;
g->nvlink.priv = (void *) ndev;
return 0;
}
int nvgpu_nvlink_init_ops(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -EINVAL;
/* Fill in device struct */
ndev->dev_ops.dev_early_init = nvgpu_nvlink_ops_early_init;
ndev->dev_ops.dev_interface_init = nvgpu_nvlink_ops_interface_init;
ndev->dev_ops.dev_reg_init = nvgpu_nvlink_ops_reg_init;
ndev->dev_ops.dev_interface_disable =
nvgpu_nvlink_ops_interface_disable;
ndev->dev_ops.dev_shutdown = nvgpu_nvlink_ops_dev_shutdown;
/* Fill in the link struct */
ndev->link.device_id = ndev->device_id;
ndev->link.mode = NVLINK_LINK_OFF;
ndev->link.is_sl_supported = false;
ndev->link.link_ops.get_link_mode = nvgpu_nvlink_ops_get_link_mode;
ndev->link.link_ops.set_link_mode = nvgpu_nvlink_ops_set_link_mode;
ndev->link.link_ops.get_sublink_mode =
nvgpu_nvlink_ops_get_sublink_mode;
ndev->link.link_ops.set_sublink_mode =
nvgpu_nvlink_ops_set_sublink_mode;
ndev->link.link_ops.get_link_state = nvgpu_nvlink_ops_get_link_state;
ndev->link.link_ops.get_tx_sublink_state =
nvgpu_nvlink_ops_get_tx_sublink_state;
ndev->link.link_ops.get_rx_sublink_state =
nvgpu_nvlink_ops_get_rx_sublink_state;
ndev->link.link_ops.link_early_init =
nvgpu_nvlink_ops_link_early_init;
return 0;
}
int nvgpu_nvlink_register_device(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
return nvlink_register_device(ndev);
}
int nvgpu_nvlink_unregister_device(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
return nvlink_unregister_device(ndev);
}
int nvgpu_nvlink_register_link(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
return nvlink_register_link(&ndev->link);
}
int nvgpu_nvlink_unregister_link(struct gk20a *g)
{
struct nvlink_device *ndev = (struct nvlink_device *) g->nvlink.priv;
if (!ndev)
return -ENODEV;
return nvlink_unregister_link(&ndev->link);
}
#endif /* CONFIG_NVGPU_NVLINK */

View File

@@ -0,0 +1,77 @@
/*
* Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/types.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/linux/os_fence_android.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/nvhost.h>
#include "os_fence_priv.h"
#include "../drivers/staging/android/sync.h"
inline struct sync_fence *nvgpu_get_sync_fence(struct nvgpu_os_fence *s)
{
struct sync_fence *fence = (struct sync_fence *)s->priv;
return fence;
}
void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
{
struct sync_fence *fence = nvgpu_get_sync_fence(s);
sync_fence_put(fence);
nvgpu_os_fence_clear(s);
}
int nvgpu_os_fence_android_install_fd(struct nvgpu_os_fence *s, int fd)
{
struct sync_fence *fence = nvgpu_get_sync_fence(s);
sync_fence_get(fence);
sync_fence_install(fence, fd);
return 0;
}
void nvgpu_os_fence_android_dup(struct nvgpu_os_fence *s)
{
struct sync_fence *fence = nvgpu_get_sync_fence(s);
sync_fence_get(fence);
}
int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
struct nvgpu_channel *c, int fd)
{
int err = -ENOSYS;
#ifdef CONFIG_TEGRA_GK20A_NVHOST
if (nvgpu_has_syncpoints(c->g)) {
err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
}
#endif
if (err)
err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
if (err)
nvgpu_err(c->g, "error obtaining fence from fd %d", fd);
return err;
}

Some files were not shown because too many files have changed in this diff Show More