Files
linux-nvgpu/drivers/gpu/nvgpu/vgpu/vgpu.c
Deepak Nibade 6090a8a7ee gpu: nvgpu: move debugfs code to linux module
Since all debugfs code is Linux specific, remove
it from common code and move it to Linux module

Debugfs code is now divided into below
module specific files :

common/linux/debug.c
common/linux/debug_cde.c
common/linux/debug_ce.c
common/linux/debug_fifo.c
common/linux/debug_gr.c
common/linux/debug_mm.c
common/linux/debug_allocator.c
common/linux/debug_kmem.c
common/linux/debug_pmu.c
common/linux/debug_sched.c

Add corresponding header files for above modules too
And compile all of above files only if CONFIG_DEBUG_FS is set

Some more details of the changes made

- Move and rename gk20a/debug_gk20a.c to common/linux/debug.c
- Move and rename gk20a/debug_gk20a.h to include/nvgpu/debug.h

- Remove gm20b/debug_gm20b.c and gm20b/debug_gm20b.h and call
  gk20a_init_debug_ops() directly from gm20b_init_hal()

- Update all debug APIs to receive struct gk20a as parameter
  instead of receiving struct device pointer
- Update API gk20a_dmabuf_get_state() to receive struct gk20a
  pointer instead of struct device

- Include <nvgpu/debug.h> explicitly in all files where debug
  operations are used
- Remove "gk20a/platform_gk20a.h" include from HAL files
  which no longer need this include

- Add new API gk20a_debug_deinit() to deinitialize debugfs
  and call it from gk20a_remove()
- Move API gk20a_debug_dump_all_channel_status_ramfc() to
  gk20a/fifo_gk20a.c

Jira NVGPU-62

Change-Id: I076975d3d7f669bdbe9212fa33d98529377feeb6
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1488902
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
2017-06-02 06:53:35 -07:00

707 lines
16 KiB
C

/*
* Virtualized GPU
*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/pm_runtime.h>
#include <linux/pm_qos.h>
#include <soc/tegra/chip-id.h>
#include <nvgpu/kmem.h>
#include <nvgpu/bug.h>
#include <nvgpu/enabled.h>
#include <nvgpu/debug.h>
#include "vgpu/vgpu.h"
#include "vgpu/fecs_trace_vgpu.h"
#include "gk20a/hal_gk20a.h"
#include "gk20a/ctxsw_trace_gk20a.h"
#include "gk20a/tsg_gk20a.h"
#include "gk20a/gk20a_scale.h"
#include "gk20a/channel_gk20a.h"
#include "gm20b/hal_gm20b.h"
#include <nvgpu/hw/gk20a/hw_mc_gk20a.h>
static inline int vgpu_comm_init(struct platform_device *pdev)
{
size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
return tegra_gr_comm_init(pdev, TEGRA_GR_COMM_CTX_CLIENT, 3,
queue_sizes, TEGRA_VGPU_QUEUE_CMD,
ARRAY_SIZE(queue_sizes));
}
static inline void vgpu_comm_deinit(void)
{
size_t queue_sizes[] = { TEGRA_VGPU_QUEUE_SIZES };
tegra_gr_comm_deinit(TEGRA_GR_COMM_CTX_CLIENT, TEGRA_VGPU_QUEUE_CMD,
ARRAY_SIZE(queue_sizes));
}
int vgpu_comm_sendrecv(struct tegra_vgpu_cmd_msg *msg, size_t size_in,
size_t size_out)
{
void *handle;
size_t size = size_in;
void *data = msg;
int err;
err = tegra_gr_comm_sendrecv(TEGRA_GR_COMM_CTX_CLIENT,
tegra_gr_comm_get_server_vmid(),
TEGRA_VGPU_QUEUE_CMD, &handle, &data, &size);
if (!err) {
WARN_ON(size < size_out);
memcpy(msg, data, size_out);
tegra_gr_comm_release(handle);
}
return err;
}
static u64 vgpu_connect(void)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_connect_params *p = &msg.params.connect;
int err;
msg.cmd = TEGRA_VGPU_CMD_CONNECT;
p->module = TEGRA_VGPU_MODULE_GPU;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
return (err || msg.ret) ? 0 : p->handle;
}
int vgpu_get_attribute(u64 handle, u32 attrib, u32 *value)
{
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_attrib_params *p = &msg.params.attrib;
int err;
msg.cmd = TEGRA_VGPU_CMD_GET_ATTRIBUTE;
msg.handle = handle;
p->attrib = attrib;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret)
return -1;
*value = p->value;
return 0;
}
static void vgpu_handle_channel_event(struct gk20a *g,
struct tegra_vgpu_channel_event_info *info)
{
if (info->id >= g->fifo.num_channels ||
info->event_id >= NVGPU_IOCTL_CHANNEL_EVENT_ID_MAX) {
nvgpu_err(g, "invalid channel event");
return;
}
if (info->is_tsg) {
struct tsg_gk20a *tsg = &g->fifo.tsg[info->id];
gk20a_tsg_event_id_post_event(tsg, info->event_id);
} else {
struct channel_gk20a *ch = &g->fifo.channel[info->id];
if (!gk20a_channel_get(ch)) {
nvgpu_err(g, "invalid channel %d for event %d",
(int)info->id, (int)info->event_id);
return;
}
gk20a_channel_event_id_post_event(ch, info->event_id);
gk20a_channel_put(ch);
}
}
static int vgpu_intr_thread(void *dev_id)
{
struct gk20a *g = dev_id;
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
while (true) {
struct tegra_vgpu_intr_msg *msg;
u32 sender;
void *handle;
size_t size;
int err;
err = tegra_gr_comm_recv(TEGRA_GR_COMM_CTX_CLIENT,
TEGRA_VGPU_QUEUE_INTR, &handle,
(void **)&msg, &size, &sender);
if (err == -ETIME)
continue;
if (WARN_ON(err))
continue;
if (msg->event == TEGRA_VGPU_EVENT_ABORT) {
tegra_gr_comm_release(handle);
break;
}
switch (msg->event) {
case TEGRA_VGPU_EVENT_INTR:
if (msg->unit == TEGRA_VGPU_INTR_GR)
vgpu_gr_isr(g, &msg->info.gr_intr);
else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_GR)
vgpu_gr_nonstall_isr(g,
&msg->info.gr_nonstall_intr);
else if (msg->unit == TEGRA_VGPU_INTR_FIFO)
vgpu_fifo_isr(g, &msg->info.fifo_intr);
else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_FIFO)
vgpu_fifo_nonstall_isr(g,
&msg->info.fifo_nonstall_intr);
else if (msg->unit == TEGRA_VGPU_NONSTALL_INTR_CE2)
vgpu_ce2_nonstall_isr(g,
&msg->info.ce2_nonstall_intr);
break;
case TEGRA_VGPU_EVENT_FECS_TRACE:
vgpu_fecs_trace_data_update(g);
break;
case TEGRA_VGPU_EVENT_CHANNEL:
vgpu_handle_channel_event(g, &msg->info.channel_event);
break;
case TEGRA_VGPU_EVENT_SM_ESR:
vgpu_gr_handle_sm_esr_event(g, &msg->info.sm_esr);
break;
default:
nvgpu_err(g, "unknown event %u", msg->event);
break;
}
tegra_gr_comm_release(handle);
}
while (!nvgpu_thread_should_stop(&priv->intr_handler))
msleep(10);
return 0;
}
static void vgpu_remove_support(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(g->dev);
struct tegra_vgpu_intr_msg msg;
int err;
if (g->dbg_regops_tmp_buf)
nvgpu_kfree(g, g->dbg_regops_tmp_buf);
if (g->pmu.remove_support)
g->pmu.remove_support(&g->pmu);
if (g->gr.remove_support)
g->gr.remove_support(&g->gr);
if (g->fifo.remove_support)
g->fifo.remove_support(&g->fifo);
if (g->mm.remove_support)
g->mm.remove_support(&g->mm);
msg.event = TEGRA_VGPU_EVENT_ABORT;
err = tegra_gr_comm_send(TEGRA_GR_COMM_CTX_CLIENT,
TEGRA_GR_COMM_ID_SELF, TEGRA_VGPU_QUEUE_INTR,
&msg, sizeof(msg));
WARN_ON(err);
nvgpu_thread_stop(&priv->intr_handler);
/* free mappings to registers, etc*/
if (g->bar1) {
iounmap(g->bar1);
g->bar1 = NULL;
}
}
static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
{
nvgpu_mutex_init(&g->poweroff_lock);
g->regs_saved = g->regs;
g->bar1_saved = g->bar1;
nvgpu_init_list_node(&g->pending_sema_waits);
nvgpu_raw_spinlock_init(&g->pending_sema_waits_lock);
g->aggressive_sync_destroy = platform->aggressive_sync_destroy;
g->aggressive_sync_destroy_thresh = platform->aggressive_sync_destroy_thresh;
g->has_syncpoints = platform->has_syncpoints;
g->ptimer_src_freq = platform->ptimer_src_freq;
g->can_railgate = platform->can_railgate_init;
g->railgate_delay = platform->railgate_delay_init;
__nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES,
platform->unify_address_spaces);
}
static int vgpu_init_support(struct platform_device *pdev)
{
struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
struct gk20a *g = get_gk20a(&pdev->dev);
int err = 0;
if (!r) {
dev_err(dev_from_gk20a(g), "faield to get gk20a bar1\n");
err = -ENXIO;
goto fail;
}
g->bar1 = devm_ioremap_resource(&pdev->dev, r);
if (IS_ERR(g->bar1)) {
dev_err(dev_from_gk20a(g), "failed to remap gk20a bar1\n");
err = PTR_ERR(g->bar1);
goto fail;
}
g->bar1_mem = r;
nvgpu_mutex_init(&g->dbg_sessions_lock);
nvgpu_mutex_init(&g->client_lock);
nvgpu_init_list_node(&g->profiler_objects);
g->dbg_regops_tmp_buf = nvgpu_kzalloc(g, SZ_4K);
if (!g->dbg_regops_tmp_buf) {
dev_err(g->dev, "couldn't allocate regops tmp buf");
return -ENOMEM;
}
g->dbg_regops_tmp_buf_ops =
SZ_4K / sizeof(g->dbg_regops_tmp_buf[0]);
g->remove_support = vgpu_remove_support;
return 0;
fail:
vgpu_remove_support(g);
return err;
}
int vgpu_pm_prepare_poweroff(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
int ret = 0;
gk20a_dbg_fn("");
if (!g->power_on)
return 0;
ret = gk20a_channel_suspend(g);
if (ret)
return ret;
g->power_on = false;
return ret;
}
static void vgpu_detect_chip(struct gk20a *g)
{
struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
gpu->arch = priv->constants.arch;
gpu->impl = priv->constants.impl;
gpu->rev = priv->constants.rev;
gk20a_dbg_info("arch: %x, impl: %x, rev: %x\n",
g->gpu_characteristics.arch,
g->gpu_characteristics.impl,
g->gpu_characteristics.rev);
}
static int vgpu_init_gpu_characteristics(struct gk20a *g)
{
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
int err;
gk20a_dbg_fn("");
err = gk20a_init_gpu_characteristics(g);
if (err)
return err;
g->gpu_characteristics.max_freq = priv->constants.max_freq;
g->gpu_characteristics.map_buffer_batch_limit = 0;
/* features vgpu does not support */
g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
g->gpu_characteristics.flags &= ~NVGPU_GPU_FLAGS_SUPPORT_MAP_COMPBITS;
return 0;
}
static int vgpu_read_ptimer(struct gk20a *g, u64 *value)
{
struct tegra_vgpu_cmd_msg msg = {0};
struct tegra_vgpu_read_ptimer_params *p = &msg.params.read_ptimer;
int err;
gk20a_dbg_fn("");
msg.cmd = TEGRA_VGPU_CMD_READ_PTIMER;
msg.handle = vgpu_get_handle(g);
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (!err)
*value = p->time;
else
nvgpu_err(g, "vgpu read ptimer failed, err=%d", err);
return err;
}
void vgpu_init_hal_common(struct gk20a *g)
{
struct gpu_ops *gops = &g->ops;
vgpu_init_fifo_ops(gops);
vgpu_init_gr_ops(gops);
vgpu_init_ltc_ops(gops);
vgpu_init_mm_ops(gops);
vgpu_init_debug_ops(gops);
vgpu_init_dbg_session_ops(gops);
vgpu_init_fecs_trace_ops(gops);
vgpu_init_tsg_ops(gops);
#if defined(CONFIG_GK20A_CYCLE_STATS)
vgpu_init_css_ops(gops);
#endif
gops->chip_init_gpu_characteristics = vgpu_init_gpu_characteristics;
gops->bus.read_ptimer = vgpu_read_ptimer;
}
static int vgpu_init_hal(struct gk20a *g)
{
u32 ver = g->gpu_characteristics.arch + g->gpu_characteristics.impl;
int err;
switch (ver) {
case GK20A_GPUID_GK20A:
gk20a_dbg_info("gk20a detected");
err = vgpu_gk20a_init_hal(g);
break;
case GK20A_GPUID_GM20B:
gk20a_dbg_info("gm20b detected");
err = vgpu_gm20b_init_hal(g);
break;
case NVGPU_GPUID_GP10B:
gk20a_dbg_info("gp10b detected");
err = vgpu_gp10b_init_hal(g);
break;
default:
nvgpu_err(g, "no support for %x", ver);
err = -ENODEV;
break;
}
return err;
}
int vgpu_pm_finalize_poweron(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
int err;
gk20a_dbg_fn("");
if (g->power_on)
return 0;
g->power_on = true;
vgpu_detect_chip(g);
err = vgpu_init_hal(g);
if (err)
goto done;
if (g->ops.ltc.init_fs_state)
g->ops.ltc.init_fs_state(g);
err = vgpu_init_mm_support(g);
if (err) {
nvgpu_err(g, "failed to init gk20a mm");
goto done;
}
err = vgpu_init_fifo_support(g);
if (err) {
nvgpu_err(g, "failed to init gk20a fifo");
goto done;
}
err = vgpu_init_gr_support(g);
if (err) {
nvgpu_err(g, "failed to init gk20a gr");
goto done;
}
err = g->ops.chip_init_gpu_characteristics(g);
if (err) {
nvgpu_err(g, "failed to init gk20a gpu characteristics");
goto done;
}
gk20a_ctxsw_trace_init(g);
gk20a_sched_ctrl_init(g);
gk20a_channel_resume(g);
done:
return err;
}
static int vgpu_qos_notify(struct notifier_block *nb,
unsigned long n, void *data)
{
struct gk20a_scale_profile *profile =
container_of(nb, struct gk20a_scale_profile,
qos_notify_block);
struct gk20a *g = get_gk20a(profile->dev);
struct tegra_vgpu_cmd_msg msg = {};
struct tegra_vgpu_gpu_clk_rate_params *p = &msg.params.gpu_clk_rate;
u32 max_freq;
int err;
gk20a_dbg_fn("");
max_freq = (u32)pm_qos_read_max_bound(PM_QOS_GPU_FREQ_BOUNDS);
msg.cmd = TEGRA_VGPU_CMD_SET_GPU_CLK_RATE;
msg.handle = vgpu_get_handle_from_dev(profile->dev);
p->rate = max_freq;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (err)
nvgpu_err(g, "%s failed, err=%d", __func__, err);
return NOTIFY_OK; /* need notify call further */
}
static int vgpu_pm_qos_init(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
struct gk20a_scale_profile *profile;
profile = nvgpu_kzalloc(g, sizeof(*profile));
if (!profile)
return -ENOMEM;
profile->dev = dev;
profile->qos_notify_block.notifier_call = vgpu_qos_notify;
g->scale_profile = profile;
pm_qos_add_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&profile->qos_notify_block);
return 0;
}
static void vgpu_pm_qos_remove(struct device *dev)
{
struct gk20a *g = get_gk20a(dev);
pm_qos_remove_max_notifier(PM_QOS_GPU_FREQ_BOUNDS,
&g->scale_profile->qos_notify_block);
nvgpu_kfree(g, g->scale_profile);
g->scale_profile = NULL;
}
static int vgpu_pm_init(struct device *dev)
{
int err = 0;
gk20a_dbg_fn("");
__pm_runtime_disable(dev, false);
err = vgpu_pm_qos_init(dev);
if (err)
return err;
return err;
}
static int vgpu_get_constants(struct gk20a *g)
{
struct tegra_vgpu_cmd_msg msg = {};
struct tegra_vgpu_constants_params *p = &msg.params.constants;
struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
int err;
gk20a_dbg_fn("");
msg.cmd = TEGRA_VGPU_CMD_GET_CONSTANTS;
msg.handle = vgpu_get_handle(g);
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
err = err ? err : msg.ret;
if (unlikely(err)) {
nvgpu_err(g, "%s failed, err=%d", __func__, err);
return err;
}
if (unlikely(p->gpc_count > TEGRA_VGPU_MAX_GPC_COUNT ||
p->max_tpc_per_gpc_count > TEGRA_VGPU_MAX_TPC_COUNT_PER_GPC)) {
nvgpu_err(g, "gpc_count %d max_tpc_per_gpc %d overflow",
(int)p->gpc_count, (int)p->max_tpc_per_gpc_count);
return -EINVAL;
}
priv->constants = *p;
return 0;
}
int vgpu_probe(struct platform_device *pdev)
{
struct gk20a *gk20a;
int err;
struct device *dev = &pdev->dev;
struct gk20a_platform *platform = gk20a_get_platform(dev);
struct vgpu_priv_data *priv;
if (!platform) {
dev_err(dev, "no platform data\n");
return -ENODATA;
}
gk20a_dbg_fn("");
gk20a = kzalloc(sizeof(struct gk20a), GFP_KERNEL);
if (!gk20a) {
dev_err(dev, "couldn't allocate gk20a support");
return -ENOMEM;
}
nvgpu_kmem_init(gk20a);
err = nvgpu_init_enabled_flags(gk20a);
if (err) {
kfree(gk20a);
return err;
}
gk20a->dev = dev;
if (tegra_platform_is_linsim() || tegra_platform_is_vdk())
__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);
gk20a->is_virtual = true;
priv = nvgpu_kzalloc(gk20a, sizeof(*priv));
if (!priv) {
kfree(gk20a);
return -ENOMEM;
}
platform->g = gk20a;
platform->vgpu_priv = priv;
err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
if (err)
return err;
vgpu_init_support(pdev);
vgpu_init_vars(gk20a, platform);
init_rwsem(&gk20a->busy_lock);
nvgpu_spinlock_init(&gk20a->mc_enable_lock);
gk20a->ch_wdt_timeout_ms = platform->ch_wdt_timeout_ms;
/* Initialize the platform interface. */
err = platform->probe(dev);
if (err) {
dev_err(dev, "platform probe failed");
return err;
}
err = vgpu_pm_init(dev);
if (err) {
dev_err(dev, "pm init failed");
return err;
}
if (platform->late_probe) {
err = platform->late_probe(dev);
if (err) {
dev_err(dev, "late probe failed");
return err;
}
}
err = vgpu_comm_init(pdev);
if (err) {
dev_err(dev, "failed to init comm interface\n");
return -ENOSYS;
}
priv->virt_handle = vgpu_connect();
if (!priv->virt_handle) {
dev_err(dev, "failed to connect to server node\n");
vgpu_comm_deinit();
return -ENOSYS;
}
err = vgpu_get_constants(gk20a);
if (err) {
vgpu_comm_deinit();
return err;
}
err = nvgpu_thread_create(&priv->intr_handler, gk20a,
vgpu_intr_thread, "gk20a");
if (err)
return err;
gk20a_debug_init(gk20a, "gpu.0");
/* Set DMA parameters to allow larger sgt lists */
dev->dma_parms = &gk20a->dma_parms;
dma_set_max_seg_size(dev, UINT_MAX);
gk20a->gr_idle_timeout_default =
CONFIG_GK20A_DEFAULT_TIMEOUT;
gk20a->timeouts_enabled = true;
vgpu_create_sysfs(dev);
gk20a_init_gr(gk20a);
kref_init(&gk20a->refcount);
return 0;
}
int vgpu_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct gk20a *g = get_gk20a(dev);
gk20a_dbg_fn("");
vgpu_pm_qos_remove(dev);
if (g->remove_support)
g->remove_support(g);
vgpu_comm_deinit();
gk20a_sched_ctrl_cleanup(g);
gk20a_user_deinit(dev, &nvgpu_class);
vgpu_remove_sysfs(dev);
gk20a_get_platform(dev)->g = NULL;
gk20a_put(g);
return 0;
}