mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
In order to perform timestamps correlation for FECS traces, we need to collect GPU / GPU timestamps samples. In virtualization case, it is possible for a guest to get GPU timestamps by using read_ptimer. However, if the CPU timestamp is read on guest side, and the GPU timestamp is read on vm-server side, then it introduces some latency that will create an artificial offset for GPU timestamps (~2 us in average). For better CPU / GPU timestamps correlation, Added a command to collect all timestamps on vm-server side. Bug 1900475 Change-Id: Idfdc6ae4c16c501dc5e00053a5b75932c55148d6 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: http://git-master/r/1472447 Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Aparna Das <aparnad@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
185 lines
4.8 KiB
C
185 lines
4.8 KiB
C
/*
|
|
* drivers/video/tegra/host/gk20a/hal_gk20a.c
|
|
*
|
|
* GK20A Tegra HAL interface.
|
|
*
|
|
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include "hal_gk20a.h"
|
|
#include "ltc_gk20a.h"
|
|
#include "fb_gk20a.h"
|
|
#include "ctrl_gk20a.h"
|
|
#include "gk20a.h"
|
|
#include "gk20a_gating_reglist.h"
|
|
#include "channel_gk20a.h"
|
|
#include "gr_ctx_gk20a.h"
|
|
#include "fecs_trace_gk20a.h"
|
|
#include "mm_gk20a.h"
|
|
#include "mc_gk20a.h"
|
|
#include "pmu_gk20a.h"
|
|
#include "clk_gk20a.h"
|
|
#include "regops_gk20a.h"
|
|
#include "therm_gk20a.h"
|
|
#include "tsg_gk20a.h"
|
|
#include "dbg_gpu_gk20a.h"
|
|
#include "css_gr_gk20a.h"
|
|
|
|
#include <nvgpu/hw/gk20a/hw_proj_gk20a.h>
|
|
|
|
static struct gpu_ops gk20a_ops = {
|
|
.clock_gating = {
|
|
.slcg_gr_load_gating_prod =
|
|
gr_gk20a_slcg_gr_load_gating_prod,
|
|
.slcg_perf_load_gating_prod =
|
|
gr_gk20a_slcg_perf_load_gating_prod,
|
|
.slcg_ltc_load_gating_prod =
|
|
ltc_gk20a_slcg_ltc_load_gating_prod,
|
|
.blcg_gr_load_gating_prod =
|
|
gr_gk20a_blcg_gr_load_gating_prod,
|
|
.pg_gr_load_gating_prod =
|
|
gr_gk20a_pg_gr_load_gating_prod,
|
|
.slcg_therm_load_gating_prod =
|
|
gr_gk20a_slcg_therm_load_gating_prod,
|
|
},
|
|
};
|
|
|
|
static int gk20a_get_litter_value(struct gk20a *g, int value)
|
|
{
|
|
int ret = EINVAL;
|
|
switch (value) {
|
|
case GPU_LIT_NUM_GPCS:
|
|
ret = proj_scal_litter_num_gpcs_v();
|
|
break;
|
|
case GPU_LIT_NUM_PES_PER_GPC:
|
|
ret = proj_scal_litter_num_pes_per_gpc_v();
|
|
break;
|
|
case GPU_LIT_NUM_ZCULL_BANKS:
|
|
ret = proj_scal_litter_num_zcull_banks_v();
|
|
break;
|
|
case GPU_LIT_NUM_TPC_PER_GPC:
|
|
ret = proj_scal_litter_num_tpc_per_gpc_v();
|
|
break;
|
|
case GPU_LIT_NUM_FBPS:
|
|
ret = proj_scal_litter_num_fbps_v();
|
|
break;
|
|
case GPU_LIT_GPC_BASE:
|
|
ret = proj_gpc_base_v();
|
|
break;
|
|
case GPU_LIT_GPC_STRIDE:
|
|
ret = proj_gpc_stride_v();
|
|
break;
|
|
case GPU_LIT_GPC_SHARED_BASE:
|
|
ret = proj_gpc_shared_base_v();
|
|
break;
|
|
case GPU_LIT_TPC_IN_GPC_BASE:
|
|
ret = proj_tpc_in_gpc_base_v();
|
|
break;
|
|
case GPU_LIT_TPC_IN_GPC_STRIDE:
|
|
ret = proj_tpc_in_gpc_stride_v();
|
|
break;
|
|
case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
|
|
ret = proj_tpc_in_gpc_shared_base_v();
|
|
break;
|
|
case GPU_LIT_PPC_IN_GPC_BASE:
|
|
ret = proj_ppc_in_gpc_base_v();
|
|
break;
|
|
case GPU_LIT_PPC_IN_GPC_STRIDE:
|
|
ret = proj_ppc_in_gpc_stride_v();
|
|
break;
|
|
case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
|
|
ret = proj_ppc_in_gpc_shared_base_v();
|
|
break;
|
|
case GPU_LIT_ROP_BASE:
|
|
ret = proj_rop_base_v();
|
|
break;
|
|
case GPU_LIT_ROP_STRIDE:
|
|
ret = proj_rop_stride_v();
|
|
break;
|
|
case GPU_LIT_ROP_SHARED_BASE:
|
|
ret = proj_rop_shared_base_v();
|
|
break;
|
|
case GPU_LIT_HOST_NUM_ENGINES:
|
|
ret = proj_host_num_engines_v();
|
|
break;
|
|
case GPU_LIT_HOST_NUM_PBDMA:
|
|
ret = proj_host_num_pbdma_v();
|
|
break;
|
|
case GPU_LIT_LTC_STRIDE:
|
|
ret = proj_ltc_stride_v();
|
|
break;
|
|
case GPU_LIT_LTS_STRIDE:
|
|
ret = proj_lts_stride_v();
|
|
break;
|
|
/* GK20A does not have a FBPA unit, despite what's listed in the
|
|
* hw headers or read back through NV_PTOP_SCAL_NUM_FBPAS,
|
|
* so hardcode all values to 0.
|
|
*/
|
|
case GPU_LIT_NUM_FBPAS:
|
|
case GPU_LIT_FBPA_STRIDE:
|
|
case GPU_LIT_FBPA_BASE:
|
|
case GPU_LIT_FBPA_SHARED_BASE:
|
|
ret = 0;
|
|
break;
|
|
default:
|
|
gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
|
|
BUG();
|
|
break;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
int gk20a_init_hal(struct gk20a *g)
|
|
{
|
|
struct gpu_ops *gops = &g->ops;
|
|
struct nvgpu_gpu_characteristics *c = &g->gpu_characteristics;
|
|
|
|
*gops = gk20a_ops;
|
|
gops->privsecurity = 0;
|
|
gops->securegpccs = 0;
|
|
gops->pmupstate = false;
|
|
gk20a_init_mc(gops);
|
|
gk20a_init_ltc(gops);
|
|
gk20a_init_gr_ops(gops);
|
|
gk20a_init_fecs_trace_ops(gops);
|
|
gk20a_init_fb(gops);
|
|
gk20a_init_fifo(gops);
|
|
gk20a_init_ce2(gops);
|
|
gk20a_init_gr_ctx(gops);
|
|
gk20a_init_mm(gops);
|
|
gk20a_init_pmu_ops(gops);
|
|
gk20a_init_clk_ops(gops);
|
|
gk20a_init_regops(gops);
|
|
gk20a_init_debug_ops(gops);
|
|
gk20a_init_dbg_session_ops(gops);
|
|
gk20a_init_therm_ops(gops);
|
|
gk20a_init_tsg_ops(gops);
|
|
#if defined(CONFIG_GK20A_CYCLE_STATS)
|
|
gk20a_init_css_ops(gops);
|
|
#endif
|
|
gops->name = "gk20a";
|
|
gops->chip_init_gpu_characteristics = gk20a_init_gpu_characteristics;
|
|
gops->get_litter_value = gk20a_get_litter_value;
|
|
gops->read_ptimer = gk20a_read_ptimer;
|
|
gops->bus.get_timestamps_zipper = gk20a_get_timestamps_zipper;
|
|
|
|
c->twod_class = FERMI_TWOD_A;
|
|
c->threed_class = KEPLER_C;
|
|
c->compute_class = KEPLER_COMPUTE_A;
|
|
c->gpfifo_class = KEPLER_CHANNEL_GPFIFO_C;
|
|
c->inline_to_memory_class = KEPLER_INLINE_TO_MEMORY_A;
|
|
c->dma_copy_class = KEPLER_DMA_COPY_A;
|
|
|
|
return 0;
|
|
}
|