mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Gpu characterstics enhancement
New members are added in nvgpu_gpu_characterstics to export more information required specially from CUDA tools. Change-Id: I907f3bcbd272405a13f47ef6236bc2cff01c6c80 Signed-off-by: Sujeet Baranwal <sbaranwal@nvidia.com> Reviewed-on: http://git-master/r/679202 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
Dan Willemsen
parent
895675e1d5
commit
2155dfeaba
@@ -496,6 +496,28 @@ static int nvgpu_gpu_ioctl_has_any_exception(
|
||||
return err;
|
||||
}
|
||||
|
||||
static int gk20a_ctrl_get_num_vsms(struct gk20a *g,
|
||||
struct nvgpu_gpu_num_vsms *args)
|
||||
{
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
args->num_vsms = gr->no_of_sm;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int gk20a_ctrl_vsm_mapping(struct gk20a *g,
|
||||
struct nvgpu_gpu_vsms_mapping *args)
|
||||
{
|
||||
int err = 0;
|
||||
struct gr_gk20a *gr = &g->gr;
|
||||
size_t write_size = gr->no_of_sm * sizeof(struct sm_info);
|
||||
|
||||
err = copy_to_user((void __user *)(uintptr_t)
|
||||
args->vsms_map_buf_addr,
|
||||
gr->sm_to_cluster, write_size);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct platform_device *dev = filp->private_data;
|
||||
@@ -705,6 +727,16 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
||||
(struct nvgpu_gpu_tpc_exception_en_status_args *)buf);
|
||||
break;
|
||||
|
||||
case NVGPU_GPU_IOCTL_NUM_VSMS:
|
||||
err = gk20a_ctrl_get_num_vsms(g,
|
||||
(struct nvgpu_gpu_num_vsms *)buf);
|
||||
break;
|
||||
case NVGPU_GPU_IOCTL_VSMS_MAPPING:
|
||||
err = gk20a_ctrl_vsm_mapping(g,
|
||||
(struct nvgpu_gpu_vsms_mapping *)buf);
|
||||
break;
|
||||
|
||||
|
||||
default:
|
||||
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
|
||||
err = -ENOTTY;
|
||||
|
||||
@@ -1927,6 +1927,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
gpu->on_board_video_memory_size = 0; /* integrated GPU */
|
||||
|
||||
gpu->num_gpc = g->gr.gpc_count;
|
||||
gpu->max_gpc_count = g->gr.gpc_count;
|
||||
|
||||
gpu->num_tpc_per_gpc = g->gr.max_tpc_per_gpc_count;
|
||||
|
||||
gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
|
||||
@@ -1962,9 +1964,15 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
|
||||
gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
|
||||
gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
|
||||
gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
|
||||
|
||||
gpu->gpu_va_bit_count = 40;
|
||||
|
||||
memcpy(gpu->chipname, g->ops.name, strlen(g->ops.name));
|
||||
gpu->max_fbps_count = g->ops.gr.get_max_fbps_count(g);
|
||||
gpu->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
|
||||
gpu->max_ltc_per_fbp = g->ops.gr.get_max_ltc_per_fbp(g);
|
||||
gpu->max_lts_per_ltc = g->ops.gr.get_max_lts_per_ltc(g);
|
||||
g->ops.gr.get_rop_l2_en_mask(g);
|
||||
|
||||
gpu->reserved = 0;
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -167,6 +167,11 @@ struct gpu_ops {
|
||||
struct gk20a_debug_output *o);
|
||||
int (*update_pc_sampling)(struct channel_gk20a *ch,
|
||||
bool enable);
|
||||
u32 (*get_max_fbps_count)(struct gk20a *g);
|
||||
u32 (*get_fbp_en_mask)(struct gk20a *g);
|
||||
u32 (*get_max_ltc_per_fbp)(struct gk20a *g);
|
||||
u32 (*get_max_lts_per_ltc)(struct gk20a *g);
|
||||
u32* (*get_rop_l2_en_mask)(struct gk20a *g);
|
||||
} gr;
|
||||
const char *name;
|
||||
struct {
|
||||
|
||||
@@ -3066,6 +3066,8 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
||||
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||
gr->max_fbps_count = top_num_fbps_value_v(tmp);
|
||||
|
||||
gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
|
||||
|
||||
tmp = gk20a_readl(g, top_tpc_per_gpc_r());
|
||||
gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
|
||||
|
||||
@@ -7313,6 +7315,41 @@ static u32 gr_gk20a_pagepool_default_size(struct gk20a *g)
|
||||
return gr_scc_pagepool_total_pages_hwmax_value_v();
|
||||
}
|
||||
|
||||
static u32 gr_gk20a_get_max_fbps_count(struct gk20a *g)
|
||||
{
|
||||
u32 max_fbps_count, tmp;
|
||||
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||
max_fbps_count = top_num_fbps_value_v(tmp);
|
||||
return max_fbps_count;
|
||||
}
|
||||
|
||||
|
||||
static u32 gr_gk20a_get_fbp_en_mask(struct gk20a *g)
|
||||
{
|
||||
u32 fbp_en_mask, opt_fbio;
|
||||
opt_fbio = gk20a_readl(g, top_fs_status_fbp_r());
|
||||
fbp_en_mask = top_fs_status_fbp_cluster_v(opt_fbio);
|
||||
return fbp_en_mask;
|
||||
}
|
||||
|
||||
static u32 gr_gk20a_get_max_ltc_per_fbp(struct gk20a *g)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static u32 gr_gk20a_get_max_lts_per_ltc(struct gk20a *g)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
u32 *gr_gk20a_rop_l2_en_mask(struct gk20a *g)
|
||||
{
|
||||
/* gk20a doesnt have rop_l2_en_mask */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int gr_gk20a_dump_gr_status_regs(struct gk20a *g,
|
||||
struct gk20a_debug_output *o)
|
||||
{
|
||||
@@ -7470,5 +7507,9 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
|
||||
gops->gr.alloc_gr_ctx = gr_gk20a_alloc_gr_ctx;
|
||||
gops->gr.free_gr_ctx = gr_gk20a_free_gr_ctx;
|
||||
gops->gr.dump_gr_regs = gr_gk20a_dump_gr_status_regs;
|
||||
gops->gr.get_max_fbps_count = gr_gk20a_get_max_fbps_count;
|
||||
gops->gr.get_fbp_en_mask = gr_gk20a_get_fbp_en_mask;
|
||||
gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp;
|
||||
gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc;
|
||||
gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask;
|
||||
}
|
||||
|
||||
|
||||
@@ -295,6 +295,7 @@ struct gr_gk20a {
|
||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
||||
struct gr_t18x t18x;
|
||||
#endif
|
||||
u32 fbp_en_mask;
|
||||
u32 no_of_sm;
|
||||
struct sm_info *sm_to_cluster;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2012-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -138,4 +138,28 @@ static inline u32 top_device_info_entry_enum_v(void)
|
||||
{
|
||||
return 0x00000002;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_r(void)
|
||||
{
|
||||
return 0x00022548;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_cluster_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_cluster_enable_v(void)
|
||||
{
|
||||
return 0x00000000;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_cluster_enable_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_cluster_disable_v(void)
|
||||
{
|
||||
return 0x00000001;
|
||||
}
|
||||
static inline u32 top_fs_status_fbp_cluster_disable_f(void)
|
||||
{
|
||||
return 0x1;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "hw_gr_gm20b.h"
|
||||
#include "hw_fifo_gm20b.h"
|
||||
#include "hw_fb_gm20b.h"
|
||||
#include "hw_top_gm20b.h"
|
||||
#include "hw_proj_gm20b.h"
|
||||
#include "hw_ctxsw_prog_gm20b.h"
|
||||
#include "hw_fuse_gm20b.h"
|
||||
@@ -975,6 +976,52 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
|
||||
{
|
||||
u32 fbp_en_mask, opt_fbio;
|
||||
opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
|
||||
fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio);
|
||||
return fbp_en_mask;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_get_max_ltc_per_fbp(struct gk20a *g)
|
||||
{
|
||||
u32 ltc_per_fbp, reg;
|
||||
reg = gk20a_readl(g, top_ltc_per_fbp_r());
|
||||
ltc_per_fbp = top_ltc_per_fbp_value_v(reg);
|
||||
return ltc_per_fbp;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
|
||||
{
|
||||
u32 lts_per_ltc, reg;
|
||||
reg = gk20a_readl(g, top_slices_per_ltc_r());
|
||||
lts_per_ltc = top_slices_per_ltc_value_v(reg);
|
||||
return lts_per_ltc;
|
||||
}
|
||||
|
||||
u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
|
||||
{
|
||||
struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
|
||||
u32 i, tmp, max_fbps_count;
|
||||
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||
max_fbps_count = top_num_fbps_value_v(tmp);
|
||||
|
||||
/* mask of Rop_L2 for each FBP */
|
||||
for (i = 0; i < max_fbps_count; i++)
|
||||
gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i);
|
||||
|
||||
return gpu->rop_l2_en_mask;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
|
||||
{
|
||||
u32 tmp, max_fbps_count;
|
||||
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||
max_fbps_count = top_num_fbps_value_v(tmp);
|
||||
return max_fbps_count;
|
||||
}
|
||||
|
||||
void gm20b_init_gr(struct gpu_ops *gops)
|
||||
{
|
||||
gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
|
||||
@@ -1020,4 +1067,9 @@ void gm20b_init_gr(struct gpu_ops *gops)
|
||||
gr_gm20b_update_ctxsw_preemption_mode;
|
||||
gops->gr.dump_gr_regs = gr_gm20b_dump_gr_status_regs;
|
||||
gops->gr.update_pc_sampling = gr_gm20b_update_pc_sampling;
|
||||
gops->gr.get_fbp_en_mask = gr_gm20b_get_fbp_en_mask;
|
||||
gops->gr.get_max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp;
|
||||
gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc;
|
||||
gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask;
|
||||
gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -98,4 +98,24 @@ static inline u32 fuse_ctrl_opt_ram_svop_pdp_override_data_no_f(void)
|
||||
{
|
||||
return 0x0;
|
||||
}
|
||||
static inline u32 fuse_status_opt_fbio_r(void)
|
||||
{
|
||||
return 0x00021c14;
|
||||
}
|
||||
static inline u32 fuse_status_opt_fbio_data_f(u32 v)
|
||||
{
|
||||
return (v & 0xffff) << 0;
|
||||
}
|
||||
static inline u32 fuse_status_opt_fbio_data_m(void)
|
||||
{
|
||||
return 0xffff << 0;
|
||||
}
|
||||
static inline u32 fuse_status_opt_fbio_data_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0xffff;
|
||||
}
|
||||
static inline u32 fuse_status_opt_rop_l2_fbp_r(u32 i)
|
||||
{
|
||||
return 0x00021d70 + i*4;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -74,6 +74,22 @@ static inline u32 top_num_fbps_value_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0x1f;
|
||||
}
|
||||
static inline u32 top_ltc_per_fbp_r(void)
|
||||
{
|
||||
return 0x00022450;
|
||||
}
|
||||
static inline u32 top_ltc_per_fbp_value_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0x1f;
|
||||
}
|
||||
static inline u32 top_slices_per_ltc_r(void)
|
||||
{
|
||||
return 0x0002245c;
|
||||
}
|
||||
static inline u32 top_slices_per_ltc_value_v(u32 r)
|
||||
{
|
||||
return (r >> 0) & 0x1f;
|
||||
}
|
||||
static inline u32 top_num_ltcs_r(void)
|
||||
{
|
||||
return 0x00022454;
|
||||
|
||||
@@ -111,7 +111,6 @@ struct nvgpu_gpu_characteristics {
|
||||
__u32 arch;
|
||||
__u32 impl;
|
||||
__u32 rev;
|
||||
|
||||
__u32 num_gpc;
|
||||
|
||||
__u64 L2_cache_size; /* bytes */
|
||||
@@ -153,9 +152,22 @@ struct nvgpu_gpu_characteristics {
|
||||
__s16 as_ioctl_nr_last;
|
||||
|
||||
__u8 gpu_va_bit_count;
|
||||
|
||||
__u8 reserved;
|
||||
|
||||
__u32 max_fbps_count;
|
||||
__u32 fbp_en_mask;
|
||||
__u32 max_ltc_per_fbp;
|
||||
__u32 max_lts_per_ltc;
|
||||
__u32 max_tex_per_tpc;
|
||||
__u32 max_gpc_count;
|
||||
/* mask of Rop_L2 for each FBP */
|
||||
__u32 rop_l2_en_mask[2];
|
||||
|
||||
|
||||
__u8 chipname[8];
|
||||
|
||||
|
||||
|
||||
/* Notes:
|
||||
- This struct can be safely appended with new fields. However, always
|
||||
keep the structure size multiple of 8 and make sure that the binary
|
||||
@@ -282,6 +294,15 @@ struct nvgpu_gpu_tpc_exception_en_status_args {
|
||||
__u64 tpc_exception_en_sm_mask;
|
||||
};
|
||||
|
||||
struct nvgpu_gpu_num_vsms {
|
||||
__u32 num_vsms;
|
||||
__u32 reserved;
|
||||
};
|
||||
|
||||
struct nvgpu_gpu_vsms_mapping {
|
||||
__u64 vsms_map_buf_addr;
|
||||
};
|
||||
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||
@@ -316,9 +337,13 @@ struct nvgpu_gpu_tpc_exception_en_status_args {
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 16, struct nvgpu_gpu_wait_pause_args)
|
||||
#define NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 17, struct nvgpu_gpu_tpc_exception_en_status_args)
|
||||
#define NVGPU_GPU_IOCTL_NUM_VSMS \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 18, struct nvgpu_gpu_num_vsms)
|
||||
#define NVGPU_GPU_IOCTL_VSMS_MAPPING \
|
||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 19, struct nvgpu_gpu_vsms_mapping)
|
||||
|
||||
#define NVGPU_GPU_IOCTL_LAST \
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_EXCEPTION_EN_STATUS)
|
||||
_IOC_NR(NVGPU_GPU_IOCTL_VSMS_MAPPING)
|
||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||
sizeof(struct nvgpu_gpu_prepare_compressible_read_args)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user