mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: gk20a: Fix FBP/L2 masks, add GET_FBP_L2_MASKS
Fix FBP and ROP_L2 enable masks for Maxwell+. Deprecate rop_l2_en_mask in GPU characteristics by adding _DEPRECATED postfix. The array is too small to hold ROP_L2 enable masks for desktop GPUs. Add NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS to expose the ROP_L2 masks for userspace. Bug 200136909 Bug 200241845 Change-Id: I5ad5a5c09f3962ebb631b8d6e7a2f9df02f75ac7 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/1245294 (cherry picked from commit 0823b33e59defec341ea7919dae4e5f73a36d256) Reviewed-on: http://git-master/r/1249883 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
cc4208a278
commit
f329e674f4
@@ -246,6 +246,30 @@ static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int gk20a_ctrl_get_fbp_l2_masks(
|
||||||
|
struct gk20a *g, struct nvgpu_gpu_get_fbp_l2_masks_args *args)
|
||||||
|
{
|
||||||
|
struct gr_gk20a *gr = &g->gr;
|
||||||
|
int err = 0;
|
||||||
|
const u32 fbp_l2_mask_size = sizeof(u32) * gr->max_fbps_count;
|
||||||
|
|
||||||
|
if (args->mask_buf_size > 0) {
|
||||||
|
size_t write_size = fbp_l2_mask_size;
|
||||||
|
|
||||||
|
if (write_size > args->mask_buf_size)
|
||||||
|
write_size = args->mask_buf_size;
|
||||||
|
|
||||||
|
err = copy_to_user((void __user *)(uintptr_t)
|
||||||
|
args->mask_buf_addr,
|
||||||
|
gr->fbp_rop_l2_en_mask, write_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err == 0)
|
||||||
|
args->mask_buf_size = fbp_l2_mask_size;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
|
static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
|
||||||
struct nvgpu_gpu_l2_fb_args *args)
|
struct nvgpu_gpu_l2_fb_args *args)
|
||||||
{
|
{
|
||||||
@@ -936,6 +960,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
|
|||||||
err = gk20a_ctrl_get_tpc_masks(g,
|
err = gk20a_ctrl_get_tpc_masks(g,
|
||||||
(struct nvgpu_gpu_get_tpc_masks_args *)buf);
|
(struct nvgpu_gpu_get_tpc_masks_args *)buf);
|
||||||
break;
|
break;
|
||||||
|
case NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS:
|
||||||
|
err = gk20a_ctrl_get_fbp_l2_masks(g,
|
||||||
|
(struct nvgpu_gpu_get_fbp_l2_masks_args *)buf);
|
||||||
|
break;
|
||||||
case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
|
case NVGPU_GPU_IOCTL_OPEN_CHANNEL:
|
||||||
/* this arg type here, but ..gpu_open_channel_args in nvgpu.h
|
/* this arg type here, but ..gpu_open_channel_args in nvgpu.h
|
||||||
* for consistency - they are the same */
|
* for consistency - they are the same */
|
||||||
|
|||||||
@@ -3257,6 +3257,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
kfree(gr->sm_to_cluster);
|
kfree(gr->sm_to_cluster);
|
||||||
kfree(gr->gpc_skip_mask);
|
kfree(gr->gpc_skip_mask);
|
||||||
kfree(gr->map_tiles);
|
kfree(gr->map_tiles);
|
||||||
|
kfree(gr->fbp_rop_l2_en_mask);
|
||||||
gr->gpc_tpc_count = NULL;
|
gr->gpc_tpc_count = NULL;
|
||||||
gr->gpc_zcb_count = NULL;
|
gr->gpc_zcb_count = NULL;
|
||||||
gr->gpc_ppc_count = NULL;
|
gr->gpc_ppc_count = NULL;
|
||||||
@@ -3266,6 +3267,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr)
|
|||||||
gr->pes_tpc_mask[1] = NULL;
|
gr->pes_tpc_mask[1] = NULL;
|
||||||
gr->gpc_skip_mask = NULL;
|
gr->gpc_skip_mask = NULL;
|
||||||
gr->map_tiles = NULL;
|
gr->map_tiles = NULL;
|
||||||
|
gr->fbp_rop_l2_en_mask = NULL;
|
||||||
|
|
||||||
gr->ctx_vars.valid = false;
|
gr->ctx_vars.valid = false;
|
||||||
kfree(gr->ctx_vars.ucode.fecs.inst.l);
|
kfree(gr->ctx_vars.ucode.fecs.inst.l);
|
||||||
@@ -3336,6 +3338,11 @@ static int gr_gk20a_init_gr_config(struct gk20a *g, struct gr_gk20a *gr)
|
|||||||
|
|
||||||
gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
|
gr->fbp_en_mask = g->ops.gr.get_fbp_en_mask(g);
|
||||||
|
|
||||||
|
gr->fbp_rop_l2_en_mask =
|
||||||
|
kzalloc(gr->max_fbps_count * sizeof(u32), GFP_KERNEL);
|
||||||
|
if (!gr->fbp_rop_l2_en_mask)
|
||||||
|
goto clean_up;
|
||||||
|
|
||||||
tmp = gk20a_readl(g, top_tpc_per_gpc_r());
|
tmp = gk20a_readl(g, top_tpc_per_gpc_r());
|
||||||
gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
|
gr->max_tpc_per_gpc_count = top_tpc_per_gpc_value_v(tmp);
|
||||||
|
|
||||||
|
|||||||
@@ -351,6 +351,7 @@ struct gr_gk20a {
|
|||||||
struct gr_t18x t18x;
|
struct gr_t18x t18x;
|
||||||
#endif
|
#endif
|
||||||
u32 fbp_en_mask;
|
u32 fbp_en_mask;
|
||||||
|
u32 *fbp_rop_l2_en_mask;
|
||||||
u32 no_of_sm;
|
u32 no_of_sm;
|
||||||
struct sm_info *sm_to_cluster;
|
struct sm_info *sm_to_cluster;
|
||||||
struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
|
struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_states;
|
||||||
|
|||||||
@@ -1028,8 +1028,15 @@ static int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
|
|||||||
static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
|
static u32 gr_gm20b_get_fbp_en_mask(struct gk20a *g)
|
||||||
{
|
{
|
||||||
u32 fbp_en_mask, opt_fbio;
|
u32 fbp_en_mask, opt_fbio;
|
||||||
|
u32 tmp, max_fbps_count;
|
||||||
|
|
||||||
|
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||||
|
max_fbps_count = top_num_fbps_value_v(tmp);
|
||||||
|
|
||||||
opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
|
opt_fbio = gk20a_readl(g, fuse_status_opt_fbio_r());
|
||||||
fbp_en_mask = fuse_status_opt_fbio_data_v(opt_fbio);
|
fbp_en_mask =
|
||||||
|
((1 << max_fbps_count) - 1) ^
|
||||||
|
fuse_status_opt_fbio_data_v(opt_fbio);
|
||||||
return fbp_en_mask;
|
return fbp_en_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1051,16 +1058,22 @@ static u32 gr_gm20b_get_max_lts_per_ltc(struct gk20a *g)
|
|||||||
|
|
||||||
static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
|
static u32 *gr_gm20b_rop_l2_en_mask(struct gk20a *g)
|
||||||
{
|
{
|
||||||
struct nvgpu_gpu_characteristics *gpu = &g->gpu_characteristics;
|
struct gr_gk20a *gr = &g->gr;
|
||||||
u32 i, tmp, max_fbps_count;
|
u32 i, tmp, max_fbps_count, max_ltc_per_fbp;
|
||||||
|
u32 rop_l2_all_en;
|
||||||
|
|
||||||
tmp = gk20a_readl(g, top_num_fbps_r());
|
tmp = gk20a_readl(g, top_num_fbps_r());
|
||||||
max_fbps_count = top_num_fbps_value_v(tmp);
|
max_fbps_count = top_num_fbps_value_v(tmp);
|
||||||
|
max_ltc_per_fbp = gr_gm20b_get_max_ltc_per_fbp(g);
|
||||||
|
rop_l2_all_en = (1 << max_ltc_per_fbp) - 1;
|
||||||
|
|
||||||
/* mask of Rop_L2 for each FBP */
|
/* mask of Rop_L2 for each FBP */
|
||||||
for (i = 0; i < max_fbps_count; i++)
|
for (i = 0; i < max_fbps_count; i++) {
|
||||||
gpu->rop_l2_en_mask[i] = fuse_status_opt_rop_l2_fbp_r(i);
|
tmp = gk20a_readl(g, fuse_status_opt_rop_l2_fbp_r(i));
|
||||||
|
gr->fbp_rop_l2_en_mask[i] = rop_l2_all_en ^ tmp;
|
||||||
|
}
|
||||||
|
|
||||||
return gpu->rop_l2_en_mask;
|
return gr->fbp_rop_l2_en_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
|
static u32 gr_gm20b_get_max_fbps_count(struct gk20a *g)
|
||||||
|
|||||||
@@ -171,7 +171,7 @@ struct nvgpu_gpu_characteristics {
|
|||||||
__u32 max_tex_per_tpc;
|
__u32 max_tex_per_tpc;
|
||||||
__u32 max_gpc_count;
|
__u32 max_gpc_count;
|
||||||
/* mask of Rop_L2 for each FBP */
|
/* mask of Rop_L2 for each FBP */
|
||||||
__u32 rop_l2_en_mask[2];
|
__u32 rop_l2_en_mask_DEPRECATED[2];
|
||||||
|
|
||||||
|
|
||||||
__u8 chipname[8];
|
__u8 chipname[8];
|
||||||
@@ -522,6 +522,22 @@ struct nvgpu_gpu_get_memory_state_args {
|
|||||||
__u64 reserved[4];
|
__u64 reserved[4];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nvgpu_gpu_get_fbp_l2_masks_args {
|
||||||
|
/* [in] L2 mask buffer size reserved by userspace. Should be
|
||||||
|
at least sizeof(__u32) * fls(fbp_en_mask) to receive LTC
|
||||||
|
mask for each FBP.
|
||||||
|
[out] full kernel buffer size
|
||||||
|
*/
|
||||||
|
__u32 mask_buf_size;
|
||||||
|
__u32 reserved;
|
||||||
|
|
||||||
|
/* [in] pointer to L2 mask buffer. It will receive one
|
||||||
|
32-bit L2 mask per FBP or 0 if FBP is not enabled or
|
||||||
|
not present. This parameter is ignored if
|
||||||
|
mask_buf_size is 0. */
|
||||||
|
__u64 mask_buf_addr;
|
||||||
|
};
|
||||||
|
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
|
||||||
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
|
||||||
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
#define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
|
||||||
@@ -583,8 +599,10 @@ struct nvgpu_gpu_get_memory_state_args {
|
|||||||
#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
|
#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
|
||||||
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
|
||||||
struct nvgpu_gpu_get_memory_state_args)
|
struct nvgpu_gpu_get_memory_state_args)
|
||||||
|
#define NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS \
|
||||||
|
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 38, struct nvgpu_gpu_get_fbp_l2_masks_args)
|
||||||
#define NVGPU_GPU_IOCTL_LAST \
|
#define NVGPU_GPU_IOCTL_LAST \
|
||||||
_IOC_NR(NVGPU_GPU_IOCTL_GET_MEMORY_STATE)
|
_IOC_NR(NVGPU_GPU_IOCTL_GET_FBP_L2_MASKS)
|
||||||
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
#define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \
|
||||||
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
sizeof(struct nvgpu_gpu_get_cpu_time_correlation_info_args)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user