diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index af834b02e..a1078b102 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -1130,6 +1130,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g) NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; g->gpu_characteristics.flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; + g->gpu_characteristics.max_css_buffer_size = 0xffffffff; #else (void)g; #endif diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c index b6f5a4cdc..1e81796e6 100644 --- a/drivers/gpu/nvgpu/gp106/hal_gp106.c +++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c @@ -289,7 +289,7 @@ static const struct gpu_ops gp106_ops = { .get_max_fbps_count = gr_gm20b_get_max_fbps_count, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, .wait_empty = gr_gp10b_wait_empty, - .init_cyclestats = gr_gp10b_init_cyclestats, + .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, .bpt_reg_info = gr_gm20b_bpt_reg_info, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 813b8891f..a01cfbfac 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1615,18 +1615,6 @@ int gr_gp10b_init_fs_state(struct gk20a *g) return gr_gm20b_init_fs_state(g); } -void gr_gp10b_init_cyclestats(struct gk20a *g) -{ -#if defined(CONFIG_GK20A_CYCLE_STATS) - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; - g->gpu_characteristics.flags |= - NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; -#else - (void)g; -#endif -} - void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) { nvgpu_tegra_fuse_write_bypass(g, 0x1); diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h index 9ddc03750..a537f1478 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h @@ -115,7 +115,6 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g, struct channel_ctx_gk20a *ch_ctx, u64 addr, u64 size, bool patch); int gr_gp10b_load_smid_config(struct gk20a *g); -void gr_gp10b_init_cyclestats(struct gk20a *g); void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); void gr_gp10b_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 98e143f0a..7b5cc2ac2 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -243,7 +243,7 @@ static const struct gpu_ops gp10b_ops = { .get_max_fbps_count = gr_gm20b_get_max_fbps_count, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, .wait_empty = gr_gp10b_wait_empty, - .init_cyclestats = gr_gp10b_init_cyclestats, + .init_cyclestats = gr_gm20b_init_cyclestats, .set_sm_debug_mode = gr_gk20a_set_sm_debug_mode, .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, .bpt_reg_info = gr_gm20b_bpt_reg_info, diff --git a/drivers/gpu/nvgpu/vgpu/css_vgpu.c b/drivers/gpu/nvgpu/vgpu/css_vgpu.c index bcb01fac2..266ce8714 100644 --- a/drivers/gpu/nvgpu/vgpu/css_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.c @@ -33,39 +33,71 @@ static struct tegra_hv_ivm_cookie *css_cookie; -static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) +static struct tegra_hv_ivm_cookie *vgpu_css_reserve_mempool(struct gk20a *g) { - struct gk20a *g = gr->g; struct device *dev = dev_from_gk20a(g); - struct gk20a_cs_snapshot *data = gr->cs_data; struct device_node *np = dev->of_node; struct of_phandle_args args; struct device_node *hv_np; - void *buf = NULL; + struct tegra_hv_ivm_cookie *cookie; u32 mempool; int err; + err = of_parse_phandle_with_fixed_args(np, + "mempool-css", 1, 0, &args); + if (err) { + nvgpu_err(g, "dt missing mempool-css"); + return ERR_PTR(err); + } + + hv_np = args.np; + mempool = args.args[0]; + cookie = tegra_hv_mempool_reserve(hv_np, mempool); + if (IS_ERR_OR_NULL(cookie)) { + nvgpu_err(g, "mempool %u reserve failed", mempool); + return ERR_PTR(-EINVAL); + } + return cookie; +} + +u32 vgpu_css_get_buffer_size(struct gk20a *g) +{ + struct tegra_hv_ivm_cookie *cookie; + u32 size; + + nvgpu_log_fn(g, " "); + + if (css_cookie) { + nvgpu_log_info(g, "buffer size = %llu", css_cookie->size); + return (u32)css_cookie->size; + } + + cookie = vgpu_css_reserve_mempool(g); + if (IS_ERR(css_cookie)) + return 0; + + size = cookie->size; + + tegra_hv_mempool_unreserve(cookie); + nvgpu_log_info(g, "buffer size = %u", size); + return size; +} + +static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) +{ + struct gk20a *g = gr->g; + struct gk20a_cs_snapshot *data = gr->cs_data; + void *buf = NULL; + int err; + gk20a_dbg_fn(""); if (data->hw_snapshot) return 0; - err = of_parse_phandle_with_fixed_args(np, - "mempool-css", 1, 0, &args); - if (err) { - nvgpu_info(g, "dt missing mempool-css"); - goto fail; - } - - hv_np = args.np; - mempool = args.args[0]; - css_cookie = tegra_hv_mempool_reserve(hv_np, mempool); - if (IS_ERR(css_cookie)) { - nvgpu_info(g, - "mempool %u reserve failed", mempool); - err = -EINVAL; - goto fail; - } + css_cookie = vgpu_css_reserve_mempool(g); + if (IS_ERR(css_cookie)) + return PTR_ERR(css_cookie); /* Make sure buffer size is large enough */ if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) { @@ -89,8 +121,8 @@ static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr) memset(data->hw_snapshot, 0xff, css_cookie->size); return 0; fail: - if (!IS_ERR_OR_NULL(css_cookie)) - tegra_hv_mempool_unreserve(css_cookie); + tegra_hv_mempool_unreserve(css_cookie); + css_cookie = NULL; return err; } @@ -105,6 +137,7 @@ void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr) data->hw_snapshot = NULL; tegra_hv_mempool_unreserve(css_cookie); + css_cookie = NULL; gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n"); } diff --git a/drivers/gpu/nvgpu/vgpu/css_vgpu.h b/drivers/gpu/nvgpu/vgpu/css_vgpu.h index b61c9862e..8c92d571e 100644 --- a/drivers/gpu/nvgpu/vgpu/css_vgpu.h +++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.h @@ -36,5 +36,5 @@ int vgpu_css_detach(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client); int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch, struct gk20a_cs_snapshot_client *cs_client); - +u32 vgpu_css_get_buffer_size(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c index 06962a86b..06cc2a4ac 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c @@ -21,8 +21,22 @@ */ #include "gk20a/gk20a.h" +#include "gk20a/css_gr_gk20a.h" +#include "vgpu/css_vgpu.h" #include "vgpu_gr_gm20b.h" +void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g) +{ +#if defined(CONFIG_GK20A_CYCLE_STATS) + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; + g->gpu_characteristics.flags |= + NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT; + g->gpu_characteristics.max_css_buffer_size = + vgpu_css_get_buffer_size(g); +#endif +} + int vgpu_gm20b_init_fs_state(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h index 993a8f3da..a0a895796 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h @@ -25,6 +25,7 @@ #include "gk20a/gk20a.h" +void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g); int vgpu_gm20b_init_fs_state(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c index 38d07ee20..81bcdc21c 100644 --- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c +++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c @@ -130,7 +130,7 @@ static const struct gpu_ops vgpu_gm20b_ops = { .get_max_fbps_count = vgpu_gr_get_max_fbps_count, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, .wait_empty = gr_gk20a_wait_idle, - .init_cyclestats = gr_gm20b_init_cyclestats, + .init_cyclestats = vgpu_gr_gm20b_init_cyclestats, .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode, .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, .bpt_reg_info = gr_gm20b_bpt_reg_info, diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c index 320aa4a5b..04a7349a3 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c @@ -143,7 +143,7 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_max_fbps_count = vgpu_gr_get_max_fbps_count, .init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info, .wait_empty = gr_gp10b_wait_empty, - .init_cyclestats = gr_gp10b_init_cyclestats, + .init_cyclestats = vgpu_gr_gm20b_init_cyclestats, .set_sm_debug_mode = vgpu_gr_set_sm_debug_mode, .enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs, .bpt_reg_info = gr_gm20b_bpt_reg_info, diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 9c883a93b..29541031f 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -245,7 +245,9 @@ struct nvgpu_gpu_characteristics { __u32 reserved1; __s16 event_ioctl_nr_last; - __u16 pad[3]; + __u16 pad; + + __u32 max_css_buffer_size; /* Notes: - This struct can be safely appended with new fields. However, always