gpu: nvgpu: add max_css_buffer_size characteristic

Add max_css_buffer_size to gpu characteristics. In the virtual case, the size of the cycle stats snapshot buffer is constrained by the size of the mempool shared between the guest OS and the RM server, so tools need to find out what is the maximum size allowed. In the native case, we return 0xffffffff to indicate that the buffer size is unbounded (subject to memory availability), in the virtual case we return the size of the mempool. Also collapse native init_cyclestats functions to a single version, as each chip had identical versions of the code. JIRA ESRM-54 Bug 200296210 Change-Id: I71764d32c6e71a0d101bd40f274eaa4bea3e5b11 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1578930 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-10-13 17:06:30 -07:00
parent 0dcf0ede81
commit 6bf40e5237
12 changed files with 79 additions and 41 deletions
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1130,6 +1130,7 @@ void gr_gm20b_init_cyclestats(struct gk20a *g)
 		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
 	g->gpu_characteristics.flags |=
 		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
+	g->gpu_characteristics.max_css_buffer_size = 0xffffffff;
 #else
 	(void)g;
 #endif
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -289,7 +289,7 @@ static const struct gpu_ops gp106_ops = {
 		.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
 		.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
 		.wait_empty = gr_gp10b_wait_empty,
-		.init_cyclestats = gr_gp10b_init_cyclestats,
+		.init_cyclestats = gr_gm20b_init_cyclestats,
 		.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
 		.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
 		.bpt_reg_info = gr_gm20b_bpt_reg_info,
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1615,18 +1615,6 @@ int gr_gp10b_init_fs_state(struct gk20a *g)
 	return gr_gm20b_init_fs_state(g);
 }

-void gr_gp10b_init_cyclestats(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-	g->gpu_characteristics.flags |=
-		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
-	g->gpu_characteristics.flags |=
-		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
-#else
-	(void)g;
-#endif
-}
-
 void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index)
 {
 	nvgpu_tegra_fuse_write_bypass(g, 0x1);
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -115,7 +115,6 @@ void gr_gp10b_commit_global_bundle_cb(struct gk20a *g,
 					    struct channel_ctx_gk20a *ch_ctx,
 					    u64 addr, u64 size, bool patch);
 int gr_gp10b_load_smid_config(struct gk20a *g);
-void gr_gp10b_init_cyclestats(struct gk20a *g);
 void gr_gp10b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index);
 void gr_gp10b_get_access_map(struct gk20a *g,
 				   u32 **whitelist, int *num_entries);
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -243,7 +243,7 @@ static const struct gpu_ops gp10b_ops = {
 		.get_max_fbps_count = gr_gm20b_get_max_fbps_count,
 		.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
 		.wait_empty = gr_gp10b_wait_empty,
-		.init_cyclestats = gr_gp10b_init_cyclestats,
+		.init_cyclestats = gr_gm20b_init_cyclestats,
 		.set_sm_debug_mode = gr_gk20a_set_sm_debug_mode,
 		.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
 		.bpt_reg_info = gr_gm20b_bpt_reg_info,
--- a/drivers/gpu/nvgpu/vgpu/css_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.c
@@ -33,39 +33,71 @@

 static struct tegra_hv_ivm_cookie *css_cookie;

-static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
+static struct tegra_hv_ivm_cookie *vgpu_css_reserve_mempool(struct gk20a *g)
 {
-	struct gk20a *g = gr->g;
 	struct device *dev = dev_from_gk20a(g);
-	struct gk20a_cs_snapshot *data = gr->cs_data;
 	struct device_node *np = dev->of_node;
 	struct of_phandle_args args;
 	struct device_node *hv_np;
-	void *buf = NULL;
+	struct tegra_hv_ivm_cookie *cookie;
 	u32 mempool;
 	int err;

+	err = of_parse_phandle_with_fixed_args(np,
+			"mempool-css", 1, 0, &args);
+	if (err) {
+		nvgpu_err(g, "dt missing mempool-css");
+		return ERR_PTR(err);
+	}
+
+	hv_np = args.np;
+	mempool = args.args[0];
+	cookie = tegra_hv_mempool_reserve(hv_np, mempool);
+	if (IS_ERR_OR_NULL(cookie)) {
+		nvgpu_err(g, "mempool  %u reserve failed", mempool);
+		return ERR_PTR(-EINVAL);
+	}
+	return cookie;
+}
+
+u32 vgpu_css_get_buffer_size(struct gk20a *g)
+{
+	struct tegra_hv_ivm_cookie *cookie;
+	u32 size;
+
+	nvgpu_log_fn(g, " ");
+
+	if (css_cookie) {
+		nvgpu_log_info(g, "buffer size = %llu", css_cookie->size);
+		return (u32)css_cookie->size;
+	}
+
+	cookie = vgpu_css_reserve_mempool(g);
+	if (IS_ERR(css_cookie))
+		return 0;
+
+	size = cookie->size;
+
+	tegra_hv_mempool_unreserve(cookie);
+	nvgpu_log_info(g, "buffer size = %u", size);
+	return size;
+}
+
+static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
+{
+	struct gk20a *g = gr->g;
+	struct gk20a_cs_snapshot *data = gr->cs_data;
+	void *buf = NULL;
+	int err;
+
 	gk20a_dbg_fn("");

 	if (data->hw_snapshot)
 		return 0;

-	err = of_parse_phandle_with_fixed_args(np,
-			"mempool-css", 1, 0, &args);
-	if (err) {
-		nvgpu_info(g, "dt missing mempool-css");
-		goto fail;
-	}
-
-	hv_np = args.np;
-	mempool = args.args[0];
-	css_cookie = tegra_hv_mempool_reserve(hv_np, mempool);
-	if (IS_ERR(css_cookie)) {
-		nvgpu_info(g,
-			"mempool  %u reserve failed", mempool);
-		err = -EINVAL;
-		goto fail;
-	}
+	css_cookie = vgpu_css_reserve_mempool(g);
+	if (IS_ERR(css_cookie))
+		return PTR_ERR(css_cookie);

 	/* Make sure buffer size is large enough */
 	if (css_cookie->size < CSS_MIN_HW_SNAPSHOT_SIZE) {
@@ -89,8 +121,8 @@ static int vgpu_css_init_snapshot_buffer(struct gr_gk20a *gr)
 	memset(data->hw_snapshot, 0xff, css_cookie->size);
 	return 0;
 fail:
-	if (!IS_ERR_OR_NULL(css_cookie))
 	tegra_hv_mempool_unreserve(css_cookie);
+	css_cookie = NULL;
 	return err;
 }

@@ -105,6 +137,7 @@ void vgpu_css_release_snapshot_buffer(struct gr_gk20a *gr)
 	data->hw_snapshot = NULL;

 	tegra_hv_mempool_unreserve(css_cookie);
+	css_cookie = NULL;

 	gk20a_dbg_info("cyclestats(vgpu): buffer for snapshots released\n");
 }
--- a/drivers/gpu/nvgpu/vgpu/css_vgpu.h
+++ b/drivers/gpu/nvgpu/vgpu/css_vgpu.h
@@ -36,5 +36,5 @@ int vgpu_css_detach(struct channel_gk20a *ch,
 		struct gk20a_cs_snapshot_client *cs_client);
 int vgpu_css_enable_snapshot_buffer(struct channel_gk20a *ch,
 				struct gk20a_cs_snapshot_client *cs_client);
-
+u32 vgpu_css_get_buffer_size(struct gk20a *g);
 #endif
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.c
@@ -21,8 +21,22 @@
 */

 #include "gk20a/gk20a.h"
+#include "gk20a/css_gr_gk20a.h"
+#include "vgpu/css_vgpu.h"
 #include "vgpu_gr_gm20b.h"

+void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	g->gpu_characteristics.flags |=
+		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+	g->gpu_characteristics.flags |=
+		NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT;
+	g->gpu_characteristics.max_css_buffer_size =
+						vgpu_css_get_buffer_size(g);
+#endif
+}
+
 int vgpu_gm20b_init_fs_state(struct gk20a *g)
 {
 	struct gr_gk20a *gr = &g->gr;
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_gr_gm20b.h
@@ -25,6 +25,7 @@

 #include "gk20a/gk20a.h"

+void vgpu_gr_gm20b_init_cyclestats(struct gk20a *g);
 int vgpu_gm20b_init_fs_state(struct gk20a *g);

 #endif
--- a/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c
+++ b/drivers/gpu/nvgpu/vgpu/gm20b/vgpu_hal_gm20b.c
@@ -130,7 +130,7 @@ static const struct gpu_ops vgpu_gm20b_ops = {
 		.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
 		.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
 		.wait_empty = gr_gk20a_wait_idle,
-		.init_cyclestats = gr_gm20b_init_cyclestats,
+		.init_cyclestats = vgpu_gr_gm20b_init_cyclestats,
 		.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
 		.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
 		.bpt_reg_info = gr_gm20b_bpt_reg_info,
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -143,7 +143,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.get_max_fbps_count = vgpu_gr_get_max_fbps_count,
 		.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info,
 		.wait_empty = gr_gp10b_wait_empty,
-		.init_cyclestats = gr_gp10b_init_cyclestats,
+		.init_cyclestats = vgpu_gr_gm20b_init_cyclestats,
 		.set_sm_debug_mode = vgpu_gr_set_sm_debug_mode,
 		.enable_cde_in_fecs = gr_gm20b_enable_cde_in_fecs,
 		.bpt_reg_info = gr_gm20b_bpt_reg_info,
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -245,7 +245,9 @@ struct nvgpu_gpu_characteristics {
 	__u32 reserved1;

 	__s16 event_ioctl_nr_last;
-	__u16 pad[3];
+	__u16 pad;
+
+	__u32 max_css_buffer_size;

 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always