gpu: nvgpu: GPU characteristics additions

Add the following info into GPU characteristics: available big page sizes, support indicators for sync fence fds and cycle stats, gpc mask, SM version, SM SPA version and warp count, and IOCTL interface levels. Also, add new IOCTL to fetch TPC masks. Bug 1551769 Bug 1558186 Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/562904 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2014-10-24 20:40:57 +03:00
parent 2c5fdd1c8a
commit d11fbfe7b1
9 changed files with 172 additions and 6 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
 	return err;
 }

+static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
+				    struct nvgpu_gpu_get_tpc_masks_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
+
+	if (args->mask_buf_size > 0) {
+		size_t write_size = gpc_tpc_mask_size;
+
+		if (write_size > args->mask_buf_size)
+			write_size = args->mask_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   args->mask_buf_addr,
+				   gr->gpc_tpc_mask, write_size);
+	}
+
+	if (err == 0)
+		args->mask_buf_size = gpc_tpc_mask_size;
+
+	return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		err = gk20a_ctrl_open_tsg(g,
 			(struct nvgpu_gpu_open_tsg_args *)buf);
 		break;
+	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
+		err = gk20a_ctrl_get_tpc_masks(g,
+			(struct nvgpu_gpu_get_tpc_masks_args *)buf);
+		break;
 	default:
 		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
 	gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;

+	gpu->available_big_page_sizes = gpu->big_page_size;
+	if (g->ops.mm.get_big_page_sizes)
+		gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
+
 	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
-		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
+		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
+		| NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;

 	if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
 	    gk20a_platform_has_syncpoints(g->dev))
 		gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;

+	if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
+		gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+
+	gpu->gpc_mask = 1;
+
+	g->ops.gr.detect_sm_arch(g);
+
+	gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
+	gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
+	gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
+	gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
+	gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
+
+	gpu->gpu_va_bit_count = 40;
+
 	gpu->reserved = 0;

 	return 0;
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
 				struct gr_zcull_info *zcull_params);
 		bool (*is_tpc_addr)(u32 addr);
 		u32 (*get_tpc_num)(u32 addr);
+		void (*detect_sm_arch)(struct gk20a *g);
 	} gr;
 	const char *name;
 	struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
 		void (*l2_flush)(struct gk20a *g, bool invalidate);
 		void (*tlb_invalidate)(struct vm_gk20a *vm);
 		void (*set_big_page_size)(struct gk20a *g,
-					 void *inst_ptr, int size);
+					  void *inst_ptr, int size);
+		u32 (*get_big_page_sizes)(void);
 	} mm;
 	struct {
 		int (*prepare_ucode)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
 	return 0;
 }

+static void gr_gk20a_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	u32 version = 0;
+
+	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
+		version = 0x320; /* SM 3.2 */
+	else
+		gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
+			  raw_version);
+
+	/* on Kepler, SM version == SPA version */
+	g->gpu_characteristics.sm_arch_spa_version = version;
+	g->gpu_characteristics.sm_arch_sm_version = version;
+
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 				  struct zbc_entry *color_val, u32 index)
 {
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
 }

--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
+{
+	return 0x0000000c;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -748,6 +748,18 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)

 #endif

+static void gr_gm20b_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	g->gpu_characteristics.sm_arch_spa_version =
+		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	g->gpu_characteristics.sm_arch_sm_version =
+		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 void gm20b_init_gr(struct gpu_ops *gops)
 {
 	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -781,4 +793,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gm20b_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch;
 }
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -1878,6 +1878,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xfff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r)
+{
+	return (r >> 20) & 0xfff;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -280,6 +280,11 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g,
 	gk20a_dbg_fn("done");
 }

+u32 gm20b_mm_get_big_page_sizes(void)
+{
+	return SZ_64K | SZ_128K;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.set_sparse = gm20b_vm_put_sparse;
@@ -295,4 +300,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.l2_flush = gk20a_mm_l2_flush;
 	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 	gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
+	gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
 }
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -100,6 +100,10 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS	(1 << 1)
 /* MAP_BUFFER_EX with sparse allocations */
 #define NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS		(1 << 2)
+/* sync fence FDs are available in, e.g., submit_gpfifo */
+#define NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS		(1 << 3)
+/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS is available */
+#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS		(1 << 4)

 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -111,14 +115,18 @@ struct nvgpu_gpu_characteristics {
 	__u64 L2_cache_size;               /* bytes */
 	__u64 on_board_video_memory_size;  /* bytes */

-	__u32 num_tpc_per_gpc;
+	__u32 num_tpc_per_gpc; /* the architectural maximum */
 	__u32 bus_type;

-	__u32 big_page_size;
+	__u32 big_page_size; /* the default big page size */
 	__u32 compression_page_size;

 	__u32 pde_coverage_bit_count;
-	__u32 reserved;
+
+	/* bit N set ==> big page size 2^N is available in
+	   NVGPU_GPU_IOCTL_ALLOC_AS. The default big page size is
+	   always available regardless of this field. */
+	__u32 available_big_page_sizes;

 	__u64 flags;

@@ -129,6 +137,23 @@ struct nvgpu_gpu_characteristics {
 	__u32 inline_to_memory_class;
 	__u32 dma_copy_class;

+	__u32 gpc_mask; /* enabled GPCs */
+
+	__u32 sm_arch_sm_version; /* sm version */
+	__u32 sm_arch_spa_version; /* sm instruction set */
+	__u32 sm_arch_warp_count;
+
+	/* IOCTL interface levels by service. -1 if not supported */
+	__s16 gpu_ioctl_nr_last;
+	__s16 tsg_ioctl_nr_last;
+	__s16 dbg_gpu_ioctl_nr_last;
+	__s16 ioctl_channel_nr_last;
+	__s16 as_ioctl_nr_last;
+
+	__u8 gpu_va_bit_count;
+
+	__u8 reserved;
+
 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always
 	     keep the structure size multiple of 8 and make sure that the binary
@@ -197,6 +222,22 @@ struct nvgpu_gpu_open_tsg_args {
 	__u32 reserved;			/* must be zero */
 };

+struct nvgpu_gpu_get_tpc_masks_args {
+	/* [in]  TPC mask buffer size reserved by userspace. Should be
+		 at least sizeof(__u32) * fls(gpc_mask) to receive TPC
+		 mask for each GPC.
+	   [out] full kernel buffer size
+	*/
+	__u32 mask_buf_size;
+	__u32 reserved;
+
+	/* [in]  pointer to TPC mask buffer. It will receive one
+		 32-bit TPC mask per GPC or 0 if GPC is not enabled or
+		 not present. This parameter is ignored if
+		 mask_buf_size is 0. */
+	__u64 mask_buf_addr;
+};
+
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
 	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -215,9 +256,11 @@ struct nvgpu_gpu_open_tsg_args {
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args)
 #define NVGPU_GPU_IOCTL_OPEN_TSG \
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 9, struct nvgpu_gpu_open_tsg_args)
+#define NVGPU_GPU_IOCTL_GET_TPC_MASKS \
+	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)

 #define NVGPU_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_GPU_IOCTL_OPEN_TSG)
+	_IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_MASKS)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_gpu_prepare_compressible_read_args)