diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index 938318443..3bcbdfd94 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -221,6 +221,30 @@ clean_up: return err; } +static int gk20a_ctrl_get_tpc_masks(struct gk20a *g, + struct nvgpu_gpu_get_tpc_masks_args *args) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count; + + if (args->mask_buf_size > 0) { + size_t write_size = gpc_tpc_mask_size; + + if (write_size > args->mask_buf_size) + write_size = args->mask_buf_size; + + err = copy_to_user((void __user *)(uintptr_t) + args->mask_buf_addr, + gr->gpc_tpc_mask, write_size); + } + + if (err == 0) + args->mask_buf_size = gpc_tpc_mask_size; + + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = gk20a_ctrl_open_tsg(g, (struct nvgpu_gpu_open_tsg_args *)buf); break; + case NVGPU_GPU_IOCTL_GET_TPC_MASKS: + err = gk20a_ctrl_get_tpc_masks(g, + (struct nvgpu_gpu_get_tpc_masks_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index ef0f6a8c5..1bd1c8986 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; + gpu->available_big_page_sizes = gpu->big_page_size; + if (g->ops.mm.get_big_page_sizes) + gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes(); + gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS - | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS; + | NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS + | NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS; if (IS_ENABLED(CONFIG_TEGRA_GK20A) && gk20a_platform_has_syncpoints(g->dev)) gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS; + if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS)) + gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS; + + gpu->gpc_mask = 1; + + g->ops.gr.detect_sm_arch(g); + + gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST; + gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST; + gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST; + gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST; + gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST; + + gpu->gpu_va_bit_count = 40; + gpu->reserved = 0; return 0; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a56614ab0..3f070a587 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -137,6 +137,7 @@ struct gpu_ops { struct gr_zcull_info *zcull_params); bool (*is_tpc_addr)(u32 addr); u32 (*get_tpc_num)(u32 addr); + void (*detect_sm_arch)(struct gk20a *g); } gr; const char *name; struct { @@ -304,7 +305,8 @@ struct gpu_ops { void (*l2_flush)(struct gk20a *g, bool invalidate); void (*tlb_invalidate)(struct vm_gk20a *vm); void (*set_big_page_size)(struct gk20a *g, - void *inst_ptr, int size); + void *inst_ptr, int size); + u32 (*get_big_page_sizes)(void); } mm; struct { int (*prepare_ucode)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 2c62c790f..da257cd4e 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr, return 0; } +static void gr_gk20a_detect_sm_arch(struct gk20a *g) +{ + u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); + + u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v); + u32 version = 0; + + if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v()) + version = 0x320; /* SM 3.2 */ + else + gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n", + raw_version); + + /* on Kepler, SM version == SPA version */ + g->gpu_characteristics.sm_arch_spa_version = version; + g->gpu_characteristics.sm_arch_sm_version = version; + + g->gpu_characteristics.sm_arch_warp_count = + gr_gpc0_tpc0_sm_arch_warp_count_v(v); +} + static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr, struct zbc_entry *color_val, u32 index) { @@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr; gops->gr.get_tpc_num = gr_gk20a_get_tpc_num; + gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch; } diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h index 3b16df583..f89bb2a40 100644 --- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h @@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) { return (v & 0xffff) << 0; } +static inline u32 gr_gpc0_tpc0_sm_arch_r(void) +{ + return 0x0050469c; +} +static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r) +{ + return (r >> 8) & 0xf; +} +static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void) +{ + return 0x0000000c; +} static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) { return 0x00503018; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index d40e9d524..8f0561814 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -748,6 +748,18 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) #endif +static void gr_gm20b_detect_sm_arch(struct gk20a *g) +{ + u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); + + g->gpu_characteristics.sm_arch_spa_version = + gr_gpc0_tpc0_sm_arch_spa_version_v(v); + g->gpu_characteristics.sm_arch_sm_version = + gr_gpc0_tpc0_sm_arch_sm_version_v(v); + g->gpu_characteristics.sm_arch_warp_count = + gr_gpc0_tpc0_sm_arch_warp_count_v(v); +} + void gm20b_init_gr(struct gpu_ops *gops) { gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu; @@ -781,4 +793,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.get_zcull_info = gr_gk20a_get_zcull_info; gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr; gops->gr.get_tpc_num = gr_gm20b_get_tpc_num; + gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch; } diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h index 0dae58966..8e4308a31 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h @@ -1878,6 +1878,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v) { return (v & 0xffff) << 0; } +static inline u32 gr_gpc0_tpc0_sm_arch_r(void) +{ + return 0x0050469c; +} +static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r) +{ + return (r >> 0) & 0xff; +} +static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r) +{ + return (r >> 8) & 0xfff; +} +static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r) +{ + return (r >> 20) & 0xfff; +} static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void) { return 0x00503018; diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 030701b97..678ef4fda 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -280,6 +280,11 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g, gk20a_dbg_fn("done"); } +u32 gm20b_mm_get_big_page_sizes(void) +{ + return SZ_64K | SZ_128K; +} + void gm20b_init_mm(struct gpu_ops *gops) { gops->mm.set_sparse = gm20b_vm_put_sparse; @@ -295,4 +300,5 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; + gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes; } diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 6bde9d415..42673820c 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -100,6 +100,10 @@ struct nvgpu_gpu_zbc_query_table_args { #define NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS (1 << 1) /* MAP_BUFFER_EX with sparse allocations */ #define NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS (1 << 2) +/* sync fence FDs are available in, e.g., submit_gpfifo */ +#define NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS (1 << 3) +/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS is available */ +#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS (1 << 4) struct nvgpu_gpu_characteristics { __u32 arch; @@ -111,14 +115,18 @@ struct nvgpu_gpu_characteristics { __u64 L2_cache_size; /* bytes */ __u64 on_board_video_memory_size; /* bytes */ - __u32 num_tpc_per_gpc; + __u32 num_tpc_per_gpc; /* the architectural maximum */ __u32 bus_type; - __u32 big_page_size; + __u32 big_page_size; /* the default big page size */ __u32 compression_page_size; __u32 pde_coverage_bit_count; - __u32 reserved; + + /* bit N set ==> big page size 2^N is available in + NVGPU_GPU_IOCTL_ALLOC_AS. The default big page size is + always available regardless of this field. */ + __u32 available_big_page_sizes; __u64 flags; @@ -129,6 +137,23 @@ struct nvgpu_gpu_characteristics { __u32 inline_to_memory_class; __u32 dma_copy_class; + __u32 gpc_mask; /* enabled GPCs */ + + __u32 sm_arch_sm_version; /* sm version */ + __u32 sm_arch_spa_version; /* sm instruction set */ + __u32 sm_arch_warp_count; + + /* IOCTL interface levels by service. -1 if not supported */ + __s16 gpu_ioctl_nr_last; + __s16 tsg_ioctl_nr_last; + __s16 dbg_gpu_ioctl_nr_last; + __s16 ioctl_channel_nr_last; + __s16 as_ioctl_nr_last; + + __u8 gpu_va_bit_count; + + __u8 reserved; + /* Notes: - This struct can be safely appended with new fields. However, always keep the structure size multiple of 8 and make sure that the binary @@ -197,6 +222,22 @@ struct nvgpu_gpu_open_tsg_args { __u32 reserved; /* must be zero */ }; +struct nvgpu_gpu_get_tpc_masks_args { + /* [in] TPC mask buffer size reserved by userspace. Should be + at least sizeof(__u32) * fls(gpc_mask) to receive TPC + mask for each GPC. + [out] full kernel buffer size + */ + __u32 mask_buf_size; + __u32 reserved; + + /* [in] pointer to TPC mask buffer. It will receive one + 32-bit TPC mask per GPC or 0 if GPC is not enabled or + not present. This parameter is ignored if + mask_buf_size is 0. */ + __u64 mask_buf_addr; +}; + #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ @@ -215,9 +256,11 @@ struct nvgpu_gpu_open_tsg_args { _IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args) #define NVGPU_GPU_IOCTL_OPEN_TSG \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 9, struct nvgpu_gpu_open_tsg_args) +#define NVGPU_GPU_IOCTL_GET_TPC_MASKS \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args) #define NVGPU_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_GPU_IOCTL_OPEN_TSG) + _IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_MASKS) #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_prepare_compressible_read_args)