diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 938318443..3bcbdfd94 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -221,6 +221,30 @@ clean_up:
 	return err;
 }
 
+static int gk20a_ctrl_get_tpc_masks(struct gk20a *g,
+				    struct nvgpu_gpu_get_tpc_masks_args *args)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int err = 0;
+	const u32 gpc_tpc_mask_size = sizeof(u32) * gr->gpc_count;
+
+	if (args->mask_buf_size > 0) {
+		size_t write_size = gpc_tpc_mask_size;
+
+		if (write_size > args->mask_buf_size)
+			write_size = args->mask_buf_size;
+
+		err = copy_to_user((void __user *)(uintptr_t)
+				   args->mask_buf_addr,
+				   gr->gpc_tpc_mask, write_size);
+	}
+
+	if (err == 0)
+		args->mask_buf_size = gpc_tpc_mask_size;
+
+	return err;
+}
+
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct platform_device *dev = filp->private_data;
@@ -390,6 +414,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
 		err = gk20a_ctrl_open_tsg(g,
 			(struct nvgpu_gpu_open_tsg_args *)buf);
 		break;
+	case NVGPU_GPU_IOCTL_GET_TPC_MASKS:
+		err = gk20a_ctrl_get_tpc_masks(g,
+			(struct nvgpu_gpu_get_tpc_masks_args *)buf);
+		break;
 	default:
 		dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
 		err = -ENOTTY;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index ef0f6a8c5..1bd1c8986 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1774,13 +1774,33 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	gpu->compression_page_size = g->mm.pmu.vm.compression_page_size;
 	gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift;
 
+	gpu->available_big_page_sizes = gpu->big_page_size;
+	if (g->ops.mm.get_big_page_sizes)
+		gpu->available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
+
 	gpu->flags = NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS
-		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS;
+		| NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS
+		| NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS;
 
 	if (IS_ENABLED(CONFIG_TEGRA_GK20A) &&
 	    gk20a_platform_has_syncpoints(g->dev))
 		gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;
 
+	if (IS_ENABLED(CONFIG_GK20A_CYCLE_STATS))
+		gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS;
+
+	gpu->gpc_mask = 1;
+
+	g->ops.gr.detect_sm_arch(g);
+
+	gpu->gpu_ioctl_nr_last = NVGPU_GPU_IOCTL_LAST;
+	gpu->tsg_ioctl_nr_last = NVGPU_TSG_IOCTL_LAST;
+	gpu->dbg_gpu_ioctl_nr_last = NVGPU_DBG_GPU_IOCTL_LAST;
+	gpu->ioctl_channel_nr_last = NVGPU_IOCTL_CHANNEL_LAST;
+	gpu->as_ioctl_nr_last = NVGPU_AS_IOCTL_LAST;
+
+	gpu->gpu_va_bit_count = 40;
+
 	gpu->reserved = 0;
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index a56614ab0..3f070a587 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -137,6 +137,7 @@ struct gpu_ops {
 				struct gr_zcull_info *zcull_params);
 		bool (*is_tpc_addr)(u32 addr);
 		u32 (*get_tpc_num)(u32 addr);
+		void (*detect_sm_arch)(struct gk20a *g);
 	} gr;
 	const char *name;
 	struct {
@@ -304,7 +305,8 @@ struct gpu_ops {
 		void (*l2_flush)(struct gk20a *g, bool invalidate);
 		void (*tlb_invalidate)(struct vm_gk20a *vm);
 		void (*set_big_page_size)(struct gk20a *g,
-					 void *inst_ptr, int size);
+					  void *inst_ptr, int size);
+		u32 (*get_big_page_sizes)(void);
 	} mm;
 	struct {
 		int (*prepare_ucode)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 2c62c790f..da257cd4e 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -3490,6 +3490,27 @@ int gr_gk20a_get_zcull_info(struct gk20a *g, struct gr_gk20a *gr,
 	return 0;
 }
 
+static void gr_gk20a_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	u32 raw_version = gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	u32 version = 0;
+
+	if (raw_version == gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v())
+		version = 0x320; /* SM 3.2 */
+	else
+		gk20a_err(dev_from_gk20a(g), "Unknown SM version 0x%x\n",
+			  raw_version);
+
+	/* on Kepler, SM version == SPA version */
+	g->gpu_characteristics.sm_arch_spa_version = version;
+	g->gpu_characteristics.sm_arch_sm_version = version;
+
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 static int gr_gk20a_add_zbc_color(struct gk20a *g, struct gr_gk20a *gr,
 				  struct zbc_entry *color_val, u32 index)
 {
@@ -7328,5 +7349,6 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gk20a_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gk20a_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gk20a_detect_sm_arch;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
index 3b16df583..f89bb2a40 100644
--- a/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/hw_gr_gk20a.h
@@ -1886,6 +1886,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_smkepler_lp_v(void)
+{
+	return 0x0000000c;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
index d40e9d524..8f0561814 100644
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -748,6 +748,18 @@ static int gr_gm20b_load_ctxsw_ucode(struct gk20a *g)
 
 #endif
 
+static void gr_gm20b_detect_sm_arch(struct gk20a *g)
+{
+	u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r());
+
+	g->gpu_characteristics.sm_arch_spa_version =
+		gr_gpc0_tpc0_sm_arch_spa_version_v(v);
+	g->gpu_characteristics.sm_arch_sm_version =
+		gr_gpc0_tpc0_sm_arch_sm_version_v(v);
+	g->gpu_characteristics.sm_arch_warp_count =
+		gr_gpc0_tpc0_sm_arch_warp_count_v(v);
+}
+
 void gm20b_init_gr(struct gpu_ops *gops)
 {
 	gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -781,4 +793,5 @@ void gm20b_init_gr(struct gpu_ops *gops)
 	gops->gr.get_zcull_info = gr_gk20a_get_zcull_info;
 	gops->gr.is_tpc_addr = gr_gm20b_is_tpc_addr;
 	gops->gr.get_tpc_num = gr_gm20b_get_tpc_num;
+	gops->gr.detect_sm_arch = gr_gm20b_detect_sm_arch;
 }
diff --git a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
index 0dae58966..8e4308a31 100644
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -1878,6 +1878,22 @@ static inline u32 gr_gpc0_tpc0_sm_cfg_sm_id_f(u32 v)
 {
 	return (v & 0xffff) << 0;
 }
+static inline u32 gr_gpc0_tpc0_sm_arch_r(void)
+{
+	return 0x0050469c;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_warp_count_v(u32 r)
+{
+	return (r >> 0) & 0xff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_spa_version_v(u32 r)
+{
+	return (r >> 8) & 0xfff;
+}
+static inline u32 gr_gpc0_tpc0_sm_arch_sm_version_v(u32 r)
+{
+	return (r >> 20) & 0xfff;
+}
 static inline u32 gr_gpc0_ppc0_pes_vsc_strem_r(void)
 {
 	return 0x00503018;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 030701b97..678ef4fda 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -280,6 +280,11 @@ static void gm20b_mm_set_big_page_size(struct gk20a *g,
 	gk20a_dbg_fn("done");
 }
 
+u32 gm20b_mm_get_big_page_sizes(void)
+{
+	return SZ_64K | SZ_128K;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.set_sparse = gm20b_vm_put_sparse;
@@ -295,4 +300,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.l2_flush = gk20a_mm_l2_flush;
 	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 	gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
+	gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
 }
diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h
index 6bde9d415..42673820c 100644
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -100,6 +100,10 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_PARTIAL_MAPPINGS	(1 << 1)
 /* MAP_BUFFER_EX with sparse allocations */
 #define NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS		(1 << 2)
+/* sync fence FDs are available in, e.g., submit_gpfifo */
+#define NVGPU_GPU_FLAGS_SUPPORT_SYNC_FENCE_FDS		(1 << 3)
+/* NVGPU_IOCTL_CHANNEL_CYCLE_STATS is available */
+#define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS		(1 << 4)
 
 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -111,14 +115,18 @@ struct nvgpu_gpu_characteristics {
 	__u64 L2_cache_size;               /* bytes */
 	__u64 on_board_video_memory_size;  /* bytes */
 
-	__u32 num_tpc_per_gpc;
+	__u32 num_tpc_per_gpc; /* the architectural maximum */
 	__u32 bus_type;
 
-	__u32 big_page_size;
+	__u32 big_page_size; /* the default big page size */
 	__u32 compression_page_size;
 
 	__u32 pde_coverage_bit_count;
-	__u32 reserved;
+
+	/* bit N set ==> big page size 2^N is available in
+	   NVGPU_GPU_IOCTL_ALLOC_AS. The default big page size is
+	   always available regardless of this field. */
+	__u32 available_big_page_sizes;
 
 	__u64 flags;
 
@@ -129,6 +137,23 @@ struct nvgpu_gpu_characteristics {
 	__u32 inline_to_memory_class;
 	__u32 dma_copy_class;
 
+	__u32 gpc_mask; /* enabled GPCs */
+
+	__u32 sm_arch_sm_version; /* sm version */
+	__u32 sm_arch_spa_version; /* sm instruction set */
+	__u32 sm_arch_warp_count;
+
+	/* IOCTL interface levels by service. -1 if not supported */
+	__s16 gpu_ioctl_nr_last;
+	__s16 tsg_ioctl_nr_last;
+	__s16 dbg_gpu_ioctl_nr_last;
+	__s16 ioctl_channel_nr_last;
+	__s16 as_ioctl_nr_last;
+
+	__u8 gpu_va_bit_count;
+
+	__u8 reserved;
+
 	/* Notes:
 	   - This struct can be safely appended with new fields. However, always
 	     keep the structure size multiple of 8 and make sure that the binary
@@ -197,6 +222,22 @@ struct nvgpu_gpu_open_tsg_args {
 	__u32 reserved;			/* must be zero */
 };
 
+struct nvgpu_gpu_get_tpc_masks_args {
+	/* [in]  TPC mask buffer size reserved by userspace. Should be
+		 at least sizeof(__u32) * fls(gpc_mask) to receive TPC
+		 mask for each GPC.
+	   [out] full kernel buffer size
+	*/
+	__u32 mask_buf_size;
+	__u32 reserved;
+
+	/* [in]  pointer to TPC mask buffer. It will receive one
+		 32-bit TPC mask per GPC or 0 if GPC is not enabled or
+		 not present. This parameter is ignored if
+		 mask_buf_size is 0. */
+	__u64 mask_buf_addr;
+};
+
 #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \
 	_IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args)
 #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \
@@ -215,9 +256,11 @@ struct nvgpu_gpu_open_tsg_args {
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args)
 #define NVGPU_GPU_IOCTL_OPEN_TSG \
 	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 9, struct nvgpu_gpu_open_tsg_args)
+#define NVGPU_GPU_IOCTL_GET_TPC_MASKS \
+	_IOWR(NVGPU_GPU_IOCTL_MAGIC, 10, struct nvgpu_gpu_get_tpc_masks_args)
 
 #define NVGPU_GPU_IOCTL_LAST		\
-	_IOC_NR(NVGPU_GPU_IOCTL_OPEN_TSG)
+	_IOC_NR(NVGPU_GPU_IOCTL_GET_TPC_MASKS)
 #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE	\
 	sizeof(struct nvgpu_gpu_prepare_compressible_read_args)