diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 5ca7c8069..74d83a7d2 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -37,8 +37,8 @@ static void release_as_share_id(struct gk20a_as *as, int id) return; } -static int gk20a_as_alloc_share(struct gk20a_as *as, - struct gk20a_as_share **out) +int gk20a_as_alloc_share(struct gk20a_as *as, + u32 flags, struct gk20a_as_share **out) { struct gk20a *g = gk20a_from_as(as); struct gk20a_as_share *as_share; @@ -56,7 +56,7 @@ static int gk20a_as_alloc_share(struct gk20a_as *as, as_share->ref_cnt.counter = 1; /* this will set as_share->vm. */ - err = g->ops.mm.vm_alloc_share(as_share); + err = g->ops.mm.vm_alloc_share(as_share, flags); if (err) goto failed; @@ -186,7 +186,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp) return err; } - err = gk20a_as_alloc_share(&g->as, &as_share); + err = gk20a_as_alloc_share(&g->as, 0, &as_share); if (err) { gk20a_dbg_fn("failed to alloc share"); gk20a_put_client(g); diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.h b/drivers/gpu/nvgpu/gk20a/as_gk20a.h index 457678ce1..166000a83 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h @@ -42,5 +42,7 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share); int gk20a_as_dev_open(struct inode *inode, struct file *filp); int gk20a_as_dev_release(struct inode *inode, struct file *filp); long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); +int gk20a_as_alloc_share(struct gk20a_as *as, + u32 flags, struct gk20a_as_share **out); #endif diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 0e8eb497a..bcc05079b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -119,6 +119,10 @@ int channel_gk20a_commit_va(struct channel_gk20a *c) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(c->vm->va_limit))); + if (c->g->ops.mm.set_big_page_size) + c->g->ops.mm.set_big_page_size(c->g, inst_ptr, + c->vm->gmmu_page_sizes[gmmu_page_size_big]); + return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c index ca587d006..6969a3a7f 100644 --- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include "gk20a.h" @@ -148,6 +150,53 @@ static int gk20a_ctrl_mark_compressible_write( return ret; } +static int gk20a_ctrl_alloc_as( + struct gk20a *g, + struct nvgpu_alloc_as_args *args) +{ + struct platform_device *dev = g->dev; + struct gk20a_as_share *as_share; + int err; + int fd; + struct file *file; + char *name; + + err = get_unused_fd_flags(O_RDWR); + if (err < 0) + return err; + fd = err; + + name = kasprintf(GFP_KERNEL, "nvhost-%s-fd%d", + dev_name(&dev->dev), fd); + + file = anon_inode_getfile(name, g->as.cdev.ops, NULL, O_RDWR); + kfree(name); + if (IS_ERR(file)) { + err = PTR_ERR(file); + goto clean_up; + } + fd_install(fd, file); + + err = gk20a_get_client(g); + if (err) + goto clean_up; + + err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share); + if (err) + goto clean_up_client; + + file->private_data = as_share; + + args->as_fd = fd; + return 0; + +clean_up_client: + gk20a_put_client(g); +clean_up: + put_unused_fd(fd); + return err; +} + long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct platform_device *dev = filp->private_data; @@ -309,6 +358,10 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg err = gk20a_ctrl_mark_compressible_write(g, (struct nvgpu_gpu_mark_compressible_write_args *)buf); break; + case NVGPU_GPU_IOCTL_ALLOC_AS: + err = gk20a_ctrl_alloc_as(g, + (struct nvgpu_alloc_as_args *)buf); + break; default: dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); err = -ENOTTY; diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index 52f2db4d9..d5b3fd877 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -18,6 +18,7 @@ #include "gk20a.h" #include "kind_gk20a.h" #include "hw_mc_gk20a.h" +#include "hw_fb_gk20a.h" static void fb_gk20a_reset(struct gk20a *g) { @@ -29,9 +30,22 @@ static void fb_gk20a_reset(struct gk20a *g) | mc_enable_hub_enabled_f()); } +static void gk20a_fb_set_mmu_page_size(struct gk20a *g) +{ + /* set large page size in fb */ + u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); + + fb_mmu_ctrl = (fb_mmu_ctrl & + ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | + fb_mmu_ctrl_vm_pg_size_128kb_f(); + + gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); +} + void gk20a_init_fb(struct gpu_ops *gops) { gops->fb.reset = fb_gk20a_reset; + gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size; gk20a_init_uncompressed_kind_map(); gk20a_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 8ebf67118..04a4cf66e 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -141,6 +141,7 @@ struct gpu_ops { void (*reset)(struct gk20a *g); void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); + void (*set_mmu_page_size)(struct gk20a *g); } fb; struct { void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); @@ -291,13 +292,16 @@ struct gpu_ops { bool va_allocated, int rw_flag); void (*vm_remove)(struct vm_gk20a *vm); - int (*vm_alloc_share)(struct gk20a_as_share *as_share); + int (*vm_alloc_share)(struct gk20a_as_share *as_share, + u32 flags); int (*vm_bind_channel)(struct gk20a_as_share *as_share, struct channel_gk20a *ch); int (*fb_flush)(struct gk20a *g); void (*l2_invalidate)(struct gk20a *g); void (*l2_flush)(struct gk20a *g, bool invalidate); void (*tlb_invalidate)(struct vm_gk20a *vm); + void (*set_big_page_size)(struct gk20a *g, + void *inst_ptr, int size); } mm; struct { int (*prepare_ucode)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 70f4294b2..e7fdb336e 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -327,17 +327,7 @@ static int gk20a_init_mm_setup_hw(struct gk20a *g) gk20a_dbg_fn(""); - /* set large page size in fb - * note this is very early on, can we defer it ? */ - { - u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); - - fb_mmu_ctrl = (fb_mmu_ctrl & - ~fb_mmu_ctrl_vm_pg_size_f(~0x0)) | - fb_mmu_ctrl_vm_pg_size_128kb_f(); - - gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); - } + g->ops.fb.set_mmu_page_size(g); inst_pa = (u32)(inst_pa >> bar1_instance_block_shift_gk20a()); gk20a_dbg_info("bar1 inst block ptr: 0x%08x", (u32)inst_pa); @@ -2173,6 +2163,7 @@ void gk20a_vm_put(struct vm_gk20a *vm) static int gk20a_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, + u32 big_page_size, u64 low_hole, u64 aperture_size, bool big_pages, @@ -2184,7 +2175,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm, size_t vma_size; /* note: keep the page sizes sorted lowest to highest here */ - u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K }; + u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size }; vm->mm = mm; @@ -2331,7 +2322,7 @@ clean_up_pdes: } /* address space interfaces for the gk20a module */ -int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size) { struct gk20a_as *as = as_share->as; struct gk20a *g = gk20a_from_as(as); @@ -2351,8 +2342,15 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share) vm->enable_ctag = true; snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); - err = gk20a_init_vm(mm, vm, - SZ_128K << 10, mm->channel.size, true, name); + + if (big_page_size && !g->ops.mm.set_big_page_size) + return -EINVAL; + if (big_page_size == 0) + big_page_size = + gk20a_get_platform(g->dev)->default_big_page_size; + + err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, + mm->channel.size, true, name); return 0; } @@ -2709,10 +2707,12 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) struct device *d = dev_from_gk20a(g); struct inst_desc *inst_block = &mm->bar1.inst_block; dma_addr_t iova; + u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); - gk20a_init_vm(mm, vm, SZ_4K, mm->bar1.aperture_size, false, "bar1"); + gk20a_init_vm(mm, vm, big_page_size, SZ_4K, + mm->bar1.aperture_size, false, "bar1"); gk20a_dbg_info("pde pa=0x%llx", (u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl)); @@ -2761,6 +2761,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + if (g->ops.mm.set_big_page_size) + g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); + gk20a_dbg_info("bar1 inst block ptr: %08llx", (u64)inst_pa); return 0; @@ -2789,11 +2792,12 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) struct device *d = dev_from_gk20a(g); struct inst_desc *inst_block = &mm->pmu.inst_block; dma_addr_t iova; + u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); - gk20a_init_vm(mm, vm, + gk20a_init_vm(mm, vm, big_page_size, SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system"); gk20a_dbg_info("pde pa=0x%llx", @@ -2842,6 +2846,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(), ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit))); + if (g->ops.mm.set_big_page_size) + g->ops.mm.set_big_page_size(g, inst_ptr, big_page_size); + return 0; clean_up_inst_block: diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index b28daef78..3f7042eec 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -512,7 +512,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, /* vm-as interface */ struct nvgpu_as_alloc_space_args; struct nvgpu_as_free_space_args; -int gk20a_vm_alloc_share(struct gk20a_as_share *as_share); +int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags); int gk20a_vm_release_share(struct gk20a_as_share *as_share); int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, struct nvgpu_as_alloc_space_args *args); diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h index ce0209fb5..aada1537e 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h @@ -76,6 +76,9 @@ struct gk20a_platform { /* Adaptative ELPG: true = enable flase = disable */ bool enable_aelpg; + /* Default big page size 64K or 128K */ + u32 default_big_page_size; + /* Initialize the platform interface of the gk20a driver. * * The platform implementation of this function must diff --git a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c index 5513ea432..ccbf932ff 100644 --- a/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c +++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a_tegra.c @@ -443,6 +443,7 @@ struct gk20a_platform t132_gk20a_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, @@ -480,6 +481,8 @@ struct gk20a_platform gk20a_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, + .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, @@ -517,6 +520,8 @@ struct gk20a_platform gm20b_tegra_platform = { .enable_elpg = true, .enable_aelpg = true, + .default_big_page_size = SZ_128K, + .probe = gk20a_tegra_probe, .late_probe = gk20a_tegra_late_probe, diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c index 34ad6418d..a2aa81d89 100644 --- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c @@ -82,9 +82,18 @@ void gm20b_init_kind_attr(void) } } +static void gm20b_fb_set_mmu_page_size(struct gk20a *g) +{ + /* set large page size in fb */ + u32 fb_mmu_ctrl = gk20a_readl(g, fb_mmu_ctrl_r()); + fb_mmu_ctrl |= fb_mmu_ctrl_use_pdb_big_page_size_true_f(); + gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); +} + void gm20b_init_fb(struct gpu_ops *gops) { gops->fb.init_fs_state = fb_gm20b_init_fs_state; + gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size; gm20b_init_uncompressed_kind_map(); gm20b_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h index 817e4fc4d..7655d2a33 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h @@ -66,6 +66,10 @@ static inline u32 fb_mmu_ctrl_vm_pg_size_128kb_f(void) { return 0x0; } +static inline u32 fb_mmu_ctrl_vm_pg_size_64kb_f(void) +{ + return 0x1; +} static inline u32 fb_mmu_ctrl_pri_fifo_empty_v(u32 r) { return (r >> 15) & 0x1; @@ -78,6 +82,18 @@ static inline u32 fb_mmu_ctrl_pri_fifo_space_v(u32 r) { return (r >> 16) & 0xff; } +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_v(u32 r) +{ + return (r >> 11) & 0x1; +} +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_true_f(void) +{ + return 0x800; +} +static inline u32 fb_mmu_ctrl_use_pdb_big_page_size_false_f(void) +{ + return 0x0; +} static inline u32 fb_priv_mmu_phy_secure_r(void) { return 0x00100ce4; diff --git a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h index 6debecdac..2e1df1d4d 100644 --- a/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/hw_ram_gm20b.h @@ -78,6 +78,26 @@ static inline u32 ram_in_page_dir_base_vol_true_f(void) { return 0x4; } +static inline u32 ram_in_big_page_size_f(u32 v) +{ + return (v & 0x1) << 11; +} +static inline u32 ram_in_big_page_size_m(void) +{ + return 0x1 << 11; +} +static inline u32 ram_in_big_page_size_w(void) +{ + return 128; +} +static inline u32 ram_in_big_page_size_128kb_f(void) +{ + return 0x0; +} +static inline u32 ram_in_big_page_size_64kb_f(void) +{ + return 0x800; +} static inline u32 ram_in_page_dir_base_lo_f(u32 v) { return (v & 0xfffff) << 12; diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index b4622c0b0..13e7859f9 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -19,6 +19,7 @@ #include "hw_gmmu_gm20b.h" #include "hw_fb_gm20b.h" #include "hw_gr_gm20b.h" +#include "hw_ram_gm20b.h" static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx, @@ -259,6 +260,25 @@ bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g) gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v(); } +void gm20b_mm_set_big_page_size(struct gk20a *g, void *inst_ptr, int size) +{ + u32 val; + + gk20a_dbg_fn(""); + + gk20a_dbg_info("big page size %d\n", size); + val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w()); + val &= ~ram_in_big_page_size_m(); + + if (size == SZ_64K) + val |= ram_in_big_page_size_64kb_f(); + else + val |= ram_in_big_page_size_128kb_f(); + + gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val); + gk20a_dbg_fn("done"); +} + void gm20b_init_mm(struct gpu_ops *gops) { gops->mm.set_sparse = gm20b_vm_put_sparse; @@ -273,4 +293,5 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.l2_invalidate = gk20a_mm_l2_invalidate; gops->mm.l2_flush = gk20a_mm_l2_flush; gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate; + gops->mm.set_big_page_size = gm20b_mm_set_big_page_size; } diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 694c497cb..c9e50b36a 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -175,6 +175,12 @@ struct nvgpu_gpu_mark_compressible_write_args { __u32 reserved[3]; /* must be zero */ }; +struct nvgpu_alloc_as_args { + __u32 big_page_size; + __s32 as_fd; + __u64 reserved; /* must be zero */ +}; + #define NVGPU_GPU_IOCTL_ZCULL_GET_CTX_SIZE \ _IOR(NVGPU_GPU_IOCTL_MAGIC, 1, struct nvgpu_gpu_zcull_get_ctx_size_args) #define NVGPU_GPU_IOCTL_ZCULL_GET_INFO \ @@ -189,9 +195,11 @@ struct nvgpu_gpu_mark_compressible_write_args { _IOWR(NVGPU_GPU_IOCTL_MAGIC, 6, struct nvgpu_gpu_prepare_compressible_read_args) #define NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 7, struct nvgpu_gpu_mark_compressible_write_args) +#define NVGPU_GPU_IOCTL_ALLOC_AS \ + _IOWR(NVGPU_GPU_IOCTL_MAGIC, 8, struct nvgpu_alloc_as_args) #define NVGPU_GPU_IOCTL_LAST \ - _IOC_NR(NVGPU_GPU_IOCTL_MARK_COMPRESSIBLE_WRITE) + _IOC_NR(NVGPU_GPU_IOCTL_ALLOC_AS) #define NVGPU_GPU_IOCTL_MAX_ARG_SIZE \ sizeof(struct nvgpu_gpu_prepare_compressible_read_args)