diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
index 97b8334bf..6f3beaa96 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -54,8 +54,9 @@ struct nvgpu_mapped_buf_priv {
 /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -71,8 +72,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
  */
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 66a804036..052a1d21d 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -975,6 +975,30 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l
 	return cde_ctx;
 }
 
+static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm,
+				       u32 map_offset, u32 map_size)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	/*
+	 * To be simple we will just make the map size depend on the
+	 * iommu'ability of the driver. If there's an IOMMU we can rely on
+	 * buffers being contiguous. If not, then we'll use 4k pages since we
+	 * know that will work for any buffer.
+	 */
+	if (!nvgpu_iommuable(g))
+		return SZ_4K;
+
+	/*
+	 * If map size or offset is not 64K aligned then use small pages.
+	 */
+	if (map_size & (vm->big_page_size - 1) ||
+	    map_offset & (vm->big_page_size - 1))
+		return SZ_4K;
+
+	return vm->big_page_size;
+}
+
 int gk20a_cde_convert(struct nvgpu_os_linux *l,
 		      struct dma_buf *compbits_scatter_buf,
 		      u64 compbits_byte_offset,
@@ -1071,7 +1095,10 @@ __releases(&l->cde_app->mutex)
 	err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
 				 NVGPU_VM_MAP_CACHEABLE |
 				 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
-				 NVGPU_KIND_INVALID,
+				 gk20a_cde_mapping_page_size(cde_ctx->vm,
+							     map_offset,
+							     map_size),
+				 NV_KIND_INVALID,
 				 compbits_kind, /* incompressible kind */
 				 gk20a_mem_flag_none,
 				 map_offset, map_size,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
index 47f612cca..5eb9802fc 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -111,6 +111,7 @@ static int gk20a_as_ioctl_map_buffer_ex(
 
 	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
 				   &args->offset, args->flags,
+				   args->page_size,
 				   args->compr_kind,
 				   args->incompr_kind,
 				   args->buffer_offset,
@@ -201,7 +202,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 
 		err = nvgpu_vm_map_buffer(
 			as_share->vm, map_args.dmabuf_fd,
-			&map_args.offset, map_args.flags,
+			&map_args.offset, map_args.flags, map_args.page_size,
 			compressible_kind, incompressible_kind,
 			map_args.buffer_offset,
 			map_args.mapping_size,
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index 76ff25c0c..938e0abd2 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1372,10 +1372,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
+			SZ_4K,
+			0,
 			0,
 			0,
 			0,
-			args->mapping_size,
 			NULL);
 	if (err)
 		goto err_remove_vm;
diff --git a/drivers/gpu/nvgpu/os/linux/vm.c b/drivers/gpu/nvgpu/os/linux/vm.c
index baa775154..eb9ca8fda 100644
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -175,8 +175,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -192,12 +193,8 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 	struct nvgpu_sgt *nvgpu_sgt = NULL;
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct dma_buf_attachment *attachment;
-	u64 map_addr = 0ULL;
 	int err = 0;
 
-	if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
-		map_addr = offset_align;
-
 	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
 	if (IS_ERR(sgt)) {
 		nvgpu_warn(g, "Failed to pin dma_buf!");
@@ -253,8 +250,9 @@ clean_up:
 
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
@@ -274,8 +272,28 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return PTR_ERR(dmabuf);
 	}
 
+	/*
+	 * For regular maps we do not accept either an input address or a
+	 * buffer_offset.
+	 */
+	if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) &&
+	    (buffer_offset || *map_addr)) {
+		nvgpu_err(g,
+			  "Regular map with addr/buf offset is not supported!");
+		return -EINVAL;
+	}
+
+	/*
+	 * Map size is always buffer size for non fixed mappings. As such map
+	 * size should be left as zero by userspace for non-fixed maps.
+	 */
+	if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		nvgpu_err(g, "map_size && non-fixed-mapping!");
+		return -EINVAL;
+	}
+
 	/* verify that we're not overflowing the buffer, i.e.
-	 * (buffer_offset + mapping_size)> dmabuf->size.
+	 * (buffer_offset + mapping_size) > dmabuf->size.
 	 *
 	 * Since buffer_offset + mapping_size could overflow, first check
 	 * that mapping size < dmabuf_size, at which point we can subtract
@@ -284,7 +302,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 	if ((mapping_size > dmabuf->size) ||
 			(buffer_offset > (dmabuf->size - mapping_size))) {
 		nvgpu_err(g,
-			  "buf size %llx < (offset(%llx) + map_size(%llx))\n",
+			  "buf size %llx < (offset(%llx) + map_size(%llx))",
 			  (u64)dmabuf->size, buffer_offset, mapping_size);
 		dma_buf_put(dmabuf);
 		return -EINVAL;
@@ -296,8 +314,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return err;
 	}
 
-	err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
+	err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr,
 				 nvgpu_vm_translate_linux_flags(g, flags),
+				 page_size,
 				 compr_kind, incompr_kind,
 				 gk20a_mem_flag_none,
 				 buffer_offset,
@@ -306,7 +325,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 				 &ret_va);
 
 	if (!err)
-		*offset_align = ret_va;
+		*map_addr = ret_va;
 	else
 		dma_buf_put(dmabuf);