Files
linux-nvgpu/drivers/gpu/nvgpu/os/linux/cde.h
Konsta Hölttä ca1f93bdd7 gpu: nvgpu: add user fence type
Decouple the fence information needed for providing submit postfences to
userspace by adding a separate type for that and using it to pass fence
data to ioctls.

The data in struct nvgpu_fence_type is used in various places:

- job tracking needs to know when a post fence is expired
- job submitters within the driver (vidmem clears) need to be able to
  wait for these fences
- userspace needs the fence as an id, value pair or as a file descriptor
  created from an os fence

To keep object lifetimes strict, start decoupling the os fence data out
of struct nvgpu_fence_type: delete nvgpu_fence_install_fd() and add
nvgpu_fence_extract_user() to return a struct nvgpu_user_fence that
contains only the necessary information. Storing the os fence in job
tracking metadata is legacy code and not useful. Passing the os fence
from where it's created through the whole submit path inside this
combined fence type has been convenient, though.

The internally stored cde job fence in dmabuf compression metadata is
still nvgpu_fence_type to keep this patch simple.

Jira NVGPU-5248

Change-Id: I75b7da676fb6aa083828f888c55571bbf7645ef3
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2359064
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
2020-12-15 14:13:28 -06:00

328 lines
7.9 KiB
C

/*
* GK20A color decompression engine support
*
* Copyright (c) 2014-2019, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _CDE_GK20A_H_
#define _CDE_GK20A_H_
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/list.h>
#include <nvgpu/lock.h>
#include <linux/kobject.h>
#include <linux/workqueue.h>
#define MAX_CDE_BUFS 10
#define MAX_CDE_PARAMS 64
#define MAX_CDE_USER_PARAMS 40
#define MAX_CDE_ARRAY_ENTRIES 9
/*
* The size of the context ring buffer that is dedicated for handling cde
* jobs. Re-using a context (=channel) for a differnt cde job forces a cpu
* wait on the previous job to that channel, so increasing this value
* reduces the likelihood of stalls.
*/
#define NUM_CDE_CONTEXTS 4
struct dma_buf;
struct device;
struct nvgpu_os_linux;
struct gk20a;
struct nvgpu_fence_type;
struct nvgpu_channel_fence;
struct nvgpu_channel;
struct vm_gk20a;
struct nvgpu_gpfifo_entry;
struct nvgpu_user_fence;
/*
* this element defines a buffer that is allocated and mapped into gpu address
* space. data_byte_offset defines the beginning of the buffer inside the
* firmare. num_bytes defines how many bytes the firmware contains.
*
* If data_byte_offset is zero, we allocate an empty buffer.
*/
struct gk20a_cde_hdr_buf {
u64 data_byte_offset;
u64 num_bytes;
};
/*
* this element defines a constant patching in buffers. It basically
* computes physical address to <source_buf>+source_byte_offset. The
* address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_replace {
u32 target_buf;
u32 source_buf;
s32 shift;
u32 type;
u64 target_byte_offset;
u64 source_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_TYPE_U32 = 0,
TYPE_PARAM_TYPE_U64_LITTLE,
TYPE_PARAM_TYPE_U64_BIG
};
/*
* this element defines a runtime patching in buffers. Parameters with id from
* 0 to 1024 are reserved for special usage as follows:
* 0 = comptags_per_cacheline,
* 1 = slices_per_fbp,
* 2 = num_fbps
* 3 = source buffer first page offset
* 4 = source buffer block height log2
* 5 = backing store memory address
* 6 = destination memory address
* 7 = destination size (bytes)
* 8 = backing store size (bytes)
* 9 = cache line size
*
* Parameters above id 1024 are user-specified. I.e. they determine where a
* parameters from user space should be placed in buffers, what is their
* type, etc.
*
* Once the value is available, we add data_offset to the value.
*
* The value address is then modified into patch value as per:
* value = (current_value & ~mask) | (address << shift) & mask .
*
* The type field defines the register size as:
* 0=u32,
* 1=u64 (little endian),
* 2=u64 (big endian)
*/
struct gk20a_cde_hdr_param {
u32 id;
u32 target_buf;
s32 shift;
u32 type;
s64 data_offset;
u64 target_byte_offset;
u64 mask;
};
enum {
TYPE_PARAM_COMPTAGS_PER_CACHELINE = 0,
TYPE_PARAM_GPU_CONFIGURATION,
TYPE_PARAM_FIRSTPAGEOFFSET,
TYPE_PARAM_NUMPAGES,
TYPE_PARAM_BACKINGSTORE,
TYPE_PARAM_DESTINATION,
TYPE_PARAM_DESTINATION_SIZE,
TYPE_PARAM_BACKINGSTORE_SIZE,
TYPE_PARAM_SOURCE_SMMU_ADDR,
TYPE_PARAM_BACKINGSTORE_BASE_HW,
TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE,
TYPE_PARAM_SCATTERBUFFER,
TYPE_PARAM_SCATTERBUFFER_SIZE,
NUM_RESERVED_PARAMS = 1024,
};
/*
* This header element defines a command. The op field determines whether the
* element is defining an init (0) or convert command (1). data_byte_offset
* denotes the beginning address of command elements in the file.
*/
struct gk20a_cde_hdr_command {
u32 op;
u32 num_entries;
u64 data_byte_offset;
};
enum {
TYPE_BUF_COMMAND_INIT = 0,
TYPE_BUF_COMMAND_CONVERT,
TYPE_BUF_COMMAND_NOOP
};
/*
* This is a command element defines one entry inside push buffer. target_buf
* defines the buffer including the pushbuffer entries, target_byte_offset the
* offset inside the buffer and num_bytes the number of words in the buffer.
*/
struct gk20a_cde_cmd_elem {
u32 target_buf;
u32 padding;
u64 target_byte_offset;
u64 num_bytes;
};
/*
* This element is used for storing a small array of data.
*/
enum {
ARRAY_PROGRAM_OFFSET = 0,
ARRAY_REGISTER_COUNT,
ARRAY_LAUNCH_COMMAND,
NUM_CDE_ARRAYS
};
struct gk20a_cde_hdr_array {
u32 id;
u32 data[MAX_CDE_ARRAY_ENTRIES];
};
/*
* Following defines a single header element. Each element has a type and
* some of the data structures.
*/
struct gk20a_cde_hdr_elem {
u32 type;
u32 padding;
union {
struct gk20a_cde_hdr_buf buf;
struct gk20a_cde_hdr_replace replace;
struct gk20a_cde_hdr_param param;
u32 required_class;
struct gk20a_cde_hdr_command command;
struct gk20a_cde_hdr_array array;
};
};
enum {
TYPE_BUF = 0,
TYPE_REPLACE,
TYPE_PARAM,
TYPE_REQUIRED_CLASS,
TYPE_COMMAND,
TYPE_ARRAY
};
struct gk20a_cde_param {
u32 id;
u32 padding;
u64 value;
};
struct gk20a_cde_ctx {
struct nvgpu_os_linux *l;
struct device *dev;
/* channel related data */
struct nvgpu_channel *ch;
struct nvgpu_tsg *tsg;
struct vm_gk20a *vm;
/* buf converter configuration */
struct nvgpu_mem mem[MAX_CDE_BUFS];
unsigned int num_bufs;
/* buffer patching params (where should patching be done) */
struct gk20a_cde_hdr_param params[MAX_CDE_PARAMS];
unsigned int num_params;
/* storage for user space parameter values */
u32 user_param_values[MAX_CDE_USER_PARAMS];
u32 surf_param_offset;
u32 surf_param_lines;
u64 surf_vaddr;
u64 compbit_vaddr;
u64 compbit_size;
u64 scatterbuffer_vaddr;
u64 scatterbuffer_size;
u64 backing_store_vaddr;
struct nvgpu_gpfifo_entry *init_convert_cmd;
int init_cmd_num_entries;
struct nvgpu_gpfifo_entry *convert_cmd;
int convert_cmd_num_entries;
struct kobj_attribute attr;
bool init_cmd_executed;
struct nvgpu_list_node list;
bool is_temporary;
bool in_use;
struct delayed_work ctx_deleter_work;
};
static inline struct gk20a_cde_ctx *
gk20a_cde_ctx_from_list(struct nvgpu_list_node *node)
{
return (struct gk20a_cde_ctx *)
((uintptr_t)node - offsetof(struct gk20a_cde_ctx, list));
};
struct gk20a_cde_app {
bool initialised;
struct nvgpu_mutex mutex;
struct nvgpu_list_node free_contexts;
struct nvgpu_list_node used_contexts;
unsigned int ctx_count;
unsigned int ctx_usecount;
unsigned int ctx_count_top;
u32 firmware_version;
u32 arrays[NUM_CDE_ARRAYS][MAX_CDE_ARRAY_ENTRIES];
u32 shader_parameter;
};
void gk20a_cde_destroy(struct nvgpu_os_linux *l);
void gk20a_cde_suspend(struct nvgpu_os_linux *l);
int gk20a_init_cde_support(struct nvgpu_os_linux *l);
int gk20a_cde_reload(struct nvgpu_os_linux *l);
int gk20a_cde_convert(struct nvgpu_os_linux *l,
struct dma_buf *compbits_buf,
u64 compbits_byte_offset,
u64 scatterbuffer_byte_offset,
struct nvgpu_channel_fence *fence,
u32 __flags, struct gk20a_cde_param *params,
int num_params, struct nvgpu_fence_type **fence_out);
int gk20a_prepare_compressible_read(
struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
u64 compbits_hoffset, u64 compbits_voffset,
u64 scatterbuffer_offset,
u32 width, u32 height, u32 block_height_log2,
u32 submit_flags, struct nvgpu_channel_fence *fence,
u32 *valid_compbits, u32 *zbc_color,
struct nvgpu_user_fence *fence_out);
int gk20a_mark_compressible_write(
struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
u32 zbc_color);
int nvgpu_cde_init_ops(struct nvgpu_os_linux *l);
#endif