mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Reduce structure padding waste
The gk20a_init_fifo_setup_sw_common() function allocates memory of
schannel_gk20a and tsg_gk20a tructures for all 512 channels:
Size Caller Module Pages Type
749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc
602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc
This change just simply reorgnizes the member defines in those two
structures to reduce padding waste. After this change:
Size Caller Module Pages Type
733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc
585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc
In summary, it saves 8 pages in 32KB memory.
Bug 2327574
Bug 2284925
Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1803358
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
d5473e225d
commit
52305f0514
@@ -197,7 +197,6 @@ struct channel_gk20a {
|
||||
struct nvgpu_list_node free_chs;
|
||||
|
||||
struct nvgpu_spinlock ref_obtain_lock;
|
||||
bool referenceable;
|
||||
nvgpu_atomic_t ref_count;
|
||||
struct nvgpu_cond ref_count_dec_wq;
|
||||
#if GK20A_CHANNEL_REFCOUNT_TRACKING
|
||||
@@ -214,19 +213,14 @@ struct channel_gk20a {
|
||||
|
||||
struct nvgpu_semaphore_int *hw_sema;
|
||||
|
||||
int chid;
|
||||
nvgpu_atomic_t bound;
|
||||
bool vpr;
|
||||
bool deterministic;
|
||||
/* deterministic, but explicitly idle and submits disallowed */
|
||||
bool deterministic_railgate_allowed;
|
||||
bool cde;
|
||||
bool usermode_submit_enabled;
|
||||
|
||||
int chid;
|
||||
int tsgid;
|
||||
pid_t pid;
|
||||
pid_t tgid;
|
||||
struct nvgpu_mutex ioctl_lock;
|
||||
|
||||
int tsgid;
|
||||
struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
|
||||
|
||||
struct channel_gk20a_joblist joblist;
|
||||
@@ -242,16 +236,11 @@ struct channel_gk20a {
|
||||
u64 userd_iova;
|
||||
u64 userd_gpu_va;
|
||||
|
||||
u32 obj_class; /* we support only one obj per channel */
|
||||
|
||||
struct priv_cmd_queue priv_cmd_q;
|
||||
|
||||
struct nvgpu_cond notifier_wq;
|
||||
struct nvgpu_cond semaphore_wq;
|
||||
|
||||
u32 timeout_accumulated_ms;
|
||||
u32 timeout_gpfifo_get;
|
||||
|
||||
/* kernel watchdog to kill stuck jobs */
|
||||
struct channel_gk20a_timeout timeout;
|
||||
|
||||
@@ -271,32 +260,43 @@ struct channel_gk20a {
|
||||
struct nvgpu_mutex dbg_s_lock;
|
||||
struct nvgpu_list_node dbg_s_list;
|
||||
|
||||
bool has_timedout;
|
||||
u32 timeout_ms_max;
|
||||
bool timeout_debug_dump;
|
||||
|
||||
struct nvgpu_mutex sync_lock;
|
||||
struct gk20a_channel_sync *sync;
|
||||
struct gk20a_channel_sync *user_sync;
|
||||
|
||||
bool has_os_fence_framework_support;
|
||||
|
||||
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
||||
u64 virt_ctx;
|
||||
#endif
|
||||
|
||||
u32 runlist_id;
|
||||
|
||||
bool is_privileged_channel;
|
||||
u32 subctx_id;
|
||||
u32 runqueue_sel;
|
||||
|
||||
struct ctx_header_desc ctx_header;
|
||||
|
||||
/* Any operating system specific data. */
|
||||
void *os_priv;
|
||||
|
||||
u32 obj_class; /* we support only one obj per channel */
|
||||
|
||||
u32 timeout_accumulated_ms;
|
||||
u32 timeout_gpfifo_get;
|
||||
|
||||
u32 subctx_id;
|
||||
u32 runqueue_sel;
|
||||
|
||||
u32 timeout_ms_max;
|
||||
u32 runlist_id;
|
||||
|
||||
bool mmu_nack_handled;
|
||||
bool has_timedout;
|
||||
bool referenceable;
|
||||
bool vpr;
|
||||
bool deterministic;
|
||||
/* deterministic, but explicitly idle and submits disallowed */
|
||||
bool deterministic_railgate_allowed;
|
||||
bool cde;
|
||||
bool usermode_submit_enabled;
|
||||
bool timeout_debug_dump;
|
||||
bool has_os_fence_framework_support;
|
||||
|
||||
bool is_privileged_channel;
|
||||
};
|
||||
|
||||
static inline struct channel_gk20a *
|
||||
|
||||
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {
|
||||
|
||||
u32 graphics_preempt_mode;
|
||||
u32 compute_preempt_mode;
|
||||
bool boosted_ctx;
|
||||
|
||||
struct nvgpu_mem preempt_ctxsw_buffer;
|
||||
struct nvgpu_mem spill_ctxsw_buffer;
|
||||
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
|
||||
u32 ctx_id;
|
||||
bool ctx_id_valid;
|
||||
bool cilp_preempt_pending;
|
||||
bool boosted_ctx;
|
||||
bool golden_img_loaded;
|
||||
|
||||
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
|
||||
u64 virt_ctx;
|
||||
#endif
|
||||
bool golden_img_loaded;
|
||||
|
||||
struct patch_desc patch_ctx;
|
||||
struct zcull_ctx_desc zcull_ctx;
|
||||
|
||||
@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
|
||||
struct tsg_gk20a {
|
||||
struct gk20a *g;
|
||||
|
||||
bool in_use;
|
||||
int tsgid;
|
||||
struct vm_gk20a *vm;
|
||||
struct nvgpu_mem *eng_method_buffers;
|
||||
|
||||
|
||||
struct nvgpu_gr_ctx gr_ctx;
|
||||
struct nvgpu_ref refcount;
|
||||
|
||||
struct nvgpu_list_node ch_list;
|
||||
int num_active_channels;
|
||||
struct nvgpu_list_node event_id_list;
|
||||
struct nvgpu_rwsem ch_list_lock;
|
||||
struct nvgpu_mutex event_id_list_lock;
|
||||
int num_active_channels;
|
||||
|
||||
unsigned int timeslice_us;
|
||||
unsigned int timeslice_timeout;
|
||||
unsigned int timeslice_scale;
|
||||
|
||||
struct vm_gk20a *vm;
|
||||
|
||||
u32 interleave_level;
|
||||
|
||||
struct nvgpu_list_node event_id_list;
|
||||
struct nvgpu_mutex event_id_list_lock;
|
||||
int tsgid;
|
||||
|
||||
u32 runlist_id;
|
||||
pid_t tgid;
|
||||
struct nvgpu_mem *eng_method_buffers;
|
||||
u32 num_active_tpcs;
|
||||
u8 tpc_pg_enabled;
|
||||
bool tpc_num_initialized;
|
||||
bool in_use;
|
||||
|
||||
struct nvgpu_gr_ctx gr_ctx;
|
||||
};
|
||||
|
||||
int gk20a_enable_tsg(struct tsg_gk20a *tsg);
|
||||
|
||||
Reference in New Issue
Block a user