gpu: nvgpu: Reduce structure padding waste

The gk20a_init_fifo_setup_sw_common() function allocates memory of
schannel_gk20a and tsg_gk20a tructures for all 512 channels:
    Size   Caller                    Module  Pages     Type
    749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc
    602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc

This change just simply reorgnizes the member defines in those two
structures to reduce padding waste. After this change:
    Size   Caller                    Module  Pages     Type
    733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc
    585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc

In summary, it saves 8 pages in 32KB memory.

Bug 2327574
Bug 2284925

Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1803358
Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nicolin Chen
2018-08-13 20:22:56 -07:00
committed by mobile promotions
parent d5473e225d
commit 52305f0514
3 changed files with 37 additions and 38 deletions

View File

@@ -197,7 +197,6 @@ struct channel_gk20a {
struct nvgpu_list_node free_chs;
struct nvgpu_spinlock ref_obtain_lock;
bool referenceable;
nvgpu_atomic_t ref_count;
struct nvgpu_cond ref_count_dec_wq;
#if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {
struct nvgpu_semaphore_int *hw_sema;
int chid;
nvgpu_atomic_t bound;
bool vpr;
bool deterministic;
/* deterministic, but explicitly idle and submits disallowed */
bool deterministic_railgate_allowed;
bool cde;
bool usermode_submit_enabled;
int chid;
int tsgid;
pid_t pid;
pid_t tgid;
struct nvgpu_mutex ioctl_lock;
int tsgid;
struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
u64 userd_iova;
u64 userd_gpu_va;
u32 obj_class; /* we support only one obj per channel */
struct priv_cmd_queue priv_cmd_q;
struct nvgpu_cond notifier_wq;
struct nvgpu_cond semaphore_wq;
u32 timeout_accumulated_ms;
u32 timeout_gpfifo_get;
/* kernel watchdog to kill stuck jobs */
struct channel_gk20a_timeout timeout;
@@ -271,32 +260,43 @@ struct channel_gk20a {
struct nvgpu_mutex dbg_s_lock;
struct nvgpu_list_node dbg_s_list;
bool has_timedout;
u32 timeout_ms_max;
bool timeout_debug_dump;
struct nvgpu_mutex sync_lock;
struct gk20a_channel_sync *sync;
struct gk20a_channel_sync *user_sync;
bool has_os_fence_framework_support;
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
u64 virt_ctx;
#endif
u32 runlist_id;
bool is_privileged_channel;
u32 subctx_id;
u32 runqueue_sel;
struct ctx_header_desc ctx_header;
/* Any operating system specific data. */
void *os_priv;
u32 obj_class; /* we support only one obj per channel */
u32 timeout_accumulated_ms;
u32 timeout_gpfifo_get;
u32 subctx_id;
u32 runqueue_sel;
u32 timeout_ms_max;
u32 runlist_id;
bool mmu_nack_handled;
bool has_timedout;
bool referenceable;
bool vpr;
bool deterministic;
/* deterministic, but explicitly idle and submits disallowed */
bool deterministic_railgate_allowed;
bool cde;
bool usermode_submit_enabled;
bool timeout_debug_dump;
bool has_os_fence_framework_support;
bool is_privileged_channel;
};
static inline struct channel_gk20a *

View File

@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {
u32 graphics_preempt_mode;
u32 compute_preempt_mode;
bool boosted_ctx;
struct nvgpu_mem preempt_ctxsw_buffer;
struct nvgpu_mem spill_ctxsw_buffer;
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
u32 ctx_id;
bool ctx_id_valid;
bool cilp_preempt_pending;
bool boosted_ctx;
bool golden_img_loaded;
#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
u64 virt_ctx;
#endif
bool golden_img_loaded;
struct patch_desc patch_ctx;
struct zcull_ctx_desc zcull_ctx;

View File

@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
struct tsg_gk20a {
struct gk20a *g;
bool in_use;
int tsgid;
struct vm_gk20a *vm;
struct nvgpu_mem *eng_method_buffers;
struct nvgpu_gr_ctx gr_ctx;
struct nvgpu_ref refcount;
struct nvgpu_list_node ch_list;
int num_active_channels;
struct nvgpu_list_node event_id_list;
struct nvgpu_rwsem ch_list_lock;
struct nvgpu_mutex event_id_list_lock;
int num_active_channels;
unsigned int timeslice_us;
unsigned int timeslice_timeout;
unsigned int timeslice_scale;
struct vm_gk20a *vm;
u32 interleave_level;
struct nvgpu_list_node event_id_list;
struct nvgpu_mutex event_id_list_lock;
int tsgid;
u32 runlist_id;
pid_t tgid;
struct nvgpu_mem *eng_method_buffers;
u32 num_active_tpcs;
u8 tpc_pg_enabled;
bool tpc_num_initialized;
bool in_use;
struct nvgpu_gr_ctx gr_ctx;
};
int gk20a_enable_tsg(struct tsg_gk20a *tsg);