gpu: nvgpu: Reduce structure padding waste

The gk20a_init_fifo_setup_sw_common() function allocates memory of schannel_gk20a and tsg_gk20a tructures for all 512 channels: Size Caller Module Pages Type 749568 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=182 vmalloc 602112 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=146 vmalloc This change just simply reorgnizes the member defines in those two structures to reduce padding waste. After this change: Size Caller Module Pages Type 733184 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=178 vmalloc 585728 __nvgpu_vzalloc+0x28/0x78 [nvgpu] pages=142 vmalloc In summary, it saves 8 pages in 32KB memory. Bug 2327574 Bug 2284925 Change-Id: I06693e0fef516a145b48dd3a05d756c0feaf3ba5 Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1803358 Reviewed-by: svc-misra-checker <svc-misra-checker@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-08-13 20:22:56 -07:00
parent d5473e225d
commit 52305f0514
3 changed files with 37 additions and 38 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -197,7 +197,6 @@ struct channel_gk20a {
 	struct nvgpu_list_node free_chs;

 	struct nvgpu_spinlock ref_obtain_lock;
-	bool referenceable;
 	nvgpu_atomic_t ref_count;
 	struct nvgpu_cond ref_count_dec_wq;
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
@@ -214,19 +213,14 @@ struct channel_gk20a {

 	struct nvgpu_semaphore_int *hw_sema;

-	int chid;
 	nvgpu_atomic_t bound;
-	bool vpr;
-	bool deterministic;
-	/* deterministic, but explicitly idle and submits disallowed */
-	bool deterministic_railgate_allowed;
-	bool cde;
-	bool usermode_submit_enabled;
+
+	int chid;
+	int tsgid;
 	pid_t pid;
 	pid_t tgid;
 	struct nvgpu_mutex ioctl_lock;

-	int tsgid;
 	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */

 	struct channel_gk20a_joblist joblist;
@@ -242,16 +236,11 @@ struct channel_gk20a {
 	u64 userd_iova;
 	u64 userd_gpu_va;

-	u32 obj_class;	/* we support only one obj per channel */
-
 	struct priv_cmd_queue priv_cmd_q;

 	struct nvgpu_cond notifier_wq;
 	struct nvgpu_cond semaphore_wq;

-	u32 timeout_accumulated_ms;
-	u32 timeout_gpfifo_get;
-
 	/* kernel watchdog to kill stuck jobs */
 	struct channel_gk20a_timeout timeout;

@@ -271,32 +260,43 @@ struct channel_gk20a {
 	struct nvgpu_mutex dbg_s_lock;
 	struct nvgpu_list_node dbg_s_list;

-	bool has_timedout;
-	u32 timeout_ms_max;
-	bool timeout_debug_dump;
-
 	struct nvgpu_mutex sync_lock;
 	struct gk20a_channel_sync *sync;
 	struct gk20a_channel_sync *user_sync;

-	bool has_os_fence_framework_support;
-
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	u64 virt_ctx;
 #endif

-	u32 runlist_id;
-
-	bool is_privileged_channel;
-	u32 subctx_id;
-	u32 runqueue_sel;
-
 	struct ctx_header_desc ctx_header;

 	/* Any operating system specific data. */
 	void *os_priv;

+	u32 obj_class;	/* we support only one obj per channel */
+
+	u32 timeout_accumulated_ms;
+	u32 timeout_gpfifo_get;
+
+	u32 subctx_id;
+	u32 runqueue_sel;
+
+	u32 timeout_ms_max;
+	u32 runlist_id;
+
 	bool mmu_nack_handled;
+	bool has_timedout;
+	bool referenceable;
+	bool vpr;
+	bool deterministic;
+	/* deterministic, but explicitly idle and submits disallowed */
+	bool deterministic_railgate_allowed;
+	bool cde;
+	bool usermode_submit_enabled;
+	bool timeout_debug_dump;
+	bool has_os_fence_framework_support;
+
+	bool is_privileged_channel;
 };

 static inline struct channel_gk20a *
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -453,7 +453,6 @@ struct nvgpu_gr_ctx {

 	u32 graphics_preempt_mode;
 	u32 compute_preempt_mode;
-	bool boosted_ctx;

 	struct nvgpu_mem preempt_ctxsw_buffer;
 	struct nvgpu_mem spill_ctxsw_buffer;
@@ -462,11 +461,12 @@ struct nvgpu_gr_ctx {
 	u32 ctx_id;
 	bool ctx_id_valid;
 	bool cilp_preempt_pending;
+	bool boosted_ctx;
+	bool golden_img_loaded;

 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	u64 virt_ctx;
 #endif
-	bool golden_img_loaded;

 	struct patch_desc	patch_ctx;
 	struct zcull_ctx_desc	zcull_ctx;
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -42,34 +42,33 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
 struct tsg_gk20a {
 	struct gk20a *g;

-	bool in_use;
-	int tsgid;
+	struct vm_gk20a *vm;
+	struct nvgpu_mem *eng_method_buffers;

+
+	struct nvgpu_gr_ctx gr_ctx;
 	struct nvgpu_ref refcount;

 	struct nvgpu_list_node ch_list;
-	int num_active_channels;
+	struct nvgpu_list_node event_id_list;
 	struct nvgpu_rwsem ch_list_lock;
+	struct nvgpu_mutex event_id_list_lock;
+	int num_active_channels;

 	unsigned int timeslice_us;
 	unsigned int timeslice_timeout;
 	unsigned int timeslice_scale;

-	struct vm_gk20a *vm;
-
 	u32 interleave_level;
-
-	struct nvgpu_list_node event_id_list;
-	struct nvgpu_mutex event_id_list_lock;
+	int tsgid;

 	u32 runlist_id;
 	pid_t tgid;
-	struct nvgpu_mem *eng_method_buffers;
 	u32  num_active_tpcs;
 	u8   tpc_pg_enabled;
 	bool tpc_num_initialized;
+	bool in_use;

-	struct nvgpu_gr_ctx gr_ctx;
 };

 int gk20a_enable_tsg(struct tsg_gk20a *tsg);