mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: allow skipping pramin barriers
A wmb() next to each gk20a_mem_wr32() via PRAMIN may be overly careful,
so support not inserting these barriers for performance, in cases where
they are not necessary, where the caller would do an explicit barrier
after a bunch of reads.
Also, move those optional wmb()s to be done at the end of the whole
internally batched write for gk20a_mem_{wr_n,memset} from the per-batch
subloops that may run multiple times.
Jira DNVGPU-23
Change-Id: I61ee65418335863110bca6f036b2e883b048c5c2
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1225149
(cherry picked from commit d2c40327d1995f76e8ab9cb4cd8c76407dabc6de)
Reviewed-on: http://git-master/r/1227474
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
718af968f0
commit
a8e260bc8d
@@ -234,12 +234,6 @@ static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
|
|||||||
r += sizeof(u32);
|
r += sizeof(u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Barrier moved here from gk20a_writel in the loop. The writes don't
|
|
||||||
* have to be ordered.
|
|
||||||
*/
|
|
||||||
wmb();
|
|
||||||
|
|
||||||
*arg = src_u32;
|
*arg = src_u32;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -252,12 +246,6 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
|
|||||||
writel_relaxed(repeat, g->regs + r);
|
writel_relaxed(repeat, g->regs + r);
|
||||||
r += sizeof(u32);
|
r += sizeof(u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Barrier moved here from gk20a_writel in the loop. The writes don't
|
|
||||||
* have to be ordered.
|
|
||||||
*/
|
|
||||||
wmb();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
|
u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
|
||||||
@@ -336,6 +324,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
|
|||||||
|
|
||||||
pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
|
pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
|
||||||
pramin_access_batch_wr_n, &p);
|
pramin_access_batch_wr_n, &p);
|
||||||
|
if (!mem->skip_wmb)
|
||||||
|
wmb();
|
||||||
} else {
|
} else {
|
||||||
WARN_ON("Accessing unallocated mem_desc");
|
WARN_ON("Accessing unallocated mem_desc");
|
||||||
}
|
}
|
||||||
@@ -368,6 +358,8 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
|
|||||||
|
|
||||||
pramin_access_batched(g, mem, offset, size,
|
pramin_access_batched(g, mem, offset, size,
|
||||||
pramin_access_batch_wr_n, &src_u32);
|
pramin_access_batch_wr_n, &src_u32);
|
||||||
|
if (!mem->skip_wmb)
|
||||||
|
wmb();
|
||||||
} else {
|
} else {
|
||||||
WARN_ON("Accessing unallocated mem_desc");
|
WARN_ON("Accessing unallocated mem_desc");
|
||||||
}
|
}
|
||||||
@@ -398,6 +390,8 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
|
|||||||
|
|
||||||
pramin_access_batched(g, mem, offset, size,
|
pramin_access_batched(g, mem, offset, size,
|
||||||
pramin_access_batch_set, &p);
|
pramin_access_batch_set, &p);
|
||||||
|
if (!mem->skip_wmb)
|
||||||
|
wmb();
|
||||||
} else {
|
} else {
|
||||||
WARN_ON("Accessing unallocated mem_desc");
|
WARN_ON("Accessing unallocated mem_desc");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ struct mem_desc {
|
|||||||
bool user_mem; /* vidmem only */
|
bool user_mem; /* vidmem only */
|
||||||
struct gk20a_allocator *allocator; /* vidmem only */
|
struct gk20a_allocator *allocator; /* vidmem only */
|
||||||
struct list_head clear_list_entry; /* vidmem only */
|
struct list_head clear_list_entry; /* vidmem only */
|
||||||
|
bool skip_wmb;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mem_desc_sub {
|
struct mem_desc_sub {
|
||||||
|
|||||||
Reference in New Issue
Block a user