gpu: nvgpu: allow skipping pramin barriers

A wmb() next to each gk20a_mem_wr32() via PRAMIN may be overly careful,
so support not inserting these barriers for performance, in cases where
they are not necessary, where the caller would do an explicit barrier
after a bunch of reads.

Also, move those optional wmb()s to be done at the end of the whole
internally batched write for gk20a_mem_{wr_n,memset} from the per-batch
subloops that may run multiple times.

Jira DNVGPU-23

Change-Id: I61ee65418335863110bca6f036b2e883b048c5c2
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1225149
(cherry picked from commit d2c40327d1995f76e8ab9cb4cd8c76407dabc6de)
Reviewed-on: http://git-master/r/1227474
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2016-09-12 12:37:30 +03:00
committed by mobile promotions
parent 718af968f0
commit a8e260bc8d
2 changed files with 7 additions and 12 deletions

View File

@@ -234,12 +234,6 @@ static inline void pramin_access_batch_wr_n(struct gk20a *g, u32 start,
r += sizeof(u32);
}
/*
* Barrier moved here from gk20a_writel in the loop. The writes don't
* have to be ordered.
*/
wmb();
*arg = src_u32;
}
@@ -252,12 +246,6 @@ static inline void pramin_access_batch_set(struct gk20a *g, u32 start,
writel_relaxed(repeat, g->regs + r);
r += sizeof(u32);
}
/*
* Barrier moved here from gk20a_writel in the loop. The writes don't
* have to be ordered.
*/
wmb();
}
u32 gk20a_mem_rd32(struct gk20a *g, struct mem_desc *mem, u32 w)
@@ -336,6 +324,8 @@ void gk20a_mem_wr32(struct gk20a *g, struct mem_desc *mem, u32 w, u32 data)
pramin_access_batched(g, mem, w * sizeof(u32), sizeof(u32),
pramin_access_batch_wr_n, &p);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated mem_desc");
}
@@ -368,6 +358,8 @@ void gk20a_mem_wr_n(struct gk20a *g, struct mem_desc *mem, u32 offset,
pramin_access_batched(g, mem, offset, size,
pramin_access_batch_wr_n, &src_u32);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated mem_desc");
}
@@ -398,6 +390,8 @@ void gk20a_memset(struct gk20a *g, struct mem_desc *mem, u32 offset,
pramin_access_batched(g, mem, offset, size,
pramin_access_batch_set, &p);
if (!mem->skip_wmb)
wmb();
} else {
WARN_ON("Accessing unallocated mem_desc");
}

View File

@@ -74,6 +74,7 @@ struct mem_desc {
bool user_mem; /* vidmem only */
struct gk20a_allocator *allocator; /* vidmem only */
struct list_head clear_list_entry; /* vidmem only */
bool skip_wmb;
};
struct mem_desc_sub {