gpu: nvgpu: FIFO sched fixes

Miscellaneous fixes for the sched code: 1. Make sure get_addr() on an SGL respects the use phys flag since the runlist needs physical addresses when NVLINK is in use. 2. Ensure the runlist is contiguous. Since the runlist memory is not virtually addressed the buffer must be physically contiguous. 3. Use all 64 bits of the runlist address in the runlist base addr register (and related fields). JIRA EVLR-2333 Change-Id: Id4fd5ba4665d3e35ff1d6ca78dea6b58894a9a9a Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1654667 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Thomas Fleury <tfleury@nvidia.com> Tested-by: Thomas Fleury <tfleury@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2018-02-08 18:25:47 -08:00
parent a885f682d6
commit 71f53272b2
3 changed files with 12 additions and 7 deletions
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -316,7 +316,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
 */
 u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
 {
-	if (!nvgpu_iommuable(g))
+	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
+	    !nvgpu_iommuable(g))
 		return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));

 	if (sg_dma_address(sgl) == 0)
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -28,6 +28,7 @@
 #include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
+#include <nvgpu/enabled.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
@@ -666,11 +667,13 @@ static void fifo_engine_exception_status(struct gk20a *g,
 static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 {
 	struct fifo_runlist_info_gk20a *runlist;
+	struct fifo_engine_info_gk20a *engine_info;
 	unsigned int runlist_id;
 	u32 i;
 	size_t runlist_size;
 	u32 active_engine_id, pbdma_id, engine_id;
-	struct fifo_engine_info_gk20a *engine_info;
+	int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
+		NVGPU_DMA_FORCE_CONTIGUOUS : 0;

 	nvgpu_log_fn(g, " ");

@@ -705,8 +708,9 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 				f->num_runlist_entries, runlist_size);

 		for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
-			int err = nvgpu_dma_alloc_sys(g, runlist_size,
-					&runlist->mem[i]);
+			int err = nvgpu_dma_alloc_flags_sys(g, flags,
+							    runlist_size,
+							    &runlist->mem[i]);
 			if (err) {
 				nvgpu_err(g, "memory allocation failed");
 				goto clean_up_runlist;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -742,13 +742,13 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,

 static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
-	u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
-			>> ram_in_base_shift_v());
+	u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
+		ram_in_base_shift_v();
 	u32 aperture = nvgpu_aperture_mask(g, inst_block,
 			gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
 			gr_fecs_current_ctx_target_vid_mem_f());

-	return gr_fecs_current_ctx_ptr_f(ptr) | aperture |
+	return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
 		gr_fecs_current_ctx_valid_f(1);
 }