From b269aae9f2ec824ec041a50ad80f4522b7ad18b6 Mon Sep 17 00:00:00 2001 From: Tejal Kudav Date: Wed, 12 Aug 2020 12:35:15 +0000 Subject: [PATCH] gpu: nvgpu: correct usage of pbdma_id The pbdma_id field stored in struct nvgpu_device is bitmask and not bit position as implied by the name. This field is incorrectly used as bit position in nvgpu_engine_disable_activity(), causing PRI timeout errors during iGPU and dGPU shutdown path. PRI timeout errors- nvgpu: 17000000.gv11b gk20a_ptimer_isr:54 [ERR] PRI timeout: ADR 0x0000308c READ DATA 0x00000000 Here the pbdma_id stored in struct nvgpu_device for runlist_0 on gv11b is 0x3(bitmask corresponding to PBDMA_0 and PBDMA_1). nvgpu_engine_disable_activity() interprets this as PBDMA_3 and adds incorrect offset to access PBDMA_STATUS register, causing PRI error. Modify nvgpu_engine_disable_activity() to treat pbdma_id as bitmask and loop through set bits. JIRA NVGPU-5991 Change-Id: Iaffb974cddaa375a329e70f3b5903b9ef2a222c4 Signed-off-by: Tejal Kudav Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2397954 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/engines.c | 50 +++++++++++++++---------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/engines.c b/drivers/gpu/nvgpu/common/fifo/engines.c index dde8fc6fe..04bb783a8 100644 --- a/drivers/gpu/nvgpu/common/fifo/engines.c +++ b/drivers/gpu/nvgpu/common/fifo/engines.c @@ -240,6 +240,10 @@ int nvgpu_engine_disable_activity(struct gk20a *g, struct nvgpu_channel *ch = NULL; struct nvgpu_engine_status_info engine_status; struct nvgpu_pbdma_status_info pbdma_status; + unsigned long runlist_served_pbdmas; + unsigned long bit; + u32 pbdma_id; + struct nvgpu_fifo *f = &g->fifo; nvgpu_log_fn(g, " "); @@ -259,28 +263,34 @@ int nvgpu_engine_disable_activity(struct gk20a *g, nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_DISABLED); - /* chid from pbdma status */ - g->ops.pbdma_status.read_pbdma_status_info(g, - dev->pbdma_id, - &pbdma_status); - if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) || - nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) { - pbdma_chid = pbdma_status.id; - } else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) || - nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) { - pbdma_chid = pbdma_status.next_id; - } else { - /* Nothing to do here */ - } + runlist_served_pbdmas = f->runlist_info[dev->runlist_id]->pbdma_bitmask; - if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) { - ch = nvgpu_channel_from_id(g, pbdma_chid); - if (ch != NULL) { - err = g->ops.fifo.preempt_channel(g, ch); - nvgpu_channel_put(ch); + for_each_set_bit(bit, &runlist_served_pbdmas, + nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) { + pbdma_id = U32(bit); + /* chid from pbdma status */ + g->ops.pbdma_status.read_pbdma_status_info(g, + pbdma_id, + &pbdma_status); + if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) || + nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) { + pbdma_chid = pbdma_status.id; + } else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) || + nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) { + pbdma_chid = pbdma_status.next_id; + } else { + /* Nothing to do here */ } - if (err != 0) { - goto clean_up; + + if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) { + ch = nvgpu_channel_from_id(g, pbdma_chid); + if (ch != NULL) { + err = g->ops.fifo.preempt_channel(g, ch); + nvgpu_channel_put(ch); + } + if (err != 0) { + goto clean_up; + } } }