From cd4b2f642cb41a104239f5ad960cc27d5e01d0bb Mon Sep 17 00:00:00 2001 From: Konsta Holtta Date: Tue, 12 Feb 2019 13:05:18 +0200 Subject: [PATCH] gpu: nvgpu: add HAL for reading ccsr_channel Refactor read accesses to the ccsr_channel register for channel state to be done via a channel HAL op for all chips. A new op called read_state is added for this; information needed by other units is collected in a new struct nvgpu_channel_hw_state. Jira NVGPU-1307 Change-Id: Iff9385c08e17ac086d97f5771a54b56b2727e3c4 Signed-off-by: Konsta Holtta Reviewed-on: https://git-master.nvidia.com/r/2017266 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel_gk20a.c | 45 ++++++++++++ drivers/gpu/nvgpu/common/fifo/channel_gk20a.h | 3 + drivers/gpu/nvgpu/common/fifo/tsg.c | 15 ++-- drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 69 +++---------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 6 +- drivers/gpu/nvgpu/gm20b/fifo_gm20b.c | 4 +- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 1 + drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 1 + drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 14 ++-- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 1 + drivers/gpu/nvgpu/include/nvgpu/fifo.h | 11 ++- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 3 + drivers/gpu/nvgpu/tu104/hal_tu104.c | 1 + 14 files changed, 92 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel_gk20a.c b/drivers/gpu/nvgpu/common/fifo/channel_gk20a.c index c108c8f04..9e1340ac0 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel_gk20a.c +++ b/drivers/gpu/nvgpu/common/fifo/channel_gk20a.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "channel_gk20a.h" @@ -58,3 +59,47 @@ void gk20a_channel_unbind(struct channel_gk20a *ch) ccsr_channel_inst_bind_false_f()); } } + +/* ccsr_channel_status_v is four bits long */ +static const char * const ccsr_chan_status_str[] = { + "idle", + "pending", + "pending_ctx_reload", + "pending_acquire", + "pending_acq_ctx_reload", + "on_pbdma", + "on_pbdma_and_eng", + "on_eng", + "on_eng_pending_acquire", + "on_eng_pending", + "on_pbdma_ctx_reload", + "on_pbdma_and_eng_ctx_reload", + "on_eng_ctx_reload", + "on_eng_pending_ctx_reload", + "on_eng_pending_acq_ctx_reload", + "N/A", +}; + +void gk20a_channel_read_state(struct gk20a *g, struct channel_gk20a *ch, + struct nvgpu_channel_hw_state *state) +{ + u32 reg = gk20a_readl(g, ccsr_channel_r(ch->chid)); + u32 status_v = ccsr_channel_status_v(reg); + + state->next = ccsr_channel_next_v(reg) == ccsr_channel_next_true_v(); + state->enabled = ccsr_channel_enable_v(reg) == + ccsr_channel_enable_in_use_v(); + state->ctx_reload = + status_v == ccsr_channel_status_pending_ctx_reload_v() || + status_v == ccsr_channel_status_pending_acq_ctx_reload_v() || + status_v == ccsr_channel_status_on_pbdma_ctx_reload_v() || + status_v == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() || + status_v == ccsr_channel_status_on_eng_ctx_reload_v() || + status_v == ccsr_channel_status_on_eng_pending_ctx_reload_v() || + status_v == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v(); + state->busy = ccsr_channel_busy_v(reg) == ccsr_channel_busy_true_v(); + state->pending_acquire = + status_v == ccsr_channel_status_pending_acquire_v() || + status_v == ccsr_channel_status_on_eng_pending_acquire_v(); + state->status_string = ccsr_chan_status_str[status_v]; +} diff --git a/drivers/gpu/nvgpu/common/fifo/channel_gk20a.h b/drivers/gpu/nvgpu/common/fifo/channel_gk20a.h index 02348b8e2..812643afb 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel_gk20a.h +++ b/drivers/gpu/nvgpu/common/fifo/channel_gk20a.h @@ -25,9 +25,12 @@ struct channel_gk20a; struct gk20a; +struct nvgpu_channel_hw_state; void gk20a_channel_enable(struct channel_gk20a *ch); void gk20a_channel_disable(struct channel_gk20a *ch); void gk20a_channel_unbind(struct channel_gk20a *ch); +void gk20a_channel_read_state(struct gk20a *g, struct channel_gk20a *ch, + struct nvgpu_channel_hw_state *state); #endif /* FIFO_CHANNEL_GK20A_H */ diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 4bfc5b867..87dd58dd5 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -43,7 +43,6 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg) { struct gk20a *g = tsg->g; struct channel_gk20a *ch; - bool is_next, is_ctx_reload; gk20a_tsg_disable_sched(g, tsg); @@ -54,19 +53,21 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg) */ nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { - is_next = gk20a_fifo_channel_status_is_next(g, ch->chid); - is_ctx_reload = gk20a_fifo_channel_status_is_ctx_reload(g, ch->chid); + struct nvgpu_channel_hw_state hw_state; - if (is_next || is_ctx_reload) { + g->ops.channel.read_state(g, ch, &hw_state); + + if (hw_state.next || hw_state.ctx_reload) { g->ops.channel.enable(ch); } } nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { - is_next = gk20a_fifo_channel_status_is_next(g, ch->chid); - is_ctx_reload = gk20a_fifo_channel_status_is_ctx_reload(g, ch->chid); + struct nvgpu_channel_hw_state hw_state; - if (is_next || is_ctx_reload) { + g->ops.channel.read_state(g, ch, &hw_state); + + if (hw_state.next || hw_state.ctx_reload) { continue; } diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 540b5453d..33fc9a821 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -57,7 +57,6 @@ #include #include -#include #include #include @@ -1760,8 +1759,11 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch) { struct gk20a *g = ch->g; + struct nvgpu_channel_hw_state hw_state; - if (gk20a_fifo_channel_status_is_next(g, ch->chid)) { + g->ops.channel.read_state(g, ch, &hw_state); + + if (hw_state.next) { nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!", ch->chid, ch->tsgid); return -EINVAL; @@ -2845,24 +2847,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g) return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f(); } -static const char * const ccsr_chan_status_str[] = { - "idle", - "pending", - "pending_ctx_reload", - "pending_acquire", - "pending_acq_ctx_reload", - "on_pbdma", - "on_pbdma_and_eng", - "on_eng", - "on_eng_pending_acquire", - "on_eng_pending", - "on_pbdma_ctx_reload", - "on_pbdma_and_eng_ctx_reload", - "on_eng_ctx_reload", - "on_eng_pending_ctx_reload", - "on_eng_pending_acq_ctx_reload", -}; - static const char * const pbdma_chan_eng_ctx_status_str[] = { "invalid", "valid", @@ -2878,15 +2862,6 @@ static const char * const not_found_str[] = { "NOT FOUND" }; -const char *gk20a_decode_ccsr_chan_status(u32 index) -{ - if (index >= ARRAY_SIZE(ccsr_chan_status_str)) { - return not_found_str[0]; - } else { - return ccsr_chan_status_str[index]; - } -} - const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index) { if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) { @@ -2896,34 +2871,13 @@ const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index) } } -bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid) -{ - u32 channel = gk20a_readl(g, ccsr_channel_r(chid)); - - return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v(); -} - -bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid) -{ - u32 channel = gk20a_readl(g, ccsr_channel_r(chid)); - u32 status = ccsr_channel_status_v(channel); - - return (status == ccsr_channel_status_pending_ctx_reload_v() || - status == ccsr_channel_status_pending_acq_ctx_reload_v() || - status == ccsr_channel_status_on_pbdma_ctx_reload_v() || - status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() || - status == ccsr_channel_status_on_eng_ctx_reload_v() || - status == ccsr_channel_status_on_eng_pending_ctx_reload_v() || - status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v()); -} - void gk20a_capture_channel_ram_dump(struct gk20a *g, struct channel_gk20a *ch, struct nvgpu_channel_dump_info *info) { struct nvgpu_mem *mem = &ch->inst_block; - info->channel_reg = gk20a_readl(g, ccsr_channel_r(ch->chid)); + g->ops.channel.read_state(g, ch, &info->hw_state); info->inst.pb_top_level_get = nvgpu_mem_rd32_pair(g, mem, ram_fc_pb_top_level_get_w(), @@ -2959,11 +2913,8 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, struct gk20a_debug_output *o, struct nvgpu_channel_dump_info *info) { - u32 status; u32 syncpointa, syncpointb; - status = ccsr_channel_status_v(info->channel_reg); - syncpointa = info->inst.syncpointa; syncpointb = info->inst.syncpointb; @@ -2974,11 +2925,9 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, info->refs, info->deterministic ? "yes" : "no"); gk20a_debug_output(o, " In use: %-3s busy: %-3s status: %s", - (ccsr_channel_enable_v(info->channel_reg) == - ccsr_channel_enable_in_use_v()) ? "yes" : "no", - (ccsr_channel_busy_v(info->channel_reg) == - ccsr_channel_busy_true_v()) ? "yes" : "no", - gk20a_decode_ccsr_chan_status(status)); + info->hw_state.enabled ? "yes" : "no", + info->hw_state.busy ? "yes" : "no", + info->hw_state.status_string); gk20a_debug_output(o, " TOP %016llx" " PUT %016llx GET %016llx", @@ -3014,7 +2963,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g, && (pbdma_syncpointb_wait_switch_v(syncpointb) == pbdma_syncpointb_wait_switch_en_v())) { gk20a_debug_output(o, "%s on syncpt %u (%s) val %u", - (status == 3 || status == 8) ? "Waiting" : "Waited", + info->hw_state.pending_acquire ? "Waiting" : "Waited", pbdma_syncpointb_syncpt_index_v(syncpointb), nvgpu_nvhost_syncpt_get_name(g->nvhost_dev, pbdma_syncpointb_syncpt_index_v(syncpointb)), diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 5ac7fb275..5d76913bf 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -25,6 +25,7 @@ #define FIFO_GK20A_H #include +#include struct gk20a_debug_output; struct mmu_fault_info; @@ -217,7 +218,7 @@ struct nvgpu_channel_dump_info { int pid; int refs; bool deterministic; - u32 channel_reg; + struct nvgpu_channel_hw_state hw_state; struct { u64 pb_top_level_get; u64 pb_put; @@ -355,11 +356,8 @@ void gk20a_dump_pbdma_status(struct gk20a *g, struct gk20a_debug_output *o); void gk20a_dump_eng_status(struct gk20a *g, struct gk20a_debug_output *o); -const char *gk20a_decode_ccsr_chan_status(u32 index); const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index); -bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid); -bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid); int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch); struct channel_gk20a *gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr); diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c index e5f8e5b85..be02e9173 100644 --- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c @@ -166,9 +166,11 @@ void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch) struct gk20a *g = ch->g; struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid]; struct channel_gk20a *temp_ch; + struct nvgpu_channel_hw_state hw_state; /* If CTX_RELOAD is set on a channel, move it to some other channel */ - if (gk20a_fifo_channel_status_is_ctx_reload(ch->g, ch->chid)) { + g->ops.channel.read_state(g, ch, &hw_state); + if (hw_state.ctx_reload) { nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, channel_gk20a, ch_entry) { if (temp_ch->chid != ch->chid) { diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index f5fe6a3b7..290e2f913 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -577,6 +577,7 @@ static const struct gpu_ops gm20b_ops = { .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, .count = gm20b_channel_count, + .read_state = gk20a_channel_read_state, }, .netlist = { .get_netlist_name = gm20b_netlist_get_name, diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index 659f8c0a7..986637a4a 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -630,6 +630,7 @@ static const struct gpu_ops gp10b_ops = { .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, .count = gm20b_channel_count, + .read_state = gk20a_channel_read_state, }, .netlist = { .get_netlist_name = gp10b_netlist_get_name, diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 2ae925be4..d04db0cc3 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -799,6 +799,7 @@ static const struct gpu_ops gv100_ops = { .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, .count = gv100_channel_count, + .read_state = gk20a_channel_read_state, }, .netlist = { .get_netlist_name = gv100_netlist_get_name, diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index a04df66e7..fd9c36638 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -223,7 +223,7 @@ void gv11b_capture_channel_ram_dump(struct gk20a *g, { struct nvgpu_mem *mem = &ch->inst_block; - info->channel_reg = gk20a_readl(g, ccsr_channel_r(ch->chid)); + g->ops.channel.read_state(g, ch, &info->hw_state); info->inst.pb_top_level_get = nvgpu_mem_rd32_pair(g, mem, ram_fc_pb_top_level_get_w(), @@ -255,10 +255,6 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g, struct gk20a_debug_output *o, struct nvgpu_channel_dump_info *info) { - u32 status; - - status = ccsr_channel_status_v(info->channel_reg); - gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ", info->chid, g->name, @@ -267,11 +263,9 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g, info->refs, info->deterministic ? ", deterministic" : ""); gk20a_debug_output(o, "channel status: %s in use %s %s\n", - (ccsr_channel_enable_v(info->channel_reg) == - ccsr_channel_enable_in_use_v()) ? "" : "not", - gk20a_decode_ccsr_chan_status(status), - (ccsr_channel_busy_v(info->channel_reg) == - ccsr_channel_busy_true_v()) ? "busy" : "not busy"); + info->hw_state.enabled ? "" : "not", + info->hw_state.status_string, + info->hw_state.busy ? "busy" : "not busy"); gk20a_debug_output(o, "RAMFC : TOP: %016llx PUT: %016llx GET: %016llx " "FETCH: %016llx\n" diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 241e78d2a..5266ba663 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -758,6 +758,7 @@ static const struct gpu_ops gv11b_ops = { .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, .count = gv11b_channel_count, + .read_state = gk20a_channel_read_state, }, .netlist = { .get_netlist_name = gv11b_netlist_get_name, diff --git a/drivers/gpu/nvgpu/include/nvgpu/fifo.h b/drivers/gpu/nvgpu/include/nvgpu/fifo.h index 338ca2739..575e4e1d3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h +++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h @@ -35,4 +35,13 @@ #define RC_TYPE_FORCE_RESET 7U #define RC_TYPE_SCHED_ERR 8U -#endif /* NVGPU_FIFO_COMMON_H */ \ No newline at end of file +struct nvgpu_channel_hw_state { + bool enabled; + bool next; + bool ctx_reload; + bool busy; + bool pending_acquire; + const char *status_string; +}; + +#endif /* NVGPU_FIFO_COMMON_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 90189c540..5d258500b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -63,6 +63,7 @@ struct nvgpu_sgt; struct nvgpu_sgl; struct nvgpu_device_info; struct nvgpu_gr_subctx; +struct nvgpu_channel_hw_state; #include #include @@ -907,6 +908,8 @@ struct gpu_ops { void (*enable)(struct channel_gk20a *ch); void (*disable)(struct channel_gk20a *ch); u32 (*count)(struct gk20a *g); + void (*read_state)(struct gk20a *g, struct channel_gk20a *ch, + struct nvgpu_channel_hw_state *state); } channel; struct pmu_v { u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index c12303591..df25c5bdb 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -830,6 +830,7 @@ static const struct gpu_ops tu104_ops = { .enable = gk20a_channel_enable, .disable = gk20a_channel_disable, .count = gv100_channel_count, + .read_state = gk20a_channel_read_state, }, .netlist = { .get_netlist_name = tu104_netlist_get_name,