mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add HAL for reading ccsr_channel
Refactor read accesses to the ccsr_channel register for channel state to be done via a channel HAL op for all chips. A new op called read_state is added for this; information needed by other units is collected in a new struct nvgpu_channel_hw_state. Jira NVGPU-1307 Change-Id: Iff9385c08e17ac086d97f5771a54b56b2727e3c4 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2017266 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
335e4f1839
commit
cd4b2f642c
@@ -25,6 +25,7 @@
|
||||
#include <nvgpu/atomic.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/barrier.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
|
||||
#include "channel_gk20a.h"
|
||||
@@ -58,3 +59,47 @@ void gk20a_channel_unbind(struct channel_gk20a *ch)
|
||||
ccsr_channel_inst_bind_false_f());
|
||||
}
|
||||
}
|
||||
|
||||
/* ccsr_channel_status_v is four bits long */
|
||||
static const char * const ccsr_chan_status_str[] = {
|
||||
"idle",
|
||||
"pending",
|
||||
"pending_ctx_reload",
|
||||
"pending_acquire",
|
||||
"pending_acq_ctx_reload",
|
||||
"on_pbdma",
|
||||
"on_pbdma_and_eng",
|
||||
"on_eng",
|
||||
"on_eng_pending_acquire",
|
||||
"on_eng_pending",
|
||||
"on_pbdma_ctx_reload",
|
||||
"on_pbdma_and_eng_ctx_reload",
|
||||
"on_eng_ctx_reload",
|
||||
"on_eng_pending_ctx_reload",
|
||||
"on_eng_pending_acq_ctx_reload",
|
||||
"N/A",
|
||||
};
|
||||
|
||||
void gk20a_channel_read_state(struct gk20a *g, struct channel_gk20a *ch,
|
||||
struct nvgpu_channel_hw_state *state)
|
||||
{
|
||||
u32 reg = gk20a_readl(g, ccsr_channel_r(ch->chid));
|
||||
u32 status_v = ccsr_channel_status_v(reg);
|
||||
|
||||
state->next = ccsr_channel_next_v(reg) == ccsr_channel_next_true_v();
|
||||
state->enabled = ccsr_channel_enable_v(reg) ==
|
||||
ccsr_channel_enable_in_use_v();
|
||||
state->ctx_reload =
|
||||
status_v == ccsr_channel_status_pending_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_pending_acq_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_on_eng_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
|
||||
status_v == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v();
|
||||
state->busy = ccsr_channel_busy_v(reg) == ccsr_channel_busy_true_v();
|
||||
state->pending_acquire =
|
||||
status_v == ccsr_channel_status_pending_acquire_v() ||
|
||||
status_v == ccsr_channel_status_on_eng_pending_acquire_v();
|
||||
state->status_string = ccsr_chan_status_str[status_v];
|
||||
}
|
||||
|
||||
@@ -25,9 +25,12 @@
|
||||
|
||||
struct channel_gk20a;
|
||||
struct gk20a;
|
||||
struct nvgpu_channel_hw_state;
|
||||
|
||||
void gk20a_channel_enable(struct channel_gk20a *ch);
|
||||
void gk20a_channel_disable(struct channel_gk20a *ch);
|
||||
void gk20a_channel_unbind(struct channel_gk20a *ch);
|
||||
void gk20a_channel_read_state(struct gk20a *g, struct channel_gk20a *ch,
|
||||
struct nvgpu_channel_hw_state *state);
|
||||
|
||||
#endif /* FIFO_CHANNEL_GK20A_H */
|
||||
|
||||
@@ -43,7 +43,6 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg)
|
||||
{
|
||||
struct gk20a *g = tsg->g;
|
||||
struct channel_gk20a *ch;
|
||||
bool is_next, is_ctx_reload;
|
||||
|
||||
gk20a_tsg_disable_sched(g, tsg);
|
||||
|
||||
@@ -54,19 +53,21 @@ int gk20a_enable_tsg(struct tsg_gk20a *tsg)
|
||||
*/
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
is_next = gk20a_fifo_channel_status_is_next(g, ch->chid);
|
||||
is_ctx_reload = gk20a_fifo_channel_status_is_ctx_reload(g, ch->chid);
|
||||
struct nvgpu_channel_hw_state hw_state;
|
||||
|
||||
if (is_next || is_ctx_reload) {
|
||||
g->ops.channel.read_state(g, ch, &hw_state);
|
||||
|
||||
if (hw_state.next || hw_state.ctx_reload) {
|
||||
g->ops.channel.enable(ch);
|
||||
}
|
||||
}
|
||||
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
is_next = gk20a_fifo_channel_status_is_next(g, ch->chid);
|
||||
is_ctx_reload = gk20a_fifo_channel_status_is_ctx_reload(g, ch->chid);
|
||||
struct nvgpu_channel_hw_state hw_state;
|
||||
|
||||
if (is_next || is_ctx_reload) {
|
||||
g->ops.channel.read_state(g, ch, &hw_state);
|
||||
|
||||
if (hw_state.next || hw_state.ctx_reload) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@
|
||||
|
||||
#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ccsr_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
|
||||
#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
|
||||
|
||||
@@ -1760,8 +1759,11 @@ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
|
||||
int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
|
||||
{
|
||||
struct gk20a *g = ch->g;
|
||||
struct nvgpu_channel_hw_state hw_state;
|
||||
|
||||
if (gk20a_fifo_channel_status_is_next(g, ch->chid)) {
|
||||
g->ops.channel.read_state(g, ch, &hw_state);
|
||||
|
||||
if (hw_state.next) {
|
||||
nvgpu_err(g, "Channel %d to be removed from TSG %d has NEXT set!",
|
||||
ch->chid, ch->tsgid);
|
||||
return -EINVAL;
|
||||
@@ -2845,24 +2847,6 @@ u32 gk20a_fifo_get_pbdma_signature(struct gk20a *g)
|
||||
return pbdma_signature_hw_valid_f() | pbdma_signature_sw_zero_f();
|
||||
}
|
||||
|
||||
static const char * const ccsr_chan_status_str[] = {
|
||||
"idle",
|
||||
"pending",
|
||||
"pending_ctx_reload",
|
||||
"pending_acquire",
|
||||
"pending_acq_ctx_reload",
|
||||
"on_pbdma",
|
||||
"on_pbdma_and_eng",
|
||||
"on_eng",
|
||||
"on_eng_pending_acquire",
|
||||
"on_eng_pending",
|
||||
"on_pbdma_ctx_reload",
|
||||
"on_pbdma_and_eng_ctx_reload",
|
||||
"on_eng_ctx_reload",
|
||||
"on_eng_pending_ctx_reload",
|
||||
"on_eng_pending_acq_ctx_reload",
|
||||
};
|
||||
|
||||
static const char * const pbdma_chan_eng_ctx_status_str[] = {
|
||||
"invalid",
|
||||
"valid",
|
||||
@@ -2878,15 +2862,6 @@ static const char * const not_found_str[] = {
|
||||
"NOT FOUND"
|
||||
};
|
||||
|
||||
const char *gk20a_decode_ccsr_chan_status(u32 index)
|
||||
{
|
||||
if (index >= ARRAY_SIZE(ccsr_chan_status_str)) {
|
||||
return not_found_str[0];
|
||||
} else {
|
||||
return ccsr_chan_status_str[index];
|
||||
}
|
||||
}
|
||||
|
||||
const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
|
||||
{
|
||||
if (index >= ARRAY_SIZE(pbdma_chan_eng_ctx_status_str)) {
|
||||
@@ -2896,34 +2871,13 @@ const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index)
|
||||
}
|
||||
}
|
||||
|
||||
bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid)
|
||||
{
|
||||
u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
|
||||
|
||||
return ccsr_channel_next_v(channel) == ccsr_channel_next_true_v();
|
||||
}
|
||||
|
||||
bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid)
|
||||
{
|
||||
u32 channel = gk20a_readl(g, ccsr_channel_r(chid));
|
||||
u32 status = ccsr_channel_status_v(channel);
|
||||
|
||||
return (status == ccsr_channel_status_pending_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_pending_acq_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_on_pbdma_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_on_pbdma_and_eng_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_on_eng_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_on_eng_pending_ctx_reload_v() ||
|
||||
status == ccsr_channel_status_on_eng_pending_acq_ctx_reload_v());
|
||||
}
|
||||
|
||||
void gk20a_capture_channel_ram_dump(struct gk20a *g,
|
||||
struct channel_gk20a *ch,
|
||||
struct nvgpu_channel_dump_info *info)
|
||||
{
|
||||
struct nvgpu_mem *mem = &ch->inst_block;
|
||||
|
||||
info->channel_reg = gk20a_readl(g, ccsr_channel_r(ch->chid));
|
||||
g->ops.channel.read_state(g, ch, &info->hw_state);
|
||||
|
||||
info->inst.pb_top_level_get = nvgpu_mem_rd32_pair(g, mem,
|
||||
ram_fc_pb_top_level_get_w(),
|
||||
@@ -2959,11 +2913,8 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
|
||||
struct gk20a_debug_output *o,
|
||||
struct nvgpu_channel_dump_info *info)
|
||||
{
|
||||
u32 status;
|
||||
u32 syncpointa, syncpointb;
|
||||
|
||||
status = ccsr_channel_status_v(info->channel_reg);
|
||||
|
||||
syncpointa = info->inst.syncpointa;
|
||||
syncpointb = info->inst.syncpointb;
|
||||
|
||||
@@ -2974,11 +2925,9 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
|
||||
info->refs,
|
||||
info->deterministic ? "yes" : "no");
|
||||
gk20a_debug_output(o, " In use: %-3s busy: %-3s status: %s",
|
||||
(ccsr_channel_enable_v(info->channel_reg) ==
|
||||
ccsr_channel_enable_in_use_v()) ? "yes" : "no",
|
||||
(ccsr_channel_busy_v(info->channel_reg) ==
|
||||
ccsr_channel_busy_true_v()) ? "yes" : "no",
|
||||
gk20a_decode_ccsr_chan_status(status));
|
||||
info->hw_state.enabled ? "yes" : "no",
|
||||
info->hw_state.busy ? "yes" : "no",
|
||||
info->hw_state.status_string);
|
||||
gk20a_debug_output(o,
|
||||
" TOP %016llx"
|
||||
" PUT %016llx GET %016llx",
|
||||
@@ -3014,7 +2963,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
|
||||
&& (pbdma_syncpointb_wait_switch_v(syncpointb) ==
|
||||
pbdma_syncpointb_wait_switch_en_v())) {
|
||||
gk20a_debug_output(o, "%s on syncpt %u (%s) val %u",
|
||||
(status == 3 || status == 8) ? "Waiting" : "Waited",
|
||||
info->hw_state.pending_acquire ? "Waiting" : "Waited",
|
||||
pbdma_syncpointb_syncpt_index_v(syncpointb),
|
||||
nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
|
||||
pbdma_syncpointb_syncpt_index_v(syncpointb)),
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#define FIFO_GK20A_H
|
||||
|
||||
#include <nvgpu/kref.h>
|
||||
#include <nvgpu/fifo.h>
|
||||
|
||||
struct gk20a_debug_output;
|
||||
struct mmu_fault_info;
|
||||
@@ -217,7 +218,7 @@ struct nvgpu_channel_dump_info {
|
||||
int pid;
|
||||
int refs;
|
||||
bool deterministic;
|
||||
u32 channel_reg;
|
||||
struct nvgpu_channel_hw_state hw_state;
|
||||
struct {
|
||||
u64 pb_top_level_get;
|
||||
u64 pb_put;
|
||||
@@ -355,11 +356,8 @@ void gk20a_dump_pbdma_status(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
void gk20a_dump_eng_status(struct gk20a *g,
|
||||
struct gk20a_debug_output *o);
|
||||
const char *gk20a_decode_ccsr_chan_status(u32 index);
|
||||
const char *gk20a_decode_pbdma_chan_eng_ctx_status(u32 index);
|
||||
|
||||
bool gk20a_fifo_channel_status_is_next(struct gk20a *g, u32 chid);
|
||||
bool gk20a_fifo_channel_status_is_ctx_reload(struct gk20a *g, u32 chid);
|
||||
int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch);
|
||||
|
||||
struct channel_gk20a *gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr);
|
||||
|
||||
@@ -166,9 +166,11 @@ void gm20b_fifo_tsg_verify_status_ctx_reload(struct channel_gk20a *ch)
|
||||
struct gk20a *g = ch->g;
|
||||
struct tsg_gk20a *tsg = &g->fifo.tsg[ch->tsgid];
|
||||
struct channel_gk20a *temp_ch;
|
||||
struct nvgpu_channel_hw_state hw_state;
|
||||
|
||||
/* If CTX_RELOAD is set on a channel, move it to some other channel */
|
||||
if (gk20a_fifo_channel_status_is_ctx_reload(ch->g, ch->chid)) {
|
||||
g->ops.channel.read_state(g, ch, &hw_state);
|
||||
if (hw_state.ctx_reload) {
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(temp_ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
if (temp_ch->chid != ch->chid) {
|
||||
|
||||
@@ -577,6 +577,7 @@ static const struct gpu_ops gm20b_ops = {
|
||||
.enable = gk20a_channel_enable,
|
||||
.disable = gk20a_channel_disable,
|
||||
.count = gm20b_channel_count,
|
||||
.read_state = gk20a_channel_read_state,
|
||||
},
|
||||
.netlist = {
|
||||
.get_netlist_name = gm20b_netlist_get_name,
|
||||
|
||||
@@ -630,6 +630,7 @@ static const struct gpu_ops gp10b_ops = {
|
||||
.enable = gk20a_channel_enable,
|
||||
.disable = gk20a_channel_disable,
|
||||
.count = gm20b_channel_count,
|
||||
.read_state = gk20a_channel_read_state,
|
||||
},
|
||||
.netlist = {
|
||||
.get_netlist_name = gp10b_netlist_get_name,
|
||||
|
||||
@@ -799,6 +799,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
.enable = gk20a_channel_enable,
|
||||
.disable = gk20a_channel_disable,
|
||||
.count = gv100_channel_count,
|
||||
.read_state = gk20a_channel_read_state,
|
||||
},
|
||||
.netlist = {
|
||||
.get_netlist_name = gv100_netlist_get_name,
|
||||
|
||||
@@ -223,7 +223,7 @@ void gv11b_capture_channel_ram_dump(struct gk20a *g,
|
||||
{
|
||||
struct nvgpu_mem *mem = &ch->inst_block;
|
||||
|
||||
info->channel_reg = gk20a_readl(g, ccsr_channel_r(ch->chid));
|
||||
g->ops.channel.read_state(g, ch, &info->hw_state);
|
||||
|
||||
info->inst.pb_top_level_get = nvgpu_mem_rd32_pair(g, mem,
|
||||
ram_fc_pb_top_level_get_w(),
|
||||
@@ -255,10 +255,6 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g,
|
||||
struct gk20a_debug_output *o,
|
||||
struct nvgpu_channel_dump_info *info)
|
||||
{
|
||||
u32 status;
|
||||
|
||||
status = ccsr_channel_status_v(info->channel_reg);
|
||||
|
||||
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs: %d%s: ",
|
||||
info->chid,
|
||||
g->name,
|
||||
@@ -267,11 +263,9 @@ void gv11b_dump_channel_status_ramfc(struct gk20a *g,
|
||||
info->refs,
|
||||
info->deterministic ? ", deterministic" : "");
|
||||
gk20a_debug_output(o, "channel status: %s in use %s %s\n",
|
||||
(ccsr_channel_enable_v(info->channel_reg) ==
|
||||
ccsr_channel_enable_in_use_v()) ? "" : "not",
|
||||
gk20a_decode_ccsr_chan_status(status),
|
||||
(ccsr_channel_busy_v(info->channel_reg) ==
|
||||
ccsr_channel_busy_true_v()) ? "busy" : "not busy");
|
||||
info->hw_state.enabled ? "" : "not",
|
||||
info->hw_state.status_string,
|
||||
info->hw_state.busy ? "busy" : "not busy");
|
||||
gk20a_debug_output(o,
|
||||
"RAMFC : TOP: %016llx PUT: %016llx GET: %016llx "
|
||||
"FETCH: %016llx\n"
|
||||
|
||||
@@ -758,6 +758,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
.enable = gk20a_channel_enable,
|
||||
.disable = gk20a_channel_disable,
|
||||
.count = gv11b_channel_count,
|
||||
.read_state = gk20a_channel_read_state,
|
||||
},
|
||||
.netlist = {
|
||||
.get_netlist_name = gv11b_netlist_get_name,
|
||||
|
||||
@@ -35,4 +35,13 @@
|
||||
#define RC_TYPE_FORCE_RESET 7U
|
||||
#define RC_TYPE_SCHED_ERR 8U
|
||||
|
||||
struct nvgpu_channel_hw_state {
|
||||
bool enabled;
|
||||
bool next;
|
||||
bool ctx_reload;
|
||||
bool busy;
|
||||
bool pending_acquire;
|
||||
const char *status_string;
|
||||
};
|
||||
|
||||
#endif /* NVGPU_FIFO_COMMON_H */
|
||||
@@ -63,6 +63,7 @@ struct nvgpu_sgt;
|
||||
struct nvgpu_sgl;
|
||||
struct nvgpu_device_info;
|
||||
struct nvgpu_gr_subctx;
|
||||
struct nvgpu_channel_hw_state;
|
||||
|
||||
#include <nvgpu/lock.h>
|
||||
#include <nvgpu/thread.h>
|
||||
@@ -907,6 +908,8 @@ struct gpu_ops {
|
||||
void (*enable)(struct channel_gk20a *ch);
|
||||
void (*disable)(struct channel_gk20a *ch);
|
||||
u32 (*count)(struct gk20a *g);
|
||||
void (*read_state)(struct gk20a *g, struct channel_gk20a *ch,
|
||||
struct nvgpu_channel_hw_state *state);
|
||||
} channel;
|
||||
struct pmu_v {
|
||||
u32 (*get_pmu_cmdline_args_size)(struct nvgpu_pmu *pmu);
|
||||
|
||||
@@ -830,6 +830,7 @@ static const struct gpu_ops tu104_ops = {
|
||||
.enable = gk20a_channel_enable,
|
||||
.disable = gk20a_channel_disable,
|
||||
.count = gv100_channel_count,
|
||||
.read_state = gk20a_channel_read_state,
|
||||
},
|
||||
.netlist = {
|
||||
.get_netlist_name = tu104_netlist_get_name,
|
||||
|
||||
Reference in New Issue
Block a user