diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 6de264f43..b9cba434b 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -900,12 +900,6 @@ static int gk20a_pm_finalize_poweron(struct device *dev) goto done; } - err = gk20a_init_pmu_setup_hw2(g); - if (err) { - gk20a_err(dev, "failed to init gk20a pmu_hw2"); - goto done; - } - err = gk20a_init_therm_support(g); if (err) { gk20a_err(dev, "failed to init gk20a therm"); @@ -1471,6 +1465,8 @@ static int gk20a_probe(struct platform_device *dev) cfb_add_device(&dev->dev); #endif + gk20a_init_gr(gk20a); + return 0; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 3d70afd58..b23bb540b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3534,7 +3534,7 @@ clean_up: return ret; } -static void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) +void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct fifo_gk20a *f = &g->fifo; struct fifo_engine_info_gk20a *gr_info = @@ -4569,9 +4569,19 @@ int gk20a_init_gr_support(struct gk20a *g) if (err) return err; + /* GR is inialized, signal possible waiters */ + g->gr.initialized = true; + wake_up(&g->gr.init_wq); + return 0; } +/* Wait until GR is initialized */ +void gk20a_gr_wait_initialized(struct gk20a *g) +{ + wait_event(g->gr.init_wq, g->gr.initialized); +} + #define NVA297_SET_ALPHA_CIRCULAR_BUFFER_SIZE 0x02dc #define NVA297_SET_CIRCULAR_BUFFER_SIZE 0x1280 #define NVA297_SET_SHADER_EXCEPTIONS 0x1528 @@ -5550,6 +5560,8 @@ int gk20a_gr_suspend(struct gk20a *g) gk20a_gr_flush_channel_tlb(&g->gr); + g->gr.initialized = false; + gk20a_dbg_fn("done"); return ret; } @@ -6816,7 +6828,12 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, gr_pd_pagepool_valid_true_f(), patch); } -void gk20a_init_gr(struct gpu_ops *gops) +void gk20a_init_gr(struct gk20a *g) +{ + init_waitqueue_head(&g->gr.init_wq); +} + +void gk20a_init_gr_ops(struct gpu_ops *gops) { gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg; gops->gr.bundle_cb_defaults = gr_gk20a_bundle_cb_defaults; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 9e0883d6c..05c27ffd8 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -204,7 +204,9 @@ struct gr_gk20a { #define GR_NETLIST_STATIC_A 'A' int netlist; + wait_queue_head_t init_wq; int initialized; + u32 num_fbps; u32 comptags_per_cacheline; @@ -313,9 +315,11 @@ struct gk20a_ctxsw_bootloader_desc { }; struct gpu_ops; -void gk20a_init_gr(struct gpu_ops *gops); +void gk20a_init_gr(struct gk20a *g); +void gk20a_init_gr_ops(struct gpu_ops *gops); int gk20a_init_gr_support(struct gk20a *g); int gk20a_gr_reset(struct gk20a *g); +void gk20a_gr_wait_initialized(struct gk20a *g); int gk20a_init_gr_channel(struct channel_gk20a *ch_gk20a); @@ -356,14 +360,17 @@ int gr_gk20a_fecs_set_reglist_virtual_addr(struct gk20a *g, u64 pmu_va); void gr_gk20a_init_elcg_mode(struct gk20a *g, u32 mode, u32 engine); void gr_gk20a_init_blcg_mode(struct gk20a *g, u32 mode, u32 engine); +void gr_gk20a_pmu_save_zbc(struct gk20a *g, u32 entries); + /* sm */ bool gk20a_gr_sm_debugger_attached(struct gk20a *g); #define gr_gk20a_elpg_protected_call(g, func) \ ({ \ - int err; \ + int err = 0; \ if (support_gk20a_pmu()) \ - gk20a_pmu_disable_elpg(g); \ + err = gk20a_pmu_disable_elpg(g); \ + if (err) return err; \ err = func; \ if (support_gk20a_pmu()) \ gk20a_pmu_enable_elpg(g); \ diff --git a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c index de6c37133..66bc47a9e 100644 --- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c @@ -43,7 +43,7 @@ int gk20a_init_hal(struct gpu_ops *gops) { *gops = gk20a_ops; gk20a_init_ltc(gops); - gk20a_init_gr(gops); + gk20a_init_gr_ops(gops); gk20a_init_fb(gops); gk20a_init_fifo(gops); gk20a_init_gr_ctx(gops); diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c index bccd4100f..b784b9a63 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c @@ -28,6 +28,7 @@ #include #include "gk20a.h" +#include "gr_gk20a.h" #include "hw_mc_gk20a.h" #include "hw_pwr_gk20a.h" #include "hw_top_gk20a.h" @@ -40,8 +41,7 @@ static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu); static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g, u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt); -static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work); -static void pmu_save_zbc(struct gk20a *g, u32 entries); +static void pmu_setup_hw(struct work_struct *work); static void ap_callback_init_and_enable_ctrl( struct gk20a *g, struct pmu_msg *msg, void *param, u32 seq_desc, u32 status); @@ -1582,8 +1582,7 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g) pmu->ucode_image = (u32 *)((u8 *)pmu->desc + pmu->desc->descriptor_size); - - INIT_WORK(&pmu->pg_init, gk20a_init_pmu_setup_hw2_workqueue); + INIT_WORK(&pmu->pg_init, pmu_setup_hw); dma_set_attr(DMA_ATTR_READ_ONLY, &attrs); pmu->ucode.cpuva = dma_alloc_attrs(d, GK20A_PMU_UCODE_SIZE_MAX, @@ -1607,7 +1606,6 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g) } pmu->seq_buf.iova = iova; - init_waitqueue_head(&pmu->pg_wq); err = gk20a_get_sgtable(d, &sgt_pmu_ucode, pmu->ucode.cpuva, @@ -1668,6 +1666,8 @@ int gk20a_init_pmu_setup_sw(struct gk20a *g) gk20a_free_sgtable(&sgt_pmu_ucode); gk20a_free_sgtable(&sgt_seq_buf); + pmu->sw_ready = true; + skip_init: mutex_init(&pmu->elpg_mutex); mutex_init(&pmu->isr_mutex); @@ -1741,7 +1741,7 @@ static void pmu_handle_pg_buf_config_msg(struct gk20a *g, struct pmu_msg *msg, } pmu->buf_loaded = (eng_buf_stat->status == PMU_PG_MSG_ENG_BUF_LOADED); - wake_up(&pmu->pg_wq); + schedule_work(&pmu->pg_init); } int gk20a_init_pmu_setup_hw1(struct gk20a *g) @@ -1781,15 +1781,37 @@ int gk20a_init_pmu_setup_hw1(struct gk20a *g) static int gk20a_aelpg_init(struct gk20a *g); static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id); +static void pmu_setup_hw_load_zbc(struct gk20a *g); +static void pmu_setup_hw_enable_elpg(struct gk20a *g); -static void gk20a_init_pmu_setup_hw2_workqueue(struct work_struct *work) +static void pmu_setup_hw(struct work_struct *work) { struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init); struct gk20a *g = pmu->g; - gk20a_init_pmu_setup_hw2(g); + + switch (pmu->pmu_state) { + case PMU_STATE_ELPG_BOOTED: + gk20a_dbg_pmu("elpg booted"); + gk20a_init_pmu_bind_fecs(g); + break; + case PMU_STATE_LOADING_PG_BUF: + gk20a_dbg_pmu("loaded pg buf"); + pmu_setup_hw_load_zbc(g); + break; + case PMU_STATE_LOADING_ZBC: + gk20a_dbg_pmu("loaded zbc"); + pmu_setup_hw_enable_elpg(g); + break; + case PMU_STATE_STARTED: + gk20a_dbg_pmu("PMU booted"); + break; + default: + gk20a_dbg_pmu("invalid state"); + break; + } } -int gk20a_init_pmu_setup_hw2(struct gk20a *g) +int gk20a_init_pmu_bind_fecs(struct gk20a *g) { struct pmu_gk20a *pmu = &g->pmu; struct mm_gk20a *mm = &g->mm; @@ -1797,19 +1819,15 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) struct device *d = dev_from_gk20a(g); struct pmu_cmd cmd; u32 desc; - long remain; int err; - bool status; u32 size; struct sg_table *sgt_pg_buf; dma_addr_t iova; gk20a_dbg_fn(""); - if (!support_gk20a_pmu()) - return 0; - size = 0; + gk20a_gr_wait_initialized(g); err = gr_gk20a_fecs_get_reglist_img_size(g, &size); if (err) { gk20a_err(dev_from_gk20a(g), @@ -1817,14 +1835,13 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) return err; } - if (!pmu->sw_ready) { + if (!pmu->pg_buf.cpuva) { pmu->pg_buf.cpuva = dma_alloc_coherent(d, size, &iova, GFP_KERNEL); if (!pmu->pg_buf.cpuva) { gk20a_err(d, "failed to allocate memory\n"); - err = -ENOMEM; - goto err; + return -ENOMEM; } pmu->pg_buf.iova = iova; @@ -1853,30 +1870,6 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) gk20a_free_sgtable(&sgt_pg_buf); } - /* - * This is the actual point at which sw setup is complete, so set the - * sw_ready flag here. - */ - pmu->sw_ready = true; - - /* TBD: acquire pmu hw mutex */ - - /* TBD: post reset again? */ - - /* PMU_INIT message handler will send PG_INIT */ - remain = wait_event_timeout( - pmu->pg_wq, - (status = (pmu->elpg_ready && - pmu->stat_dmem_offset != 0 && - pmu->elpg_stat == PMU_ELPG_STAT_OFF)), - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); - if (status == 0) { - gk20a_err(dev_from_gk20a(g), - "PG_INIT_ACK failed, remaining timeout : 0x%lx", remain); - pmu_dump_falcon_stats(pmu); - return -EBUSY; - } - err = gr_gk20a_fecs_set_reglist_bind_inst(g, mm->pmu.inst_block.cpu_pa); if (err) { gk20a_err(dev_from_gk20a(g), @@ -1906,17 +1899,24 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_FECS"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); + pmu->pmu_state = PMU_STATE_LOADING_PG_BUF; + return err; - remain = wait_event_timeout( - pmu->pg_wq, - pmu->buf_loaded, - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); - if (!pmu->buf_loaded) { - gk20a_err(dev_from_gk20a(g), - "PGENG FECS buffer load failed, remaining timeout : 0x%lx", - remain); - return -EBUSY; - } +err_free_sgtable: + gk20a_free_sgtable(&sgt_pg_buf); +err_free_pg_buf: + dma_free_coherent(d, size, + pmu->pg_buf.cpuva, pmu->pg_buf.iova); + pmu->pg_buf.cpuva = NULL; + pmu->pg_buf.iova = 0; + return err; +} + +static void pmu_setup_hw_load_zbc(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; + struct pmu_cmd cmd; + u32 desc; memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; @@ -1933,17 +1933,12 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) gk20a_dbg_pmu("cmd post PMU_PG_CMD_ID_ENG_BUF_LOAD PMU_PGENG_GR_BUFFER_IDX_ZBC"); gk20a_pmu_cmd_post(g, &cmd, NULL, NULL, PMU_COMMAND_QUEUE_LPQ, pmu_handle_pg_buf_config_msg, pmu, &desc, ~0); + pmu->pmu_state = PMU_STATE_LOADING_ZBC; +} - remain = wait_event_timeout( - pmu->pg_wq, - pmu->buf_loaded, - msecs_to_jiffies(gk20a_get_gr_idle_timeout(g))); - if (!pmu->buf_loaded) { - gk20a_err(dev_from_gk20a(g), - "PGENG ZBC buffer load failed, remaining timeout 0x%lx", - remain); - return -EBUSY; - } +static void pmu_setup_hw_enable_elpg(struct gk20a *g) +{ + struct pmu_gk20a *pmu = &g->pmu; /* * FIXME: To enable ELPG, we increase the PMU ext2priv timeout unit to @@ -1954,17 +1949,11 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) gk20a_writel(g, 0x10a164, 0x109ff); pmu->initialized = true; - - /* - * We can't guarantee that gr code to enable ELPG will be - * invoked, so we explicitly call disable-enable here - * to enable elpg. - */ - gk20a_pmu_disable_elpg(g); + pmu->pmu_state = PMU_STATE_STARTED; pmu->zbc_ready = true; /* Save zbc table after PMU is initialized. */ - pmu_save_zbc(g, 0xf); + gr_gk20a_pmu_save_zbc(g, 0xf); if (g->elpg_enabled) gk20a_pmu_enable_elpg(g); @@ -1976,18 +1965,6 @@ int gk20a_init_pmu_setup_hw2(struct gk20a *g) gk20a_aelpg_init(g); gk20a_aelpg_init_and_enable(g, PMU_AP_CTRL_ID_GRAPHICS); } - - return 0; - - err_free_sgtable: - gk20a_free_sgtable(&sgt_pg_buf); - err_free_pg_buf: - dma_free_coherent(d, size, - pmu->pg_buf.cpuva, pmu->pg_buf.iova); - pmu->pg_buf.cpuva = NULL; - pmu->pg_buf.iova = 0; - err: - return err; } int gk20a_init_pmu_support(struct gk20a *g) @@ -2036,18 +2013,17 @@ static void pmu_handle_pg_elpg_msg(struct gk20a *g, struct pmu_msg *msg, switch (elpg_msg->msg) { case PMU_PG_ELPG_MSG_INIT_ACK: gk20a_dbg_pmu("INIT_PG is acknowledged from PMU"); - pmu->elpg_ready = true; - wake_up(&pmu->pg_wq); break; case PMU_PG_ELPG_MSG_ALLOW_ACK: gk20a_dbg_pmu("ALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_ON; - wake_up(&pmu->pg_wq); break; case PMU_PG_ELPG_MSG_DISALLOW_ACK: gk20a_dbg_pmu("DISALLOW is acknowledged from PMU"); pmu->elpg_stat = PMU_ELPG_STAT_OFF; - wake_up(&pmu->pg_wq); + if (pmu->pmu_state == PMU_STATE_STARTING) + pmu->pmu_state = PMU_STATE_ELPG_BOOTED; + schedule_work(&pmu->pg_init); break; default: gk20a_err(dev_from_gk20a(g), @@ -2074,7 +2050,6 @@ static void pmu_handle_pg_stat_msg(struct gk20a *g, struct pmu_msg *msg, case PMU_PG_STAT_MSG_RESP_DMEM_OFFSET: gk20a_dbg_pmu("ALLOC_DMEM_OFFSET is acknowledged from PMU"); pmu->stat_dmem_offset = msg->msg.pg.stat.data; - wake_up(&pmu->pg_wq); break; default: break; @@ -2131,7 +2106,7 @@ static int pmu_init_powergating(struct pmu_gk20a *pmu) /* disallow ELPG initially PMU ucode requires a disallow cmd before allow cmd */ - pmu->elpg_stat = PMU_ELPG_STAT_ON; /* set for wait_event PMU_ELPG_STAT_OFF */ + pmu->elpg_stat = PMU_ELPG_STAT_OFF; /* set for wait_event PMU_ELPG_STAT_OFF */ memset(&cmd, 0, sizeof(struct pmu_cmd)); cmd.hdr.unit_id = PMU_UNIT_PG; cmd.hdr.size = PMU_CMD_HDR_SIZE + sizeof(struct pmu_pg_cmd_elpg_cmd); @@ -2144,7 +2119,9 @@ static int pmu_init_powergating(struct pmu_gk20a *pmu) pmu_handle_pg_elpg_msg, pmu, &seq, ~0); /* start with elpg disabled until first enable call */ - pmu->elpg_refcnt = 1; + pmu->elpg_refcnt = 0; + + pmu->pmu_state = PMU_STATE_STARTING; return 0; } @@ -2482,7 +2459,7 @@ static void pmu_handle_zbc_msg(struct gk20a *g, struct pmu_msg *msg, pmu->zbc_save_done = 1; } -static void pmu_save_zbc(struct gk20a *g, u32 entries) +void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) { struct pmu_gk20a *pmu = &g->pmu; struct pmu_cmd cmd; @@ -2508,12 +2485,6 @@ static void pmu_save_zbc(struct gk20a *g, u32 entries) gk20a_err(dev_from_gk20a(g), "ZBC save timeout"); } -void gk20a_pmu_save_zbc(struct gk20a *g, u32 entries) -{ - if (g->pmu.zbc_ready) - pmu_save_zbc(g, entries); -} - static int pmu_perfmon_start_sampling(struct pmu_gk20a *pmu) { struct gk20a *g = pmu->g; @@ -3195,9 +3166,6 @@ int gk20a_pmu_enable_elpg(struct gk20a *g) gk20a_dbg_fn(""); - if (!pmu->elpg_ready || !pmu->initialized) - goto exit; - mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt++; @@ -3225,7 +3193,6 @@ int gk20a_pmu_enable_elpg(struct gk20a *g) exit_unlock: mutex_unlock(&pmu->elpg_mutex); -exit: gk20a_dbg_fn("done"); return ret; } @@ -3239,9 +3206,6 @@ int gk20a_pmu_disable_elpg(struct gk20a *g) gk20a_dbg_fn(""); - if (!pmu->elpg_ready || !pmu->initialized) - return 0; - mutex_lock(&pmu->elpg_mutex); pmu->elpg_refcnt--; @@ -3353,10 +3317,10 @@ int gk20a_pmu_destroy(struct gk20a *g) g->pg_gating_cnt += gating_cnt; pmu_enable(pmu, false); + pmu->pmu_state = PMU_STATE_OFF; pmu->pmu_ready = false; pmu->perfmon_ready = false; pmu->zbc_ready = false; - pmu->elpg_ready = false; gk20a_dbg_fn("done"); return 0; diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h index 8aaea33f7..2843d4834 100644 --- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h @@ -987,6 +987,14 @@ struct pmu_pg_stats { #define PMU_FALCON_REG_RSVD2 (31) #define PMU_FALCON_REG_SIZE (32) +/* Choices for pmu_state */ +#define PMU_STATE_OFF 0 /* PMU is off */ +#define PMU_STATE_STARTING 1 /* PMU is booting */ +#define PMU_STATE_ELPG_BOOTED 2 /* ELPG is initialized */ +#define PMU_STATE_LOADING_PG_BUF 3 /* Loading PG buf */ +#define PMU_STATE_LOADING_ZBC 4 /* Loading ZBC buf */ +#define PMU_STATE_STARTED 5 /* Fully unitialized */ + struct pmu_gk20a { struct gk20a *g; @@ -1022,9 +1030,9 @@ struct pmu_gk20a { u32 stat_dmem_offset; - bool elpg_ready; u32 elpg_stat; - wait_queue_head_t pg_wq; + + int pmu_state; #define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */ struct work_struct pg_init; @@ -1053,7 +1061,7 @@ struct pmu_gk20a { }; int gk20a_init_pmu_support(struct gk20a *g); -int gk20a_init_pmu_setup_hw2(struct gk20a *g); +int gk20a_init_pmu_bind_fecs(struct gk20a *g); void gk20a_pmu_isr(struct gk20a *g);