diff --git a/drivers/gpu/nvgpu/clk/clk.c b/drivers/gpu/nvgpu/clk/clk.c index ef0834f49..bec5fad13 100644 --- a/drivers/gpu/nvgpu/clk/clk.c +++ b/drivers/gpu/nvgpu/clk/clk.c @@ -255,7 +255,7 @@ static int get_regime_id(struct gk20a *g, u32 domain, u32 *regimeid) return -EINVAL; } -int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) +int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk) { int status = -EINVAL; struct clk_domain *pdomain; @@ -277,8 +277,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) if (fllclk->clkmhz == 0) return -EINVAL; - mutex_lock(&pclk->changeclkmutex); - setfllclk.voltuv = fllclk->voltuv; setfllclk.gpc2clkmhz = fllclk->clkmhz; @@ -376,63 +374,6 @@ int clk_program_fllclks(struct gk20a *g, struct change_fll_clk *fllclk) if (status) goto done; done: - mutex_unlock(&pclk->changeclkmutex); - return status; -} - -int clk_set_boot_fll_clk(struct gk20a *g) -{ - int status; - struct change_fll_clk bootfllclk; - u16 gpc2clk_clkmhz = BOOT_GPC2CLK_MHZ; - u32 gpc2clk_voltuv = 0; - u32 gpc2clk_voltuv_sram = 0; - u16 mclk_clkmhz = BOOT_MCLK_MHZ; - u32 mclk_voltuv = 0; - u32 mclk_voltuv_sram = 0; - u32 voltuv = 0; - u32 voltuv_sram = 0; - - mutex_init(&g->clk_pmu.changeclkmutex); - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &gpc2clk_clkmhz, &gpc2clk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); - if (status) - return status; - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, - &gpc2clk_clkmhz, &gpc2clk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM); - if (status) - return status; - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &mclk_clkmhz, &mclk_voltuv, CTRL_VOLT_DOMAIN_LOGIC); - if (status) - return status; - status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, - &mclk_clkmhz, &mclk_voltuv_sram, CTRL_VOLT_DOMAIN_SRAM); - if (status) - return status; - - voltuv = ((gpc2clk_voltuv) > (mclk_voltuv)) ? (gpc2clk_voltuv) - : (mclk_voltuv); - - voltuv_sram = ((gpc2clk_voltuv_sram) > (mclk_voltuv_sram)) ? - (gpc2clk_voltuv_sram) : (mclk_voltuv_sram); - - status = volt_set_voltage(g, voltuv, voltuv_sram); - if (status) - gk20a_err(dev_from_gk20a(g), - "attempt to set boot voltage failed %d %d", - voltuv, voltuv_sram); - - bootfllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; - bootfllclk.clkmhz = gpc2clk_clkmhz; - bootfllclk.voltuv = voltuv; - status = clk_program_fllclks(g, &bootfllclk); - if (status) - gk20a_err(dev_from_gk20a(g), "attempt to set boot gpc2clk failed"); - status = g->clk_pmu.clk_mclk.change(g, DEFAULT_BOOT_MCLK_SPEED); - if (status) - gk20a_err(dev_from_gk20a(g), "attempt to set boot mclk failed"); - return status; } diff --git a/drivers/gpu/nvgpu/clk/clk.h b/drivers/gpu/nvgpu/clk/clk.h index a0b88dcb3..d0e821738 100644 --- a/drivers/gpu/nvgpu/clk/clk.h +++ b/drivers/gpu/nvgpu/clk/clk.h @@ -119,6 +119,5 @@ u32 clk_domain_get_f_points( u32 *fpointscount, u16 *freqpointsinmhz ); -int clk_set_boot_fll_clk(struct gk20a *g); int clk_program_fll_clks(struct gk20a *g, struct change_fll_clk *fllclk); #endif diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c index 98b7cb5f4..f868100b2 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.c +++ b/drivers/gpu/nvgpu/clk/clk_arb.c @@ -18,9 +18,17 @@ #include #include #include +#include #include "clk/clk_arb.h" + +#define MAX_F_POINTS 127 + +#ifdef CONFIG_DEBUG_FS +static int nvgpu_clk_arb_debugfs_init(struct gk20a *g); +#endif + static int nvgpu_clk_arb_release_event_dev(struct inode *inode, struct file *filp); static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, @@ -28,21 +36,57 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait); static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work); +static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work); +static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *); + +struct nvgpu_clk_vf_point { + u16 mhz; + u32 uvolt; + u32 uvolt_sram; +}; struct nvgpu_clk_arb { - struct mutex req_lock; - struct mutex users_lock; + spinlock_t sessions_lock; + spinlock_t users_lock; + spinlock_t req_lock; + struct list_head users; + struct list_head sessions; struct list_head requests; - u64 gpc2clk_current_hz; - u64 gpc2clk_target_hz; - u64 gpc2clk_default_hz; - u64 mclk_current_hz; - u64 mclk_target_hz; - u64 mclk_default_hz; - atomic_t usercount; + struct gk20a *g; + spinlock_t data_lock; + spinlock_t vf_lock; + + u16 gpc2clk_actual_mhz; + u16 gpc2clk_default_mhz; + + u16 mclk_actual_mhz; + u16 mclk_default_mhz; + u32 voltuv_actual; + struct work_struct update_fn_work; + struct work_struct vftable_fn_work; + wait_queue_head_t vftable_wq; + + u16 *mclk_f_points; + bool vftable_set; + + struct nvgpu_clk_vf_point *mclk_vf_points; + u32 mclk_f_numpoints; + u16 *gpc2clk_f_points; + u32 gpc2clk_f_numpoints; + struct nvgpu_clk_vf_point *gpc2clk_vf_points; + +#ifdef CONFIG_DEBUG_FS + struct mutex debug_lock; + s64 switch_max; + s64 switch_min; + u64 switch_num; + s64 switch_avg; + s64 switch_std; + bool debugfs_set; +#endif }; @@ -51,15 +95,20 @@ struct nvgpu_clk_dev { struct list_head link; wait_queue_head_t readout_wq; atomic_t poll_mask; + u16 gpc2clk_target_mhz; + u16 mclk_target_mhz; }; struct nvgpu_clk_session { bool zombie; struct gk20a *g; struct kref refcount; + struct list_head link; + struct list_head targets; - u64 gpc2clk_target_hz; - u64 mclk_target_hz; + spinlock_t target_lock; + u16 gpc2clk_target_mhz; + u16 mclk_target_mhz; }; static const struct file_operations completion_dev_ops = { @@ -77,7 +126,7 @@ static const struct file_operations event_dev_ops = { int nvgpu_clk_arb_init_arbiter(struct gk20a *g) { struct nvgpu_clk_arb *arb; - u64 default_hz; + u16 default_mhz; int err; gk20a_dbg_fn(""); @@ -86,39 +135,104 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g) return 0; arb = kzalloc(sizeof(struct nvgpu_clk_arb), GFP_KERNEL); - if (!arb) - return -ENOMEM; + if (!arb) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_f_numpoints = MAX_F_POINTS; + arb->mclk_f_numpoints = MAX_F_POINTS; + + arb->gpc2clk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); + if (!arb->gpc2clk_f_points) { + err = -ENOMEM; + goto init_fail; + } + + arb->mclk_f_points = kcalloc(MAX_F_POINTS, sizeof(u16), GFP_KERNEL); + if (!arb->mclk_f_points) { + err = -ENOMEM; + goto init_fail; + } + + arb->gpc2clk_vf_points = kcalloc(MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); + if (!arb->gpc2clk_vf_points) { + err = -ENOMEM; + goto init_fail; + } + + arb->mclk_vf_points = kcalloc(MAX_F_POINTS, + sizeof(struct nvgpu_clk_vf_point), GFP_KERNEL); + if (!arb->mclk_vf_points) { + err = -ENOMEM; + goto init_fail; + } g->clk_arb = arb; + arb->g = g; - mutex_init(&arb->req_lock); - mutex_init(&arb->users_lock); + spin_lock_init(&arb->sessions_lock); + spin_lock_init(&arb->users_lock); + spin_lock_init(&arb->req_lock); + spin_lock_init(&arb->data_lock); + spin_lock_init(&arb->vf_lock); err = g->ops.clk_arb.get_arbiter_clk_default(g, - NVGPU_GPU_CLK_DOMAIN_MCLK, &default_hz); - if (err) - return -EINVAL; + NVGPU_GPU_CLK_DOMAIN_MCLK, &default_mhz); + if (err) { + err = -EINVAL; + goto init_fail; + } - arb->mclk_target_hz = default_hz; - arb->mclk_current_hz = default_hz; - arb->mclk_default_hz = default_hz; + arb->mclk_default_mhz = default_mhz; err = g->ops.clk_arb.get_arbiter_clk_default(g, - NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_hz); - if (err) - return -EINVAL; + NVGPU_GPU_CLK_DOMAIN_GPC2CLK, &default_mhz); + if (err) { + err = -EINVAL; + goto init_fail; + } - arb->gpc2clk_target_hz = default_hz; - arb->gpc2clk_current_hz = default_hz; - arb->gpc2clk_default_hz = default_hz; - - atomic_set(&arb->usercount, 0); + arb->gpc2clk_default_mhz = default_mhz; INIT_LIST_HEAD(&arb->users); + INIT_LIST_HEAD(&arb->sessions); INIT_LIST_HEAD(&arb->requests); + + init_waitqueue_head(&arb->vftable_wq); + + INIT_WORK(&arb->vftable_fn_work, nvgpu_clk_arb_run_vftable_cb); + INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb); +#ifdef CONFIG_DEBUG_FS + mutex_init(&arb->debug_lock); + if (!arb->debugfs_set) { + if (nvgpu_clk_arb_debugfs_init(g)) + arb->debugfs_set = true; + } +#endif + err = nvgpu_clk_arb_update_vftable(arb); + if (err < 0) + goto init_fail; + + /* Schedule first run */ + schedule_work(&arb->update_fn_work); + return 0; + +init_fail: + + kfree(arb->gpc2clk_f_points); + kfree(arb->gpc2clk_vf_points); + + kfree(arb->mclk_f_points); + kfree(arb->mclk_vf_points); + + kfree(arb); + + return err; } void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g) @@ -170,6 +284,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g, fail: kfree(dev); put_unused_fd(fd); + return err; } @@ -190,12 +305,16 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, session->g = g; kref_init(&session->refcount); - - atomic_inc(&arb->usercount); + spin_lock_init(&session->target_lock); session->zombie = false; - session->mclk_target_hz = arb->mclk_default_hz; - session->gpc2clk_target_hz = arb->gpc2clk_default_hz; + session->mclk_target_mhz = arb->mclk_default_mhz; + session->gpc2clk_target_mhz = arb->gpc2clk_default_mhz; + INIT_LIST_HEAD(&session->targets); + + spin_lock(&arb->sessions_lock); + list_add_tail(&session->link, &arb->sessions); + spin_unlock(&arb->sessions_lock); *_session = session; @@ -206,8 +325,15 @@ void nvgpu_clk_arb_free_session(struct kref *refcount) { struct nvgpu_clk_session *session = container_of(refcount, struct nvgpu_clk_session, refcount); + struct nvgpu_clk_arb *arb = session->g->clk_arb; + gk20a_dbg_fn(""); + + spin_lock(&arb->sessions_lock); + list_del(&session->link); + spin_unlock(&arb->sessions_lock); kfree(session); +; } void nvgpu_clk_arb_release_session(struct gk20a *g, @@ -215,12 +341,12 @@ void nvgpu_clk_arb_release_session(struct gk20a *g, { struct nvgpu_clk_arb *arb = g->clk_arb; + gk20a_dbg_fn(""); + session->zombie = true; kref_put(&session->refcount, nvgpu_clk_arb_free_session); - /* schedule arbiter if no more user */ - if (!atomic_dec_and_test(&arb->usercount)) - schedule_work(&arb->update_fn_work); + schedule_work(&arb->update_fn_work); } int nvgpu_clk_arb_install_event_fd(struct gk20a *g, @@ -230,19 +356,155 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g, struct nvgpu_clk_dev *dev; int fd; + gk20a_dbg_fn(""); + fd = nvgpu_clk_arb_install_fd(g, session, &event_dev_ops, &dev); if (fd < 0) return fd; - mutex_lock(&arb->users_lock); + spin_lock(&arb->users_lock); list_add_tail(&dev->link, &arb->users); - mutex_unlock(&arb->users_lock); + spin_unlock(&arb->users_lock); *event_fd = fd; return 0; } +int nvgpu_clk_arb_install_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int *request_fd) +{ + struct nvgpu_clk_dev *dev; + int fd; + + gk20a_dbg_fn(""); + + fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); + if (fd < 0) + return fd; + + *request_fd = fd; + + return 0; +} + +static int nvgpu_clk_arb_update_vftable(struct nvgpu_clk_arb *arb) +{ + struct gk20a *g = arb->g; + + int i; + int status = 0; + u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; + u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; + + /* the flag must be visible in all threads */ + mb(); + ACCESS_ONCE(arb->vftable_set) = false; + + spin_lock(&arb->vf_lock); + + if (!clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_GPC2CLK, + &arb->gpc2clk_f_numpoints, arb->gpc2clk_f_points) < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to fetch GPC2CLK frequency points"); + goto exit_vftable; + } + if (clk_domain_get_f_points(arb->g, NVGPU_GPU_CLK_DOMAIN_MCLK, + &arb->mclk_f_numpoints, arb->mclk_f_points) < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to fetch MCLK frequency points"); + goto exit_vftable; + } + + + memset(arb->mclk_vf_points, 0, + arb->mclk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); + memset(arb->gpc2clk_vf_points, 0, + arb->gpc2clk_f_numpoints*sizeof(struct nvgpu_clk_vf_point)); + + for (i = 0 ; i < arb->mclk_f_numpoints; i++) { + arb->mclk_vf_points[i].mhz = arb->mclk_f_points[i]; + mclk_voltuv = mclk_voltuv_sram = 0; + + status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, + &arb->mclk_vf_points[i].mhz, &mclk_voltuv, + CTRL_VOLT_DOMAIN_LOGIC); + if (status < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to get MCLK LOGIC voltage"); + goto exit_vftable; + } + status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_MCLK, + &arb->mclk_vf_points[i].mhz, &mclk_voltuv_sram, + CTRL_VOLT_DOMAIN_SRAM); + if (status < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to get MCLK SRAM voltage"); + goto exit_vftable; + } + + arb->mclk_vf_points[i].uvolt = mclk_voltuv; + arb->mclk_vf_points[i].uvolt_sram = mclk_voltuv_sram; + } + + for (i = 0 ; i < arb->gpc2clk_f_numpoints; i++) { + arb->gpc2clk_vf_points[i].mhz = arb->gpc2clk_f_points[i]; + gpc2clk_voltuv = gpc2clk_voltuv_sram = 0; + + status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, + &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv, + CTRL_VOLT_DOMAIN_LOGIC); + if (status < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to get GPC2CLK LOGIC voltage"); + goto exit_vftable; + } + status = clk_domain_get_f_or_v(g, CTRL_CLK_DOMAIN_GPC2CLK, + &arb->gpc2clk_vf_points[i].mhz, &gpc2clk_voltuv_sram, + CTRL_VOLT_DOMAIN_SRAM); + if (status < 0) { + gk20a_err(dev_from_gk20a(g), + "failed to get GPC2CLK SRAM voltage"); + goto exit_vftable; + } + + arb->gpc2clk_vf_points[i].uvolt = gpc2clk_voltuv; + arb->gpc2clk_vf_points[i].uvolt_sram = gpc2clk_voltuv_sram; + + } + + /* make flag visible when all data has resolved in the tables */ + wmb(); + ACCESS_ONCE(arb->vftable_set) = true; + + wake_up(&arb->vftable_wq); +exit_vftable: + + spin_unlock(&arb->vf_lock); + + return status; +} + +void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g) +{ + struct nvgpu_clk_arb *arb = g->clk_arb; + + ACCESS_ONCE(arb->vftable_set) = false; + /* Disable the flag in case arbiter gets scheduled first */ + mb(); + + schedule_work(&arb->vftable_fn_work); + schedule_work(&arb->update_fn_work); +} + +static void nvgpu_clk_arb_run_vftable_cb(struct work_struct *work) +{ + struct nvgpu_clk_arb *arb = + container_of(work, struct nvgpu_clk_arb, update_fn_work); + + nvgpu_clk_arb_update_vftable(arb); +} + static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) { struct nvgpu_clk_arb *arb = @@ -250,67 +512,270 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work) struct nvgpu_clk_session *session; struct nvgpu_clk_dev *dev; struct nvgpu_clk_dev *tmp; + struct gk20a *g = arb->g; - mutex_lock(&arb->req_lock); + struct change_fll_clk fllclk; + u32 gpc2clk_voltuv = 0, mclk_voltuv = 0; + u32 gpc2clk_voltuv_sram = 0, mclk_voltuv_sram = 0; - arb->mclk_target_hz = arb->mclk_default_hz; - arb->gpc2clk_target_hz = arb->gpc2clk_default_hz; + u32 voltuv, voltuv_sram; - list_for_each_entry(dev, &arb->requests, link) { - session = dev->session; + int status; + + /* Temporary variables for checking target frequency */ + u16 gpc2clk_target, mclk_target; + + /* iteration index */ + u32 index; + +#ifdef CONFIG_DEBUG_FS + u64 t0, t1; +#endif + + gk20a_dbg_fn(""); + +#ifdef CONFIG_DEBUG_FS + g->ops.read_ptimer(g, &t0); +#endif + + /* Only one arbiter should be running */ + gpc2clk_target = 0; + mclk_target = 0; + + spin_lock(&arb->sessions_lock); + list_for_each_entry(session, &arb->sessions, link) { if (!session->zombie) { - /* TODO: arbiter policy. For now last request wins */ + spin_lock(&arb->req_lock); + spin_lock(&session->target_lock); - arb->mclk_target_hz = session->mclk_target_hz; - arb->gpc2clk_target_hz = session->gpc2clk_target_hz; + mclk_target = mclk_target > session->mclk_target_mhz ? + mclk_target : session->mclk_target_mhz; + + gpc2clk_target = + gpc2clk_target > session->gpc2clk_target_mhz ? + gpc2clk_target : session->gpc2clk_target_mhz; + /* Move processed requests to notification list*/ + list_for_each_entry_safe(dev, tmp, &session->targets, + link) { + list_del_init(&dev->link); + list_add_tail(&dev->link, &arb->requests); + } + spin_unlock(&session->target_lock); + spin_unlock(&arb->req_lock); + + } + } + spin_unlock(&arb->sessions_lock); + + gpc2clk_target = (gpc2clk_target > 0) ? gpc2clk_target : + arb->gpc2clk_actual_mhz ? gpc2clk_target : + arb->gpc2clk_default_mhz; + + mclk_target = (mclk_target > 0) ? mclk_target : + arb->mclk_actual_mhz ? mclk_target : + arb->mclk_default_mhz; + + if (!gpc2clk_target && !mclk_target) { + mclk_target = arb->mclk_default_mhz; + gpc2clk_target = arb->gpc2clk_default_mhz; + } + + if (!gpc2clk_target) + gpc2clk_target = arb->gpc2clk_actual_mhz; + + do { + /* Check that the table is set */ + mb(); + wait_event(arb->vftable_wq, arb->vftable_set); + } while (!ACCESS_ONCE(arb->vftable_set)); + + spin_lock(&arb->vf_lock); + /* round up the freq requests */ + for (index = 0; index < arb->gpc2clk_f_numpoints; index++) { + if (arb->gpc2clk_vf_points[index].mhz >= gpc2clk_target) { + gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; + gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; + gpc2clk_voltuv_sram = + arb->gpc2clk_vf_points[index].uvolt_sram; + break; } } - /* TODO: loop up higher or equal VF points */ + if (index == arb->gpc2clk_f_numpoints) { + gpc2clk_target = arb->gpc2clk_vf_points[index].mhz; + gpc2clk_voltuv = arb->gpc2clk_vf_points[index].uvolt; + gpc2clk_voltuv_sram = + arb->gpc2clk_vf_points[index].uvolt_sram; + } - arb->mclk_current_hz = arb->mclk_target_hz; - arb->gpc2clk_current_hz = arb->gpc2clk_target_hz; + if (!mclk_target) + mclk_target = arb->mclk_actual_mhz; - /* TODO: actually program the clocks */ + for (index = 0; index < arb->mclk_f_numpoints; index++) { + if (arb->mclk_vf_points[index].mhz >= mclk_target) { + mclk_target = arb->mclk_vf_points[index].mhz; + mclk_voltuv = arb->mclk_vf_points[index].uvolt; + mclk_voltuv_sram = + arb->mclk_vf_points[index].uvolt_sram; + break; + } + } + if (index == arb->mclk_f_numpoints) { + mclk_target = arb->mclk_vf_points[index].mhz; + mclk_voltuv = arb->mclk_vf_points[index].uvolt; + mclk_voltuv_sram = + arb->mclk_vf_points[index].uvolt_sram; + } + spin_unlock(&arb->vf_lock); + /* Program clocks */ + /* A change in both mclk of gpc2clk may require a change in voltage */ + if ((arb->gpc2clk_actual_mhz == gpc2clk_target) && + (arb->mclk_actual_mhz == mclk_target)) { + goto exit_arb; + } + + voltuv = gpc2clk_voltuv > mclk_voltuv ? gpc2clk_voltuv : mclk_voltuv; + voltuv_sram = gpc2clk_voltuv_sram > mclk_voltuv_sram ? + gpc2clk_voltuv_sram : mclk_voltuv_sram; + + /* if voltage ascends we do: + * (1) FLL change + * (2) Voltage change + * (3) MCLK change + * If it goes down + * (1) MCLK change + * (2) Voltage change + * (3) FLL change + */ + + /* descending */ + if (voltuv <= arb->voltuv_actual) { + status = g->clk_pmu.clk_mclk.change(g, mclk_target); + if (status < 0) + goto exit_arb; + + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) + goto exit_arb; + + fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; + fllclk.clkmhz = gpc2clk_target; + fllclk.voltuv = voltuv; + status = clk_program_fll_clks(g, &fllclk); + if (status < 0) + goto exit_arb; + } else { + fllclk.api_clk_domain = CTRL_CLK_DOMAIN_GPC2CLK; + fllclk.clkmhz = gpc2clk_target; + fllclk.voltuv = voltuv; + status = clk_program_fll_clks(g, &fllclk); + if (status < 0) + goto exit_arb; + + status = volt_set_voltage(g, voltuv, voltuv_sram); + if (status < 0) + goto exit_arb; + + status = g->clk_pmu.clk_mclk.change(g, mclk_target); + if (status < 0) + goto exit_arb; + } + + spin_lock(&arb->data_lock); + arb->gpc2clk_actual_mhz = gpc2clk_target; + arb->mclk_actual_mhz = mclk_target; + arb->voltuv_actual = voltuv; + /* Make changes visible to other threads */ + wmb(); + + spin_unlock(&arb->data_lock); + +#ifdef CONFIG_DEBUG_FS + g->ops.read_ptimer(g, &t1); + arb->switch_num++; + + mutex_lock(&arb->debug_lock); + if (arb->switch_num == 1) { + arb->switch_max = arb->switch_min = + arb->switch_avg = (t1-t0)/1000; + arb->switch_std = 0; + } else { + s64 prev_avg; + u64 curr = (t1-t0)/1000; + + arb->switch_max = curr > arb->switch_max ? + curr : arb->switch_max; + arb->switch_min = arb->switch_min ? + (curr < arb->switch_min ? + curr : arb->switch_min) : curr; + prev_avg = arb->switch_avg; + arb->switch_avg = (curr + + (arb->switch_avg * (arb->switch_num-1))) / + arb->switch_num; + arb->switch_std += + (curr - arb->switch_avg) * (curr - prev_avg); + } + mutex_unlock(&arb->debug_lock); + +#endif + +exit_arb: + + spin_lock(&arb->req_lock); /* notify completion for all requests */ list_for_each_entry_safe(dev, tmp, &arb->requests, link) { atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); wake_up_interruptible(&dev->readout_wq); list_del_init(&dev->link); } - mutex_unlock(&arb->req_lock); + spin_unlock(&arb->req_lock); /* notify event for all users */ - mutex_lock(&arb->users_lock); + spin_lock(&arb->users_lock); list_for_each_entry(dev, &arb->users, link) { atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM); wake_up_interruptible(&dev->readout_wq); } - mutex_unlock(&arb->users_lock); - + spin_unlock(&arb->users_lock); } -int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, - struct nvgpu_clk_session *session, int *completion_fd) +int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int request_fd) { struct nvgpu_clk_arb *arb = g->clk_arb; struct nvgpu_clk_dev *dev; - int fd; + struct fd fd; + int err = 0; - fd = nvgpu_clk_arb_install_fd(g, session, &completion_dev_ops, &dev); - if (fd < 0) - return fd; + gk20a_dbg_fn(""); - *completion_fd = fd; + fd = fdget(request_fd); - mutex_lock(&arb->req_lock); - list_add_tail(&dev->link, &arb->requests); - mutex_unlock(&arb->req_lock); + if (!fd.file) + return -EINVAL; + + dev = (struct nvgpu_clk_dev *) fd.file->private_data; + + if (!dev || dev->session != session) { + err = -EINVAL; + goto fdput_fd; + } + spin_lock(&session->target_lock); + session->mclk_target_mhz = dev->mclk_target_mhz ? dev->mclk_target_mhz : + session->mclk_target_mhz; + session->gpc2clk_target_mhz = dev->gpc2clk_target_mhz ? + dev->gpc2clk_target_mhz : + session->gpc2clk_target_mhz; + + list_add_tail(&dev->link, &session->targets); + spin_unlock(&session->target_lock); schedule_work(&arb->update_fn_work); - return 0; +fdput_fd: + fdput(fd); + return err; } static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait) @@ -328,11 +793,22 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode, { struct nvgpu_clk_dev *dev = filp->private_data; struct nvgpu_clk_session *session = dev->session; + struct nvgpu_clk_arb *arb; + + arb = session->g->clk_arb; gk20a_dbg_fn(""); + spin_lock(&arb->req_lock); + spin_lock(&session->target_lock); + if (!list_empty(&dev->link)) + list_del_init(&dev->link); + spin_unlock(&session->target_lock); + spin_unlock(&arb->req_lock); + kref_put(&session->refcount, nvgpu_clk_arb_free_session); kfree(dev); + return 0; } @@ -341,94 +817,123 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode, { struct nvgpu_clk_dev *dev = filp->private_data; struct nvgpu_clk_session *session = dev->session; - struct nvgpu_clk_arb *arb = session->g->clk_arb; + struct nvgpu_clk_arb *arb; + + arb = session->g->clk_arb; gk20a_dbg_fn(""); - mutex_lock(&arb->users_lock); - list_del_init(&dev->link); - mutex_unlock(&arb->users_lock); + spin_lock(&arb->users_lock); + list_del(&dev->link); + spin_unlock(&arb->users_lock); kref_put(&session->refcount, nvgpu_clk_arb_free_session); kfree(dev); + return 0; } -int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, - u32 api_domain, u64 target_hz) +int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, + int request_fd, u32 api_domain, u16 target_mhz) { + struct nvgpu_clk_dev *dev; + struct fd fd; + int err = 0; - gk20a_dbg_fn("domain=0x%08x target_hz=%llu", api_domain, target_hz); + gk20a_dbg_fn("domain=0x%08x target_mhz=%u", api_domain, target_mhz); + + fd = fdget(request_fd); + + if (!fd.file) + return -EINVAL; + + dev = fd.file->private_data; + if (!dev || dev->session != session) { + err = -EINVAL; + goto fdput_fd; + } switch (api_domain) { case NVGPU_GPU_CLK_DOMAIN_MCLK: - session->mclk_target_hz = target_hz; - return 0; + dev->mclk_target_mhz = target_mhz; + break; case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: - session->gpc2clk_target_hz = target_hz; - return 0; + dev->gpc2clk_target_mhz = target_mhz; + break; default: - return -EINVAL; + err = -EINVAL; } + +fdput_fd: + fdput(fd); + return err; } -int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, - u32 api_domain, u64 *freq_hz) +int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, + u32 api_domain, u16 *freq_mhz) { + int err = 0; + + spin_lock(&session->target_lock); + switch (api_domain) { case NVGPU_GPU_CLK_DOMAIN_MCLK: - *freq_hz = session->mclk_target_hz; - return 0; + *freq_mhz = session->mclk_target_mhz; + break; case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: - *freq_hz = session->gpc2clk_target_hz; - return 0; + *freq_mhz = session->gpc2clk_target_mhz; + break; default: - *freq_hz = 0; - return -EINVAL; + *freq_mhz = 0; + err = -EINVAL; } + + spin_unlock(&session->target_lock); + return err; } -int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, - u32 api_domain, u64 *freq_hz) +int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, + u32 api_domain, u16 *freq_mhz) { struct nvgpu_clk_arb *arb = g->clk_arb; int err = 0; - mutex_lock(&arb->req_lock); + spin_lock(&arb->data_lock); + switch (api_domain) { case NVGPU_GPU_CLK_DOMAIN_MCLK: - *freq_hz = arb->mclk_current_hz; + *freq_mhz = arb->mclk_actual_mhz; break; case NVGPU_GPU_CLK_DOMAIN_GPC2CLK: - *freq_hz = arb->gpc2clk_current_hz; + *freq_mhz = arb->gpc2clk_actual_mhz; break; default: - *freq_hz = 0; + *freq_mhz = 0; err = -EINVAL; } - mutex_unlock(&arb->req_lock); + spin_unlock(&arb->data_lock); return err; } -int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, - u32 api_domain, u64 *freq_hz) +int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, + u32 api_domain, u16 *freq_mhz) { /* TODO: measure clocks from counters */ - return nvgpu_clk_arb_get_arbiter_actual_hz(g, api_domain, freq_hz); + return nvgpu_clk_arb_get_arbiter_actual_mhz(g, api_domain, freq_mhz); } int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, - u64 *min_hz, u64 *max_hz) + u16 *min_mhz, u16 *max_mhz) { return g->ops.clk_arb.get_arbiter_clk_range(g, api_domain, - min_hz, max_hz); + min_mhz, max_mhz); } u32 nvgpu_clk_arb_get_arbiter_clk_domains(struct gk20a *g) @@ -441,3 +946,67 @@ int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, { return (int)clk_domain_get_f_points(g, api_domain, max_points, fpoints); } + +#ifdef CONFIG_DEBUG_FS +static int nvgpu_clk_arb_stats_show(struct seq_file *s, void *unused) +{ + struct gk20a *g = s->private; + struct nvgpu_clk_arb *arb = g->clk_arb; + u64 num; + s64 tmp, avg, std, max, min; + + /* Make copy of structure to reduce time with lock held */ + mutex_lock(&arb->debug_lock); + std = arb->switch_std; + avg = arb->switch_avg; + max = arb->switch_max; + min = arb->switch_min; + num = arb->switch_num; + mutex_unlock(&arb->debug_lock); + + tmp = std; + do_div(tmp, num); + seq_printf(s, "Number of transitions: %lld\n", + num); + seq_printf(s, "max / min : %lld / %lld usec\n", + max, min); + seq_printf(s, "avg / std : %lld / %ld usec\n", + avg, int_sqrt(tmp)); + + return 0; +} + +static int nvgpu_clk_arb_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, nvgpu_clk_arb_stats_show, inode->i_private); +} + +static const struct file_operations nvgpu_clk_arb_stats_fops = { + .open = nvgpu_clk_arb_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + + +static int nvgpu_clk_arb_debugfs_init(struct gk20a *g) +{ + struct gk20a_platform *platform = dev_get_drvdata(g->dev); + + struct dentry *gpu_root = platform->debugfs; + struct dentry *d; + + gk20a_dbg(gpu_dbg_info, "g=%p", g); + + d = debugfs_create_file( + "arb_stats", + S_IRUGO, + gpu_root, + g, + &nvgpu_clk_arb_stats_fops); + if (!d) + return -ENOMEM; + + return 0; +} +#endif diff --git a/drivers/gpu/nvgpu/clk/clk_arb.h b/drivers/gpu/nvgpu/clk/clk_arb.h index 957493693..717cca9bf 100644 --- a/drivers/gpu/nvgpu/clk/clk_arb.h +++ b/drivers/gpu/nvgpu/clk/clk_arb.h @@ -22,13 +22,13 @@ struct nvgpu_clk_session; int nvgpu_clk_arb_init_arbiter(struct gk20a *g); int nvgpu_clk_arb_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, - u64 *min_hz, u64 *max_hz); + u16 *min_mhz, u16 *max_mhz); -int nvgpu_clk_arb_get_arbiter_actual_hz(struct gk20a *g, - u32 api_domain, u64 *actual_hz); +int nvgpu_clk_arb_get_arbiter_actual_mhz(struct gk20a *g, + u32 api_domain, u16 *actual_mhz); -int nvgpu_clk_arb_get_arbiter_effective_hz(struct gk20a *g, - u32 api_domain, u64 *actual_hz); +int nvgpu_clk_arb_get_arbiter_effective_mhz(struct gk20a *g, + u32 api_domain, u16 *effective_mhz); int nvgpu_clk_arb_get_arbiter_clk_f_points(struct gk20a *g, u32 api_domain, u32 *max_points, u16 *fpoints); @@ -46,19 +46,21 @@ int nvgpu_clk_arb_init_session(struct gk20a *g, void nvgpu_clk_arb_release_session(struct gk20a *g, struct nvgpu_clk_session *session); -int nvgpu_clk_arb_apply_session_constraints(struct gk20a *g, - struct nvgpu_clk_session *session, int *completion_fd); +int nvgpu_clk_arb_commit_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int request_fd); -int nvgpu_clk_arb_set_session_target_hz(struct nvgpu_clk_session *session, - u32 api_domain, u64 target_hz); +int nvgpu_clk_arb_set_session_target_mhz(struct nvgpu_clk_session *session, + int fd, u32 api_domain, u16 target_mhz); -int nvgpu_clk_arb_get_session_target_hz(struct nvgpu_clk_session *session, - u32 api_domain, u64 *target_hz); +int nvgpu_clk_arb_get_session_target_mhz(struct nvgpu_clk_session *session, + u32 api_domain, u16 *target_mhz); int nvgpu_clk_arb_install_event_fd(struct gk20a *g, struct nvgpu_clk_session *session, int *event_fd); +int nvgpu_clk_arb_install_request_fd(struct gk20a *g, + struct nvgpu_clk_session *session, int *event_fd); - +void nvgpu_clk_arb_schedule_vftable_update(struct gk20a *g); #endif /* _CLK_ARB_H_ */ diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.c b/drivers/gpu/nvgpu/clk/clk_mclk.c index 86f4ff6dd..6ad6c0544 100644 --- a/drivers/gpu/nvgpu/clk/clk_mclk.c +++ b/drivers/gpu/nvgpu/clk/clk_mclk.c @@ -2222,7 +2222,7 @@ int clk_mclkseq_init_mclk_gddr5(struct gk20a *g) return 0; } -int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) +int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val) { struct clk_mclk_state *mclk; struct pmu_payload payload = { {0} }; @@ -2236,6 +2236,7 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) #ifdef CONFIG_DEBUG_FS u64 t0, t1; #endif + enum gk20a_mclk_speed speed; gk20a_dbg_info(""); @@ -2246,6 +2247,13 @@ int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, enum gk20a_mclk_speed speed) if (!mclk->init) goto exit_status; + /* TODO thia should be done according to VBIOS tables */ + + speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed : + (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed : + gk20a_mclk_high_speed; + + if (speed == mclk->speed) goto exit_status; @@ -2374,20 +2382,13 @@ exit_status: #ifdef CONFIG_DEBUG_FS static int mclk_debug_speed_set(void *data, u64 val) { - enum gk20a_mclk_speed speed; struct gk20a *g = (struct gk20a *) data; struct clk_mclk_state *mclk; mclk = &g->clk_pmu.clk_mclk; - /* TODO thia should be done according to VBIOS tables */ - - speed = (val <= MCLK_LOW_SPEED_LIMIT) ? gk20a_mclk_low_speed : - (val <= MCLK_MID_SPEED_LIMIT) ? gk20a_mclk_mid_speed : - gk20a_mclk_high_speed; - if (mclk->change) - return mclk->change(g, speed); + return mclk->change(g, (u16) val); return 0; } diff --git a/drivers/gpu/nvgpu/clk/clk_mclk.h b/drivers/gpu/nvgpu/clk/clk_mclk.h index 9d193c96e..e3e6c1ee7 100644 --- a/drivers/gpu/nvgpu/clk/clk_mclk.h +++ b/drivers/gpu/nvgpu/clk/clk_mclk.h @@ -22,9 +22,12 @@ enum gk20a_mclk_speed { gk20a_mclk_high_speed, }; -#define DEFAULT_BOOT_MCLK_SPEED gk20a_mclk_high_speed #define MCLK_LOW_SPEED_LIMIT 405 #define MCLK_MID_SPEED_LIMIT 810 +#define MCLK_HIGH_SPEED_LIMIT 3003 + +#define DEFAULT_BOOT_MCLK_SPEED MCLK_HIGH_SPEED_LIMIT + struct clk_mclk_state { enum gk20a_mclk_speed speed; struct mutex mclk_mutex; @@ -32,7 +35,7 @@ struct clk_mclk_state { bool init; /* function pointers */ - int (*change)(struct gk20a *g, enum gk20a_mclk_speed speed); + int (*change)(struct gk20a *g, u16 val); #ifdef CONFIG_DEBUG_FS s64 switch_max; @@ -45,7 +48,6 @@ struct clk_mclk_state { }; int clk_mclkseq_init_mclk_gddr5(struct gk20a *g); -int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, - enum gk20a_mclk_speed speed); +int clk_mclkseq_change_mclk_gddr5(struct gk20a *g, u16 val); #endif diff --git a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c index 112cb588a..d1cbb32b5 100644 --- a/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c +++ b/drivers/gpu/nvgpu/gp106/clk_arb_gp106.c @@ -23,7 +23,7 @@ static u32 gp106_get_arbiter_clk_domains(struct gk20a *g) } static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, - u64 *min_hz, u64 *max_hz) + u16 *min_mhz, u16 *max_mhz) { enum nv_pmu_clk_clkwhich clkwhich; struct clk_set_info *p0_info; @@ -52,14 +52,14 @@ static int gp106_get_arbiter_clk_range(struct gk20a *g, u32 api_domain, if (!p0_info) return -EINVAL; - *min_hz = (u64)(p5_info->min_mhz) * (u64)MHZ; - *max_hz = (u64)(p0_info->max_mhz) * (u64)MHZ; + *min_mhz = p5_info->min_mhz; + *max_mhz = p0_info->max_mhz; return 0; } static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, - u64 *default_hz) + u16 *default_mhz) { enum nv_pmu_clk_clkwhich clkwhich; struct clk_set_info *p0_info; @@ -82,7 +82,7 @@ static int gp106_get_arbiter_clk_default(struct gk20a *g, u32 api_domain, if (!p0_info) return -EINVAL; - *default_hz = (u64)p0_info->max_mhz * (u64)MHZ; + *default_mhz = p0_info->max_mhz; return 0; } diff --git a/drivers/gpu/nvgpu/pstate/pstate.c b/drivers/gpu/nvgpu/pstate/pstate.c index 0dc152012..f01b52adf 100644 --- a/drivers/gpu/nvgpu/pstate/pstate.c +++ b/drivers/gpu/nvgpu/pstate/pstate.c @@ -153,10 +153,6 @@ int gk20a_init_pstate_pmu_support(struct gk20a *g) if (err) return err; - err = clk_set_boot_fll_clk(g); - if (err) - return err; - err = pmgr_domain_pmu_setup(g); return err; }