gpu: nvgpu: ioctls for clock controls

Add ioctls for clock range and VF points query.
Add ioctls to set target mhz, and get actual mhz.

Jira DNVGPU-125

Change-Id: I7639789bb15eabd8c98adc468201dba3a6e19ade
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1223473
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
(cherry picked from commit 5e635ae34221c99a739321bcfc1418db56c1051d)
Reviewed-on: http://git-master/r/1243107
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Thomas Fleury
2016-09-13 14:25:28 -07:00
committed by mobile promotions
parent 2109478311
commit 05805ec65b
4 changed files with 520 additions and 3 deletions

View File

@@ -19,6 +19,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/anon_inodes.h> #include <linux/anon_inodes.h>
#include <linux/nvgpu.h> #include <linux/nvgpu.h>
#include <linux/bitops.h>
#include <uapi/linux/nvgpu.h> #include <uapi/linux/nvgpu.h>
#include <linux/delay.h> #include <linux/delay.h>
@@ -29,25 +30,55 @@
#include "hw_gr_gk20a.h" #include "hw_gr_gk20a.h"
#include "hw_fb_gk20a.h" #include "hw_fb_gk20a.h"
#include "hw_timer_gk20a.h" #include "hw_timer_gk20a.h"
#include "clk/clk_arb.h"
struct gk20a_ctrl_priv {
struct device *dev;
struct nvgpu_clk_session *clk_session;
};
int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp) int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
{ {
struct gk20a *g; struct gk20a *g;
struct gk20a_ctrl_priv *priv;
int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
g = container_of(inode->i_cdev, g = container_of(inode->i_cdev,
struct gk20a, ctrl.cdev); struct gk20a, ctrl.cdev);
filp->private_data = g->dev; priv = kzalloc(sizeof(struct gk20a_ctrl_priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
return 0; filp->private_data = priv;
priv->dev = g->dev;
if (!g->gr.sw_ready) {
err = gk20a_busy(g->dev);
if (err)
return err;
gk20a_idle(g->dev);
}
return nvgpu_clk_arb_init_session(g, &priv->clk_session);
} }
int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp) int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
{ {
struct gk20a_ctrl_priv *priv = filp->private_data;
struct gk20a *g = gk20a_from_dev(priv->dev);
struct nvgpu_clk_session *clk_session = priv->clk_session;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (clk_session)
nvgpu_clk_arb_cleanup_session(g, clk_session);
kfree(priv);
return 0; return 0;
} }
@@ -789,9 +820,284 @@ static int nvgpu_gpu_get_memory_state(struct gk20a *g,
return err; return err;
} }
static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
struct gk20a_ctrl_priv *priv,
struct nvgpu_gpu_clk_vf_points_args *args)
{
struct nvgpu_gpu_clk_vf_point clk_point;
struct nvgpu_gpu_clk_vf_point __user *entry;
struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0;
int err;
u16 last_mhz;
u16 *fpoints;
u32 i;
u32 max_points = 0;
u32 num_points = 0;
u16 min_mhz;
u16 max_mhz;
gk20a_dbg_fn("");
if (!session || args->flags)
return -EINVAL;
clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
args->num_entries = 0;
if ((args->clk_domain & clk_domains) == 0)
return -EINVAL;
err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
args->clk_domain, &max_points, NULL);
if (err)
return err;
if (!args->max_entries) {
args->max_entries = max_points;
return 0;
}
if (args->max_entries < max_points)
return -EINVAL;
err = nvgpu_clk_arb_get_arbiter_clk_range(g, args->clk_domain,
&min_mhz, &max_mhz);
if (err)
return err;
fpoints = kcalloc(max_points, sizeof(u16), GFP_KERNEL);
if (!fpoints)
return -ENOMEM;
err = nvgpu_clk_arb_get_arbiter_clk_f_points(g,
args->clk_domain, &max_points, fpoints);
if (err)
goto fail;
entry = (struct nvgpu_gpu_clk_vf_point __user *)
(uintptr_t)args->clk_vf_point_entries;
last_mhz = 0;
num_points = 0;
for (i = 0; (i < max_points) && !err; i++) {
/* filter out duplicate frequencies */
if (fpoints[i] == last_mhz)
continue;
/* filter out out-of-range frequencies */
if ((fpoints[i] < min_mhz) || (fpoints[i] > max_mhz))
continue;
last_mhz = fpoints[i];
clk_point.freq_mhz = fpoints[i];
err = copy_to_user((void __user *)entry, &clk_point,
sizeof(clk_point));
num_points++;
entry++;
}
args->num_entries = num_points;
fail:
kfree(fpoints);
return err;
}
static int nvgpu_gpu_clk_get_range(struct gk20a *g,
struct gk20a_ctrl_priv *priv,
struct nvgpu_gpu_clk_range_args *args)
{
struct nvgpu_gpu_clk_range clk_range;
struct nvgpu_gpu_clk_range __user *entry;
struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0;
u32 num_domains;
int bit;
u16 min_mhz, max_mhz;
int err;
gk20a_dbg_fn("");
if (!session || args->flags)
return -EINVAL;
args->num_entries = 0;
clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
num_domains = hweight_long(clk_domains);
if (!args->max_entries) {
args->max_entries = num_domains;
return 0;
}
if (args->max_entries < num_domains)
return -EINVAL;
entry = (struct nvgpu_gpu_clk_range __user *)
(uintptr_t)args->clk_range_entries;
memset(&clk_range, 0, sizeof(clk_range));
while (clk_domains) {
bit = ffs(clk_domains) - 1;
clk_range.clk_domain = BIT(bit);
err = nvgpu_clk_arb_get_arbiter_clk_range(g,
clk_range.clk_domain, &min_mhz, &max_mhz);
if (err)
return err;
clk_range.min_mhz = min_mhz;
clk_range.max_mhz = max_mhz;
err = copy_to_user(entry, &clk_range, sizeof(clk_range));
if (err)
return -EFAULT;
entry++;
clk_domains &= ~BIT(bit);
}
args->num_entries = num_domains;
return 0;
}
static int nvgpu_gpu_clk_set_info(struct gk20a *g,
struct gk20a_ctrl_priv *priv,
struct nvgpu_gpu_clk_set_info_args *args)
{
struct nvgpu_gpu_clk_info clk_info;
struct nvgpu_gpu_clk_info __user *entry;
struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0;
u32 i;
int fd;
gk20a_dbg_fn("");
if (!session || args->flags)
return -EINVAL;
clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
if (!clk_domains)
return -EINVAL;
fd = nvgpu_clk_arb_install_session_fd(g, session);
if (fd < 0)
return fd;
entry = (struct nvgpu_gpu_clk_info __user *)
(uintptr_t)args->clk_info_entries;
for (i = 0; i < args->num_entries; i++, entry++) {
if (copy_from_user(&clk_info, entry, sizeof(clk_info)))
return -EFAULT;
if ((clk_info.clk_domain & clk_domains) != clk_info.clk_domain)
return -EINVAL;
if (hweight_long(clk_info.clk_domain) != 1)
return -EINVAL;
}
entry = (struct nvgpu_gpu_clk_info __user *)
(uintptr_t)args->clk_info_entries;
for (i = 0; i < args->num_entries; i++, entry++) {
if (copy_from_user(&clk_info, (void __user *)entry,
sizeof(clk_info)))
return -EFAULT;
nvgpu_clk_arb_set_session_target_mhz(session,
clk_info.clk_domain, clk_info.target_mhz);
}
nvgpu_clk_arb_apply_session_constraints(g, session);
args->req_nr = nvgpu_clk_arb_get_session_req_nr(g, session);
args->fd = fd;
return 0;
}
static int nvgpu_gpu_clk_get_info(struct gk20a *g,
struct gk20a_ctrl_priv *priv,
struct nvgpu_gpu_clk_get_info_args *args)
{
struct nvgpu_gpu_clk_info clk_info;
struct nvgpu_gpu_clk_info __user *entry;
struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0;
u32 num_domains;
u16 actual_mhz;
u16 target_mhz;
int err;
u32 i;
gk20a_dbg_fn("");
if (!session || args->flags)
return -EINVAL;
clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
if (!clk_domains)
return -EINVAL;
args->last_req_nr = nvgpu_clk_arb_get_arbiter_req_nr(g);
num_domains = hweight_long(clk_domains);
if (!args->num_entries) {
args->num_entries = num_domains;
return 0;
}
entry = (struct nvgpu_gpu_clk_info __user *)
(uintptr_t)args->clk_info_entries;
for (i = 0; i < args->num_entries; i++, entry++) {
if (copy_from_user(&clk_info, (void __user *)entry,
sizeof(clk_info)))
return -EFAULT;
err = nvgpu_clk_arb_get_arbiter_actual_mhz(g,
clk_info.clk_domain, &actual_mhz);
if (err)
return err;
err = nvgpu_clk_arb_get_session_target_mhz(session,
clk_info.clk_domain, &target_mhz);
if (err)
return err;
clk_info.actual_mhz = actual_mhz;
clk_info.target_mhz = target_mhz;
err = copy_to_user((void __user *)entry, &clk_info,
sizeof(clk_info));
if (err)
return -EFAULT;
}
return 0;
}
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct device *dev = filp->private_data; struct gk20a_ctrl_priv *priv = filp->private_data;
struct device *dev = priv->dev;
struct gk20a *g = get_gk20a(dev); struct gk20a *g = get_gk20a(dev);
struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args; struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
struct nvgpu_gpu_zcull_get_info_args *get_info_args; struct nvgpu_gpu_zcull_get_info_args *get_info_args;
@@ -1050,6 +1356,26 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(struct nvgpu_gpu_get_memory_state_args *)buf); (struct nvgpu_gpu_get_memory_state_args *)buf);
break; break;
case NVGPU_GPU_IOCTL_CLK_GET_RANGE:
err = nvgpu_gpu_clk_get_range(g, priv,
(struct nvgpu_gpu_clk_range_args *)buf);
break;
case NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS:
err = nvgpu_gpu_clk_get_vf_points(g, priv,
(struct nvgpu_gpu_clk_vf_points_args *)buf);
break;
case NVGPU_GPU_IOCTL_CLK_SET_INFO:
err = nvgpu_gpu_clk_set_info(g, priv,
(struct nvgpu_gpu_clk_set_info_args *)buf);
break;
case NVGPU_GPU_IOCTL_CLK_GET_INFO:
err = nvgpu_gpu_clk_get_info(g, priv,
(struct nvgpu_gpu_clk_get_info_args *)buf);
break;
default: default:
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY; err = -ENOTTY;

View File

@@ -974,6 +974,12 @@ int gk20a_pm_finalize_poweron(struct device *dev)
} }
#endif #endif
err = nvgpu_clk_arb_init_arbiter(g);
if (err) {
gk20a_err(dev, "failed to init clk arb");
goto done;
}
if (g->ops.pmu.is_pmu_supported(g)) { if (g->ops.pmu.is_pmu_supported(g)) {
err = gk20a_init_pmu_support(g); err = gk20a_init_pmu_support(g);
if (err) { if (err) {
@@ -1644,6 +1650,8 @@ static int __exit gk20a_remove(struct platform_device *pdev)
if (platform->has_ce) if (platform->has_ce)
gk20a_ce_destroy(g); gk20a_ce_destroy(g);
nvgpu_clk_arb_cleanup_arbiter(g);
gk20a_user_deinit(dev, &nvgpu_class); gk20a_user_deinit(dev, &nvgpu_class);
debugfs_remove_recursive(platform->debugfs); debugfs_remove_recursive(platform->debugfs);

View File

@@ -56,6 +56,7 @@ struct acr_desc;
#include "gm206/bios_gm206.h" #include "gm206/bios_gm206.h"
#ifdef CONFIG_ARCH_TEGRA_18x_SOC #ifdef CONFIG_ARCH_TEGRA_18x_SOC
#include "clk/clk.h" #include "clk/clk.h"
#include "clk/clk_arb.h"
#include "perf/perf.h" #include "perf/perf.h"
#include "pmgr/pmgr.h" #include "pmgr/pmgr.h"
#include "therm/thrm.h" #include "therm/thrm.h"
@@ -632,6 +633,13 @@ struct gpu_ops {
int (*suspend_clk_support)(struct gk20a *g); int (*suspend_clk_support)(struct gk20a *g);
u32 (*get_crystal_clk_hz)(struct gk20a *g); u32 (*get_crystal_clk_hz)(struct gk20a *g);
} clk; } clk;
struct {
u32 (*get_arbiter_clk_domains)(struct gk20a *g);
int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain,
u16 *min_mhz, u16 *max_mhz);
int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain,
u16 *default_mhz);
} clk_arb;
bool privsecurity; bool privsecurity;
bool securegpccs; bool securegpccs;
bool pmupstate; bool pmupstate;
@@ -956,6 +964,8 @@ struct gk20a {
struct nvgpu_bios bios; struct nvgpu_bios bios;
struct debugfs_blob_wrapper bios_blob; struct debugfs_blob_wrapper bios_blob;
struct nvgpu_clk_arb *clk_arb;
struct gk20a_ce_app ce_app; struct gk20a_ce_app ce_app;
/* PCI device identifier */ /* PCI device identifier */

View File

@@ -510,6 +510,171 @@ struct nvgpu_gpu_alloc_vidmem_args {
}; };
}; };
#define NVGPU_GPU_CLK_DOMAIN_MCLK (0x00000010)
#define NVGPU_GPU_CLK_DOMAIN_GPC2CLK (0x00010000)
struct nvgpu_gpu_clk_range {
/* Flags (not currently used) */
__u32 flags;
/* NVGPU_GPU_CLK_DOMAIN_* */
__u32 clk_domain;
__u32 min_mhz;
__u32 max_mhz;
};
struct nvgpu_gpu_clk_range_args {
/* Flags (not currently used) */
__u32 flags;
/* in/out: max number of entries in clk_range_entries buffer. If zero,
NVGPU_GPU_IOCTL_CLK_GET_RANGE will return 0 and max_entries will be
set to the max number of clock domains. If there are more entries
than max_entries, then ioctl will return -EINVAL.
*/
__u16 max_entries;
/* out: number of nvgpu_gpu_clk_range entries contained in
clk_range_entries */
__u16 num_entries;
/* in: Pointer to clock range entries in the caller's address space.
size must be >= max_entries * sizeof(struct nvgpu_gpu_clk_range)
*/
__u64 clk_range_entries;
};
struct nvgpu_gpu_clk_vf_point {
/* Flags (not currently used) */
__u32 flags;
__u32 freq_mhz;
};
struct nvgpu_gpu_clk_vf_points_args {
/* in: Flags (not currently used) */
__u32 flags;
/* in: NVGPU_GPU_CLK_DOMAIN_* */
__u32 clk_domain;
/* in/out: max number of nvgpu_gpu_clk_vf_point entries in
clk_vf_point_entries. If max_entries is zero,
NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS will return 0 and max_entries will
be set to the max number of VF entries for this clock domain. If
there are more entries than max_entires, then ioctl will return
-EINVAL.
*/
__u16 max_entries;
/* out: Number of nvgpu_gpu_clk_vf_point entries returned in
clk_vf_point_entries. Number of entries might vary depending on
thermal conditions.
*/
__u16 num_entries;
__u32 reserved;
/* in: Pointer to clock VF point entries in the caller's address space.
size must be >= max_entries * sizeof(struct nvgpu_gpu_clk_vf_point).
*/
__u64 clk_vf_point_entries;
};
struct nvgpu_gpu_clk_info {
/* Flags (not currently used) */
__u32 flags;
/* NVGPU_GPU_CLK_DOMAIN_* */
__u32 clk_domain;
/* target clock frequency for the domain in MHz. Should be
specified with a non-zero value in NVGPU_GPU_IOCTL_CLK_SET_INFO.
*/
__u32 target_mhz;
/* actual clock frequency for the domain in MHz. This value
may deviate from the desired target frequency due to PLL constraints.
Not used in NVGPU_GPU_IOCTL_CLK_SET_INFO.
*/
__u32 actual_mhz;
};
struct nvgpu_gpu_clk_get_info_args {
/* in: Flags (not currently used). */
__u32 flags;
__u16 pad0;
/* in/out: Number of clock info entries contained in clk_info_entries.
If zero, NVGPU_GPU_IOCTL_CLK_GET_INFO will return 0 and
max_entries will be set to number of clock domains. Also,
last_req_nr will be updated, which allows checking if a given
request has completed. If there are more entries than max_entries,
then ioctl will return -EINVAL.
*/
__u16 num_entries;
/* in: Pointer to nvgpu_gpu_clk_info entries in the caller's address
space. Buffer size must be at least:
num_entries * sizeof(struct nvgpu_gpu_clk_info)
For each entry, the clk_domain to be queried should be set. Note
that clk_info_entries passed to an NVGPU_GPU_IOCTL_CLK_SET_INFO,
can be re-used on completion for a NVGPU_GPU_IOCTL_CLK_GET_INFO.
This allows checking actual_mhz.
*/
__u64 clk_info_entries;
__u32 pad1;
/* out: sequence number of last processed request. sequence numbers
are per-user.
*/
__u32 last_req_nr;
};
struct nvgpu_gpu_clk_set_info_args {
/* in: Flags (not currently used). */
__u32 flags;
__u16 pad0;
/* Number of clock info entries contained in clk_info_entries.
Must be > 0.
*/
__u16 num_entries;
/* Pointer to clock info entries in the caller's address space. Buffer
size must be at least
num_entries * sizeof(struct nvgpu_gpu_clk_info)
*/
__u64 clk_info_entries;
/* out: File descriptor for completions and event notifications.
If application does not close this fd after completion, then the
same fd will be returned for subsequent request (recommended).
*/
int fd;
/* out: sequence number for this request. In order to determine that
a request has completed, an application should check this sequence
number against last_req_nr from NVGPU_GPU_IOCTL_CLK_GET_INFO, using
nvgpu_clk_req_complete(req_nr, last_req_nr);
*/
__u32 req_nr;
};
static inline int nvgpu_clk_req_complete(__u32 req_nr, __u32 last_req_nr)
{
return ((long)(last_req_nr - req_nr) >= 0);
}
struct nvgpu_gpu_get_memory_state_args { struct nvgpu_gpu_get_memory_state_args {
/* /*
* Current free space for this device; may change even when any * Current free space for this device; may change even when any
@@ -596,6 +761,14 @@ struct nvgpu_gpu_get_fbp_l2_masks_args {
#define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \ #define NVGPU_GPU_IOCTL_ALLOC_VIDMEM \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 27, \
struct nvgpu_gpu_alloc_vidmem_args) struct nvgpu_gpu_alloc_vidmem_args)
#define NVGPU_GPU_IOCTL_CLK_GET_RANGE \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 28, struct nvgpu_gpu_clk_range_args)
#define NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 29, struct nvgpu_gpu_clk_vf_points_args)
#define NVGPU_GPU_IOCTL_CLK_GET_INFO \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args)
#define NVGPU_GPU_IOCTL_CLK_SET_INFO \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args)
#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ #define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
struct nvgpu_gpu_get_memory_state_args) struct nvgpu_gpu_get_memory_state_args)