gpu: nvgpu: update clock controls

Install one completion fd per SET request.
Notifications on dedicated event fd.
Changed frequencies unit to Hz from MHz.
Remove sequence numbers from dummy arbiter.
Added effective clock type (query frequency from counters).

Jira DNVGPU-125

Change-Id: Ica364eccdf85b188fd208f770e4eae0e9f0379e9
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: http://git-master/r/1230224
(cherry picked from commit f9b06686c090c676e60e1e137fdc9bbfc76d4843)
Reviewed-on: http://git-master/r/1243109
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Thomas Fleury
2016-09-30 16:11:30 -07:00
committed by mobile promotions
parent 94cea420c8
commit a8f90069e9
3 changed files with 95 additions and 78 deletions

View File

@@ -76,7 +76,7 @@ int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
gk20a_dbg_fn(""); gk20a_dbg_fn("");
if (clk_session) if (clk_session)
nvgpu_clk_arb_cleanup_session(g, clk_session); nvgpu_clk_arb_release_session(g, clk_session);
kfree(priv); kfree(priv);
return 0; return 0;
@@ -834,6 +834,8 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
u32 i; u32 i;
u32 max_points = 0; u32 max_points = 0;
u32 num_points = 0; u32 num_points = 0;
u64 min_hz;
u64 max_hz;
u16 min_mhz; u16 min_mhz;
u16 max_mhz; u16 max_mhz;
@@ -862,7 +864,7 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
return -EINVAL; return -EINVAL;
err = nvgpu_clk_arb_get_arbiter_clk_range(g, args->clk_domain, err = nvgpu_clk_arb_get_arbiter_clk_range(g, args->clk_domain,
&min_mhz, &max_mhz); &min_hz, &max_hz);
if (err) if (err)
return err; return err;
@@ -879,6 +881,8 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
(uintptr_t)args->clk_vf_point_entries; (uintptr_t)args->clk_vf_point_entries;
last_mhz = 0; last_mhz = 0;
min_mhz = (u16)(min_hz / (u64)MHZ);
max_mhz = (u16)(max_hz / (u64)MHZ);
num_points = 0; num_points = 0;
for (i = 0; (i < max_points) && !err; i++) { for (i = 0; (i < max_points) && !err; i++) {
@@ -891,7 +895,7 @@ static int nvgpu_gpu_clk_get_vf_points(struct gk20a *g,
continue; continue;
last_mhz = fpoints[i]; last_mhz = fpoints[i];
clk_point.freq_mhz = fpoints[i]; clk_point.freq_hz = (u64)fpoints[i] * (u64)MHZ;
err = copy_to_user((void __user *)entry, &clk_point, err = copy_to_user((void __user *)entry, &clk_point,
sizeof(clk_point)); sizeof(clk_point));
@@ -919,7 +923,6 @@ static int nvgpu_gpu_clk_get_range(struct gk20a *g,
u32 num_domains; u32 num_domains;
u32 i; u32 i;
int bit; int bit;
u16 min_mhz, max_mhz;
int err; int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -963,15 +966,13 @@ static int nvgpu_gpu_clk_get_range(struct gk20a *g,
clk_domains &= ~BIT(bit); clk_domains &= ~BIT(bit);
} }
clk_range.flags = 0;
err = nvgpu_clk_arb_get_arbiter_clk_range(g, err = nvgpu_clk_arb_get_arbiter_clk_range(g,
clk_range.clk_domain, &min_mhz, &max_mhz); clk_range.clk_domain,
&clk_range.min_hz, &clk_range.max_hz);
if (err) if (err)
return err; return err;
clk_range.min_mhz = min_mhz;
clk_range.max_mhz = max_mhz;
clk_range.flags = 0;
err = copy_to_user(entry, &clk_range, sizeof(clk_range)); err = copy_to_user(entry, &clk_range, sizeof(clk_range));
if (err) if (err)
return -EFAULT; return -EFAULT;
@@ -992,7 +993,6 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
struct nvgpu_clk_session *session = priv->clk_session; struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0; u32 clk_domains = 0;
u32 i; u32 i;
int fd;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -1003,10 +1003,6 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
if (!clk_domains) if (!clk_domains)
return -EINVAL; return -EINVAL;
fd = nvgpu_clk_arb_install_session_fd(g, session);
if (fd < 0)
return fd;
entry = (struct nvgpu_gpu_clk_info __user *) entry = (struct nvgpu_gpu_clk_info __user *)
(uintptr_t)args->clk_info_entries; (uintptr_t)args->clk_info_entries;
@@ -1031,16 +1027,12 @@ static int nvgpu_gpu_clk_set_info(struct gk20a *g,
sizeof(clk_info))) sizeof(clk_info)))
return -EFAULT; return -EFAULT;
nvgpu_clk_arb_set_session_target_mhz(session, nvgpu_clk_arb_set_session_target_hz(session,
clk_info.clk_domain, clk_info.target_mhz); clk_info.clk_domain, clk_info.freq_hz);
} }
nvgpu_clk_arb_apply_session_constraints(g, session); return nvgpu_clk_arb_apply_session_constraints(g, session,
&args->completion_fd);
args->req_nr = nvgpu_clk_arb_get_session_req_nr(g, session);
args->fd = fd;
return 0;
} }
@@ -1053,8 +1045,6 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
struct nvgpu_clk_session *session = priv->clk_session; struct nvgpu_clk_session *session = priv->clk_session;
u32 clk_domains = 0; u32 clk_domains = 0;
u32 num_domains; u32 num_domains;
u16 actual_mhz;
u16 target_mhz;
u32 i; u32 i;
int err; int err;
int bit; int bit;
@@ -1064,8 +1054,6 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
if (!session) if (!session)
return -EINVAL; return -EINVAL;
args->last_req_nr = nvgpu_clk_arb_get_arbiter_req_nr(g);
if (!args->flags) { if (!args->flags) {
clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g); clk_domains = nvgpu_clk_arb_get_arbiter_clk_domains(g);
num_domains = hweight_long(clk_domains); num_domains = hweight_long(clk_domains);
@@ -1100,20 +1088,29 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
bit = ffs(clk_domains) - 1; bit = ffs(clk_domains) - 1;
clk_info.clk_domain = BIT(bit); clk_info.clk_domain = BIT(bit);
clk_domains &= ~BIT(bit); clk_domains &= ~BIT(bit);
clk_info.clk_type = args->clk_type;
} }
err = nvgpu_clk_arb_get_arbiter_actual_mhz(g, switch (clk_info.clk_type) {
clk_info.clk_domain, &actual_mhz); case NVGPU_GPU_CLK_TYPE_TARGET:
err = nvgpu_clk_arb_get_session_target_hz(session,
clk_info.clk_domain, &clk_info.freq_hz);
break;
case NVGPU_GPU_CLK_TYPE_ACTUAL:
err = nvgpu_clk_arb_get_arbiter_actual_hz(g,
clk_info.clk_domain, &clk_info.freq_hz);
break;
case NVGPU_GPU_CLK_TYPE_EFFECTIVE:
err = nvgpu_clk_arb_get_arbiter_effective_hz(g,
clk_info.clk_domain, &clk_info.freq_hz);
break;
default:
err = -EINVAL;
break;
}
if (err) if (err)
return err; return err;
err = nvgpu_clk_arb_get_session_target_mhz(session,
clk_info.clk_domain, &target_mhz);
if (err)
return err;
clk_info.actual_mhz = actual_mhz;
clk_info.target_mhz = target_mhz;
clk_info.flags = 0; clk_info.flags = 0;
err = copy_to_user((void __user *)entry, &clk_info, err = copy_to_user((void __user *)entry, &clk_info,
@@ -1127,6 +1124,20 @@ static int nvgpu_gpu_clk_get_info(struct gk20a *g,
return 0; return 0;
} }
static int nvgpu_gpu_clk_get_event_fd(struct gk20a *g,
struct gk20a_ctrl_priv *priv,
struct nvgpu_gpu_clk_get_event_fd_args *args)
{
struct nvgpu_clk_session *session = priv->clk_session;
gk20a_dbg_fn("");
if (!session || args->flags)
return -EINVAL;
return nvgpu_clk_arb_install_event_fd(g, session, &args->event_fd);
}
long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{ {
struct gk20a_ctrl_priv *priv = filp->private_data; struct gk20a_ctrl_priv *priv = filp->private_data;
@@ -1409,6 +1420,11 @@ long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg
(struct nvgpu_gpu_clk_get_info_args *)buf); (struct nvgpu_gpu_clk_get_info_args *)buf);
break; break;
case NVGPU_GPU_IOCTL_CLK_GET_EVENT_FD:
err = nvgpu_gpu_clk_get_event_fd(g, priv,
(struct nvgpu_gpu_clk_get_event_fd_args *)buf);
break;
default: default:
dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd); dev_dbg(dev_from_gk20a(g), "unrecognized gpu ioctl cmd: 0x%x", cmd);
err = -ENOTTY; err = -ENOTTY;

View File

@@ -636,9 +636,9 @@ struct gpu_ops {
struct { struct {
u32 (*get_arbiter_clk_domains)(struct gk20a *g); u32 (*get_arbiter_clk_domains)(struct gk20a *g);
int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain, int (*get_arbiter_clk_range)(struct gk20a *g, u32 api_domain,
u16 *min_mhz, u16 *max_mhz); u64 *min_hz, u64 *max_hz);
int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain, int (*get_arbiter_clk_default)(struct gk20a *g, u32 api_domain,
u16 *default_mhz); u64 *default_hz);
} clk_arb; } clk_arb;
bool privsecurity; bool privsecurity;
bool securegpccs; bool securegpccs;

View File

@@ -520,8 +520,8 @@ struct nvgpu_gpu_clk_range {
/* NVGPU_GPU_CLK_DOMAIN_* */ /* NVGPU_GPU_CLK_DOMAIN_* */
__u32 clk_domain; __u32 clk_domain;
__u32 min_mhz; __u64 min_hz;
__u32 max_mhz; __u64 max_hz;
}; };
/* Request on specific clock domains */ /* Request on specific clock domains */
@@ -551,10 +551,7 @@ struct nvgpu_gpu_clk_range_args {
}; };
struct nvgpu_gpu_clk_vf_point { struct nvgpu_gpu_clk_vf_point {
__u64 freq_hz;
/* Flags (not currently used) */
__u32 flags;
__u32 freq_mhz;
}; };
struct nvgpu_gpu_clk_vf_points_args { struct nvgpu_gpu_clk_vf_points_args {
@@ -569,7 +566,7 @@ struct nvgpu_gpu_clk_vf_points_args {
clk_vf_point_entries. If max_entries is zero, clk_vf_point_entries. If max_entries is zero,
NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS will return 0 and max_entries will NVGPU_GPU_IOCTL_CLK_GET_VF_POINTS will return 0 and max_entries will
be set to the max number of VF entries for this clock domain. If be set to the max number of VF entries for this clock domain. If
there are more entries than max_entires, then ioctl will return there are more entries than max_entries, then ioctl will return
-EINVAL. -EINVAL.
*/ */
__u16 max_entries; __u16 max_entries;
@@ -588,24 +585,31 @@ struct nvgpu_gpu_clk_vf_points_args {
__u64 clk_vf_point_entries; __u64 clk_vf_point_entries;
}; };
/* Target clock requested by application*/
#define NVGPU_GPU_CLK_TYPE_TARGET 1
/* Actual clock frequency for the domain.
May deviate from desired target frequency due to PLL constraints. */
#define NVGPU_GPU_CLK_TYPE_ACTUAL 2
/* Effective clock, measured from hardware */
#define NVGPU_GPU_CLK_TYPE_EFFECTIVE 3
struct nvgpu_gpu_clk_info { struct nvgpu_gpu_clk_info {
/* Flags (not currently used) */ /* Flags (not currently used) */
__u32 flags; __u16 flags;
/* NVGPU_GPU_CLK_DOMAIN_* */ /* in: When NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS set, indicates
the type of clock info to be returned for this entry. It is
allowed to have several entries with different clock types in
the same request (for instance query both target and actual
clocks for a given clock domain). This field is ignored for a
SET operation. */
__u16 clk_type;
/* NVGPU_GPU_CLK_DOMAIN_xxx */
__u32 clk_domain; __u32 clk_domain;
/* target clock frequency for the domain in MHz. Should be __u64 freq_hz;
specified with a non-zero value in NVGPU_GPU_IOCTL_CLK_SET_INFO.
*/
__u32 target_mhz;
/* actual clock frequency for the domain in MHz. This value
may deviate from the desired target frequency due to PLL constraints.
Not used in NVGPU_GPU_IOCTL_CLK_SET_INFO.
*/
__u32 actual_mhz;
}; };
struct nvgpu_gpu_clk_get_info_args { struct nvgpu_gpu_clk_get_info_args {
@@ -617,7 +621,11 @@ struct nvgpu_gpu_clk_get_info_args {
*/ */
__u32 flags; __u32 flags;
__u16 pad0; /* in: indicates which type of clock info to be returned (see
NVGPU_GPU_CLK_TYPE_xxx). If NVGPU_GPU_CLK_FLAG_SPECIFIC_DOMAINS
is defined, clk_type is specified in each clock info entry instead.
*/
__u16 clk_type;
/* in/out: Number of clock info entries contained in clk_info_entries. /* in/out: Number of clock info entries contained in clk_info_entries.
If zero, NVGPU_GPU_IOCTL_CLK_GET_INFO will return 0 and If zero, NVGPU_GPU_IOCTL_CLK_GET_INFO will return 0 and
@@ -639,12 +647,6 @@ struct nvgpu_gpu_clk_get_info_args {
*/ */
__u64 clk_info_entries; __u64 clk_info_entries;
__u32 pad1;
/* out: sequence number of last processed request. sequence numbers
are per-user.
*/
__u32 last_req_nr;
}; };
struct nvgpu_gpu_clk_set_info_args { struct nvgpu_gpu_clk_set_info_args {
@@ -665,24 +667,21 @@ struct nvgpu_gpu_clk_set_info_args {
*/ */
__u64 clk_info_entries; __u64 clk_info_entries;
/* out: File descriptor for completions and event notifications. /* out: File descriptor for request completion. Application can poll
If application does not close this fd after completion, then the this file descriptor to determine when the request has completed.
same fd will be returned for subsequent request (recommended). The fd must be closed afterwards.
*/ */
int fd; int completion_fd;
/* out: sequence number for this request. In order to determine that
a request has completed, an application should check this sequence
number against last_req_nr from NVGPU_GPU_IOCTL_CLK_GET_INFO, using
nvgpu_clk_req_complete(req_nr, last_req_nr);
*/
__u32 req_nr;
}; };
static inline int nvgpu_clk_req_complete(__u32 req_nr, __u32 last_req_nr) struct nvgpu_gpu_clk_get_event_fd_args {
{
return ((long)(last_req_nr - req_nr) >= 0); /* in: Flags (not currently used). */
} __u32 flags;
/* out: File descriptor for events, i.e. any clock update. */
int event_fd;
};
struct nvgpu_gpu_get_memory_state_args { struct nvgpu_gpu_get_memory_state_args {
/* /*
@@ -778,6 +777,8 @@ struct nvgpu_gpu_get_fbp_l2_masks_args {
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args) _IOWR(NVGPU_GPU_IOCTL_MAGIC, 30, struct nvgpu_gpu_clk_get_info_args)
#define NVGPU_GPU_IOCTL_CLK_SET_INFO \ #define NVGPU_GPU_IOCTL_CLK_SET_INFO \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args) _IOWR(NVGPU_GPU_IOCTL_MAGIC, 31, struct nvgpu_gpu_clk_set_info_args)
#define NVGPU_GPU_IOCTL_CLK_GET_EVENT_FD \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 32, struct nvgpu_gpu_clk_get_event_fd_args)
#define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \ #define NVGPU_GPU_IOCTL_GET_MEMORY_STATE \
_IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \ _IOWR(NVGPU_GPU_IOCTL_MAGIC, 33, \
struct nvgpu_gpu_get_memory_state_args) struct nvgpu_gpu_get_memory_state_args)