mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Re-factor perfmon unit
- Move the perfmon unit source code to common/pmu/perfmon/ folder - Separate perfmon unit headers under include/nvgpu/pmu/pmu_perfmon.h - Make a new structure: nvgpu_pmu_perfmon for perfmon unit - This new struct combines all perfmon unit variables like perfmon_query, perfmon_ready etc. into one structure as a part of perfmon unit refactoring. - Use pmu_perfmon struct to access all perfmon variables. - Eg: pmu->pmu_perfmon->perfmon_query, pmu->pmu_perfmon->perfmon_ready and so on. JIRA NVGPU-1961 Change-Id: I57516c646bfb256004dd7b719e40fafd3c2a09b2 Signed-off-by: Divya Singhatwaria <dsinghatwari@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2080555 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
3c08a91de8
commit
700c16599e
@@ -94,7 +94,7 @@ nvgpu-y += \
|
|||||||
common/pmu/pmu_fw.o \
|
common/pmu/pmu_fw.o \
|
||||||
common/pmu/pg/pmu_pg.o \
|
common/pmu/pg/pmu_pg.o \
|
||||||
common/pmu/pg/pmu_aelpg.o \
|
common/pmu/pg/pmu_aelpg.o \
|
||||||
common/pmu/pmu_perfmon.o \
|
common/pmu/perfmon/pmu_perfmon.o \
|
||||||
common/pmu/pmu_debug.o \
|
common/pmu/pmu_debug.o \
|
||||||
common/pmu/pmu_gk20a.o \
|
common/pmu/pmu_gk20a.o \
|
||||||
common/pmu/pmu_gm20b.o \
|
common/pmu/pmu_gm20b.o \
|
||||||
|
|||||||
@@ -133,7 +133,7 @@ srcs += common/sim.c \
|
|||||||
common/pmu/pmu_fw.c \
|
common/pmu/pmu_fw.c \
|
||||||
common/pmu/pg/pmu_pg.c \
|
common/pmu/pg/pmu_pg.c \
|
||||||
common/pmu/pg/pmu_aelpg.c \
|
common/pmu/pg/pmu_aelpg.c \
|
||||||
common/pmu/pmu_perfmon.c \
|
common/pmu/perfmon/pmu_perfmon.c \
|
||||||
common/pmu/pmu_debug.c \
|
common/pmu/pmu_debug.c \
|
||||||
common/pmu/pmu_gk20a.c \
|
common/pmu/pmu_gk20a.c \
|
||||||
common/pmu/pmu_gm20b.c \
|
common/pmu/pmu_gm20b.c \
|
||||||
|
|||||||
@@ -30,6 +30,7 @@
|
|||||||
#include <nvgpu/pmu/therm.h>
|
#include <nvgpu/pmu/therm.h>
|
||||||
#include <nvgpu/pmu/lsfm.h>
|
#include <nvgpu/pmu/lsfm.h>
|
||||||
#include <nvgpu/pmu/super_surface.h>
|
#include <nvgpu/pmu/super_surface.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
static int pmu_payload_extract(struct nvgpu_pmu *pmu, struct pmu_sequence *seq)
|
static int pmu_payload_extract(struct nvgpu_pmu *pmu, struct pmu_sequence *seq)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -20,17 +20,21 @@
|
|||||||
* DEALINGS IN THE SOFTWARE.
|
* DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
#include <nvgpu/gk20a.h>
|
||||||
|
#include <nvgpu/falcon.h>
|
||||||
|
#include <nvgpu/bug.h>
|
||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
#include <nvgpu/pmu.h>
|
#include <nvgpu/pmu.h>
|
||||||
#include <nvgpu/pmu/cmd.h>
|
#include <nvgpu/pmu/cmd.h>
|
||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
#include <nvgpu/bug.h>
|
#include <nvgpu/bug.h>
|
||||||
#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
|
#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/kmem.h>
|
||||||
|
|
||||||
static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
|
static u8 get_perfmon_id(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
u32 ver = g->params.gpu_arch + g->params.gpu_impl;
|
u32 ver = g->params.gpu_arch + g->params.gpu_impl;
|
||||||
u8 unit_id;
|
u8 unit_id;
|
||||||
|
|
||||||
@@ -64,7 +68,7 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
|
|||||||
case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
|
case NV_PMU_RPC_ID_PERFMON_T18X_INIT:
|
||||||
nvgpu_pmu_dbg(g,
|
nvgpu_pmu_dbg(g,
|
||||||
"reply NV_PMU_RPC_ID_PERFMON_INIT");
|
"reply NV_PMU_RPC_ID_PERFMON_INIT");
|
||||||
pmu->perfmon_ready = true;
|
pmu->pmu_perfmon->perfmon_ready = true;
|
||||||
break;
|
break;
|
||||||
case NV_PMU_RPC_ID_PERFMON_T18X_START:
|
case NV_PMU_RPC_ID_PERFMON_T18X_START:
|
||||||
nvgpu_pmu_dbg(g,
|
nvgpu_pmu_dbg(g,
|
||||||
@@ -79,8 +83,8 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
|
|||||||
"reply NV_PMU_RPC_ID_PERFMON_QUERY");
|
"reply NV_PMU_RPC_ID_PERFMON_QUERY");
|
||||||
rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)
|
rpc_param = (struct nv_pmu_rpc_struct_perfmon_query *)
|
||||||
rpc_payload->rpc_buff;
|
rpc_payload->rpc_buff;
|
||||||
pmu->load = rpc_param->sample_buffer[0];
|
pmu->pmu_perfmon->load = rpc_param->sample_buffer[0];
|
||||||
pmu->perfmon_query = 1;
|
pmu->pmu_perfmon->perfmon_query = 1;
|
||||||
/* set perfmon_query to 1 after load is copied */
|
/* set perfmon_query to 1 after load is copied */
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -89,9 +93,40 @@ void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
if (pmu->pmu_perfmon != NULL) {
|
||||||
|
/* Not to allocate a new buffer after railgating
|
||||||
|
is done. Use the same memory for pmu_perfmon
|
||||||
|
after railgating.
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
/* One-time memory allocation for pmu_perfmon */
|
||||||
|
pmu->pmu_perfmon = (struct nvgpu_pmu_perfmon *)(nvgpu_kzalloc(g,
|
||||||
|
sizeof(struct nvgpu_pmu_perfmon)));
|
||||||
|
if (pmu->pmu_perfmon == NULL) {
|
||||||
|
nvgpu_err(g, "failed to initialize perfmon");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
if (pmu->pmu_perfmon == NULL) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
nvgpu_kfree(g, pmu->pmu_perfmon);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct pmu_v *pv = &g->ops.pmu_ver;
|
struct pmu_v *pv = &g->ops.pmu_ver;
|
||||||
struct pmu_cmd cmd;
|
struct pmu_cmd cmd;
|
||||||
struct pmu_payload payload;
|
struct pmu_payload payload;
|
||||||
@@ -104,16 +139,16 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
pmu->perfmon_ready = false;
|
pmu->pmu_perfmon->perfmon_ready = false;
|
||||||
|
|
||||||
g->ops.pmu.pmu_init_perfmon_counter(g);
|
g->ops.pmu.pmu_init_perfmon_counter(g);
|
||||||
|
|
||||||
if (pmu->sample_buffer == 0U) {
|
if (pmu->pmu_perfmon->sample_buffer == 0U) {
|
||||||
tmp_addr = nvgpu_alloc(&pmu->dmem, 2U * sizeof(u16));
|
tmp_addr = nvgpu_alloc(&pmu->dmem, 2U * sizeof(u16));
|
||||||
nvgpu_assert(tmp_addr <= U32_MAX);
|
nvgpu_assert(tmp_addr <= U32_MAX);
|
||||||
pmu->sample_buffer = (u32)tmp_addr;
|
pmu->pmu_perfmon->sample_buffer = (u32)tmp_addr;
|
||||||
}
|
}
|
||||||
if (pmu->sample_buffer == 0U) {
|
if (pmu->pmu_perfmon->sample_buffer == 0U) {
|
||||||
nvgpu_err(g, "failed to allocate perfmon sample buffer");
|
nvgpu_err(g, "failed to allocate perfmon sample buffer");
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
}
|
}
|
||||||
@@ -133,10 +168,9 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
|
|||||||
cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
|
cmd.cmd.perfmon.cmd_type = PMU_PERFMON_CMD_ID_INIT;
|
||||||
/* buffer to save counter values for pmu perfmon */
|
/* buffer to save counter values for pmu perfmon */
|
||||||
pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
|
pv->perfmon_cmd_init_set_sample_buffer(&cmd.cmd.perfmon,
|
||||||
(u16)pmu->sample_buffer);
|
(u16)pmu->pmu_perfmon->sample_buffer);
|
||||||
/* number of sample periods below lower threshold
|
/* number of sample periods below lower threshold
|
||||||
* before pmu triggers perfmon decrease event
|
* before pmu triggers perfmon decrease event
|
||||||
* TBD: = 15
|
|
||||||
*/
|
*/
|
||||||
pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
|
pv->perfmon_cmd_init_set_dec_cnt(&cmd.cmd.perfmon, 15);
|
||||||
/* index of base counter, aka. always ticking counter */
|
/* index of base counter, aka. always ticking counter */
|
||||||
@@ -171,7 +205,7 @@ int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct pmu_v *pv = &g->ops.pmu_ver;
|
struct pmu_v *pv = &g->ops.pmu_ver;
|
||||||
struct pmu_cmd cmd;
|
struct pmu_cmd cmd;
|
||||||
struct pmu_payload payload;
|
struct pmu_payload payload;
|
||||||
@@ -197,7 +231,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
|
|||||||
pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
|
pv->perfmon_start_set_group_id(&cmd.cmd.perfmon,
|
||||||
PMU_DOMAIN_GROUP_PSTATE);
|
PMU_DOMAIN_GROUP_PSTATE);
|
||||||
pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
|
pv->perfmon_start_set_state_id(&cmd.cmd.perfmon,
|
||||||
pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
|
pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE]);
|
||||||
|
|
||||||
pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
|
pv->perfmon_start_set_flags(&cmd.cmd.perfmon,
|
||||||
PMU_PERFMON_FLAG_ENABLE_INCREASE |
|
PMU_PERFMON_FLAG_ENABLE_INCREASE |
|
||||||
@@ -230,7 +264,7 @@ int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct pmu_cmd cmd;
|
struct pmu_cmd cmd;
|
||||||
u64 tmp_size;
|
u64 tmp_size;
|
||||||
|
|
||||||
@@ -258,7 +292,7 @@ int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load)
|
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load)
|
||||||
{
|
{
|
||||||
*load = g->pmu.load_shadow;
|
*load = g->pmu.pmu_perfmon->load_shadow;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -267,28 +301,27 @@ int nvgpu_pmu_load_update(struct gk20a *g)
|
|||||||
struct nvgpu_pmu *pmu = &g->pmu;
|
struct nvgpu_pmu *pmu = &g->pmu;
|
||||||
u32 load = 0;
|
u32 load = 0;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
if (!pmu->pmu_perfmon->perfmon_ready) {
|
||||||
if (!pmu->perfmon_ready) {
|
pmu->pmu_perfmon->load_shadow = 0;
|
||||||
pmu->load_shadow = 0;
|
pmu->pmu_perfmon->load = 0;
|
||||||
pmu->load = 0;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g->ops.pmu.pmu_perfmon_get_samples_rpc != NULL) {
|
if (g->ops.pmu.pmu_perfmon_get_samples_rpc != NULL) {
|
||||||
nvgpu_pmu_perfmon_get_samples_rpc(pmu);
|
nvgpu_pmu_perfmon_get_samples_rpc(pmu);
|
||||||
load = pmu->load;
|
load = pmu->pmu_perfmon->load;
|
||||||
} else {
|
} else {
|
||||||
err = nvgpu_falcon_copy_from_dmem(&pmu->flcn,
|
err = nvgpu_falcon_copy_from_dmem(&pmu->flcn,
|
||||||
pmu->sample_buffer,
|
pmu->pmu_perfmon->sample_buffer, (u8 *)&load, 2 * 1, 0);
|
||||||
(u8 *)&load, 2 * 1, 0);
|
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
nvgpu_err(g, "PMU falcon DMEM copy failed");
|
nvgpu_err(g, "PMU falcon DMEM copy failed");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pmu->load_shadow = load / 10U;
|
pmu->pmu_perfmon->load_shadow = load / 10U;
|
||||||
pmu->load_avg = (((9U*pmu->load_avg) + pmu->load_shadow) / 10U);
|
pmu->pmu_perfmon->load_avg = (((9U*pmu->pmu_perfmon->load_avg) +
|
||||||
|
pmu->pmu_perfmon->load_shadow) / 10U);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -365,7 +398,7 @@ void nvgpu_pmu_reset_load_counters(struct gk20a *g)
|
|||||||
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
||||||
struct pmu_perfmon_msg *msg)
|
struct pmu_perfmon_msg *msg)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
@@ -374,16 +407,16 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|||||||
nvgpu_pmu_dbg(g, "perfmon increase event: ");
|
nvgpu_pmu_dbg(g, "perfmon increase event: ");
|
||||||
nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
|
nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
|
||||||
msg->gen.state_id, msg->gen.group_id, msg->gen.data);
|
msg->gen.state_id, msg->gen.group_id, msg->gen.data);
|
||||||
(pmu->perfmon_events_cnt)++;
|
(pmu->pmu_perfmon->perfmon_events_cnt)++;
|
||||||
break;
|
break;
|
||||||
case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
|
case PMU_PERFMON_MSG_ID_DECREASE_EVENT:
|
||||||
nvgpu_pmu_dbg(g, "perfmon decrease event: ");
|
nvgpu_pmu_dbg(g, "perfmon decrease event: ");
|
||||||
nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
|
nvgpu_pmu_dbg(g, "state_id %d, ground_id %d, pct %d",
|
||||||
msg->gen.state_id, msg->gen.group_id, msg->gen.data);
|
msg->gen.state_id, msg->gen.group_id, msg->gen.data);
|
||||||
(pmu->perfmon_events_cnt)++;
|
(pmu->pmu_perfmon->perfmon_events_cnt)++;
|
||||||
break;
|
break;
|
||||||
case PMU_PERFMON_MSG_ID_INIT_EVENT:
|
case PMU_PERFMON_MSG_ID_INIT_EVENT:
|
||||||
pmu->perfmon_ready = true;
|
pmu->pmu_perfmon->perfmon_ready = true;
|
||||||
nvgpu_pmu_dbg(g, "perfmon init event");
|
nvgpu_pmu_dbg(g, "perfmon init event");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -393,7 +426,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* restart sampling */
|
/* restart sampling */
|
||||||
if (pmu->perfmon_sampling_enabled) {
|
if (pmu->pmu_perfmon->perfmon_sampling_enabled) {
|
||||||
return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
return g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -403,7 +436,7 @@ int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|||||||
/* Perfmon RPC */
|
/* Perfmon RPC */
|
||||||
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct nv_pmu_rpc_struct_perfmon_init rpc;
|
struct nv_pmu_rpc_struct_perfmon_init rpc;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
|
|
||||||
@@ -414,7 +447,7 @@ int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu)
|
|||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
|
|
||||||
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
|
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_init));
|
||||||
pmu->perfmon_ready = false;
|
pmu->pmu_perfmon->perfmon_ready = false;
|
||||||
|
|
||||||
g->ops.pmu.pmu_init_perfmon_counter(g);
|
g->ops.pmu.pmu_init_perfmon_counter(g);
|
||||||
|
|
||||||
@@ -451,7 +484,7 @@ exit:
|
|||||||
|
|
||||||
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct nv_pmu_rpc_struct_perfmon_start rpc;
|
struct nv_pmu_rpc_struct_perfmon_start rpc;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
|
|
||||||
@@ -463,7 +496,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
|
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_start));
|
||||||
rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
|
rpc.group_id = PMU_DOMAIN_GROUP_PSTATE;
|
||||||
rpc.state_id = pmu->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
|
rpc.state_id = pmu->pmu_perfmon->perfmon_state_id[PMU_DOMAIN_GROUP_PSTATE];
|
||||||
rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
|
rpc.flags = PMU_PERFMON_FLAG_ENABLE_INCREASE |
|
||||||
PMU_PERFMON_FLAG_ENABLE_DECREASE |
|
PMU_PERFMON_FLAG_ENABLE_DECREASE |
|
||||||
PMU_PERFMON_FLAG_CLEAR_PREV;
|
PMU_PERFMON_FLAG_CLEAR_PREV;
|
||||||
@@ -482,7 +515,7 @@ int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct nv_pmu_rpc_struct_perfmon_stop rpc;
|
struct nv_pmu_rpc_struct_perfmon_stop rpc;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
|
|
||||||
@@ -505,7 +538,7 @@ int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
|
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
struct gk20a *g = gk20a_from_pmu(pmu);
|
struct gk20a *g = pmu->g;
|
||||||
struct nv_pmu_rpc_struct_perfmon_query rpc;
|
struct nv_pmu_rpc_struct_perfmon_query rpc;
|
||||||
int status = 0;
|
int status = 0;
|
||||||
|
|
||||||
@@ -514,7 +547,7 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_log_fn(g, " ");
|
nvgpu_log_fn(g, " ");
|
||||||
pmu->perfmon_query = 0;
|
pmu->pmu_perfmon->perfmon_query = 0;
|
||||||
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
|
(void) memset(&rpc, 0, sizeof(struct nv_pmu_rpc_struct_perfmon_query));
|
||||||
/* PERFMON QUERY */
|
/* PERFMON QUERY */
|
||||||
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
|
nvgpu_pmu_dbg(g, "RPC post NV_PMU_RPC_ID_PERFMON_QUERY\n");
|
||||||
@@ -524,7 +557,28 @@ int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g),
|
pmu_wait_message_cond(pmu, nvgpu_get_poll_timeout(g),
|
||||||
&pmu->perfmon_query, 1);
|
&pmu->pmu_perfmon->perfmon_query, 1);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
return pmu->pmu_perfmon->perfmon_sampling_enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu,
|
||||||
|
bool status)
|
||||||
|
{
|
||||||
|
pmu->pmu_perfmon->perfmon_sampling_enabled = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
return pmu->pmu_perfmon->perfmon_events_cnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu)
|
||||||
|
{
|
||||||
|
return pmu->pmu_perfmon->load_avg;
|
||||||
|
}
|
||||||
@@ -37,6 +37,7 @@
|
|||||||
#include <nvgpu/nvgpu_err.h>
|
#include <nvgpu/nvgpu_err.h>
|
||||||
#include <nvgpu/pmu/lsfm.h>
|
#include <nvgpu/pmu/lsfm.h>
|
||||||
#include <nvgpu/pmu/super_surface.h>
|
#include <nvgpu/pmu/super_surface.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
static void pmu_report_error(struct gk20a *g, u32 err_type,
|
static void pmu_report_error(struct gk20a *g, u32 err_type,
|
||||||
u32 status, u32 pmu_err_type)
|
u32 status, u32 pmu_err_type)
|
||||||
@@ -161,10 +162,6 @@ static int nvgpu_init_pmu_setup_sw(struct gk20a *g)
|
|||||||
goto skip_init;
|
goto skip_init;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* no infoRom script from vbios? */
|
|
||||||
|
|
||||||
/* TBD: sysmon subtask */
|
|
||||||
|
|
||||||
err = nvgpu_pmu_mutexes_alloc(g, &pmu->mutexes);
|
err = nvgpu_pmu_mutexes_alloc(g, &pmu->mutexes);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
goto err;
|
goto err;
|
||||||
@@ -367,7 +364,7 @@ int nvgpu_pmu_destroy(struct gk20a *g)
|
|||||||
|
|
||||||
nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
|
nvgpu_pmu_state_change(g, PMU_STATE_OFF, false);
|
||||||
pmu->pmu_ready = false;
|
pmu->pmu_ready = false;
|
||||||
pmu->perfmon_ready = false;
|
pmu->pmu_perfmon->perfmon_ready = false;
|
||||||
pmu->pmu_pg.zbc_ready = false;
|
pmu->pmu_pg.zbc_ready = false;
|
||||||
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
|
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
#include <nvgpu/pmu/pstate.h>
|
#include <nvgpu/pmu/pstate.h>
|
||||||
#include <nvgpu/pmu/volt.h>
|
#include <nvgpu/pmu/volt.h>
|
||||||
#include <nvgpu/pmu/clk/clk.h>
|
#include <nvgpu/pmu/clk/clk.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
#include <nvgpu/pmu/allocator.h>
|
#include <nvgpu/pmu/allocator.h>
|
||||||
#include <nvgpu/pmu/lsfm.h>
|
#include <nvgpu/pmu/lsfm.h>
|
||||||
#include <nvgpu/pmu/super_surface.h>
|
#include <nvgpu/pmu/super_surface.h>
|
||||||
@@ -61,32 +62,32 @@ static u32 pmu_perfmon_cntr_sz_v2(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu)
|
static void *get_perfmon_cntr_ptr_v2(struct nvgpu_pmu *pmu)
|
||||||
{
|
{
|
||||||
return (void *)(&pmu->perfmon_counter_v2);
|
return (void *)(&pmu->pmu_perfmon->perfmon_counter_v2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut)
|
static void set_perfmon_cntr_ut_v2(struct nvgpu_pmu *pmu, u16 ut)
|
||||||
{
|
{
|
||||||
pmu->perfmon_counter_v2.upper_threshold = ut;
|
pmu->pmu_perfmon->perfmon_counter_v2.upper_threshold = ut;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt)
|
static void set_perfmon_cntr_lt_v2(struct nvgpu_pmu *pmu, u16 lt)
|
||||||
{
|
{
|
||||||
pmu->perfmon_counter_v2.lower_threshold = lt;
|
pmu->pmu_perfmon->perfmon_counter_v2.lower_threshold = lt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid)
|
static void set_perfmon_cntr_valid_v2(struct nvgpu_pmu *pmu, u8 valid)
|
||||||
{
|
{
|
||||||
pmu->perfmon_counter_v2.valid = valid;
|
pmu->pmu_perfmon->perfmon_counter_v2.valid = valid;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index)
|
static void set_perfmon_cntr_index_v2(struct nvgpu_pmu *pmu, u8 index)
|
||||||
{
|
{
|
||||||
pmu->perfmon_counter_v2.index = index;
|
pmu->pmu_perfmon->perfmon_counter_v2.index = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid)
|
static void set_perfmon_cntr_group_id_v2(struct nvgpu_pmu *pmu, u8 gid)
|
||||||
{
|
{
|
||||||
pmu->perfmon_counter_v2.group_id = gid;
|
pmu->pmu_perfmon->perfmon_counter_v2.group_id = gid;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu)
|
static void set_pmu_cmdline_args_falctracedmabase_v4(struct nvgpu_pmu *pmu)
|
||||||
@@ -1659,6 +1660,8 @@ static void nvgpu_remove_pmu_support(struct nvgpu_pmu *pmu)
|
|||||||
|
|
||||||
nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
|
nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
|
||||||
|
|
||||||
|
/* de-allocate memory space of pmu_perfmon */
|
||||||
|
nvgpu_pmu_deinitialize_perfmon(g, pmu);
|
||||||
nvgpu_mutex_destroy(&pmu->pmu_pg.elpg_mutex);
|
nvgpu_mutex_destroy(&pmu->pmu_pg.elpg_mutex);
|
||||||
nvgpu_mutex_destroy(&pmu->pmu_pg.pg_mutex);
|
nvgpu_mutex_destroy(&pmu->pmu_pg.pg_mutex);
|
||||||
nvgpu_mutex_destroy(&pmu->isr_mutex);
|
nvgpu_mutex_destroy(&pmu->isr_mutex);
|
||||||
@@ -1692,7 +1695,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu)
|
|||||||
} else {
|
} else {
|
||||||
/* secure boot ucodes's */
|
/* secure boot ucodes's */
|
||||||
nvgpu_pmu_dbg(g, "requesting PMU ucode image");
|
nvgpu_pmu_dbg(g, "requesting PMU ucode image");
|
||||||
pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE, 0);
|
pmu->fw_image = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_IMAGE,
|
||||||
|
0);
|
||||||
if (pmu->fw_image == NULL) {
|
if (pmu->fw_image == NULL) {
|
||||||
nvgpu_err(g, "failed to load pmu ucode!!");
|
nvgpu_err(g, "failed to load pmu ucode!!");
|
||||||
err = -ENOENT;
|
err = -ENOENT;
|
||||||
@@ -1700,7 +1704,8 @@ static int init_pmu_ucode(struct nvgpu_pmu *pmu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
nvgpu_pmu_dbg(g, "requesting PMU ucode desc");
|
nvgpu_pmu_dbg(g, "requesting PMU ucode desc");
|
||||||
pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC, 0);
|
pmu->fw_desc = nvgpu_request_firmware(g, NVGPU_PMU_UCODE_DESC,
|
||||||
|
0);
|
||||||
if (pmu->fw_desc == NULL) {
|
if (pmu->fw_desc == NULL) {
|
||||||
nvgpu_err(g, "failed to load pmu ucode desc!!");
|
nvgpu_err(g, "failed to load pmu ucode desc!!");
|
||||||
err = -ENOENT;
|
err = -ENOENT;
|
||||||
@@ -1779,6 +1784,12 @@ int nvgpu_early_init_pmu_sw(struct gk20a *g, struct nvgpu_pmu *pmu)
|
|||||||
goto init_failed;
|
goto init_failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Allocate memory for pmu_perfmon */
|
||||||
|
err = nvgpu_pmu_initialize_perfmon(g, pmu);
|
||||||
|
if (err != 0) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
err = init_pmu_ucode(pmu);
|
err = init_pmu_ucode(pmu);
|
||||||
if (err != 0) {
|
if (err != 0) {
|
||||||
goto init_failed;
|
goto init_failed;
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
#include <nvgpu/gr/gr_intr.h>
|
#include <nvgpu/gr/gr_intr.h>
|
||||||
#include <nvgpu/gr/gr_falcon.h>
|
#include <nvgpu/gr/gr_falcon.h>
|
||||||
#include <nvgpu/gr/setup.h>
|
#include <nvgpu/gr/setup.h>
|
||||||
#include <nvgpu/gr/gr.h>
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "hal/mc/mc_gm20b.h"
|
#include "hal/mc/mc_gm20b.h"
|
||||||
#include "hal/bus/bus_gm20b.h"
|
#include "hal/bus/bus_gm20b.h"
|
||||||
|
|||||||
@@ -43,6 +43,7 @@
|
|||||||
#include <nvgpu/gr/fecs_trace.h>
|
#include <nvgpu/gr/fecs_trace.h>
|
||||||
#include <nvgpu/gr/gr.h>
|
#include <nvgpu/gr/gr.h>
|
||||||
#include <nvgpu/gr/gr_intr.h>
|
#include <nvgpu/gr/gr_intr.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "hal/mc/mc_gm20b.h"
|
#include "hal/mc/mc_gm20b.h"
|
||||||
#include "hal/mc/mc_gp10b.h"
|
#include "hal/mc/mc_gp10b.h"
|
||||||
|
|||||||
@@ -180,6 +180,7 @@
|
|||||||
#include <nvgpu/gr/setup.h>
|
#include <nvgpu/gr/setup.h>
|
||||||
#include <nvgpu/gr/fecs_trace.h>
|
#include <nvgpu/gr/fecs_trace.h>
|
||||||
#include <nvgpu/gr/gr.h>
|
#include <nvgpu/gr/gr.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
#include <nvgpu/gr/gr_intr.h>
|
#include <nvgpu/gr/gr_intr.h>
|
||||||
|
|
||||||
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
#include <nvgpu/hw/gv100/hw_proj_gv100.h>
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
#include <nvgpu/regops.h>
|
#include <nvgpu/regops.h>
|
||||||
#include <nvgpu/gr/gr_falcon.h>
|
#include <nvgpu/gr/gr_falcon.h>
|
||||||
#include <nvgpu/gr/gr.h>
|
#include <nvgpu/gr/gr.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "hal/mc/mc_gm20b.h"
|
#include "hal/mc/mc_gm20b.h"
|
||||||
#include "hal/mc/mc_gp10b.h"
|
#include "hal/mc/mc_gp10b.h"
|
||||||
|
|||||||
@@ -214,7 +214,6 @@ struct nvgpu_pmu {
|
|||||||
|
|
||||||
struct nvgpu_pmu_lsfm *lsfm;
|
struct nvgpu_pmu_lsfm *lsfm;
|
||||||
|
|
||||||
/* TBD: remove this if ZBC seq is fixed */
|
|
||||||
struct nvgpu_mem seq_buf;
|
struct nvgpu_mem seq_buf;
|
||||||
struct nvgpu_mem trace_buf;
|
struct nvgpu_mem trace_buf;
|
||||||
|
|
||||||
@@ -233,27 +232,16 @@ struct nvgpu_pmu {
|
|||||||
|
|
||||||
bool pmu_ready;
|
bool pmu_ready;
|
||||||
|
|
||||||
u32 perfmon_query;
|
|
||||||
|
|
||||||
u32 mscg_stat;
|
u32 mscg_stat;
|
||||||
u32 mscg_transition_state;
|
u32 mscg_transition_state;
|
||||||
|
|
||||||
u32 pmu_state;
|
u32 pmu_state;
|
||||||
|
|
||||||
struct nvgpu_pmu_pg pmu_pg;
|
struct nvgpu_pmu_pg pmu_pg;
|
||||||
union {
|
struct nvgpu_pmu_perfmon *pmu_perfmon;
|
||||||
struct pmu_perfmon_counter_v2 perfmon_counter_v2;
|
|
||||||
};
|
|
||||||
u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
|
|
||||||
|
|
||||||
void (*remove_support)(struct nvgpu_pmu *pmu);
|
void (*remove_support)(struct nvgpu_pmu *pmu);
|
||||||
bool sw_ready;
|
bool sw_ready;
|
||||||
bool perfmon_ready;
|
|
||||||
|
|
||||||
u32 sample_buffer;
|
|
||||||
u32 load_shadow;
|
|
||||||
u32 load_avg;
|
|
||||||
u32 load;
|
|
||||||
|
|
||||||
struct nvgpu_mutex isr_mutex;
|
struct nvgpu_mutex isr_mutex;
|
||||||
bool isr_enabled;
|
bool isr_enabled;
|
||||||
@@ -264,8 +252,7 @@ struct nvgpu_pmu {
|
|||||||
struct pmu_cmdline_args_v5 args_v5;
|
struct pmu_cmdline_args_v5 args_v5;
|
||||||
struct pmu_cmdline_args_v6 args_v6;
|
struct pmu_cmdline_args_v6 args_v6;
|
||||||
};
|
};
|
||||||
unsigned long perfmon_events_cnt;
|
|
||||||
bool perfmon_sampling_enabled;
|
|
||||||
u32 override_done;
|
u32 override_done;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -289,26 +276,6 @@ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu,
|
|||||||
int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu,
|
int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu,
|
||||||
u32 id, u32 *token);
|
u32 id, u32 *token);
|
||||||
|
|
||||||
/* perfmon */
|
|
||||||
void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
|
|
||||||
struct nv_pmu_rpc_header *rpc,
|
|
||||||
struct rpc_handler_payload *rpc_payload);
|
|
||||||
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
|
||||||
struct pmu_perfmon_msg *msg);
|
|
||||||
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
|
|
||||||
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
|
||||||
int nvgpu_pmu_load_update(struct gk20a *g);
|
|
||||||
int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
|
|
||||||
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
|
||||||
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
|
||||||
u32 *total_cycles);
|
|
||||||
|
|
||||||
int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
|
int nvgpu_pmu_handle_therm_event(struct nvgpu_pmu *pmu,
|
||||||
struct nv_pmu_therm_msg *msg);
|
struct nv_pmu_therm_msg *msg);
|
||||||
|
|
||||||
|
|||||||
77
drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h
Normal file
77
drivers/gpu/nvgpu/include/nvgpu/pmu/pmu_perfmon.h
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
/* |
|
||||||
|
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
|
* to deal in the Software without restriction, including without limitation
|
||||||
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||||
|
* and/or sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||||
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||||
|
* DEALINGS IN THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NVGPU_PMU_PERFMON_H
|
||||||
|
#define NVGPU_PMU_PERFMON_H
|
||||||
|
|
||||||
|
//#include <nvgpu/enabled.h>
|
||||||
|
#include <nvgpu/pmuif/nvgpu_gpmu_cmdif.h>
|
||||||
|
#include <nvgpu/pmuif/gpmuif_perfmon.h>
|
||||||
|
|
||||||
|
struct gk20a;
|
||||||
|
struct nvgpu_pmu;
|
||||||
|
struct pmu_perfmon_msg;
|
||||||
|
struct rpc_handler_payload;
|
||||||
|
struct nv_pmu_rpc_header;
|
||||||
|
|
||||||
|
struct nvgpu_pmu_perfmon {
|
||||||
|
struct pmu_perfmon_counter_v2 perfmon_counter_v2;
|
||||||
|
u64 perfmon_events_cnt;
|
||||||
|
u32 perfmon_query;
|
||||||
|
u8 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
|
||||||
|
u32 sample_buffer;
|
||||||
|
u32 load_shadow;
|
||||||
|
u32 load_avg;
|
||||||
|
u32 load;
|
||||||
|
bool perfmon_ready;
|
||||||
|
bool perfmon_sampling_enabled;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* perfmon */
|
||||||
|
void nvgpu_pmu_perfmon_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu,
|
||||||
|
struct nv_pmu_rpc_header *rpc,
|
||||||
|
struct rpc_handler_payload *rpc_payload);
|
||||||
|
int nvgpu_pmu_initialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu);
|
||||||
|
void nvgpu_pmu_deinitialize_perfmon(struct gk20a *g, struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_init_perfmon(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_start_sampling(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_stop_sampling(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_start_sampling_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_stop_sampling_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_perfmon_get_samples_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_handle_perfmon_event(struct nvgpu_pmu *pmu,
|
||||||
|
struct pmu_perfmon_msg *msg);
|
||||||
|
int nvgpu_pmu_init_perfmon_rpc(struct nvgpu_pmu *pmu);
|
||||||
|
int nvgpu_pmu_load_norm(struct gk20a *g, u32 *load);
|
||||||
|
int nvgpu_pmu_load_update(struct gk20a *g);
|
||||||
|
int nvgpu_pmu_busy_cycles_norm(struct gk20a *g, u32 *norm);
|
||||||
|
void nvgpu_pmu_reset_load_counters(struct gk20a *g);
|
||||||
|
void nvgpu_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
|
||||||
|
u32 *total_cycles);
|
||||||
|
int nvgpu_pmu_perfmon_get_sampling_enable_status(struct nvgpu_pmu *pmu);
|
||||||
|
void nvgpu_pmu_perfmon_set_sampling_enable_status(struct nvgpu_pmu *pmu,
|
||||||
|
bool status);
|
||||||
|
u64 nvgpu_pmu_perfmon_get_events_count(struct nvgpu_pmu *pmu);
|
||||||
|
u32 nvgpu_pmu_perfmon_get_load_avg(struct nvgpu_pmu *pmu);
|
||||||
|
|
||||||
|
#endif /* NVGPU_PMU_PERFMON_H */
|
||||||
@@ -13,6 +13,8 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "debug_pmu.h"
|
#include "debug_pmu.h"
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
|
|
||||||
@@ -314,7 +316,8 @@ static int perfmon_events_enable_show(struct seq_file *s, void *data)
|
|||||||
{
|
{
|
||||||
struct gk20a *g = s->private;
|
struct gk20a *g = s->private;
|
||||||
|
|
||||||
seq_printf(s, "%u\n", g->pmu.perfmon_sampling_enabled ? 1 : 0);
|
seq_printf(s, "%u\n",
|
||||||
|
nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu)) ? 1 : 0);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -333,6 +336,7 @@ static ssize_t perfmon_events_enable_write(struct file *file,
|
|||||||
char buf[40];
|
char buf[40];
|
||||||
int buf_size;
|
int buf_size;
|
||||||
int err;
|
int err;
|
||||||
|
bool status;
|
||||||
|
|
||||||
(void) memset(buf, 0, sizeof(buf));
|
(void) memset(buf, 0, sizeof(buf));
|
||||||
buf_size = min(count, (sizeof(buf)-1));
|
buf_size = min(count, (sizeof(buf)-1));
|
||||||
@@ -349,18 +353,22 @@ static ssize_t perfmon_events_enable_write(struct file *file,
|
|||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
if (val && !g->pmu.perfmon_sampling_enabled &&
|
if (val && !nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu))
|
||||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||||
g->pmu.perfmon_sampling_enabled = true;
|
nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu),
|
||||||
|
true);
|
||||||
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
g->ops.pmu.pmu_perfmon_start_sampling(&(g->pmu));
|
||||||
} else if (!val && g->pmu.perfmon_sampling_enabled &&
|
} else if (!val
|
||||||
nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
&& nvgpu_pmu_perfmon_get_sampling_enable_status(&(g->pmu))
|
||||||
g->pmu.perfmon_sampling_enabled = false;
|
&& nvgpu_is_enabled(g, NVGPU_PMU_PERFMON)) {
|
||||||
|
nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu),
|
||||||
|
false);
|
||||||
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
|
g->ops.pmu.pmu_perfmon_stop_sampling(&(g->pmu));
|
||||||
}
|
}
|
||||||
gk20a_idle(g);
|
gk20a_idle(g);
|
||||||
} else {
|
} else {
|
||||||
g->pmu.perfmon_sampling_enabled = val ? true : false;
|
status = val ? true : false;
|
||||||
|
nvgpu_pmu_perfmon_set_sampling_enable_status(&(g->pmu), status);
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
@@ -378,7 +386,7 @@ static int perfmon_events_count_show(struct seq_file *s, void *data)
|
|||||||
{
|
{
|
||||||
struct gk20a *g = s->private;
|
struct gk20a *g = s->private;
|
||||||
|
|
||||||
seq_printf(s, "%lu\n", g->pmu.perfmon_events_cnt);
|
seq_printf(s, "%llu\n", nvgpu_pmu_perfmon_get_events_count(&(g->pmu)));
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,7 +44,7 @@
|
|||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/gr/global_ctx.h>
|
#include <nvgpu/gr/global_ctx.h>
|
||||||
#include <nvgpu/nvhost.h>
|
#include <nvgpu/nvhost.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
#include <nvgpu/linux/dma.h>
|
#include <nvgpu/linux/dma.h>
|
||||||
|
|
||||||
#include "gm20b/clk_gm20b.h"
|
#include "gm20b/clk_gm20b.h"
|
||||||
@@ -172,7 +172,8 @@ static unsigned long gk20a_tegra_get_emc_rate(struct gk20a *g,
|
|||||||
/* When scaling emc, account for the gpu load when the
|
/* When scaling emc, account for the gpu load when the
|
||||||
* gpu frequency is less than or equal to fmax@vmin. */
|
* gpu frequency is less than or equal to fmax@vmin. */
|
||||||
if (gpu_freq <= gpu_fmax_at_vmin)
|
if (gpu_freq <= gpu_fmax_at_vmin)
|
||||||
emc_scale = min(g->pmu.load_avg, g->emc3d_ratio);
|
emc_scale = min(nvgpu_pmu_perfmon_get_load_avg(&(g->pmu)),
|
||||||
|
g->emc3d_ratio);
|
||||||
else
|
else
|
||||||
emc_scale = g->emc3d_ratio;
|
emc_scale = g->emc3d_ratio;
|
||||||
|
|
||||||
|
|||||||
@@ -34,6 +34,7 @@
|
|||||||
#include <nvgpu/enabled.h>
|
#include <nvgpu/enabled.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/nvhost.h>
|
#include <nvgpu/nvhost.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* gk20a clock scaling profile
|
* gk20a clock scaling profile
|
||||||
*
|
*
|
||||||
* Copyright (c) 2013-2018, NVIDIA Corporation. All rights reserved.
|
* Copyright (c) 2013-2019, NVIDIA Corporation. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -27,6 +27,7 @@
|
|||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
#include <nvgpu/gk20a.h>
|
#include <nvgpu/gk20a.h>
|
||||||
#include <nvgpu/clk_arb.h>
|
#include <nvgpu/clk_arb.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "platform_gk20a.h"
|
#include "platform_gk20a.h"
|
||||||
#include "scale.h"
|
#include "scale.h"
|
||||||
|
|||||||
@@ -27,6 +27,7 @@
|
|||||||
#include <nvgpu/gr/obj_ctx.h>
|
#include <nvgpu/gr/obj_ctx.h>
|
||||||
#include <nvgpu/power_features/cg.h>
|
#include <nvgpu/power_features/cg.h>
|
||||||
#include <nvgpu/power_features/pg.h>
|
#include <nvgpu/power_features/pg.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include "os_linux.h"
|
#include "os_linux.h"
|
||||||
#include "sysfs.h"
|
#include "sysfs.h"
|
||||||
|
|||||||
@@ -199,6 +199,7 @@
|
|||||||
#include <nvgpu/gr/gr_falcon.h>
|
#include <nvgpu/gr/gr_falcon.h>
|
||||||
#include <nvgpu/gr/gr.h>
|
#include <nvgpu/gr/gr.h>
|
||||||
#include <nvgpu/gr/gr_intr.h>
|
#include <nvgpu/gr/gr_intr.h>
|
||||||
|
#include <nvgpu/pmu/pmu_perfmon.h>
|
||||||
|
|
||||||
#include <nvgpu/hw/tu104/hw_proj_tu104.h>
|
#include <nvgpu/hw/tu104/hw_proj_tu104.h>
|
||||||
#include <nvgpu/hw/tu104/hw_top_tu104.h>
|
#include <nvgpu/hw/tu104/hw_top_tu104.h>
|
||||||
|
|||||||
Reference in New Issue
Block a user