From 65a789698789fedafc10dc5dee6bffe0d00b3610 Mon Sep 17 00:00:00 2001 From: rmylavarapu Date: Thu, 12 Sep 2019 17:29:52 +0530 Subject: [PATCH] nvgpu: gpu: Implement PMU therm channel get status Currently nvgpu reads the temperature by reading the NV_THERM_I2CS_SENSOR_00 register. Below are the issues with current approach 1) NV_THERM_I2CS_SENSOR_00 doesn't support fractional precision which is POR. 2) It doesn't support negative temperatures which is required for Auto. 3) It doesn't take into account the right POR sensor in VFE VBIOS tables. From therm channel get status interface we can read the current temperature from PMU. NVBUG - 200549047 Change-Id: I2fb21926208876f3d3bebe3f2dee08edafedbc7d Signed-off-by: rmylavarapu Reviewed-on: https://git-master.nvidia.com/r/2196224 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../pmu/super_surface/super_surface_priv.h | 2 + .../gpu/nvgpu/common/pmu/therm/thrmchannel.c | 120 ++++++++++++++++++ .../gpu/nvgpu/common/pmu/therm/thrmchannel.h | 6 + drivers/gpu/nvgpu/hal/init/hal_tu104.c | 2 - drivers/gpu/nvgpu/hal/therm/therm_tu104.c | 14 -- drivers/gpu/nvgpu/hal/therm/therm_tu104.h | 1 - drivers/gpu/nvgpu/include/nvgpu/gops_therm.h | 1 - .../include/nvgpu/pmu/pmuif/thermsensor.h | 20 +++ drivers/gpu/nvgpu/include/nvgpu/pmu/therm.h | 3 + .../gpu/nvgpu/os/linux/debug_therm_tu104.c | 6 +- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 15 ++- 11 files changed, 164 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h index dc8fd3a8f..17dd342b4 100644 --- a/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h +++ b/drivers/gpu/nvgpu/common/pmu/super_surface/super_surface_priv.h @@ -161,6 +161,8 @@ struct nv_pmu_super_surface { therm_channel_grp_set; struct nv_pmu_therm_therm_device_boardobj_grp_set therm_device_grp_set; + struct nv_pmu_therm_therm_channel_boardobj_grp_get_status + therm_channel_grp_get_status; } therm; struct { struct perf_change_seq_pmu_script script_curr; diff --git a/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.c b/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.c index 0e6319088..9c5eabc24 100644 --- a/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.c +++ b/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "thrmchannel.h" #include "thrmdev.h" @@ -125,6 +126,25 @@ static int _therm_channel_pmudata_instget(struct gk20a *g, return 0; } +static int therm_channel_pmustatus_instget(struct gk20a *g, + void *pboardobjgrppmu, struct nv_pmu_boardobj_query + **ppboardobjpmustatus, u8 idx) +{ + struct nv_pmu_therm_therm_channel_boardobj_grp_get_status *pmu_status = + (struct nv_pmu_therm_therm_channel_boardobj_grp_get_status *) + (void *)pboardobjgrppmu; + + /*check whether pmuboardobjgrp has a valid boardobj in index*/ + if (((u32)BIT(idx) & + pmu_status->hdr.data.super.obj_mask.super.data[0]) == 0U) { + return -EINVAL; + } + + *ppboardobjpmustatus = (struct nv_pmu_boardobj_query *) + &pmu_status->objects[idx].data.board_obj; + return 0; +} + static int devinit_get_therm_channel_table(struct gk20a *g, struct therm_channels *pthermchannelobjs) { @@ -243,6 +263,7 @@ int therm_channel_sw_setup(struct gk20a *g) /* Override the Interfaces */ pboardobjgrp->pmudatainstget = _therm_channel_pmudata_instget; + pboardobjgrp->pmustatusinstget = therm_channel_pmustatus_instget; status = devinit_get_therm_channel_table(g, pthermchannelobjs); if (status != 0) { @@ -260,7 +281,106 @@ int therm_channel_sw_setup(struct gk20a *g) goto done; } + status = BOARDOBJGRP_PMU_CMD_GRP_GET_STATUS_CONSTRUCT(g, pboardobjgrp, + therm, THERM, therm_channel, THERM_CHANNEL); + if (status != 0) { + nvgpu_err(g, + "error constructing THERM_GET_STATUS interface - 0x%x", + status); + goto done; + } + done: nvgpu_log_info(g, " done status %x", status); return status; } + +static int therm_channel_currtemp_update(struct gk20a *g, + struct boardobj *board_obj_ptr, + struct nv_pmu_boardobj *ppmudata) +{ + struct therm_channel_get_status *therm_channel_obj; + struct nv_pmu_therm_therm_channel_boardobj_get_status *pstatus; + + nvgpu_log_info(g, " "); + + therm_channel_obj = (struct therm_channel_get_status *) + (void *)board_obj_ptr; + pstatus = (struct nv_pmu_therm_therm_channel_boardobj_get_status *) + (void *)ppmudata; + + if (pstatus->super.type != therm_channel_obj->super.type) { + nvgpu_err(g, "pmu data and boardobj type not matching"); + return -EINVAL; + } + + therm_channel_obj->curr_temp = pstatus->current_temp; + return 0; +} + +static int therm_channel_boardobj_grp_get_status(struct gk20a *g) +{ + struct boardobjgrp *pboardobjgrp = NULL; + struct boardobjgrpmask *pboardobjgrpmask; + struct nv_pmu_boardobjgrp_super *pboardobjgrppmu; + struct boardobj *pboardobj = NULL; + struct nv_pmu_boardobj_query *pboardobjpmustatus = NULL; + int status; + u8 index; + + nvgpu_log_info(g, " "); + + pboardobjgrp = &g->pmu->therm_pmu->therm_channelobjs.super.super; + pboardobjgrpmask = &g->pmu->therm_pmu->therm_channelobjs.super.mask.super; + status = pboardobjgrp->pmugetstatus(g, pboardobjgrp, pboardobjgrpmask); + if (status != 0) { + nvgpu_err(g, "err getting boardobjs from pmu"); + return status; + } + pboardobjgrppmu = pboardobjgrp->pmu.getstatus.buf; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + status = pboardobjgrp->pmustatusinstget(g, + (struct nv_pmu_boardobjgrp *)(void *)pboardobjgrppmu, + &pboardobjpmustatus, index); + if (status != 0) { + nvgpu_err(g, "could not get status object instance"); + return status; + } + status = therm_channel_currtemp_update(g, pboardobj, + (struct nv_pmu_boardobj *)(void *)pboardobjpmustatus); + if (status != 0) { + nvgpu_err(g, "could not update therm_channel status"); + return status; + } + } + return 0; + +} + +int nvgpu_therm_channel_get_curr_temp(struct gk20a *g, u32 *temp) +{ + struct boardobjgrp *pboardobjgrp; + struct boardobj *pboardobj = NULL; + struct therm_channel_get_status *therm_channel_status = NULL; + int status; + u8 index; + + status = therm_channel_boardobj_grp_get_status(g); + if (status != 0) { + nvgpu_err(g, "therm_channel get status failed"); + return status; + } + + pboardobjgrp = &g->pmu->therm_pmu->therm_channelobjs.super.super; + + BOARDOBJGRP_FOR_EACH(pboardobjgrp, struct boardobj*, pboardobj, index) { + therm_channel_status = (struct therm_channel_get_status *) + (void *)pboardobj; + if (therm_channel_status->curr_temp != 0U) { + *temp = therm_channel_status->curr_temp; + return status; + } + } + return status; +} diff --git a/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.h b/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.h index 5d4811a5c..c18a960ea 100644 --- a/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.h +++ b/drivers/gpu/nvgpu/common/pmu/therm/thrmchannel.h @@ -27,6 +27,7 @@ #include #include #include +#include struct gk20a; @@ -48,6 +49,11 @@ struct therm_channel_device { u8 therm_dev_prov_idx; }; +struct therm_channel_get_status { + struct boardobj super; + u32 curr_temp; +}; + int therm_channel_sw_setup(struct gk20a *g); #endif /* NVGPU_THERM_THRMCHANNEL_H */ diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 9c3eeb816..cb54bff46 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1173,8 +1173,6 @@ static const struct gpu_ops tu104_ops = { .init_blcg_mode = gm20b_therm_init_blcg_mode, .elcg_init_idle_filters = NULL, #ifdef CONFIG_NVGPU_LS_PMU - .get_internal_sensor_curr_temp = - tu104_get_internal_sensor_curr_temp, .get_internal_sensor_limits = tu104_get_internal_sensor_limits, #endif diff --git a/drivers/gpu/nvgpu/hal/therm/therm_tu104.c b/drivers/gpu/nvgpu/hal/therm/therm_tu104.c index df1a2100a..b2aa3ad59 100644 --- a/drivers/gpu/nvgpu/hal/therm/therm_tu104.c +++ b/drivers/gpu/nvgpu/hal/therm/therm_tu104.c @@ -35,17 +35,3 @@ void tu104_get_internal_sensor_limits(s32 *max_24_8, s32 *min_24_8) *max_24_8 = (0x87 << 8); *min_24_8 = (((u32)-216) << 8); } - -int tu104_get_internal_sensor_curr_temp(struct gk20a *g, u32 *temp_f24_8) -{ - int err = 0; - u32 readval; - - readval = nvgpu_readl(g, therm_i2cs_sensor_00_r()); - - /* Convert from celsius to f24_8 format*/ - *temp_f24_8 = (readval << 8); - - return err; -} - diff --git a/drivers/gpu/nvgpu/hal/therm/therm_tu104.h b/drivers/gpu/nvgpu/hal/therm/therm_tu104.h index 58ed5911a..64e234d8e 100644 --- a/drivers/gpu/nvgpu/hal/therm/therm_tu104.h +++ b/drivers/gpu/nvgpu/hal/therm/therm_tu104.h @@ -30,6 +30,5 @@ struct gk20a; void tu104_get_internal_sensor_limits(s32 *max_24_8, s32 *min_24_8); -int tu104_get_internal_sensor_curr_temp(struct gk20a *g, u32 *temp_f24_8); #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_therm.h b/drivers/gpu/nvgpu/include/nvgpu/gops_therm.h index 93181cdbf..75787d6e2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops_therm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops_therm.h @@ -117,7 +117,6 @@ struct gops_therm { int (*elcg_init_idle_filters)(struct gk20a *g); /** @cond DOXYGEN_SHOULD_SKIP_THIS */ - int (*get_internal_sensor_curr_temp)(struct gk20a *g, u32 *temp_f24_8); void (*get_internal_sensor_limits)(s32 *max_24_8, s32 *min_24_8); void (*throttle_enable)(struct gk20a *g, u32 val); diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmuif/thermsensor.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmuif/thermsensor.h index 77fcafafb..ae7301603 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu/pmuif/thermsensor.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/pmuif/thermsensor.h @@ -30,6 +30,8 @@ #define NV_PMU_THERM_CMD_ID_BOARDOBJ_GRP_SET 0x0000000B #define NV_PMU_THERM_MSG_ID_BOARDOBJ_GRP_SET 0x00000008 +#define NV_PMU_THERM_CMD_ID_BOARDOBJ_GRP_GET_STATUS 0x00000001U +#define NV_PMU_THERM_MSG_ID_BOARDOBJ_GRP_GET_STATUS 0x00000001U struct nv_pmu_therm_therm_device_boardobjgrp_set_header { struct nv_pmu_boardobjgrp_e32 super; @@ -102,4 +104,22 @@ union nv_pmu_therm_therm_channel_boardobj_set_union { NV_PMU_BOARDOBJ_GRP_SET_MAKE_E32(therm, therm_channel); +struct nv_pmu_therm_therm_channel_boardobjgrp_get_status_header { + struct nv_pmu_boardobjgrp_e32 super; +}; + +struct nv_pmu_therm_therm_channel_boardobj_get_status +{ + struct nv_pmu_boardobj_query super; + u32 current_temp; +}; + +union nv_pmu_therm_therm_channel_boardobj_get_status_union +{ + struct nv_pmu_boardobj_query board_obj; + struct nv_pmu_therm_therm_channel_boardobj_get_status therm_channel; +}; + +NV_PMU_BOARDOBJ_GRP_GET_STATUS_MAKE_E32(therm, therm_channel); + #endif /* NVGPU_PMUIF_THERMSENSOR_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu/therm.h b/drivers/gpu/nvgpu/include/nvgpu/pmu/therm.h index fef418aaa..2dd2caf94 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pmu/therm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pmu/therm.h @@ -27,6 +27,8 @@ struct gk20a; struct nvgpu_pmu; struct nv_pmu_therm_msg; +struct nv_pmu_rpc_header; +struct pmu_msg; void nvgpu_pmu_handle_therm_event(struct gk20a *g, struct nvgpu_pmu *pmu, struct pmu_msg *msg, struct nv_pmu_rpc_header *rpc); @@ -39,5 +41,6 @@ int nvgpu_therm_configure_therm_alert(struct gk20a *g, struct nvgpu_pmu *pmu); #endif void nvgpu_pmu_therm_rpc_handler(struct gk20a *g, struct nvgpu_pmu *pmu, struct nv_pmu_rpc_header *rpc); +int nvgpu_therm_channel_get_curr_temp(struct gk20a *g, u32 *temp); #endif /* NVGPU_PMU_THREM_H */ diff --git a/drivers/gpu/nvgpu/os/linux/debug_therm_tu104.c b/drivers/gpu/nvgpu/os/linux/debug_therm_tu104.c index 9f178cb80..aaf9dc869 100644 --- a/drivers/gpu/nvgpu/os/linux/debug_therm_tu104.c +++ b/drivers/gpu/nvgpu/os/linux/debug_therm_tu104.c @@ -15,6 +15,7 @@ */ #include +#include #include "os_linux.h" @@ -24,10 +25,7 @@ static int therm_get_internal_sensor_curr_temp(void *data, u64 *val) u32 readval; int err; - if (!g->ops.therm.get_internal_sensor_curr_temp) - return -EINVAL; - - err = g->ops.therm.get_internal_sensor_curr_temp(g, &readval); + err = nvgpu_therm_channel_get_curr_temp(g, &readval); if (!err) *val = readval; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index a1ee2da5f..00c3fd47e 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -1512,20 +1513,26 @@ static int nvgpu_gpu_get_temperature(struct gk20a *g, nvgpu_log_fn(g, " "); +#ifdef CONFIG_NVGPU_SIM + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + return 0; + } +#endif + if (args->reserved[0] || args->reserved[1] || args->reserved[2]) return -EINVAL; if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_GET_TEMPERATURE)) return -EINVAL; - if (!g->ops.therm.get_internal_sensor_curr_temp) - return -EINVAL; - err = gk20a_busy(g); if (err) return err; - err = g->ops.therm.get_internal_sensor_curr_temp(g, &temp_f24_8); + err = nvgpu_therm_channel_get_curr_temp(g, &temp_f24_8); + if (err) { + return err; + } gk20a_idle(g);