mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Implement Thermal Alert for PG189
PG189 has multiple sensors which can provide interrupt when board temperature reaches programmed threshold. This Interrupt is implemented in nvgpu and provide events via clk_arb. Support is enabled for TU104 with NVGPU_SUPPORT_DGPU_THERMAL_ALERT flag. Board specific config is added in DT which will be parsed by nvgpu. Nvgpu does the following. 1.Read gpio line number, interrupt type, and event delay from DT. 2.Call kernel methods and register the interrupt with kernel. 3.Create work queue which will process the interrupt in process context. 4.When interrupt occurs disable interrupt, add work to work queue. 5.In work queue post events and sleep for delay time then enable Interrupt Bug 2492512 Change-Id: Ic5694fe366ca492f8afe8a67de4350e9a51af2af Signed-off-by: Abdul Salam <absalam@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2119411 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
20fcf813dd
commit
25eb392fd1
@@ -472,8 +472,12 @@ static void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
|
|||||||
|
|
||||||
void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
|
void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g)
|
||||||
{
|
{
|
||||||
|
struct nvgpu_clk_arb *arb = g->clk_arb;
|
||||||
|
|
||||||
|
if (arb != NULL) {
|
||||||
nvgpu_clk_arb_schedule_alarm(g,
|
nvgpu_clk_arb_schedule_alarm(g,
|
||||||
BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
|
BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
|
void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
|
||||||
|
|||||||
@@ -1453,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g)
|
|||||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
|
nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
|
||||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
|
nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true);
|
||||||
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true);
|
nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true);
|
||||||
|
nvgpu_set_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT, true);
|
||||||
|
|
||||||
/* for now */
|
/* for now */
|
||||||
gops->clk.support_clk_freq_controller = false;
|
gops->clk.support_clk_freq_controller = false;
|
||||||
|
|||||||
@@ -203,10 +203,14 @@ struct gk20a;
|
|||||||
|
|
||||||
/* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */
|
/* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */
|
||||||
#define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78
|
#define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78
|
||||||
|
|
||||||
|
/* DGPU Thermal Alert */
|
||||||
|
#define NVGPU_SUPPORT_DGPU_THERMAL_ALERT 79
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Must be greater than the largest bit offset in the above list.
|
* Must be greater than the largest bit offset in the above list.
|
||||||
*/
|
*/
|
||||||
#define NVGPU_MAX_ENABLED_BITS 79U
|
#define NVGPU_MAX_ENABLED_BITS 80U
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* nvgpu_is_enabled - Check if the passed flag is enabled.
|
* nvgpu_is_enabled - Check if the passed flag is enabled.
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
|
|||||||
if (_WRAPGTEQ(tail, head) && info) {
|
if (_WRAPGTEQ(tail, head) && info) {
|
||||||
head++;
|
head++;
|
||||||
p_notif = &dev->queue.notifications[head % dev->queue.size];
|
p_notif = &dev->queue.notifications[head % dev->queue.size];
|
||||||
events |= nvgpu_convert_gpu_event(p_notif->notification);
|
events = p_notif->notification;
|
||||||
info->event_id = ffs(events) - 1;
|
info->event_id = ffs(events) - 1;
|
||||||
info->timestamp = p_notif->timestamp;
|
info->timestamp = p_notif->timestamp;
|
||||||
nvgpu_atomic_set(&dev->queue.head, head);
|
nvgpu_atomic_set(&dev->queue.head, head);
|
||||||
|
|||||||
@@ -28,6 +28,7 @@
|
|||||||
#include <linux/notifier.h>
|
#include <linux/notifier.h>
|
||||||
#include <linux/platform/tegra/common.h>
|
#include <linux/platform/tegra/common.h>
|
||||||
#include <linux/pci.h>
|
#include <linux/pci.h>
|
||||||
|
#include <linux/of_gpio.h>
|
||||||
|
|
||||||
#include <uapi/linux/nvgpu.h>
|
#include <uapi/linux/nvgpu.h>
|
||||||
#include <dt-bindings/soc/gm20b-fuse.h>
|
#include <dt-bindings/soc/gm20b-fuse.h>
|
||||||
@@ -284,6 +285,78 @@ void gk20a_init_linux_characteristics(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void therm_alert_work_queue(struct work_struct *work)
|
||||||
|
{
|
||||||
|
|
||||||
|
struct dgpu_thermal_alert *thermal_alert =
|
||||||
|
container_of(work, struct dgpu_thermal_alert, work);
|
||||||
|
struct nvgpu_os_linux *l =
|
||||||
|
container_of(thermal_alert, struct nvgpu_os_linux,
|
||||||
|
thermal_alert);
|
||||||
|
struct gk20a *g = &l->g;
|
||||||
|
|
||||||
|
nvgpu_clk_arb_send_thermal_alarm(g);
|
||||||
|
nvgpu_msleep(l->thermal_alert.event_delay * 1000U);
|
||||||
|
enable_irq(l->thermal_alert.therm_alert_irq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static irqreturn_t therm_irq(int irq, void *dev_id)
|
||||||
|
{
|
||||||
|
struct nvgpu_os_linux *l = (struct nvgpu_os_linux *)dev_id;
|
||||||
|
|
||||||
|
disable_irq_nosync(irq);
|
||||||
|
queue_work(l->thermal_alert.workqueue, &l->thermal_alert.work);
|
||||||
|
return IRQ_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvgpu_request_therm_irq(struct nvgpu_os_linux *l)
|
||||||
|
{
|
||||||
|
struct device_node *np;
|
||||||
|
int ret = 0, gpio, index = 0;
|
||||||
|
u32 irq_flags = IRQ_TYPE_NONE;
|
||||||
|
u32 event_delay = 10U;
|
||||||
|
|
||||||
|
if (l->thermal_alert.workqueue != NULL) {
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
np = of_find_node_by_name(NULL, "nvgpu");
|
||||||
|
if (!np) {
|
||||||
|
return -ENOENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
gpio = of_get_named_gpio(np, "nvgpu-therm-gpios", index);
|
||||||
|
if (gpio < 0) {
|
||||||
|
nvgpu_err(&l->g, "failed to get GPIO %d ", gpio);
|
||||||
|
return gpio;
|
||||||
|
}
|
||||||
|
|
||||||
|
l->thermal_alert.therm_alert_irq = gpio_to_irq(gpio);
|
||||||
|
|
||||||
|
if (of_property_read_u32(np, "alert-interrupt-level", &irq_flags))
|
||||||
|
nvgpu_info(&l->g, "Missing interrupt-level "
|
||||||
|
"prop using %d", irq_flags);
|
||||||
|
if (of_property_read_u32(np, "alert-event-interval", &event_delay))
|
||||||
|
nvgpu_info(&l->g, "Missing event-interval "
|
||||||
|
"prop using %d seconds ", event_delay);
|
||||||
|
|
||||||
|
l->thermal_alert.event_delay = event_delay;
|
||||||
|
|
||||||
|
if (!l->thermal_alert.workqueue) {
|
||||||
|
l->thermal_alert.workqueue = alloc_workqueue("%s",
|
||||||
|
WQ_HIGHPRI, 1, "dgpu_thermal_alert");
|
||||||
|
INIT_WORK(&l->thermal_alert.work, therm_alert_work_queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = devm_request_irq(l->dev, l->thermal_alert.therm_alert_irq ,
|
||||||
|
therm_irq, irq_flags, "dgpu_therm", l);
|
||||||
|
if (ret != 0) {
|
||||||
|
nvgpu_err(&l->g, "IRQ request failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int gk20a_pm_finalize_poweron(struct device *dev)
|
int gk20a_pm_finalize_poweron(struct device *dev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(dev);
|
struct gk20a *g = get_gk20a(dev);
|
||||||
@@ -331,6 +404,16 @@ int gk20a_pm_finalize_poweron(struct device *dev)
|
|||||||
g->sim->sim_init_late(g);
|
g->sim->sim_init_late(g);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) &&
|
||||||
|
nvgpu_platform_is_silicon(g)) {
|
||||||
|
err = nvgpu_request_therm_irq(l);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "thermal interrupt request failed %d",
|
||||||
|
err);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = gk20a_finalize_poweron(g);
|
err = gk20a_finalize_poweron(g);
|
||||||
if (err)
|
if (err)
|
||||||
goto done;
|
goto done;
|
||||||
|
|||||||
@@ -62,10 +62,17 @@ struct nvgpu_os_linux_ops {
|
|||||||
} s_param;
|
} s_param;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct dgpu_thermal_alert {
|
||||||
|
struct workqueue_struct *workqueue;
|
||||||
|
struct work_struct work;
|
||||||
|
u32 therm_alert_irq;
|
||||||
|
u32 event_delay;
|
||||||
|
};
|
||||||
|
|
||||||
struct nvgpu_os_linux {
|
struct nvgpu_os_linux {
|
||||||
struct gk20a g;
|
struct gk20a g;
|
||||||
struct device *dev;
|
struct device *dev;
|
||||||
|
struct dgpu_thermal_alert thermal_alert;
|
||||||
struct {
|
struct {
|
||||||
struct cdev cdev;
|
struct cdev cdev;
|
||||||
struct device *node;
|
struct device *node;
|
||||||
|
|||||||
@@ -558,6 +558,19 @@ err_free_l:
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvgpu_thermal_deinit(struct gk20a *g)
|
||||||
|
{
|
||||||
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
|
||||||
|
devm_free_irq(dev, l->thermal_alert.therm_alert_irq, g);
|
||||||
|
if (l->thermal_alert.workqueue != NULL) {
|
||||||
|
cancel_work_sync(&l->thermal_alert.work);
|
||||||
|
destroy_workqueue(l->thermal_alert.workqueue);
|
||||||
|
l->thermal_alert.workqueue = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||||
{
|
{
|
||||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||||
@@ -576,6 +589,11 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
|
|||||||
|
|
||||||
gk20a_driver_start_unload(g);
|
gk20a_driver_start_unload(g);
|
||||||
|
|
||||||
|
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) &&
|
||||||
|
nvgpu_platform_is_silicon(g)) {
|
||||||
|
nvgpu_thermal_deinit(g);
|
||||||
|
}
|
||||||
|
|
||||||
err = nvgpu_quiesce(g);
|
err = nvgpu_quiesce(g);
|
||||||
/* TODO: handle failure to idle */
|
/* TODO: handle failure to idle */
|
||||||
WARN(err, "gpu failed to idle during driver removal");
|
WARN(err, "gpu failed to idle during driver removal");
|
||||||
|
|||||||
Reference in New Issue
Block a user