From 25eb392fd11f764e0e89c27d61f0506d160f4574 Mon Sep 17 00:00:00 2001 From: Abdul Salam Date: Thu, 23 May 2019 23:44:05 +0530 Subject: [PATCH] gpu: nvgpu: Implement Thermal Alert for PG189 PG189 has multiple sensors which can provide interrupt when board temperature reaches programmed threshold. This Interrupt is implemented in nvgpu and provide events via clk_arb. Support is enabled for TU104 with NVGPU_SUPPORT_DGPU_THERMAL_ALERT flag. Board specific config is added in DT which will be parsed by nvgpu. Nvgpu does the following. 1.Read gpio line number, interrupt type, and event delay from DT. 2.Call kernel methods and register the interrupt with kernel. 3.Create work queue which will process the interrupt in process context. 4.When interrupt occurs disable interrupt, add work to work queue. 5.In work queue post events and sleep for delay time then enable Interrupt Bug 2492512 Change-Id: Ic5694fe366ca492f8afe8a67de4350e9a51af2af Signed-off-by: Abdul Salam Reviewed-on: https://git-master.nvidia.com/r/2119411 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/clk_arb/clk_arb.c | 8 ++- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 1 + drivers/gpu/nvgpu/include/nvgpu/enabled.h | 6 +- drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c | 2 +- drivers/gpu/nvgpu/os/linux/module.c | 83 ++++++++++++++++++++++ drivers/gpu/nvgpu/os/linux/os_linux.h | 9 ++- drivers/gpu/nvgpu/os/linux/pci.c | 18 +++++ 7 files changed, 122 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c index 760af2ee0..6f41dadd3 100644 --- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c @@ -472,8 +472,12 @@ static void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) { - nvgpu_clk_arb_schedule_alarm(g, - BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); + struct nvgpu_clk_arb *arb = g->clk_arb; + + if (arb != NULL) { + nvgpu_clk_arb_schedule_alarm(g, + BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); + } } void nvgpu_clk_arb_worker_deinit(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 649486173..15e8734a9 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1453,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT, true); /* for now */ gops->clk.support_clk_freq_controller = false; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 4f2284ee6..85d8fa557 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -203,10 +203,14 @@ struct gk20a; /* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */ #define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78 + +/* DGPU Thermal Alert */ +#define NVGPU_SUPPORT_DGPU_THERMAL_ALERT 79 + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 79U +#define NVGPU_MAX_ENABLED_BITS 80U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c index 5a97f182f..340e67f06 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -175,7 +175,7 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev, if (_WRAPGTEQ(tail, head) && info) { head++; p_notif = &dev->queue.notifications[head % dev->queue.size]; - events |= nvgpu_convert_gpu_event(p_notif->notification); + events = p_notif->notification; info->event_id = ffs(events) - 1; info->timestamp = p_notif->timestamp; nvgpu_atomic_set(&dev->queue.head, head); diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index d7fc9881c..18f3bccbb 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -284,6 +285,78 @@ void gk20a_init_linux_characteristics(struct gk20a *g) } } +static void therm_alert_work_queue(struct work_struct *work) +{ + + struct dgpu_thermal_alert *thermal_alert = + container_of(work, struct dgpu_thermal_alert, work); + struct nvgpu_os_linux *l = + container_of(thermal_alert, struct nvgpu_os_linux, + thermal_alert); + struct gk20a *g = &l->g; + + nvgpu_clk_arb_send_thermal_alarm(g); + nvgpu_msleep(l->thermal_alert.event_delay * 1000U); + enable_irq(l->thermal_alert.therm_alert_irq); +} + +static irqreturn_t therm_irq(int irq, void *dev_id) +{ + struct nvgpu_os_linux *l = (struct nvgpu_os_linux *)dev_id; + + disable_irq_nosync(irq); + queue_work(l->thermal_alert.workqueue, &l->thermal_alert.work); + return IRQ_HANDLED; +} + +static int nvgpu_request_therm_irq(struct nvgpu_os_linux *l) +{ + struct device_node *np; + int ret = 0, gpio, index = 0; + u32 irq_flags = IRQ_TYPE_NONE; + u32 event_delay = 10U; + + if (l->thermal_alert.workqueue != NULL) { + return ret; + } + np = of_find_node_by_name(NULL, "nvgpu"); + if (!np) { + return -ENOENT; + } + + gpio = of_get_named_gpio(np, "nvgpu-therm-gpios", index); + if (gpio < 0) { + nvgpu_err(&l->g, "failed to get GPIO %d ", gpio); + return gpio; + } + + l->thermal_alert.therm_alert_irq = gpio_to_irq(gpio); + + if (of_property_read_u32(np, "alert-interrupt-level", &irq_flags)) + nvgpu_info(&l->g, "Missing interrupt-level " + "prop using %d", irq_flags); + if (of_property_read_u32(np, "alert-event-interval", &event_delay)) + nvgpu_info(&l->g, "Missing event-interval " + "prop using %d seconds ", event_delay); + + l->thermal_alert.event_delay = event_delay; + + if (!l->thermal_alert.workqueue) { + l->thermal_alert.workqueue = alloc_workqueue("%s", + WQ_HIGHPRI, 1, "dgpu_thermal_alert"); + INIT_WORK(&l->thermal_alert.work, therm_alert_work_queue); + } + + ret = devm_request_irq(l->dev, l->thermal_alert.therm_alert_irq , + therm_irq, irq_flags, "dgpu_therm", l); + if (ret != 0) { + nvgpu_err(&l->g, "IRQ request failed"); + } + + return ret; +} + + int gk20a_pm_finalize_poweron(struct device *dev) { struct gk20a *g = get_gk20a(dev); @@ -331,6 +404,16 @@ int gk20a_pm_finalize_poweron(struct device *dev) g->sim->sim_init_late(g); } + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) && + nvgpu_platform_is_silicon(g)) { + err = nvgpu_request_therm_irq(l); + if (err) { + nvgpu_err(g, "thermal interrupt request failed %d", + err); + goto done; + } + } + err = gk20a_finalize_poweron(g); if (err) goto done; diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index e13019ae8..bb6a926c4 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -62,10 +62,17 @@ struct nvgpu_os_linux_ops { } s_param; }; +struct dgpu_thermal_alert { + struct workqueue_struct *workqueue; + struct work_struct work; + u32 therm_alert_irq; + u32 event_delay; +}; + struct nvgpu_os_linux { struct gk20a g; struct device *dev; - + struct dgpu_thermal_alert thermal_alert; struct { struct cdev cdev; struct device *node; diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c index 9c348a191..c7c6fdf77 100644 --- a/drivers/gpu/nvgpu/os/linux/pci.c +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -558,6 +558,19 @@ err_free_l: return err; } +static void nvgpu_thermal_deinit(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + + devm_free_irq(dev, l->thermal_alert.therm_alert_irq, g); + if (l->thermal_alert.workqueue != NULL) { + cancel_work_sync(&l->thermal_alert.work); + destroy_workqueue(l->thermal_alert.workqueue); + l->thermal_alert.workqueue = NULL; + } +} + static void nvgpu_pci_remove(struct pci_dev *pdev) { struct gk20a *g = get_gk20a(&pdev->dev); @@ -576,6 +589,11 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) gk20a_driver_start_unload(g); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) && + nvgpu_platform_is_silicon(g)) { + nvgpu_thermal_deinit(g); + } + err = nvgpu_quiesce(g); /* TODO: handle failure to idle */ WARN(err, "gpu failed to idle during driver removal");