diff --git a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c index 760af2ee0..6f41dadd3 100644 --- a/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c +++ b/drivers/gpu/nvgpu/common/clk_arb/clk_arb.c @@ -472,8 +472,12 @@ static void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm) void nvgpu_clk_arb_send_thermal_alarm(struct gk20a *g) { - nvgpu_clk_arb_schedule_alarm(g, - BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); + struct nvgpu_clk_arb *arb = g->clk_arb; + + if (arb != NULL) { + nvgpu_clk_arb_schedule_alarm(g, + BIT32(NVGPU_EVENT_ALARM_THERMAL_ABOVE_THRESHOLD)); + } } void nvgpu_clk_arb_worker_deinit(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 649486173..15e8734a9 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1453,6 +1453,7 @@ int tu104_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_SUPER_SURFACE, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT, true); /* for now */ gops->clk.support_clk_freq_controller = false; diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 4f2284ee6..85d8fa557 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -203,10 +203,14 @@ struct gk20a; /* NVGPU_GPU_IOCTL_SET_MMU_DEBUG_MODE is available */ #define NVGPU_SUPPORT_SET_CTX_MMU_DEBUG_MODE 78 + +/* DGPU Thermal Alert */ +#define NVGPU_SUPPORT_DGPU_THERMAL_ALERT 79 + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 79U +#define NVGPU_MAX_ENABLED_BITS 80U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c index 5a97f182f..340e67f06 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_clk_arb.c @@ -175,7 +175,7 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev, if (_WRAPGTEQ(tail, head) && info) { head++; p_notif = &dev->queue.notifications[head % dev->queue.size]; - events |= nvgpu_convert_gpu_event(p_notif->notification); + events = p_notif->notification; info->event_id = ffs(events) - 1; info->timestamp = p_notif->timestamp; nvgpu_atomic_set(&dev->queue.head, head); diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index d7fc9881c..18f3bccbb 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -284,6 +285,78 @@ void gk20a_init_linux_characteristics(struct gk20a *g) } } +static void therm_alert_work_queue(struct work_struct *work) +{ + + struct dgpu_thermal_alert *thermal_alert = + container_of(work, struct dgpu_thermal_alert, work); + struct nvgpu_os_linux *l = + container_of(thermal_alert, struct nvgpu_os_linux, + thermal_alert); + struct gk20a *g = &l->g; + + nvgpu_clk_arb_send_thermal_alarm(g); + nvgpu_msleep(l->thermal_alert.event_delay * 1000U); + enable_irq(l->thermal_alert.therm_alert_irq); +} + +static irqreturn_t therm_irq(int irq, void *dev_id) +{ + struct nvgpu_os_linux *l = (struct nvgpu_os_linux *)dev_id; + + disable_irq_nosync(irq); + queue_work(l->thermal_alert.workqueue, &l->thermal_alert.work); + return IRQ_HANDLED; +} + +static int nvgpu_request_therm_irq(struct nvgpu_os_linux *l) +{ + struct device_node *np; + int ret = 0, gpio, index = 0; + u32 irq_flags = IRQ_TYPE_NONE; + u32 event_delay = 10U; + + if (l->thermal_alert.workqueue != NULL) { + return ret; + } + np = of_find_node_by_name(NULL, "nvgpu"); + if (!np) { + return -ENOENT; + } + + gpio = of_get_named_gpio(np, "nvgpu-therm-gpios", index); + if (gpio < 0) { + nvgpu_err(&l->g, "failed to get GPIO %d ", gpio); + return gpio; + } + + l->thermal_alert.therm_alert_irq = gpio_to_irq(gpio); + + if (of_property_read_u32(np, "alert-interrupt-level", &irq_flags)) + nvgpu_info(&l->g, "Missing interrupt-level " + "prop using %d", irq_flags); + if (of_property_read_u32(np, "alert-event-interval", &event_delay)) + nvgpu_info(&l->g, "Missing event-interval " + "prop using %d seconds ", event_delay); + + l->thermal_alert.event_delay = event_delay; + + if (!l->thermal_alert.workqueue) { + l->thermal_alert.workqueue = alloc_workqueue("%s", + WQ_HIGHPRI, 1, "dgpu_thermal_alert"); + INIT_WORK(&l->thermal_alert.work, therm_alert_work_queue); + } + + ret = devm_request_irq(l->dev, l->thermal_alert.therm_alert_irq , + therm_irq, irq_flags, "dgpu_therm", l); + if (ret != 0) { + nvgpu_err(&l->g, "IRQ request failed"); + } + + return ret; +} + + int gk20a_pm_finalize_poweron(struct device *dev) { struct gk20a *g = get_gk20a(dev); @@ -331,6 +404,16 @@ int gk20a_pm_finalize_poweron(struct device *dev) g->sim->sim_init_late(g); } + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) && + nvgpu_platform_is_silicon(g)) { + err = nvgpu_request_therm_irq(l); + if (err) { + nvgpu_err(g, "thermal interrupt request failed %d", + err); + goto done; + } + } + err = gk20a_finalize_poweron(g); if (err) goto done; diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index e13019ae8..bb6a926c4 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -62,10 +62,17 @@ struct nvgpu_os_linux_ops { } s_param; }; +struct dgpu_thermal_alert { + struct workqueue_struct *workqueue; + struct work_struct work; + u32 therm_alert_irq; + u32 event_delay; +}; + struct nvgpu_os_linux { struct gk20a g; struct device *dev; - + struct dgpu_thermal_alert thermal_alert; struct { struct cdev cdev; struct device *node; diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c index 9c348a191..c7c6fdf77 100644 --- a/drivers/gpu/nvgpu/os/linux/pci.c +++ b/drivers/gpu/nvgpu/os/linux/pci.c @@ -558,6 +558,19 @@ err_free_l: return err; } +static void nvgpu_thermal_deinit(struct gk20a *g) +{ + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); + struct device *dev = dev_from_gk20a(g); + + devm_free_irq(dev, l->thermal_alert.therm_alert_irq, g); + if (l->thermal_alert.workqueue != NULL) { + cancel_work_sync(&l->thermal_alert.work); + destroy_workqueue(l->thermal_alert.workqueue); + l->thermal_alert.workqueue = NULL; + } +} + static void nvgpu_pci_remove(struct pci_dev *pdev) { struct gk20a *g = get_gk20a(&pdev->dev); @@ -576,6 +589,11 @@ static void nvgpu_pci_remove(struct pci_dev *pdev) gk20a_driver_start_unload(g); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_DGPU_THERMAL_ALERT) && + nvgpu_platform_is_silicon(g)) { + nvgpu_thermal_deinit(g); + } + err = nvgpu_quiesce(g); /* TODO: handle failure to idle */ WARN(err, "gpu failed to idle during driver removal");