mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: rework ptimer scale APIs
common.ptimer unit right now exposes two APIs - scale_ptimer() to scale the timer ptimer_scalingfactor10x() to get the scaling factor receiving scaling factor is not really necessary for user of common.ptimer since it can be internally calculated in scale_ptimer() function itself. Hence make ptimer_scalingfactor10x() static and rename public API scale_ptimer() to nvgpu_ptimer_scale() nvgpu_ptimer_scale() will not accept timeout value as parameter and return scaled timeout value in another pointer parameter. Error code is returned if timeout value is invalid. Jira NVGPU-6394 Change-Id: Ib882d99f6096c3af5f96eef298d713fb5e36dd87 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2546970 (cherry picked from commit 2da7c918efe91046818c83481664312e194ead8e) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2551334 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
b38b8a794d
commit
02943a63b4
@@ -73,6 +73,7 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
|
||||
struct nvgpu_channel *ch;
|
||||
u32 count = 0;
|
||||
u32 timeslice;
|
||||
int err;
|
||||
|
||||
nvgpu_log_fn(f->g, " ");
|
||||
|
||||
@@ -87,8 +88,10 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
|
||||
* timeslice is measured with PTIMER.
|
||||
* On some platforms, PTIMER is lower than 1GHz.
|
||||
*/
|
||||
timeslice = scale_ptimer(tsg->timeslice_us,
|
||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||
err = nvgpu_ptimer_scale(g, tsg->timeslice_us, ×lice);
|
||||
if (err != 0) {
|
||||
return RUNLIST_APPEND_FAILURE;
|
||||
}
|
||||
|
||||
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
|
||||
|
||||
|
||||
@@ -20,13 +20,42 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
|
||||
#include <nvgpu/ptimer.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/nvgpu_init.h>
|
||||
#include <nvgpu/power_features/cg.h>
|
||||
#endif
|
||||
|
||||
static u32 ptimer_scalingfactor10x(u32 ptimer_src_freq)
|
||||
{
|
||||
return nvgpu_safe_cast_u64_to_u32((U64(PTIMER_REF_FREQ_HZ) * U64(10))
|
||||
/ U64(ptimer_src_freq));
|
||||
}
|
||||
|
||||
int nvgpu_ptimer_scale(struct gk20a *g, u32 timeout, u32 *scaled_timeout)
|
||||
{
|
||||
u32 scale10x;
|
||||
|
||||
nvgpu_assert(g->ptimer_src_freq != 0U);
|
||||
scale10x = ptimer_scalingfactor10x(g->ptimer_src_freq);
|
||||
nvgpu_assert(scale10x != 0U);
|
||||
|
||||
if (timeout > U32_MAX / 10U) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (((timeout * 10U) % scale10x) >= (scale10x / 2U)) {
|
||||
*scaled_timeout = ((timeout * 10U) / scale10x) + 1U;
|
||||
} else {
|
||||
*scaled_timeout = (timeout * 10U) / scale10x;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
|
||||
int nvgpu_ptimer_init(struct gk20a *g)
|
||||
{
|
||||
#if defined(CONFIG_NVGPU_NON_FUSA)
|
||||
|
||||
@@ -97,6 +97,7 @@ static void ga10b_fifo_ctxsw_timeout_disable_and_clear(struct gk20a *g,
|
||||
void ga10b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
|
||||
{
|
||||
u32 timeout;
|
||||
u32 scaled_timeout;
|
||||
|
||||
nvgpu_log_fn(g, " ");
|
||||
|
||||
@@ -104,10 +105,9 @@ void ga10b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
|
||||
if (nvgpu_platform_is_silicon(g)) {
|
||||
timeout = nvgpu_safe_mult_u32(
|
||||
g->ctxsw_timeout_period_ms, MS_TO_US);
|
||||
timeout = scale_ptimer(timeout,
|
||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
|
||||
timeout =
|
||||
runlist_engine_ctxsw_timeout_config_period_f(timeout) |
|
||||
runlist_engine_ctxsw_timeout_config_period_f(scaled_timeout) |
|
||||
runlist_engine_ctxsw_timeout_config_detection_enabled_f();
|
||||
} else {
|
||||
timeout =
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -36,13 +36,13 @@
|
||||
void gk20a_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
|
||||
{
|
||||
u32 timeout;
|
||||
u32 scaled_timeout;
|
||||
|
||||
if (enable) {
|
||||
timeout = g->ctxsw_timeout_period_ms * 1000U; /* in us */
|
||||
timeout = scale_ptimer(timeout,
|
||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||
timeout |= fifo_eng_timeout_detection_enabled_f();
|
||||
nvgpu_writel(g, fifo_eng_timeout_r(), timeout);
|
||||
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
|
||||
scaled_timeout |= fifo_eng_timeout_detection_enabled_f();
|
||||
nvgpu_writel(g, fifo_eng_timeout_r(), scaled_timeout);
|
||||
} else {
|
||||
timeout = nvgpu_readl(g, fifo_eng_timeout_r());
|
||||
timeout &= ~(fifo_eng_timeout_detection_enabled_f());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -37,6 +37,7 @@
|
||||
void gv11b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
|
||||
{
|
||||
u32 timeout;
|
||||
u32 scaled_timeout;
|
||||
|
||||
if (enable) {
|
||||
/* clear ctxsw timeout interrupts */
|
||||
@@ -44,10 +45,9 @@ void gv11b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
|
||||
|
||||
if (nvgpu_platform_is_silicon(g)) {
|
||||
timeout = g->ctxsw_timeout_period_ms * 1000U;
|
||||
timeout = scale_ptimer(timeout,
|
||||
ptimer_scalingfactor10x(g->ptimer_src_freq));
|
||||
timeout |= fifo_eng_ctxsw_timeout_detection_enabled_f();
|
||||
nvgpu_writel(g, fifo_eng_ctxsw_timeout_r(), timeout);
|
||||
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
|
||||
scaled_timeout |= fifo_eng_ctxsw_timeout_detection_enabled_f();
|
||||
nvgpu_writel(g, fifo_eng_ctxsw_timeout_r(), scaled_timeout);
|
||||
} else {
|
||||
timeout = nvgpu_readl(g, fifo_eng_ctxsw_timeout_r());
|
||||
nvgpu_log_info(g,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -44,11 +44,12 @@ struct nvgpu_cpu_time_correlation_sample {
|
||||
#define PTIMER_REF_FREQ_HZ 31250000U
|
||||
|
||||
/**
|
||||
* @brief Computes the ptimer scaling factor. This API allows setting the ptimer
|
||||
* appropriately before using it to enforce different timeouts or
|
||||
* scheduling timeslices.
|
||||
* @brief Scales the ptimer based timeout value as per the ptimer scale factor.
|
||||
* Units like common.fifo use this API to scale the timeouts and
|
||||
* scheduling timeslices enforced by it using the GPU ptimer.
|
||||
*
|
||||
* @param ptimer_src_freq [in] source frequency to ptimer
|
||||
* @param timeout [in] Time value captured using ptimer reference clock
|
||||
* @param scaled_timeout [out] Scaled time value after scaling with scale factor
|
||||
*
|
||||
* 1. The ptimer has a resolution of 32 ns and so requires a reference frequency
|
||||
* of:
|
||||
@@ -59,7 +60,7 @@ struct nvgpu_cpu_time_correlation_sample {
|
||||
* 2. If the source frequency to ptimer is different than the above reference
|
||||
* frequency, we need to get the scaling factor as:
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* Scale_factor = ptimer_ref_freq / ptimer_src_freq
|
||||
* scale_factor = ptimer_ref_freq / ptimer_src_freq
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* 3. The scale_factor is multiplied by 10, so that we get an additional digit
|
||||
@@ -68,53 +69,28 @@ struct nvgpu_cpu_time_correlation_sample {
|
||||
* 4. For example,
|
||||
* - On Maxwell, the ptimer source frequency is 19.2 MHz.
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* scaling_factor_10x = (31250000 * 10)/ 19200000 = 16
|
||||
* scale_factor = (31250000 * 10)/ 19200000 = 16
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* - On Volta, ptimer_source frequency = 31250000 Hz = ptimer_ref_frequency.
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* scaling_factor_10x = 10
|
||||
* scale_factor = 10
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* @return Scale factor between ptimer reference and source frequency with
|
||||
* one digit decimal precision.
|
||||
*/
|
||||
static inline u32 ptimer_scalingfactor10x(u32 ptimer_src_freq)
|
||||
{
|
||||
nvgpu_assert(ptimer_src_freq != 0U);
|
||||
return nvgpu_safe_cast_u64_to_u32((U64(PTIMER_REF_FREQ_HZ) * U64(10))
|
||||
/ U64(ptimer_src_freq));
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Scales back the ptimer based timeout value as per the scale factor.
|
||||
* Units like common.fifo use this API to scale the timeouts and
|
||||
* scheduling timeslices enforced by it using the GPU ptimer.
|
||||
*
|
||||
* @param timeout [in] Time value captured using ptimer reference clock
|
||||
* @param scale10x [in] The scale factor multiplied by 10 to be used for
|
||||
* scaling the ptimer based timeout value.
|
||||
*
|
||||
* 1. When the ptimer source frequency is not same as expected ptimer reference
|
||||
* 5. When the ptimer source frequency is not same as expected ptimer reference
|
||||
* frequency, we need to scale the ptimer based time value. The scaled value
|
||||
* is calculated as follows:
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
* Scaled valued = timeout / scale_factor.
|
||||
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
*
|
||||
* 2. To retain 1 digit decimal precision, the above equation is calculated
|
||||
* after multiplication by 10.
|
||||
* 6. To retain 1 digit decimal precision, the above equation is calculated
|
||||
* after multiplication by 10. And because of that maximum acceptable
|
||||
* value of \a timeout can be (U32_MAX / 10).
|
||||
*
|
||||
* @return Scaled \a timeout value as per \a scale10x
|
||||
* @return 0 in case of success, < 0 in case of failure.
|
||||
* @retval -EINVAL in case invalid \a timeout value is passed.
|
||||
*/
|
||||
static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
|
||||
{
|
||||
nvgpu_assert(scale10x != 0U);
|
||||
if ((nvgpu_safe_mult_u32(timeout, 10U) % scale10x) >= (scale10x/2U)) {
|
||||
return ((timeout * 10U) / scale10x) + 1U;
|
||||
} else {
|
||||
return (timeout * 10U) / scale10x;
|
||||
}
|
||||
}
|
||||
int nvgpu_ptimer_scale(struct gk20a *g, u32 timeout, u32 *scaled_timeout);
|
||||
|
||||
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
|
||||
int nvgpu_get_timestamps_zipper(struct gk20a *g,
|
||||
|
||||
@@ -635,6 +635,7 @@ nvgpu_posix_is_fault_injection_triggered
|
||||
nvgpu_posix_probe
|
||||
nvgpu_posix_register_io
|
||||
nvgpu_pte_words
|
||||
nvgpu_ptimer_scale
|
||||
nvgpu_queue_alloc
|
||||
nvgpu_queue_free
|
||||
nvgpu_queue_in_locked
|
||||
|
||||
@@ -652,6 +652,7 @@ nvgpu_posix_is_fault_injection_triggered
|
||||
nvgpu_posix_probe
|
||||
nvgpu_posix_register_io
|
||||
nvgpu_pte_words
|
||||
nvgpu_ptimer_scale
|
||||
nvgpu_queue_alloc
|
||||
nvgpu_queue_free
|
||||
nvgpu_queue_in_locked
|
||||
|
||||
Reference in New Issue
Block a user