gpu: nvgpu: rework ptimer scale APIs

common.ptimer unit right now exposes two APIs -
scale_ptimer() to scale the timer
ptimer_scalingfactor10x() to get the scaling factor

receiving scaling factor is not really necessary for user of
common.ptimer since it can be internally calculated in scale_ptimer()
function itself.

Hence make ptimer_scalingfactor10x() static and rename public API
scale_ptimer() to nvgpu_ptimer_scale()

nvgpu_ptimer_scale() will not accept timeout value as parameter
and return scaled timeout value in another pointer parameter.
Error code is returned if timeout value is invalid.

Jira NVGPU-6394

Change-Id: Ib882d99f6096c3af5f96eef298d713fb5e36dd87
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2546970
(cherry picked from commit 2da7c918efe91046818c83481664312e194ead8e)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2551334
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2021-06-18 13:09:23 +05:30
committed by mobile promotions
parent b38b8a794d
commit 02943a63b4
8 changed files with 67 additions and 57 deletions

View File

@@ -73,6 +73,7 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
struct nvgpu_channel *ch;
u32 count = 0;
u32 timeslice;
int err;
nvgpu_log_fn(f->g, " ");
@@ -87,8 +88,10 @@ static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
* timeslice is measured with PTIMER.
* On some platforms, PTIMER is lower than 1GHz.
*/
timeslice = scale_ptimer(tsg->timeslice_us,
ptimer_scalingfactor10x(g->ptimer_src_freq));
err = nvgpu_ptimer_scale(g, tsg->timeslice_us, &timeslice);
if (err != 0) {
return RUNLIST_APPEND_FAILURE;
}
g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);

View File

@@ -20,13 +20,42 @@
* DEALINGS IN THE SOFTWARE.
*/
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#include <nvgpu/ptimer.h>
#include <nvgpu/timers.h>
#include <nvgpu/gk20a.h>
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
#include <nvgpu/timers.h>
#include <nvgpu/nvgpu_init.h>
#include <nvgpu/power_features/cg.h>
#endif
static u32 ptimer_scalingfactor10x(u32 ptimer_src_freq)
{
return nvgpu_safe_cast_u64_to_u32((U64(PTIMER_REF_FREQ_HZ) * U64(10))
/ U64(ptimer_src_freq));
}
int nvgpu_ptimer_scale(struct gk20a *g, u32 timeout, u32 *scaled_timeout)
{
u32 scale10x;
nvgpu_assert(g->ptimer_src_freq != 0U);
scale10x = ptimer_scalingfactor10x(g->ptimer_src_freq);
nvgpu_assert(scale10x != 0U);
if (timeout > U32_MAX / 10U) {
return -EINVAL;
}
if (((timeout * 10U) % scale10x) >= (scale10x / 2U)) {
*scaled_timeout = ((timeout * 10U) / scale10x) + 1U;
} else {
*scaled_timeout = (timeout * 10U) / scale10x;
}
return 0;
}
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
int nvgpu_ptimer_init(struct gk20a *g)
{
#if defined(CONFIG_NVGPU_NON_FUSA)

View File

@@ -97,6 +97,7 @@ static void ga10b_fifo_ctxsw_timeout_disable_and_clear(struct gk20a *g,
void ga10b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
{
u32 timeout;
u32 scaled_timeout;
nvgpu_log_fn(g, " ");
@@ -104,10 +105,9 @@ void ga10b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
if (nvgpu_platform_is_silicon(g)) {
timeout = nvgpu_safe_mult_u32(
g->ctxsw_timeout_period_ms, MS_TO_US);
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
timeout =
runlist_engine_ctxsw_timeout_config_period_f(timeout) |
runlist_engine_ctxsw_timeout_config_period_f(scaled_timeout) |
runlist_engine_ctxsw_timeout_config_detection_enabled_f();
} else {
timeout =

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -36,13 +36,13 @@
void gk20a_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
{
u32 timeout;
u32 scaled_timeout;
if (enable) {
timeout = g->ctxsw_timeout_period_ms * 1000U; /* in us */
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_timeout_detection_enabled_f();
nvgpu_writel(g, fifo_eng_timeout_r(), timeout);
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
scaled_timeout |= fifo_eng_timeout_detection_enabled_f();
nvgpu_writel(g, fifo_eng_timeout_r(), scaled_timeout);
} else {
timeout = nvgpu_readl(g, fifo_eng_timeout_r());
timeout &= ~(fifo_eng_timeout_detection_enabled_f());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -37,6 +37,7 @@
void gv11b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
{
u32 timeout;
u32 scaled_timeout;
if (enable) {
/* clear ctxsw timeout interrupts */
@@ -44,10 +45,9 @@ void gv11b_fifo_ctxsw_timeout_enable(struct gk20a *g, bool enable)
if (nvgpu_platform_is_silicon(g)) {
timeout = g->ctxsw_timeout_period_ms * 1000U;
timeout = scale_ptimer(timeout,
ptimer_scalingfactor10x(g->ptimer_src_freq));
timeout |= fifo_eng_ctxsw_timeout_detection_enabled_f();
nvgpu_writel(g, fifo_eng_ctxsw_timeout_r(), timeout);
nvgpu_assert(nvgpu_ptimer_scale(g, timeout, &scaled_timeout) == 0);
scaled_timeout |= fifo_eng_ctxsw_timeout_detection_enabled_f();
nvgpu_writel(g, fifo_eng_ctxsw_timeout_r(), scaled_timeout);
} else {
timeout = nvgpu_readl(g, fifo_eng_ctxsw_timeout_r());
nvgpu_log_info(g,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -44,11 +44,12 @@ struct nvgpu_cpu_time_correlation_sample {
#define PTIMER_REF_FREQ_HZ 31250000U
/**
* @brief Computes the ptimer scaling factor. This API allows setting the ptimer
* appropriately before using it to enforce different timeouts or
* scheduling timeslices.
* @brief Scales the ptimer based timeout value as per the ptimer scale factor.
* Units like common.fifo use this API to scale the timeouts and
* scheduling timeslices enforced by it using the GPU ptimer.
*
* @param ptimer_src_freq [in] source frequency to ptimer
* @param timeout [in] Time value captured using ptimer reference clock
* @param scaled_timeout [out] Scaled time value after scaling with scale factor
*
* 1. The ptimer has a resolution of 32 ns and so requires a reference frequency
* of:
@@ -59,7 +60,7 @@ struct nvgpu_cpu_time_correlation_sample {
* 2. If the source frequency to ptimer is different than the above reference
* frequency, we need to get the scaling factor as:
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Scale_factor = ptimer_ref_freq / ptimer_src_freq
* scale_factor = ptimer_ref_freq / ptimer_src_freq
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* 3. The scale_factor is multiplied by 10, so that we get an additional digit
@@ -68,53 +69,28 @@ struct nvgpu_cpu_time_correlation_sample {
* 4. For example,
* - On Maxwell, the ptimer source frequency is 19.2 MHz.
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* scaling_factor_10x = (31250000 * 10)/ 19200000 = 16
* scale_factor = (31250000 * 10)/ 19200000 = 16
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* - On Volta, ptimer_source frequency = 31250000 Hz = ptimer_ref_frequency.
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* scaling_factor_10x = 10
* scale_factor = 10
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* @return Scale factor between ptimer reference and source frequency with
* one digit decimal precision.
*/
static inline u32 ptimer_scalingfactor10x(u32 ptimer_src_freq)
{
nvgpu_assert(ptimer_src_freq != 0U);
return nvgpu_safe_cast_u64_to_u32((U64(PTIMER_REF_FREQ_HZ) * U64(10))
/ U64(ptimer_src_freq));
}
/**
* @brief Scales back the ptimer based timeout value as per the scale factor.
* Units like common.fifo use this API to scale the timeouts and
* scheduling timeslices enforced by it using the GPU ptimer.
*
* @param timeout [in] Time value captured using ptimer reference clock
* @param scale10x [in] The scale factor multiplied by 10 to be used for
* scaling the ptimer based timeout value.
*
* 1. When the ptimer source frequency is not same as expected ptimer reference
* 5. When the ptimer source frequency is not same as expected ptimer reference
* frequency, we need to scale the ptimer based time value. The scaled value
* is calculated as follows:
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Scaled valued = timeout / scale_factor.
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* 2. To retain 1 digit decimal precision, the above equation is calculated
* after multiplication by 10.
* 6. To retain 1 digit decimal precision, the above equation is calculated
* after multiplication by 10. And because of that maximum acceptable
* value of \a timeout can be (U32_MAX / 10).
*
* @return Scaled \a timeout value as per \a scale10x
* @return 0 in case of success, < 0 in case of failure.
* @retval -EINVAL in case invalid \a timeout value is passed.
*/
static inline u32 scale_ptimer(u32 timeout , u32 scale10x)
{
nvgpu_assert(scale10x != 0U);
if ((nvgpu_safe_mult_u32(timeout, 10U) % scale10x) >= (scale10x/2U)) {
return ((timeout * 10U) / scale10x) + 1U;
} else {
return (timeout * 10U) / scale10x;
}
}
int nvgpu_ptimer_scale(struct gk20a *g, u32 timeout, u32 *scaled_timeout);
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA
int nvgpu_get_timestamps_zipper(struct gk20a *g,

View File

@@ -635,6 +635,7 @@ nvgpu_posix_is_fault_injection_triggered
nvgpu_posix_probe
nvgpu_posix_register_io
nvgpu_pte_words
nvgpu_ptimer_scale
nvgpu_queue_alloc
nvgpu_queue_free
nvgpu_queue_in_locked

View File

@@ -652,6 +652,7 @@ nvgpu_posix_is_fault_injection_triggered
nvgpu_posix_probe
nvgpu_posix_register_io
nvgpu_pte_words
nvgpu_ptimer_scale
nvgpu_queue_alloc
nvgpu_queue_free
nvgpu_queue_in_locked