/*
 * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

#ifndef __NV_SOFTFLOAT_H__
#define __NV_SOFTFLOAT_H__

/*
 * This header file provides utility code built on top of the softfloat floating
 * point emulation library.
 */

#include "softfloat.h"
#include "nvtypes.h"
#include "platform.h"

/*
 * float32_t stores the bit pattern for a 32-bit single-precision IEEE floating
 * point value in a structure containing an uint32_t:
 *
 *   typedef struct { uint32_t v; } float32_t;
 *
 * In some cases, clients pass in a 32-bit single-precision IEEE floating
 * point value in an NvU32.
 *
 * Define functions to change the "view" between an NvU32 and a float32_t.
 */
INLINE float32_t NvU32viewAsF32(NvU32 u)
{
    float32_t f = { .v = u };
    return f;
}

INLINE NvU32 F32viewAsNvU32(float32_t f)
{
    return f.v;
}

INLINE NvU16 F16viewAsNvU16(float16_t f)
{
    return f.v;
}

/*
 * Convert the value of a float32_t to an NvU16.
 *
 * The conversion requires several steps:
 *
 * - Clamp the float32_t value to the [0,NV_U16_MAX] range of NvU16.
 *
 * - Use softfloat to convert the float32_t to ui32, with appropriate rounding.
 *
 * - Due to the clamping and rounding above, the value in the ui32 should be in
 *   the range of NvU16 and can be safely returned as NvU16.
 */
INLINE NvU16 F32toNvU16(float32_t f)
{
    const float32_t minF32 = NvU32viewAsF32(0);
    const float32_t maxF32 = ui32_to_f32(NV_U16_MAX);
    NvU32 u;

    /* clamp to zero: f = (f < minF32) ? minF32 : f */
    f = f32_lt(f, minF32) ? minF32 : f;

    /* clamp to NV_U16_MAX: f = (maxF32 < f) ? maxF32 : f */
    f = f32_lt(maxF32, f) ? maxF32 : f;

    /*
     * The "_r_minMag" in "f32_to_ui32_r_minMag" means round "to minimum
     * magnitude" (i.e., round towards zero).
     *
     * The "exact = FALSE" argument means do not raise the inexact exception
     * flag, even if the conversion is inexact.
     *
     * For more on f32_to_ui32_r_minMag() semantics, see
     * drivers/common/softfloat/doc/SoftFloat.html
     */
    u = f32_to_ui32_r_minMag(f, NV_FALSE /* exact */);
    nvAssert(u <= NV_U16_MAX);

    return (NvU16) u;
}

/*
 * Perform the following with float32_t: (a * b) + (c * d) + e
 */
INLINE float32_t F32_AxB_plus_CxD_plus_E(
    float32_t a,
    float32_t b,
    float32_t c,
    float32_t d,
    float32_t e)
{
    const float32_t tmpA = f32_mul(a, b);
    const float32_t tmpB = f32_mul(c, d);
    const float32_t tmpC = f32_add(tmpA, tmpB);

    return f32_add(tmpC, e);
}

/*
 * Perform the following with float32_t: (a * b) - (c * d)
 */
INLINE float32_t F32_AxB_minus_CxD(
    float32_t a,
    float32_t b,
    float32_t c,
    float32_t d)
{
    const float32_t tmpA = f32_mul(a, b);
    const float32_t tmpB = f32_mul(c, d);

    return f32_sub(tmpA, tmpB);
}

/*
 * Perform the following with float64_t: a * -1
 */
INLINE float64_t F64_negate(float64_t a)
{
    const float64_t negOneF64 = i32_to_f64(-1);
    return f64_mul(negOneF64, a);
}

INLINE float16_t nvUnormToFp16(NvU16 unorm, float32_t maxf)
{
    const float32_t unormf = ui32_to_f32(unorm);
    const float32_t normf = f32_div(unormf, maxf);

    return f32_to_f16(normf);
}

INLINE float16_t nvUnorm10ToFp16(NvU16 unorm10)
{
    const float32_t maxf = NvU32viewAsF32(0x44800000U); // 1024.0f
    return nvUnormToFp16(unorm10, maxf);
}

INLINE float32_t f32_min(float32_t a, float32_t b)
{
    return (f32_lt(a, b)) ? a : b;
}

INLINE float32_t f32_max(float32_t a, float32_t b)
{
    return (f32_lt(a, b)) ? b : a;
}

#endif /* __NV_SOFTFLOAT_H__ */