diff --git a/dag_vecMath_trig_constexpr.h b/dag_vecMath_trig_constexpr.h new file mode 100644 index 0000000..361708e --- /dev/null +++ b/dag_vecMath_trig_constexpr.h @@ -0,0 +1,175 @@ +#pragma once + +constexpr long double const_pi = 3.1415926535897932384626433832795028841972L; +constexpr long double const_half_pi = 1.5707963267948966192313216916397514420986L; + +template constexpr bool v_isnan(const T x) noexcept { return x != x; } + +constexpr float v_infinity() noexcept { return __builtin_huge_val(); } + +template constexpr bool v_isneginf(const T x) noexcept { return (x == -__builtin_huge_val()); } + +template constexpr bool v_isposinf(const T x) noexcept { return (x == __builtin_huge_val()); } + +template constexpr bool v_isinf(const T x) noexcept { return (v_isneginf(x) || v_isposinf(x)); } + +template constexpr bool v_isfinite(const T x) noexcept { return (!v_isnan(x)) && (!v_isinf(x)); } + +template constexpr T v_abs_c(const T x) noexcept { return (x == T(0) ? T(0) : (x < T(0)) ? (-x) : x); } +constexpr bool v_isodd_c(const int64_t x) noexcept { return (x & 1U) != 0; } + +namespace detail_pow_integral { +// forward declaration +template constexpr T1 pow_integral_compute(const T1 base, const T2 exp_term) noexcept; + +// integral-valued powers using method described in +// https://en.wikipedia.org/wiki/Exponentiation_by_squaring + +template constexpr T1 pow_integral_compute_recur(const T1 base, const T1 val, const T2 exp_term) noexcept { + if (exp_term > T2(1)) { + if (v_isodd_c(exp_term)) { + return pow_integral_compute_recur(base * base, val * base, exp_term / 2); + } + + return pow_integral_compute_recur(base * base, val, exp_term / 2); + } else if (exp_term == T2(1)) { + return (val * base); + } + + return val; +} + +template +constexpr T1 pow_integral_sgn_check(const T1 base, const T2 exp_term) noexcept { + return (pow_integral_compute_recur(base, T1(1), exp_term)); +} + +template constexpr T1 pow_integral_compute(const T1 base, const T2 exp_term) noexcept { + if (exp_term == T2(3)) { + return (base * base * base); + } else if (exp_term == T2(2)) { + return (base * base); + } else if (exp_term == T2(1)) { + return base; + } else if (exp_term == T2(0)) { + return T1(1); + } else if (exp_term == std::numeric_limits::min()) { + return T1(0); + } else if (exp_term == std::numeric_limits::max()) { + return v_infinity(); + } else { + return pow_integral_sgn_check(base, exp_term); + } +} + +template +constexpr T1 _pow_integral(const T1 base, const T2 exp_term) noexcept { + return pow_integral_compute(base, static_cast(exp_term)); +} + +} // namespace detail_pow_integral + +namespace detail_floor { + +template constexpr int floor_resid(const T x, const T x_whole) noexcept { return ((x < T(0)) && (x < x_whole)); } + +template constexpr T floor_int(const T x, const T x_whole) noexcept { return (x_whole - static_cast(floor_resid(x, x_whole))); } + +template constexpr T _floor(const T x) noexcept { + return (v_isnan(x) ? std::numeric_limits::quiet_NaN() : !v_isfinite(x) ? x : std::numeric_limits::min() > v_abs_c(x) ? x : floor_int(x, T(static_cast(x)))); +} + +} // namespace detail_floor + +namespace detail_tan { +// this is based on a fourth-order expansion of tan(z) using Bernoulli numbers +template constexpr T tan_series_exp_long(const T x) noexcept { + return (-1 / x // iter 1 + + (x / 3 // iter 2 + + (detail_pow_integral::_pow_integral(x, 3) / 45 // iter 3 + + (2 * detail_pow_integral::_pow_integral(x, 5) / 945 // iter 4 + + detail_pow_integral::_pow_integral(x, 7) / 4725) // iter 5 + ))); +} + +template constexpr T tan_series_exp(const T x) noexcept { + // the value tan(pi/2) is somewhat of a convention; + // technically the function is not defined at EXACTLY pi/2, + // but this is floating point pi/2 + // otherwise we use an expansion around pi/2 + return (std::numeric_limits::min() > v_abs_c(x - T(const_half_pi)) ? T(1.633124e+16) : tan_series_exp_long(x - T(const_half_pi))); +} + +template constexpr T tan_cf_recur(const T xx, const int depth, const int max_depth) noexcept { + return (depth < max_depth ? T(2 * depth - 1) - xx / tan_cf_recur(xx, depth + 1, max_depth) : T(2 * depth - 1)); +} + +template constexpr T tan_cf_main(const T x) noexcept { + return (((x > T(1.55) && x < T(1.60))) + ? tan_series_exp(x) + : x > T(1.4) ? x / tan_cf_recur(x * x, 1, 45) : x > T(1) ? x / tan_cf_recur(x * x, 1, 35) : x / tan_cf_recur(x * x, 1, 25)); +} + +template constexpr T tan_begin(const T x, const int count = 0) noexcept { + return (x > T(const_pi) ? count > 1 ? std::numeric_limits::quiet_NaN() : (tan_begin(x - T(const_pi) * detail_floor::_floor(x / T(const_pi)), count + 1)) + : tan_cf_main(x)); +} + +template constexpr T _tan(const T x) noexcept { + return (v_isnan(x) ? std::numeric_limits::quiet_NaN() : (std::numeric_limits::min() > v_abs_c(x)) ? T(0) : x < T(0) ? (-tan_begin(-x)) : tan_begin(x)); +} +} // namespace detail_tan + +namespace detail_cos { + +template constexpr T cos_impl(const T x) noexcept { return (T(1) - x * x) / (T(1) + x * x); } + +template constexpr T _cos(const T x) noexcept { + return (v_isnan(x) ? std::numeric_limits::quiet_NaN() + : (std::numeric_limits::min() > v_abs_c(x)) + ? T(1) // indistinguishable from 0 + : (std::numeric_limits::min() > v_abs_c(x - T(const_half_pi))) + ? T(0) // special cases: pi/2 and pi + : (std::numeric_limits::min() > v_abs_c(x + T(const_half_pi))) + ? T(0) + : (std::numeric_limits::min() > v_abs_c(x - T(const_pi))) + ? -T(1) + : (std::numeric_limits::min() > v_abs_c(x + T(const_pi))) ? -T(1) : cos_impl(detail_tan::_tan(x / T(2)))); +} +} // namespace detail_cos + +namespace detail_sin { +template constexpr T sin_compute(const T x) noexcept { return T(2) * x / (T(1) + x * x); } + +template constexpr T _sin(const T x) noexcept { + return (v_isnan(x) ? std::numeric_limits::quiet_NaN() + : std::numeric_limits::min() > v_abs_c(x) + ? T(0) + : std::numeric_limits::min() > v_abs_c(x - T(const_half_pi)) // special cases: pi/2 and pi + ? T(1) + : std::numeric_limits::min() > v_abs_c(x + T(const_half_pi)) + ? -T(1) + : std::numeric_limits::min() > v_abs_c(x - T(const_pi)) + ? T(0) + : std::numeric_limits::min() > v_abs_c(x + T(const_pi)) ? -T(0) : sin_compute(detail_tan::_tan(x / T(2)))); +} + +} // namespace detail_sin + +template constexpr T v_tan_c(const T x) noexcept { return detail_tan::_tan(static_cast(x)); } + constexpr vec4f v_tan_m(const vec4f a) noexcept { return vec4f{detail_tan::_tan(a.m128_f32[0]), + detail_tan::_tan(a.m128_f32[1]), + detail_tan::_tan(a.m128_f32[2]), + detail_tan::_tan(a.m128_f32[3])}; } + +template constexpr T v_cos_c(const T x) noexcept { return detail_cos::_cos(static_cast(x)); } + constexpr vec4f v_cos_m(const vec4f a) noexcept { return vec4f{detail_cos::_cos(a.m128_f32[0]), + detail_cos::_cos(a.m128_f32[1]), + detail_cos::_cos(a.m128_f32[2]), + detail_cos::_cos(a.m128_f32[3])}; } + +template constexpr T v_sin_c(const T x) noexcept { return detail_sin::_sin(static_cast(x)); } + constexpr vec4f v_sin_m(const vec4f a) noexcept { return vec4f{detail_sin::_sin(a.m128_f32[0]), + detail_sin::_sin(a.m128_f32[1]), + detail_sin::_sin(a.m128_f32[2]), + detail_sin::_sin(a.m128_f32[3])}; } \ No newline at end of file diff --git a/dag_vecMath_trig_constexpr_unittest.h b/dag_vecMath_trig_constexpr_unittest.h new file mode 100644 index 0000000..eaedc4b --- /dev/null +++ b/dag_vecMath_trig_constexpr_unittest.h @@ -0,0 +1,89 @@ +#include + +#include "dag_vecMath.h" +#include "dag_vecMath_pc_sse.h" +#include "dag_vecMath_trig_constexpr.h" +#include "dag_vecMath_trig.h" + +#pragma warning( push ) +#pragma warning( disable : 4995 ) // Allow deprecated functions, cos/sin/log etc... +#pragma warning( disable : 4220 ) // Disable warning treated as an error, for this test case only. + +#include // Using to check our compile-time math with std implementation. +void test_constexpr_trig() { +#define COMPILETIME_TEST_COMPARE_VALS(a, b, ...) \ + { \ + constexpr auto avalue = a(__VA_ARGS__); \ + const auto bvalue = b(__VA_ARGS__); \ + assert(v_abs_c(avalue - bvalue) < 0.0000001); \ + } + +#define COMPILETIME_TEST_COMPARE_MVALS(a, b, ...) \ + { \ + constexpr auto avalue = a(__VA_ARGS__); \ + const auto bvalue = b(__VA_ARGS__); \ + assert(v_abs_c(avalue.m128_f32[0] - bvalue.m128_f32[0]) < 0.00001); \ + assert(v_abs_c(avalue.m128_f32[1] - bvalue.m128_f32[1]) < 0.00001); \ + assert(v_abs_c(avalue.m128_f32[2] - bvalue.m128_f32[2]) < 0.00001); \ + assert(v_abs_c(avalue.m128_f32[3] - bvalue.m128_f32[3]) < 0.00001); \ + } + + // abs + COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, 0.0); + COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, -0.0); + COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, 1.0); + COMPILETIME_TEST_COMPARE_VALS(v_abs_c, std::fabs, -1.0); + + // cos + constexpr vec4f point0{ 0.f, 1.f, 2.f, 3.f }; + constexpr vec4f point1{ -1.5f, -1.0f, 1.0f, 1.5f }; + constexpr vec4f point2{ 0.0f, 0.0001f, 1.0001f, 1.5f }; + constexpr vec4f point3{ 1.5f, -1.5f, 0.0001f, 0.0f }; + + COMPILETIME_TEST_COMPARE_VALS(v_cos_c, std::cos, -1.5); + COMPILETIME_TEST_COMPARE_VALS(v_cos_c, std::cos, 0.0); + + // cos vec4f + auto cos_m = [](vec4f p) { return vec4f{ std::cos(p.m128_f32[0]), std::cos(p.m128_f32[1]), std::cos(p.m128_f32[2]), std::cos(p.m128_f32[3]) }; }; + COMPILETIME_TEST_COMPARE_MVALS(v_cos_m, cos_m, point1); + COMPILETIME_TEST_COMPARE_MVALS(v_cos_m, cos_m, point0); + + // tan + + COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 0.0); + COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 0.001); + COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 1.001); + COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, 1.5); + COMPILETIME_TEST_COMPARE_VALS(v_tan_c, std::tan, -1.5); + + // tan vec4f + auto tan_m = [](vec4f p) { return vec4f{ std::tan(p.m128_f32[0]), std::tan(p.m128_f32[1]), std::tan(p.m128_f32[2]), std::tan(p.m128_f32[3]) }; }; + COMPILETIME_TEST_COMPARE_MVALS(v_tan_m, tan_m, point2); + COMPILETIME_TEST_COMPARE_MVALS(v_tan_m, tan_m, point3); + + + // sin + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, -1.5f); + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 0.0f); + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 0.001f); + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 1.001f); + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 1.5f); + COMPILETIME_TEST_COMPARE_VALS(v_sin_c, std::sin, 11.1f); + + // sin vec4f + auto sin_m = [](vec4f p) { return vec4f{ std::sin(p.m128_f32[0]), std::sin(p.m128_f32[1]), std::sin(p.m128_f32[2]), std::sin(p.m128_f32[3]) }; }; + COMPILETIME_TEST_COMPARE_MVALS(v_sin_m, sin_m, point0); + COMPILETIME_TEST_COMPARE_MVALS(v_sin_m, sin_m, point1); + + // last check + // rad: 0.f 1.f 2.f 3.f + // val: 0.00000000 0.841471016 0.909297407 0.141120046} + constexpr vec4f gold_sin = v_sin_m(vec4f{0.f, 1.f, 2.f, 3.f}); + assert(gold_sin.m128_f32[0] < 0.000001 + && v_abs_c(gold_sin.m128_f32[1] - 0.841471016) < 0.000001 + && v_abs_c(gold_sin.m128_f32[2] - 0.909297407) < 0.000001 + && v_abs_c(gold_sin.m128_f32[3] - 0.141120046) < 0.000001); + + printf("All tests passed: OK"); +} +#pragma warning(pop)