From 781df861c9ccc2441c08568d79275dfed6de8c47 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 2 Dec 2025 17:13:14 +0700
Subject: [PATCH 01/12] split out new quaternion hlsl stuff over from hlsl path
 tracer example

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 305 ++++++++++++++++++
 src/nbl/builtin/CMakeLists.txt                |   1 +
 2 files changed, 306 insertions(+)
 create mode 100644 include/nbl/builtin/hlsl/math/quaternions.hlsl
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
new file mode 100644
index 0000000000..834d41cb54
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -0,0 +1,305 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_QUATERNIONS_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+
+template<typename T>
+struct truncated_quaternion
+{
+    using this_t = truncated_quaternion<T>;
+    using scalar_type = T;
+    using data_type = vector<T, 3>;
+
+    static this_t create()
+    {
+        this_t q;
+        q.data = data_type(0.0, 0.0, 0.0);
+        return q;
+    }
+
+    data_type data;
+};
+
+template <typename T>
+struct quaternion
+{
+    using this_t = quaternion<T>;
+    using scalar_type = T;
+    using data_type = vector<T, 4>;
+    using vector3_type = vector<T, 3>;
+    using matrix_type = matrix<T, 3, 3>;
+
+    using AsUint = typename unsigned_integer_of_size<sizeof(scalar_type)>::type;
+
+    static this_t create()
+    {
+        this_t q;
+        q.data = data_type(0.0, 0.0, 0.0, 1.0);
+        return q;
+    }
+    
+    static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w)
+    {
+        this_t q;
+        q.data = data_type(x, y, z, w);
+        return q;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(this_t) other)
+    {
+        return other;
+    }
+
+    // angle: Rotation angle expressed in radians.
+    // axis: Rotation axis, must be normalized.
+    static this_t create(scalar_type angle, const vector3_type axis)
+    {
+        this_t q;
+        const scalar_type sinTheta = hlsl::sin(angle * 0.5);
+        const scalar_type cosTheta = hlsl::cos(angle * 0.5);
+        q.data = data_type(axis * sinTheta, cosTheta);
+        return q;
+    }
+
+
+    static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll)
+    {
+        const scalar_type rollDiv2 = roll * scalar_type(0.5);
+        const scalar_type sr = hlsl::sin(rollDiv2);
+        const scalar_type cr = hlsl::cos(rollDiv2);
+
+        const scalar_type pitchDiv2 = pitch * scalar_type(0.5);
+        const scalar_type sp = hlsl::sin(pitchDiv2);
+        const scalar_type cp = hlsl::cos(pitchDiv2);
+
+        const scalar_type yawDiv2 = yaw * scalar_type(0.5);
+        const scalar_type sy = hlsl::sin(yawDiv2);
+        const scalar_type cy = hlsl::cos(yawDiv2);
+
+        this_t output;
+        output.data[0] = cr * sp * cy + sr * cp * sy; // x
+        output.data[1] = cr * cp * sy - sr * sp * cy; // y
+        output.data[2] = sr * cp * cy - cr * sp * sy; // z
+        output.data[3] = cr * cp * cy + sr * sp * sy; // w
+
+        return output;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
+    {
+        const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
+        const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
+        const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
+        const scalar_type neg_m22 = bit_cast<scalar_type>(bit_cast<AsUint>(m22)^0x80000000u);
+        const data_type Qx = data_type(m00, m00, neg_m00, neg_m00);
+        const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
+        const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
+
+        const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        const data_type invscales = hlsl::promote<data_type>(0.5) / hlsl::sqrt(tmp);
+        const data_type scales = tmp * invscales * hlsl::promote<data_type>(0.5);
+
+        // TODO: speed this up
+        this_t retval;
+        if (tmp.x > scalar_type(0.0))
+        {
+            retval.data.x = (m[2][1] - m[1][2]) * invscales.x;
+            retval.data.y = (m[0][2] - m[2][0]) * invscales.x;
+            retval.data.z = (m[1][0] - m[0][1]) * invscales.x;
+            retval.data.w = scales.x;
+        }
+        else
+        {
+            if (tmp.y > scalar_type(0.0))
+            {
+                retval.data.x = scales.y;
+                retval.data.y = (m[0][1] + m[1][0]) * invscales.y;
+                retval.data.z = (m[2][0] + m[0][2]) * invscales.y;
+                retval.data.w = (m[2][1] - m[1][2]) * invscales.y;
+            }
+            else if (tmp.z > scalar_type(0.0))
+            {
+                retval.data.x = (m[0][1] + m[1][0]) * invscales.z;
+                retval.data.y = scales.z;
+                retval.data.z = (m[0][2] - m[2][0]) * invscales.z;
+                retval.data.w = (m[1][2] + m[2][1]) * invscales.z;
+            }
+            else
+            {
+                retval.data.x = (m[0][2] + m[2][0]) * invscales.w;
+                retval.data.y = (m[1][2] + m[2][1]) * invscales.w;
+                retval.data.z = scales.w;
+                retval.data.w = (m[1][0] - m[0][1]) * invscales.w;
+            }
+        }
+
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(truncated_quaternion<T>) first3Components)
+    {
+        this_t retval;
+        retval.data.xyz = first3Components.data;
+        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data));
+        return retval;
+    }
+
+    this_t operator*(scalar_type scalar)
+    {
+        this_t output;
+        output.data = data * scalar;
+        return output;
+    }
+
+    this_t operator*(NBL_CONST_REF_ARG(this_t) other)
+    {
+        return this_t::create(
+            data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
+            data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
+            data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
+            data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
+        );
+    }
+
+    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
+    {
+        const AsUint negationMask = hlsl::bit_cast<AsUint>(totalPseudoAngle) & AsUint(0x80000000u);
+        const data_type adjEnd = hlsl::bit_cast<scalar_type>(hlsl::bit_cast<AsUint>(end.data) ^ negationMask);
+
+        this_t retval;
+        retval.data = hlsl::mix(start.data, adjEnd, fraction);
+        return retval;
+    }
+
+    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        return lerp(start, end, fraction, hlsl::dot(start.data, end.data));
+    }
+
+    static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3)
+    {
+        const scalar_type A = scalar_type(1.0904) + angle * (scalar_type(-3.2452) + angle * (scalar_type(3.55645) - angle * scalar_type(1.43519)));
+        const scalar_type B = scalar_type(0.848013) + angle * (scalar_type(-1.06021) + angle * scalar_type(0.215638));
+        const scalar_type k = A * interpolantPrecalcTerm2 + B;
+        return fraction + interpolantPrecalcTerm3 * k;
+    }
+
+    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
+        const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
+        const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
+        const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3);
+        
+        this_t retval = lerp(start,end,adjFrac,pseudoAngle);
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
+    }
+
+    vector3_type transformVector(const vector3_type v)
+    {
+        scalar_type scale = hlsl::length(data);
+        vector3_type direction = data.xyz;
+        return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
+    }
+
+    matrix_type constructMatrix()
+    {
+        matrix_type mat;
+        mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0);
+        mat[1] = data.yzx * data.xzw + data.zxy * data.wxz * vector3_type(-1.0, 1.0, 1.0);
+        mat[2] = data.yzx * data.wyx + data.zxy * data.xwy * vector3_type( 1.0,-1.0, 1.0);
+        mat[0][0] = scalar_type(0.5) - mat[0][0];
+        mat[1][1] = scalar_type(0.5) - mat[1][1];
+        mat[2][2] = scalar_type(0.5) - mat[2][2];
+        mat *= scalar_type(2.0);
+        return hlsl::transpose(mat);    // TODO: double check transpose?
+    }
+
+    static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
+    {
+        vector3_type planeNormal = hlsl::cross(start,preScaledWaypoint);
+    
+        cosAngleFromStart *= scalar_type(0.5);
+        const scalar_type sinAngle = hlsl::sqrt(scalar_type(0.5) - cosAngleFromStart);
+        const scalar_type cosAngle = hlsl::sqrt(scalar_type(0.5) + cosAngleFromStart);
+        
+        planeNormal *= sinAngle;
+        const vector3_type precompPart = hlsl::cross(planeNormal, start) * scalar_type(2.0);
+
+        return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
+    }
+
+    this_t inverse()
+    {
+        this_t retval;
+        retval.data.x = bit_cast<scalar_type>(bit_cast<AsUint>(data.x)^0x80000000u);
+        retval.data.y = bit_cast<scalar_type>(bit_cast<AsUint>(data.y)^0x80000000u);
+        retval.data.z = bit_cast<scalar_type>(bit_cast<AsUint>(data.z)^0x80000000u);
+        retval.data.w = data.w;
+        return retval;
+    }
+
+    static this_t normalize(NBL_CONST_REF_ARG(this_t) q)
+    {
+        this_t retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+
+    data_type data;
+};
+
+}
+
+namespace impl
+{
+
+template<typename T>
+struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
+{
+    static inline math::quaternion<T> cast(math::truncated_quaternion<T> q)
+    {
+        return math::quaternion<T>::create(q);
+    }
+};
+
+template<typename T>
+struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
+{
+    static inline math::truncated_quaternion<T> cast(math::quaternion<T> q)
+    {
+        math::truncated_quaternion<T> t;
+        t.data.x = t.data.x;
+        t.data.y = t.data.y;
+        t.data.z = t.data.z;
+        return t;
+    }
+};
+
+template<typename T>
+struct static_cast_helper<matrix<T,3,3>, math::quaternion<T> >
+{
+    static inline matrix<T,3,3> cast(math::quaternion<T> q)
+    {
+        return q.constructMatrix();
+    }
+};
+}
+
+}
+}
+
+#endif
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index e8798499f9..37c5d2e43e 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -225,6 +225,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/intutil.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/polar.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/angle_adding.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/quaternions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quadratic.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/cubic.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/equations/quartic.hlsl")

From 1bf0616246f047c9be399181b319e1c5611e4617 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 11 Dec 2025 15:52:35 +0700
Subject: [PATCH 02/12] removed redundant constructors, some reorganize
 quaternions

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 834d41cb54..6114949572 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -48,22 +48,10 @@ struct quaternion
         q.data = data_type(0.0, 0.0, 0.0, 1.0);
         return q;
     }
-    
-    static this_t create(scalar_type x, scalar_type y, scalar_type z, scalar_type w)
-    {
-        this_t q;
-        q.data = data_type(x, y, z, w);
-        return q;
-    }
-
-    static this_t create(NBL_CONST_REF_ARG(this_t) other)
-    {
-        return other;
-    }
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    static this_t create(scalar_type angle, const vector3_type axis)
+    static this_t create(const vector3_type axis, scalar_type angle)
     {
         this_t q;
         const scalar_type sinTheta = hlsl::sin(angle * 0.5);
@@ -72,28 +60,39 @@ struct quaternion
         return q;
     }
 
-
-    static this_t create(scalar_type pitch, scalar_type yaw, scalar_type roll)
+    template<typename U=vector<scalar_type,2> NBL_FUNC_REQUIRES(is_same_v<vector<scalar_type,2>,U>)
+    static this_t create(const U halfPitchCosSin, const U halfYawCosSin, const U halfRollCosSin)
     {
-        const scalar_type rollDiv2 = roll * scalar_type(0.5);
-        const scalar_type sr = hlsl::sin(rollDiv2);
-        const scalar_type cr = hlsl::cos(rollDiv2);
+        const scalar_type cp = halfPitchCosSin.x;
+        const scalar_type sp = halfPitchCosSin.y;
 
-        const scalar_type pitchDiv2 = pitch * scalar_type(0.5);
-        const scalar_type sp = hlsl::sin(pitchDiv2);
-        const scalar_type cp = hlsl::cos(pitchDiv2);
+        const scalar_type cy = halfYawCosSin.x;
+        const scalar_type sy = halfYawCosSin.y;
 
-        const scalar_type yawDiv2 = yaw * scalar_type(0.5);
-        const scalar_type sy = hlsl::sin(yawDiv2);
-        const scalar_type cy = hlsl::cos(yawDiv2);
+        const scalar_type cr = halfRollCosSin.x;
+        const scalar_type sr = halfRollCosSin.y;
 
-        this_t output;
-        output.data[0] = cr * sp * cy + sr * cp * sy; // x
-        output.data[1] = cr * cp * sy - sr * sp * cy; // y
-        output.data[2] = sr * cp * cy - cr * sp * sy; // z
-        output.data[3] = cr * cp * cy + sr * sp * sy; // w
+        this_t q;
+        q.data[0] = cr * sp * cy + sr * cp * sy; // x
+        q.data[1] = cr * cp * sy - sr * sp * cy; // y
+        q.data[2] = sr * cp * cy - cr * sp * sy; // z
+        q.data[3] = cr * cp * cy + sr * sp * sy; // w
 
-        return output;
+        return q;
+    }
+
+    template<typename U=scalar_type NBL_FUNC_REQUIRES(is_same_v<scalar_type,U>)
+    static this_t create(const U pitch, const U yaw, const U roll)
+    {
+        const scalar_type halfPitch = pitch * scalar_type(0.5);
+        const scalar_type halfYaw = yaw * scalar_type(0.5);
+        const scalar_type halfRoll = roll * scalar_type(0.5);
+
+        return create(
+            vector<scalar_type,2>(hlsl::cos(halfPitch), hlsl::sin(halfPitch)),
+            vector<scalar_type,2>(hlsl::cos(halfYaw), hlsl::sin(halfYaw)),
+            vector<scalar_type,2>(hlsl::cos(halfRoll), hlsl::sin(halfRoll))
+        );
     }
 
     static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
@@ -165,12 +164,14 @@ struct quaternion
 
     this_t operator*(NBL_CONST_REF_ARG(this_t) other)
     {
-        return this_t::create(
+        this_t retval;
+        retval.data = data_type(
             data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
             data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
             data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
             data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
         );
+        return retval;
     }
 
     static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)

From 8745a33514602e3a3089f588d2988dcb027fe733 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 11 Dec 2025 16:46:12 +0700
Subject: [PATCH 03/12] added some checks to create from rot mat

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 75 ++++++++++++++-----
 1 file changed, 56 insertions(+), 19 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 6114949572..49ad0dde96 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -95,8 +95,43 @@ struct quaternion
         );
     }
 
-    static this_t create(NBL_CONST_REF_ARG(matrix_type) m)
+    static bool __isEqual(const scalar_type a, const scalar_type b)
     {
+        return hlsl::max(a/b, b/a) <= scalar_type(1e-4);
+    }
+    static bool __dotIsZero(const vector3_type a, const vector3_type b)
+    {
+        const scalar_type ab = hlsl::dot(a, b);
+        return hlsl::abs(ab) <= scalar_type(1e-4);
+    }
+
+    static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
+    {
+        {
+            // only orthogonal and uniform scale mats can be converted
+            bool valid = __dotIsZero(m[0], m[1]);
+            valid = __dotIsZero(m[1], m[2]) && valid;
+            valid = __dotIsZero(m[0], m[2]) && valid;
+
+            const matrix_type m_T = hlsl::transpose(m);
+            const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]);
+            const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]);
+            const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]);
+            valid = __isEqual(dotCol0, dotCol1) && valid;
+            valid = __isEqual(dotCol1, dotCol2) && valid;
+            valid = __isEqual(dotCol0, dotCol2) && valid;
+
+            if (dontAssertValidMatrix)
+                if (!valid)
+                {
+                    this_t retval;
+                    retval.data = hlsl::promote<data_type>(bit_cast<scalar_type>(numeric_limits<scalar_type>::quiet_NaN));
+                    return retval;
+                }
+            else
+                assert(valid);
+        }
+
         const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
         const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
         const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
@@ -106,40 +141,42 @@ struct quaternion
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
 
         const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
-        const data_type invscales = hlsl::promote<data_type>(0.5) / hlsl::sqrt(tmp);
-        const data_type scales = tmp * invscales * hlsl::promote<data_type>(0.5);
 
         // TODO: speed this up
         this_t retval;
         if (tmp.x > scalar_type(0.0))
         {
-            retval.data.x = (m[2][1] - m[1][2]) * invscales.x;
-            retval.data.y = (m[0][2] - m[2][0]) * invscales.x;
-            retval.data.z = (m[1][0] - m[0][1]) * invscales.x;
-            retval.data.w = scales.x;
+            const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x);
+            retval.data.x = (m[2][1] - m[1][2]) * invscales;
+            retval.data.y = (m[0][2] - m[2][0]) * invscales;
+            retval.data.z = (m[1][0] - m[0][1]) * invscales;
+            retval.data.w = tmp.x * invscales * scalar_type(0.5);
         }
         else
         {
             if (tmp.y > scalar_type(0.0))
             {
-                retval.data.x = scales.y;
-                retval.data.y = (m[0][1] + m[1][0]) * invscales.y;
-                retval.data.z = (m[2][0] + m[0][2]) * invscales.y;
-                retval.data.w = (m[2][1] - m[1][2]) * invscales.y;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y);
+                retval.data.x = tmp.y * invscales * scalar_type(0.5);
+                retval.data.y = (m[0][1] + m[1][0]) * invscales;
+                retval.data.z = (m[2][0] + m[0][2]) * invscales;
+                retval.data.w = (m[2][1] - m[1][2]) * invscales;
             }
             else if (tmp.z > scalar_type(0.0))
             {
-                retval.data.x = (m[0][1] + m[1][0]) * invscales.z;
-                retval.data.y = scales.z;
-                retval.data.z = (m[0][2] - m[2][0]) * invscales.z;
-                retval.data.w = (m[1][2] + m[2][1]) * invscales.z;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z);
+                retval.data.x = (m[0][1] + m[1][0]) * invscales;
+                retval.data.y = tmp.z * invscales * scalar_type(0.5);
+                retval.data.z = (m[0][2] - m[2][0]) * invscales;
+                retval.data.w = (m[1][2] + m[2][1]) * invscales;
             }
             else
             {
-                retval.data.x = (m[0][2] + m[2][0]) * invscales.w;
-                retval.data.y = (m[1][2] + m[2][1]) * invscales.w;
-                retval.data.z = scales.w;
-                retval.data.w = (m[1][0] - m[0][1]) * invscales.w;
+                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w);
+                retval.data.x = (m[0][2] + m[2][0]) * invscales;
+                retval.data.y = (m[1][2] + m[2][1]) * invscales;
+                retval.data.z = tmp.w * invscales * scalar_type(0.5);
+                retval.data.w = (m[1][0] - m[0][1]) * invscales;
             }
         }
 

From 2a8451d73fab71fe283563cbcaff631c07f181e5 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 12 Dec 2025 11:15:26 +0700
Subject: [PATCH 04/12] moved normalize, static_cast to helper specializations,
 norm and unnorm variants for lerp/flerp

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 85 ++++++++++++-------
 1 file changed, 53 insertions(+), 32 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 49ad0dde96..73dc977d62 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -184,14 +184,6 @@ struct quaternion
         return retval;
     }
 
-    static this_t create(NBL_CONST_REF_ARG(truncated_quaternion<T>) first3Components)
-    {
-        this_t retval;
-        retval.data.xyz = first3Components.data;
-        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(first3Components.data, first3Components.data));
-        return retval;
-    }
-
     this_t operator*(scalar_type scalar)
     {
         this_t output;
@@ -211,19 +203,26 @@ struct quaternion
         return retval;
     }
 
-    static this_t lerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
+    static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
-        const AsUint negationMask = hlsl::bit_cast<AsUint>(totalPseudoAngle) & AsUint(0x80000000u);
-        const data_type adjEnd = hlsl::bit_cast<scalar_type>(hlsl::bit_cast<AsUint>(end.data) ^ negationMask);
+        // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
+        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, totalPseudoAngle);
 
         this_t retval;
         retval.data = hlsl::mix(start.data, adjEnd, fraction);
         return retval;
     }
 
+    static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction)
+    {
+        return unnormLerp(start, end, fraction, hlsl::dot(start.data, end.data));
+    }
+
     static this_t lerp(const this_t start, const this_t end, const scalar_type fraction)
     {
-        return lerp(start, end, fraction, hlsl::dot(start.data, end.data));
+        this_t retval = unnormLerp(start, end, fraction);
+        retval.data = hlsl::normalize(retval.data);
+        return retval;
     }
 
     static scalar_type __adj_interpolant(const scalar_type angle, const scalar_type fraction, const scalar_type interpolantPrecalcTerm2, const scalar_type interpolantPrecalcTerm3)
@@ -234,26 +233,32 @@ struct quaternion
         return fraction + interpolantPrecalcTerm3 * k;
     }
 
-    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction)
     {
         const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
         const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
         const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
         const scalar_type adjFrac = __adj_interpolant(hlsl::abs(pseudoAngle),fraction,interpolantPrecalcTerm*interpolantPrecalcTerm,interpolantPrecalcTerm3);
         
-        this_t retval = lerp(start,end,adjFrac,pseudoAngle);
+        this_t retval = unnormLerp(start,end,adjFrac,pseudoAngle);
+        return retval;
+    }
+
+    static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
+    {       
+        this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle);
         retval.data = hlsl::normalize(retval.data);
         return retval;
     }
 
-    vector3_type transformVector(const vector3_type v)
+    vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC
     {
-        scalar_type scale = hlsl::length(data);
+        scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale);
         vector3_type direction = data.xyz;
         return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
     }
 
-    matrix_type constructMatrix()
+    matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC
     {
         matrix_type mat;
         mat[0] = data.yzx * data.ywz + data.zxy * data.zyw * vector3_type( 1.0, 1.0,-1.0);
@@ -280,23 +285,14 @@ struct quaternion
         return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
     }
 
-    this_t inverse()
+    this_t inverse() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
-        retval.data.x = bit_cast<scalar_type>(bit_cast<AsUint>(data.x)^0x80000000u);
-        retval.data.y = bit_cast<scalar_type>(bit_cast<AsUint>(data.y)^0x80000000u);
-        retval.data.z = bit_cast<scalar_type>(bit_cast<AsUint>(data.z)^0x80000000u);
+        retval.data.xyz = -retval.data.xyz;
         retval.data.w = data.w;
         return retval;
     }
 
-    static this_t normalize(NBL_CONST_REF_ARG(this_t) q)
-    {
-        this_t retval;
-        retval.data = hlsl::normalize(q.data);
-        return retval;
-    }
-
     data_type data;
 };
 
@@ -305,19 +301,44 @@ struct quaternion
 namespace impl
 {
 
+template<typename T>
+struct normalize_helper<math::truncated_quaternion<T> >
+{
+    static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
+    {
+        math::truncated_quaternion<T> retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+}
+
+template<typename T>
+struct normalize_helper<math::quaternion<T> >
+{
+    static inline math::quaternion<T> __call(const math::quaternion<T> q)
+    {
+        math::quaternion<T> retval;
+        retval.data = hlsl::normalize(q.data);
+        return retval;
+    }
+}
+
 template<typename T>
 struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
 {
-    static inline math::quaternion<T> cast(math::truncated_quaternion<T> q)
+    static inline math::quaternion<T> cast(const math::truncated_quaternion<T> q)
     {
-        return math::quaternion<T>::create(q);
+        math::quaternion<T> retval;
+        retval.data.xyz = q.data;
+        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data));
+        return retval;
     }
 };
 
 template<typename T>
 struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 {
-    static inline math::truncated_quaternion<T> cast(math::quaternion<T> q)
+    static inline math::truncated_quaternion<T> cast(const math::quaternion<T> q)
     {
         math::truncated_quaternion<T> t;
         t.data.x = t.data.x;
@@ -330,7 +351,7 @@ struct static_cast_helper<math::truncated_quaternion<T>, math::quaternion<T> >
 template<typename T>
 struct static_cast_helper<matrix<T,3,3>, math::quaternion<T> >
 {
-    static inline matrix<T,3,3> cast(math::quaternion<T> q)
+    static inline matrix<T,3,3> cast(const math::quaternion<T> q)
     {
         return q.constructMatrix();
     }

From a93fa2608f608574e17937bf42bdcdc75e17e291 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Fri, 12 Dec 2025 15:39:32 +0700
Subject: [PATCH 05/12] fix some quaternion bugs

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 73dc977d62..91ee4975e3 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -246,7 +246,7 @@ struct quaternion
 
     static this_t flerp(const this_t start, const this_t end, const scalar_type fraction)
     {       
-        this_t retval = unnormFlerp(start,end,adjFrac,pseudoAngle);
+        this_t retval = unnormFlerp(start,end,fraction);
         retval.data = hlsl::normalize(retval.data);
         return retval;
     }
@@ -267,8 +267,10 @@ struct quaternion
         mat[0][0] = scalar_type(0.5) - mat[0][0];
         mat[1][1] = scalar_type(0.5) - mat[1][1];
         mat[2][2] = scalar_type(0.5) - mat[2][2];
-        mat *= scalar_type(2.0);
-        return hlsl::transpose(mat);    // TODO: double check transpose?
+        mat[0] = mat[0] * scalar_type(2.0);
+        mat[1] = mat[1] * scalar_type(2.0);
+        mat[2] = mat[2] * scalar_type(2.0);
+        return mat;// hlsl::transpose(mat);    // TODO: double check transpose?
     }
 
     static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
@@ -298,9 +300,9 @@ struct quaternion
 
 }
 
-namespace impl
-{
 
+namespace cpp_compat_intrinsics_impl
+{
 template<typename T>
 struct normalize_helper<math::truncated_quaternion<T> >
 {
@@ -310,7 +312,7 @@ struct normalize_helper<math::truncated_quaternion<T> >
         retval.data = hlsl::normalize(q.data);
         return retval;
     }
-}
+};
 
 template<typename T>
 struct normalize_helper<math::quaternion<T> >
@@ -321,8 +323,11 @@ struct normalize_helper<math::quaternion<T> >
         retval.data = hlsl::normalize(q.data);
         return retval;
     }
+};
 }
 
+namespace impl
+{
 template<typename T>
 struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
 {

From 2f33aa03cbcdfaf20df6f26c6b6ffac39fb20dfd Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Tue, 6 Jan 2026 16:26:43 +0700
Subject: [PATCH 06/12] some fixes to quaternions

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 91ee4975e3..be3b7b8ede 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -184,21 +184,21 @@ struct quaternion
         return retval;
     }
 
-    this_t operator*(scalar_type scalar)
+    this_t operator*(scalar_type scalar) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
         output.data = data * scalar;
         return output;
     }
 
-    this_t operator*(NBL_CONST_REF_ARG(this_t) other)
+    this_t operator*(NBL_CONST_REF_ARG(this_t) other) NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
         retval.data = data_type(
-            data.w * other.data.w - data.x * other.x - data.y * other.data.y - data.z * other.data.z,
-            data.w * other.data.x + data.x * other.w + data.y * other.data.z - data.z * other.data.y,
-            data.w * other.data.y - data.x * other.z + data.y * other.data.w + data.z * other.data.x,
-            data.w * other.data.z + data.x * other.y - data.y * other.data.x + data.z * other.data.w
+            data.w * other.data.x + data.x * other.data.w + data.y * other.data.z - data.z * other.data.y,
+            data.w * other.data.y - data.x * other.data.z + data.y * other.data.w + data.z * other.data.x,
+            data.w * other.data.z + data.x * other.data.y - data.y * other.data.x + data.z * other.data.w,
+            data.w * other.data.w - data.x * other.data.x - data.y * other.data.y - data.z * other.data.z
         );
         return retval;
     }
@@ -270,7 +270,7 @@ struct quaternion
         mat[0] = mat[0] * scalar_type(2.0);
         mat[1] = mat[1] * scalar_type(2.0);
         mat[2] = mat[2] * scalar_type(2.0);
-        return mat;// hlsl::transpose(mat);    // TODO: double check transpose?
+        return mat;
     }
 
     static vector3_type slerp_delta(const vector3_type start, const vector3_type preScaledWaypoint, scalar_type cosAngleFromStart)
@@ -335,7 +335,7 @@ struct static_cast_helper<math::quaternion<T>, math::truncated_quaternion<T> >
     {
         math::quaternion<T> retval;
         retval.data.xyz = q.data;
-        retval.data.w = hlsl::sqrt(scalar_type(1.0) - hlsl::dot(q.data, q.data));
+        retval.data.w = hlsl::sqrt(T(1.0) - hlsl::dot(q.data, q.data));
         return retval;
     }
 };

From a22d46ae9506112f5ba5830cb81a9b7dbe9b3f81 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 12:08:16 +0700
Subject: [PATCH 07/12] implement quaternion slerp (might need optimizing?)

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 29 +++++++++++++++++--
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index be3b7b8ede..b54e1ad619 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -206,10 +206,10 @@ struct quaternion
     static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
         // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
-        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, totalPseudoAngle);
+        const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
 
         this_t retval;
-        retval.data = hlsl::mix(start.data, adjEnd, fraction);
+        retval.data = hlsl::mix(start.data, adjEnd, hlsl::promote<data_type>(fraction));
         return retval;
     }
 
@@ -287,10 +287,33 @@ struct quaternion
         return precompPart * cosAngle + hlsl::cross(planeNormal, precompPart);
     }
 
+    static this_t slerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type threshold = numeric_limits<scalar_type>::epsilon)
+    {
+        const scalar_type totalPseudoAngle = hlsl::dot(start.data, end.data);
+
+        // make sure we use the short rotation
+        const scalar_type cosA = ieee754::flipSignIfRHSNegative<scalar_type>(totalPseudoAngle, totalPseudoAngle);
+        if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
+        {
+            this_t retval;
+
+            const scalar_type A = hlsl::acos(cosA);
+            const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
+            const scalar_type sinAt = hlsl::sin(fraction * A);
+            const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
+            retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp;
+
+            return retval;
+        }
+        else
+            return unnormLerp(start, end, fraction, totalPseudoAngle);
+            // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle));
+    }
+
     this_t inverse() NBL_CONST_MEMBER_FUNC
     {
         this_t retval;
-        retval.data.xyz = -retval.data.xyz;
+        retval.data.xyz = -data.xyz;
         retval.data.w = data.w;
         return retval;
     }

From f71cca19d9d18f99cb4265c6d183999cd618c568 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 13:56:40 +0700
Subject: [PATCH 08/12] minor optimization to slerp

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b54e1ad619..b7f39f19fe 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -296,18 +296,17 @@ struct quaternion
         if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
         {
             this_t retval;
-
-            const scalar_type A = hlsl::acos(cosA);
             const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
-            const scalar_type sinAt = hlsl::sin(fraction * A);
+            const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));
+            const scalar_type sinAt_over_sinA = sinAt*sinARcp;
+            const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A)
             const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
-            retval.data = (hlsl::sin((scalar_type(1.0) - fraction) * A) * start.data + sinAt * adjEnd) * sinARcp;
+            retval.data = scale * start.data + sinAt_over_sinA * adjEnd;
 
             return retval;
         }
         else
             return unnormLerp(start, end, fraction, totalPseudoAngle);
-            // return hlsl::normalize(unnormLerp(start, end, fraction, totalPseudoAngle));
     }
 
     this_t inverse() NBL_CONST_MEMBER_FUNC

From c39c78a8e7b8d9709ddbc9de602d6dff5573d0da Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 15:26:01 +0700
Subject: [PATCH 09/12] fix create from rotation matrix

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index b7f39f19fe..1f720b0247 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -140,42 +140,47 @@ struct quaternion
         const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
 
-        const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        // const data_type tmp = hlsl::promote<data_type>(1.0) + Qx + Qy + Qz;
+        const data_type tmp = Qx + Qy + Qz;
 
         // TODO: speed this up
         this_t retval;
         if (tmp.x > scalar_type(0.0))
         {
-            const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.x);
+            const scalar_type scales = hlsl::sqrt(tmp.x + scalar_type(1.0));
+            const scalar_type invscales = scalar_type(0.5) / scales;
             retval.data.x = (m[2][1] - m[1][2]) * invscales;
             retval.data.y = (m[0][2] - m[2][0]) * invscales;
             retval.data.z = (m[1][0] - m[0][1]) * invscales;
-            retval.data.w = tmp.x * invscales * scalar_type(0.5);
+            retval.data.w = scales * scalar_type(0.5);
         }
         else
         {
             if (tmp.y > scalar_type(0.0))
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.y);
-                retval.data.x = tmp.y * invscales * scalar_type(0.5);
+                const scalar_type scales = hlsl::sqrt(tmp.y + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
+                retval.data.x = scales * scalar_type(0.5);
                 retval.data.y = (m[0][1] + m[1][0]) * invscales;
                 retval.data.z = (m[2][0] + m[0][2]) * invscales;
                 retval.data.w = (m[2][1] - m[1][2]) * invscales;
             }
             else if (tmp.z > scalar_type(0.0))
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.z);
+                const scalar_type scales = hlsl::sqrt(tmp.z + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
                 retval.data.x = (m[0][1] + m[1][0]) * invscales;
-                retval.data.y = tmp.z * invscales * scalar_type(0.5);
-                retval.data.z = (m[0][2] - m[2][0]) * invscales;
-                retval.data.w = (m[1][2] + m[2][1]) * invscales;
+                retval.data.y = scales * scalar_type(0.5);
+                retval.data.z = (m[1][2] + m[2][1]) * invscales;
+                retval.data.w = (m[0][2] - m[2][0]) * invscales;
             }
             else
             {
-                const scalar_type invscales = scalar_type(0.5) / hlsl::sqrt(tmp.w);
+                const scalar_type scales = hlsl::sqrt(tmp.w + scalar_type(1.0));
+                const scalar_type invscales = scalar_type(0.5) / scales;
                 retval.data.x = (m[0][2] + m[2][0]) * invscales;
                 retval.data.y = (m[1][2] + m[2][1]) * invscales;
-                retval.data.z = tmp.w * invscales * scalar_type(0.5);
+                retval.data.z = scales * scalar_type(0.5);
                 retval.data.w = (m[1][0] - m[0][1]) * invscales;
             }
         }

From 0b180c88b65d37acf0ccc0817d5d5d97cd6fdf74 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 16:20:40 +0700
Subject: [PATCH 10/12] force constructor type with requires to avoid dxc
 implicit conversions

---
 include/nbl/builtin/hlsl/math/quaternions.hlsl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 1f720b0247..4be73482bb 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -51,12 +51,13 @@ struct quaternion
 
     // angle: Rotation angle expressed in radians.
     // axis: Rotation axis, must be normalized.
-    static this_t create(const vector3_type axis, scalar_type angle)
+    template<typename U=vector3_type, typename F=scalar_type NBL_FUNC_REQUIRES(is_same_v<vector3_type,U> && is_same_v<scalar_type,F>)
+    static this_t create(const U axis, const F angle, const F uniformScale = scalar_type(1.0))
     {
         this_t q;
         const scalar_type sinTheta = hlsl::sin(angle * 0.5);
         const scalar_type cosTheta = hlsl::cos(angle * 0.5);
-        q.data = data_type(axis * sinTheta, cosTheta);
+        q.data = data_type(axis * sinTheta, cosTheta) * uniformScale;
         return q;
     }
 
@@ -301,7 +302,7 @@ struct quaternion
         if (cosA <= (scalar_type(1.0) - threshold)) // spherical interpolation
         {
             this_t retval;
-            const scalar_type sinARcp  = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
+            const scalar_type sinARcp = scalar_type(1.0) / hlsl::sqrt(scalar_type(1.0) - cosA * cosA);
             const scalar_type sinAt = hlsl::sin(fraction * hlsl::acos(cosA));
             const scalar_type sinAt_over_sinA = sinAt*sinARcp;
             const scalar_type scale = hlsl::sqrt(scalar_type(1.0)-sinAt*sinAt) - sinAt_over_sinA*cosA; //cosAt-cos(A)sin(tA)/sin(A) = (sin(A)cos(tA)-cos(A)sin(tA))/sin(A)

From de1b0d1f5aa4cb4e1b3bd73330315f776de1c108 Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Wed, 7 Jan 2026 17:13:56 +0700
Subject: [PATCH 11/12] fixes to transformVector and other minor fixes

---
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 4be73482bb..34abb042fb 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -134,9 +134,9 @@ struct quaternion
         }
 
         const scalar_type m00 = m[0][0], m11 = m[1][1], m22 = m[2][2];
-        const scalar_type neg_m00 = bit_cast<scalar_type>(bit_cast<AsUint>(m00)^0x80000000u);
-        const scalar_type neg_m11 = bit_cast<scalar_type>(bit_cast<AsUint>(m11)^0x80000000u);
-        const scalar_type neg_m22 = bit_cast<scalar_type>(bit_cast<AsUint>(m22)^0x80000000u);
+        const scalar_type neg_m00 = -m00;
+        const scalar_type neg_m11 = -m11;
+        const scalar_type neg_m22 = -m22;
         const data_type Qx = data_type(m00, m00, neg_m00, neg_m00);
         const data_type Qy = data_type(m11, neg_m11, m11, neg_m11);
         const data_type Qz = data_type(m22, neg_m22, neg_m22, m22);
@@ -186,7 +186,7 @@ struct quaternion
             }
         }
 
-        retval.data = hlsl::normalize(retval.data);
+        retval.data = hlsl::normalize(retval.data) / hlsl::sqrt(hlsl::dot(m[0], m[0])); // restore uniform scale
         return retval;
     }
 
@@ -211,6 +211,8 @@ struct quaternion
 
     static this_t unnormLerp(const this_t start, const this_t end, const scalar_type fraction, const scalar_type totalPseudoAngle)
     {
+        assert(hlsl::length(start.data) == scalar_type(1.0));
+        assert(hlsl::length(end.data) == scalar_type(1.0));
         // TODO: benchmark uint sign flip vs just *sign(totalPseudoAngle)
         const data_type adjEnd = ieee754::flipSignIfRHSNegative<data_type>(end.data, hlsl::promote<data_type>(totalPseudoAngle));
 
@@ -241,6 +243,9 @@ struct quaternion
 
     static this_t unnormFlerp(const this_t start, const this_t end, const scalar_type fraction)
     {
+        assert(hlsl::length(start.data) == scalar_type(1.0));
+        assert(hlsl::length(end.data) == scalar_type(1.0));
+
         const scalar_type pseudoAngle = hlsl::dot(start.data,end.data);
         const scalar_type interpolantPrecalcTerm = fraction - scalar_type(0.5);
         const scalar_type interpolantPrecalcTerm3 = fraction * interpolantPrecalcTerm * (fraction - scalar_type(1.0));
@@ -259,9 +264,10 @@ struct quaternion
 
     vector3_type transformVector(const vector3_type v, const bool assumeNoScale=false) NBL_CONST_MEMBER_FUNC
     {
-        scalar_type scale = hlsl::mix(hlsl::length(data), scalar_type(1.0), assumeNoScale);
-        vector3_type direction = data.xyz;
-        return v * scale + hlsl::cross(direction, v * data.w + hlsl::cross(direction, v)) * scalar_type(2.0);
+        const scalar_type scaleRcp = scalar_type(1.0) / hlsl::sqrt(hlsl::dot(data, data));
+        const vector3_type modV = v * scalar_type(2.0) * scaleRcp;
+        const vector3_type direction = data.xyz;
+        return v / scaleRcp + hlsl::cross(direction, modV * data.w + hlsl::cross(direction, modV));
     }
 
     matrix_type constructMatrix() NBL_CONST_MEMBER_FUNC
@@ -336,8 +342,10 @@ struct normalize_helper<math::truncated_quaternion<T> >
 {
     static inline math::truncated_quaternion<T> __call(const math::truncated_quaternion<T> q)
     {
+        assert(hlsl::length(q.data) == scalar_type(1.0));
+
         math::truncated_quaternion<T> retval;
-        retval.data = hlsl::normalize(q.data);
+        retval.data = q.data;   // should be normalized by definition (dropped component should be 1.0)
         return retval;
     }
 };

From 96ef95d82251abfcf85f194afa43b2e46982b87a Mon Sep 17 00:00:00 2001
From: keptsecret <sorchon@gmail.com>
Date: Thu, 8 Jan 2026 14:09:22 +0700
Subject: [PATCH 12/12] added matrix runtime traits for checking orthogonality,
 uniform scale

---
 .../math/linalg/matrix_runtime_traits.hlsl    | 66 +++++++++++++++++++
 .../nbl/builtin/hlsl/math/quaternions.hlsl    | 24 +------
 src/nbl/builtin/CMakeLists.txt                |  1 +
 3 files changed, 70 insertions(+), 21 deletions(-)
 create mode 100644 include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl

diff --git a/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
new file mode 100644
index 0000000000..fc19b2cb3e
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl
@@ -0,0 +1,66 @@
+// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_LINALG_MATRIX_RUNTIME_TRAITS_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/testing/relative_approx_compare.hlsl"
+#include "nbl/builtin/hlsl/concepts/matrix.hlsl"
+#include "nbl/builtin/hlsl/matrix_utils/matrix_traits.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+namespace linalg
+{
+
+template<typename T NBL_PRIMARY_REQUIRES(concepts::Matricial<T> && matrix_traits<T>::Square)
+struct RuntimeTraits
+{
+    using matrix_t = T;
+    using scalar_t = typename matrix_traits<T>::scalar_type;
+    NBL_CONSTEXPR_STATIC_INLINE uint16_t N = matrix_traits<T>::RowCount;
+
+    static RuntimeTraits<matrix_t> create(const matrix_t m)
+    {
+        RuntimeTraits<matrix_t> retval;
+        retval.invertible = !testing::relativeApproxCompare(hlsl::determinant(m), scalar_t(0.0), 1e-5);
+        {
+            bool orthogonal = true;
+            NBL_UNROLL for (uint16_t i = 0; i < N; i++)
+                orthogonal = testing::relativeApproxCompare(hlsl::dot(m[i], m[(i+1)%N]), scalar_t(0.0), 1e-4) && orthogonal;
+            retval.orthogonal = orthogonal;
+        }
+        {
+            const matrix_t m_T = hlsl::transpose(m);
+            scalar_t dots[N];
+            NBL_UNROLL for (uint16_t i = 0; i < N; i++)
+                dots[i] = hlsl::dot(m[i], m[i]);
+
+            bool uniformScale = true;
+            NBL_UNROLL for (uint16_t i = 0; i < N-1; i++)
+                uniformScale = testing::relativeApproxCompare(dots[i], dots[i+1], 1e-4) && uniformScale;
+
+            retval.uniformScale = uniformScale;
+            retval.orthonormal = uniformScale && retval.orthogonal && testing::relativeApproxCompare(dots[0], scalar_t(1.0), 1e-5);
+        }
+        return retval;
+    }
+    
+    bool invertible;
+    bool orthogonal;
+    bool uniformScale;
+    bool orthonormal;
+};
+
+}
+}
+}
+}
+
+#endif
diff --git a/include/nbl/builtin/hlsl/math/quaternions.hlsl b/include/nbl/builtin/hlsl/math/quaternions.hlsl
index 34abb042fb..59f2eea243 100644
--- a/include/nbl/builtin/hlsl/math/quaternions.hlsl
+++ b/include/nbl/builtin/hlsl/math/quaternions.hlsl
@@ -6,6 +6,7 @@
 
 #include "nbl/builtin/hlsl/cpp_compat.hlsl"
 #include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/math/linalg/matrix_runtime_traits.hlsl"
 
 namespace nbl
 {
@@ -96,31 +97,12 @@ struct quaternion
         );
     }
 
-    static bool __isEqual(const scalar_type a, const scalar_type b)
-    {
-        return hlsl::max(a/b, b/a) <= scalar_type(1e-4);
-    }
-    static bool __dotIsZero(const vector3_type a, const vector3_type b)
-    {
-        const scalar_type ab = hlsl::dot(a, b);
-        return hlsl::abs(ab) <= scalar_type(1e-4);
-    }
-
     static this_t create(NBL_CONST_REF_ARG(matrix_type) m, const bool dontAssertValidMatrix=false)
     {
         {
             // only orthogonal and uniform scale mats can be converted
-            bool valid = __dotIsZero(m[0], m[1]);
-            valid = __dotIsZero(m[1], m[2]) && valid;
-            valid = __dotIsZero(m[0], m[2]) && valid;
-
-            const matrix_type m_T = hlsl::transpose(m);
-            const scalar_type dotCol0 = hlsl::dot(m_T[0],m_T[0]);
-            const scalar_type dotCol1 = hlsl::dot(m_T[1],m_T[1]);
-            const scalar_type dotCol2 = hlsl::dot(m_T[2],m_T[2]);
-            valid = __isEqual(dotCol0, dotCol1) && valid;
-            valid = __isEqual(dotCol1, dotCol2) && valid;
-            valid = __isEqual(dotCol0, dotCol2) && valid;
+            linalg::RuntimeTraits<matrix_type> traits = linalg::RuntimeTraits<matrix_type>::create(m);
+            bool valid = traits.orthogonal && traits.uniformScale;
 
             if (dontAssertValidMatrix)
                 if (!valid)
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 085ed3c923..038ac2573d 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -223,6 +223,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl")
 #linear algebra
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/matrix_runtime_traits.hlsl")
 # TODO: rename `equations` to `polynomials` probably
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")