Initial commit

author: daoge_cmd <3523206925@qq.com> 2026-03-01 12:16:08 +0800
committer: daoge_cmd <3523206925@qq.com> 2026-03-01 12:16:08 +0800
commit: b691c43c44ff180d10e7d4a9afc83b98551ff586 (patch)
tree: 3e9849222cbc6ba49f2f1fc6e5fe7179632c7390 /Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
parent: def8cb415354ac390b7e89052a50605285f1aca9 (diff)
1 files changed, 2501 insertions, 0 deletions
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
new file mode 100644
index 00000000..f3461e6c
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
@@ -0,0 +1,2501 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathMisc.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//  
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+/****************************************************************************
+ *
+ * Quaternion
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionEqual
+(
+    FXMVECTOR Q1,
+    FXMVECTOR Q2
+)
+{
+    return XMVector4Equal(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionNotEqual
+(
+    FXMVECTOR Q1,
+    FXMVECTOR Q2
+)
+{
+    return XMVector4NotEqual(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsNaN
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4IsNaN(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsInfinite
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4IsInfinite(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMQuaternionIsIdentity
+(
+    FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+    return XMVector4Equal(Q, g_XMIdentityR3.v);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionDot
+(
+    FXMVECTOR Q1,
+    FXMVECTOR Q2
+)
+{
+    return XMVector4Dot(Q1, Q2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionMultiply
+(
+    FXMVECTOR Q1,
+    FXMVECTOR Q2
+)
+{
+    // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
+
+    // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
+    //   (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
+    //   (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
+    //   (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
+
+#if defined(_XM_NO_INTRINSICS_)
+    XMVECTOR Result = {
+        (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
+        (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
+        (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
+        (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) };
+    return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
+    static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
+    static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
+
+    __n64 Q2L = vget_low_f32(Q2);
+    __n64 Q2H = vget_high_f32(Q2);
+
+    __n128 Q2X = vdupq_lane_f32( Q2L, 0 );
+    __n128 Q2Y = vdupq_lane_f32( Q2L, 1 );
+    __n128 Q2Z = vdupq_lane_f32( Q2H, 0 );
+    __n128 vResult = vdupq_lane_f32( Q2H, 1 );
+    vResult = vmulq_f32(vResult,Q1);
+
+    // Mul by Q1WZYX
+    __n128 vTemp = vrev64q_u32(Q1);
+    vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) );
+    Q2X = vmulq_f32(Q2X,vTemp);
+    vResult = vmlaq_f32( vResult, Q2X, ControlWZYX );
+
+    // Mul by Q1ZWXY
+    vTemp = vrev64q_u32(vTemp);
+    Q2Y = vmulq_f32(Q2Y,vTemp);
+    vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
+
+    // Mul by Q1YXWZ
+    vTemp = vrev64q_u32(vTemp);
+    vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
+    Q2Z = vmulq_f32(Q2Z,vTemp);
+    vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
+    return vResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+    static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
+    static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
+    static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
+    // Copy to SSE registers and use as few as possible for x86
+    XMVECTOR Q2X = Q2;
+    XMVECTOR Q2Y = Q2;
+    XMVECTOR Q2Z = Q2;
+    XMVECTOR vResult = Q2;
+    // Splat with one instruction
+    vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3));
+    Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0));
+    Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1));
+    Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2));
+    // Retire Q1 and perform Q1*Q2W
+    vResult = _mm_mul_ps(vResult,Q1);
+    XMVECTOR Q1Shuffle = Q1;
+    // Shuffle the copies of Q1
+    Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+    // Mul by Q1WZYX
+    Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
+    Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
+    // Flip the signs on y and z
+    Q2X = _mm_mul_ps(Q2X,ControlWZYX);
+    // Mul by Q1ZWXY
+    Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
+    Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
+    // Flip the signs on z and w
+    Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
+    // Mul by Q1YXWZ
+    Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
+    vResult = _mm_add_ps(vResult,Q2X);
+    // Flip the signs on x and w
+    Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
+    Q2Y = _mm_add_ps(Q2Y,Q2Z);
+    vResult = _mm_add_ps(vResult,Q2Y);
+    return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLengthSq
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4LengthSq(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionReciprocalLength
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4ReciprocalLength(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLength
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4Length(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionNormalizeEst
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4NormalizeEst(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionNormalize
+(
+    FXMVECTOR Q
+)
+{
+    return XMVector4Normalize(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionConjugate
+(
+    FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+    XMVECTOR Result = {
+        -Q.vector4_f32[0],
+        -Q.vector4_f32[1],
+        -Q.vector4_f32[2],
+        Q.vector4_f32[3]
+    };
+    return Result;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
+    return vmulq_f32(Q, NegativeOne3.v );
+#elif defined(_XM_SSE_INTRINSICS_)
+    static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
+    return _mm_mul_ps(Q,NegativeOne3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionInverse
+(
+    FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    const XMVECTOR  Zero = XMVectorZero();
+
+    XMVECTOR L = XMVector4LengthSq(Q);
+    XMVECTOR Conjugate = XMQuaternionConjugate(Q);
+
+    XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
+
+    XMVECTOR Result = XMVectorDivide(Conjugate, L);
+
+    Result = XMVectorSelect(Result, Zero, Control);
+
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionLn
+(
+    FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+    XMVECTOR QW = XMVectorSplatW(Q);
+    XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
+
+    XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
+
+    XMVECTOR Theta = XMVectorACos(QW);
+    XMVECTOR SinTheta = XMVectorSin(Theta);
+
+    XMVECTOR S = XMVectorDivide(Theta,SinTheta);
+
+    XMVECTOR Result = XMVectorMultiply(Q0, S);
+    Result = XMVectorSelect(Q0, Result, ControlW);
+
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionExp
+(
+    FXMVECTOR Q
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR Theta = XMVector3Length(Q);
+
+    XMVECTOR SinTheta, CosTheta;
+    XMVectorSinCos(&SinTheta, &CosTheta, Theta);
+
+    XMVECTOR S = XMVectorDivide(SinTheta, Theta);
+
+    XMVECTOR Result = XMVectorMultiply(Q, S);
+
+    const XMVECTOR Zero = XMVectorZero();
+    XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
+    Result = XMVectorSelect(Result, Q, Control);
+
+    Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
+
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSlerp
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    float    t
+)
+{
+    XMVECTOR T = XMVectorReplicate(t);
+    return XMQuaternionSlerpV(Q0, Q1, T);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSlerpV
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    FXMVECTOR T
+)
+{
+    assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
+
+    // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+
+    XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
+
+    const XMVECTOR Zero = XMVectorZero();
+    XMVECTOR Control = XMVectorLess(CosOmega, Zero);
+    XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
+
+    CosOmega = XMVectorMultiply(CosOmega, Sign);
+
+    Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+    XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
+    SinOmega = XMVectorSqrt(SinOmega);
+
+    XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
+
+    XMVECTOR SignMask = XMVectorSplatSignMask();
+    XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
+    SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
+    V01 = XMVectorXorInt(V01, SignMask);
+    V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
+
+    XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
+
+    XMVECTOR S0 = XMVectorMultiply(V01, Omega);
+    S0 = XMVectorSin(S0);
+    S0 = XMVectorMultiply(S0, InvSinOmega);
+
+    S0 = XMVectorSelect(V01, S0, Control);
+
+    XMVECTOR S1 = XMVectorSplatY(S0);
+    S0 = XMVectorSplatX(S0);
+
+    S1 = XMVectorMultiply(S1, Sign);
+
+    XMVECTOR Result = XMVectorMultiply(Q0, S0);
+    Result = XMVectorMultiplyAdd(Q1, S1, Result);
+
+    return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+    static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
+    static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
+    static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
+
+    XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
+
+    const XMVECTOR Zero = XMVectorZero();
+    XMVECTOR Control = XMVectorLess(CosOmega, Zero);
+    XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
+
+    CosOmega = _mm_mul_ps(CosOmega, Sign);
+
+    Control = XMVectorLess(CosOmega, OneMinusEpsilon);
+
+    XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega);
+    SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
+    SinOmega = _mm_sqrt_ps(SinOmega);
+
+    XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
+
+    XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1));
+    V01 = _mm_and_ps(V01,MaskXY);
+    V01 = _mm_xor_ps(V01,SignMask2);
+    V01 = _mm_add_ps(g_XMIdentityR0, V01);
+
+    XMVECTOR S0 = _mm_mul_ps(V01, Omega);
+    S0 = XMVectorSin(S0);
+    S0 = _mm_div_ps(S0, SinOmega);
+
+    S0 = XMVectorSelect(V01, S0, Control);
+
+    XMVECTOR S1 = XMVectorSplatY(S0);
+    S0 = XMVectorSplatX(S0);
+
+    S1 = _mm_mul_ps(S1, Sign);
+    XMVECTOR Result = _mm_mul_ps(Q0, S0);
+    S1 = _mm_mul_ps(S1, Q1);
+    Result = _mm_add_ps(Result,S1);
+    return Result;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSquad
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    FXMVECTOR Q2,
+    GXMVECTOR Q3,
+    float    t
+)
+{
+    XMVECTOR T = XMVectorReplicate(t);
+    return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionSquadV
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    FXMVECTOR Q2,
+    GXMVECTOR Q3,
+    CXMVECTOR T
+)
+{
+    assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
+
+    XMVECTOR TP = T;
+    const XMVECTOR Two = XMVectorSplatConstant(2, 0);
+
+    XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
+    XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
+
+    TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
+    TP = XMVectorMultiply(TP, Two);
+
+    XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
+
+    return Result;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMQuaternionSquadSetup
+(
+    XMVECTOR* pA,
+    XMVECTOR* pB,
+    XMVECTOR* pC,
+    FXMVECTOR  Q0,
+    FXMVECTOR  Q1,
+    FXMVECTOR  Q2,
+    GXMVECTOR  Q3
+)
+{
+    assert(pA);
+    assert(pB);
+    assert(pC);
+
+    XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
+    XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
+    XMVECTOR SQ2 = XMVectorNegate(Q2);
+
+    XMVECTOR Control1 = XMVectorLess(LS12, LD12);
+    SQ2 = XMVectorSelect(Q2, SQ2, Control1);
+
+    XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
+    XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
+    XMVECTOR SQ0 = XMVectorNegate(Q0);
+
+    XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
+    XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
+    XMVECTOR SQ3 = XMVectorNegate(Q3);
+
+    XMVECTOR Control0 = XMVectorLess(LS01, LD01);
+    XMVECTOR Control2 = XMVectorLess(LS23, LD23);
+
+    SQ0 = XMVectorSelect(Q0, SQ0, Control0);
+    SQ3 = XMVectorSelect(Q3, SQ3, Control2);
+
+    XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
+    XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
+
+    XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
+    XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
+    XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
+    XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
+
+    const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
+
+    XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
+    XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
+    ExpQ02 = XMQuaternionExp(ExpQ02);
+    ExpQ13 = XMQuaternionExp(ExpQ13);
+
+    *pA = XMQuaternionMultiply(Q1, ExpQ02);
+    *pB = XMQuaternionMultiply(SQ2, ExpQ13);
+    *pC = SQ2;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionBaryCentric
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    FXMVECTOR Q2,
+    float    f,
+    float    g
+)
+{
+    float s = f + g;
+
+    XMVECTOR Result;
+    if ((s < 0.00001f) && (s > -0.00001f))
+    {
+        Result = Q0;
+    }
+    else
+    {
+        XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
+        XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
+
+        Result = XMQuaternionSlerp(Q01, Q02, g / s);
+    }
+
+    return Result;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionBaryCentricV
+(
+    FXMVECTOR Q0,
+    FXMVECTOR Q1,
+    FXMVECTOR Q2,
+    GXMVECTOR F,
+    CXMVECTOR G
+)
+{
+    assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
+    assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
+
+    const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
+
+    XMVECTOR S = XMVectorAdd(F, G);
+
+    XMVECTOR Result;
+    if (XMVector4InBounds(S, Epsilon))
+    {
+        Result = Q0;
+    }
+    else
+    {
+        XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
+        XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
+        XMVECTOR GS = XMVectorReciprocal(S);
+        GS = XMVectorMultiply(G, GS);
+
+        Result = XMQuaternionSlerpV(Q01, Q02, GS);
+    }
+
+    return Result;
+}
+
+//------------------------------------------------------------------------------
+// Transformation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionIdentity()
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+    return g_XMIdentityR3.v;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationRollPitchYaw
+(
+    float Pitch,
+    float Yaw,
+    float Roll
+)
+{
+    XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
+    XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
+    return Q;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationRollPitchYawFromVector
+(
+    FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    static const XMVECTORF32  Sign = {1.0f, -1.0f, -1.0f, 1.0f};
+
+    XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
+
+    XMVECTOR SinAngles, CosAngles;
+    XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
+
+    XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
+    XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
+    XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
+    XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
+    XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
+    XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
+
+    XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
+    XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
+    Q1 = XMVectorMultiply(Q1, Y1);
+    Q0 = XMVectorMultiply(Q0, R0);
+    XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
+
+    return Q;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationNormal
+(
+    FXMVECTOR NormalAxis,
+    float    Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
+
+    float SinV, CosV;
+    XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
+
+    XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV );
+    return XMVectorMultiply(N, Scale);
+#elif defined(_XM_SSE_INTRINSICS_)
+    XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
+    N = _mm_or_ps(N,g_XMIdentityR3);
+    XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
+    XMVECTOR vSine;
+    XMVECTOR vCosine;
+    XMVectorSinCos(&vSine,&vCosine,Scale);
+    Scale = _mm_and_ps(vSine,g_XMMask3);
+    vCosine = _mm_and_ps(vCosine,g_XMMaskW);
+    Scale = _mm_or_ps(Scale,vCosine);
+    N = _mm_mul_ps(N,Scale);
+    return N;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationAxis
+(
+    FXMVECTOR Axis,
+    float    Angle
+)
+{
+    assert(!XMVector3Equal(Axis, XMVectorZero()));
+    assert(!XMVector3IsInfinite(Axis));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+    XMVECTOR Normal = XMVector3Normalize(Axis);
+    XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
+    return Q;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMQuaternionRotationMatrix
+(
+    CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+    XMVECTORF32 q;
+    float r22 = M.m[2][2];
+    if (r22 <= 0.f)  // x^2 + y^2 >= z^2 + w^2
+    {
+        float dif10 = M.m[1][1] - M.m[0][0];
+        float omr22 = 1.f - r22;
+        if (dif10 <= 0.f)  // x^2 >= y^2
+        {
+            float fourXSqr = omr22 - dif10;
+            float inv4x = 0.5f / sqrtf(fourXSqr);
+            q.f[0] = fourXSqr*inv4x;
+            q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x;
+            q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x;
+            q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x;
+        }
+        else  // y^2 >= x^2
+        {
+            float fourYSqr = omr22 + dif10;
+            float inv4y = 0.5f / sqrtf(fourYSqr);
+            q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y;
+            q.f[1] = fourYSqr*inv4y;
+            q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y;
+            q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y;
+        }
+    }
+    else  // z^2 + w^2 >= x^2 + y^2
+    {
+        float sum10 = M.m[1][1] + M.m[0][0];
+        float opr22 = 1.f + r22;
+        if (sum10 <= 0.f)  // z^2 >= w^2
+        {
+            float fourZSqr = opr22 - sum10;
+            float inv4z = 0.5f / sqrtf(fourZSqr);
+            q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z;
+            q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z;
+            q.f[2] = fourZSqr*inv4z;
+            q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z;
+        }
+        else  // w^2 >= z^2
+        {
+            float fourWSqr = opr22 + sum10;
+            float inv4w = 0.5f / sqrtf(fourWSqr);
+            q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w;
+            q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w;
+            q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w;
+            q.f[3] = fourWSqr*inv4w;
+        }
+    }
+    return q.v;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
+    static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
+    static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; 
+    static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 };
+    static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 };
+
+    XMVECTOR r0 = M.r[0];
+    XMVECTOR r1 = M.r[1];
+    XMVECTOR r2 = M.r[2];
+
+    XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
+    XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
+    XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
+
+    // x^2 >= y^2 equivalent to r11 - r00 <= 0
+    XMVECTOR r11mr00 = vsubq_f32(r11, r00);
+    XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
+
+    // z^2 >= w^2 equivalent to r11 + r00 <= 0
+    XMVECTOR r11pr00 = vaddq_f32(r11, r00);
+    XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
+    
+    // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
+    XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
+
+    // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
+    XMVECTOR t0 = vmulq_f32( XMPMMP, r00 );
+    XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 );
+    x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 );
+    x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne );
+
+    // (r01, r02, r12, r11)
+    t0 = vextq_f32(r0, r0, 1);
+    XMVECTOR t1 = vextq_f32(r1, r1, 1);
+    t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) );
+
+    // (r10, r20, r21, r10)
+    t1 = vextq_f32(r2, r2, 3);
+    XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 );
+    t1 = vbslq_f32( Select0110, t1, r10 );
+
+    // (4*x*y, 4*x*z, 4*y*z, unused)
+    XMVECTOR xyxzyz = vaddq_f32(t0, t1);
+
+    // (r21, r20, r10, r10)
+    t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) );
+
+    // (r12, r02, r01, r12)
+    XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) );
+    XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 );
+    t1 = vbslq_f32( Select0110, t2, t3 );
+
+    // (4*x*w, 4*y*w, 4*z*w, unused)
+    XMVECTOR xwywzw = vsubq_f32(t0, t1);
+    xwywzw = vmulq_f32(XMMPMP, xwywzw);
+
+    // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
+    t0 = vextq_f32( xyxzyz, xyxzyz, 3 );
+    t1 = vbslq_f32( Select0110, t0, x2y2z2w2 );
+    t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 );
+    XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 );
+
+    // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
+    t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 );
+    t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 );
+    XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 );
+
+    // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
+    t0 = vextq_f32(xyxzyz, xyxzyz, 1);
+    t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) );
+    XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 );
+
+    // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
+    XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 );
+
+    // Select the row of the tensor-product matrix that has the largest
+    // magnitude.
+    t0 = vbslq_f32( x2gey2, tensor0, tensor1 );
+    t1 = vbslq_f32( z2gew2, tensor2, tensor3 );
+    t2 = vbslq_f32( x2py2gez2pw2, t0, t1 );
+
+    // Normalize the row.  No division by zero is possible because the
+    // quaternion is unit-length (and the row is a nonzero multiple of
+    // the quaternion).
+    t0 = XMVector4Length(t2);
+    return XMVectorDivide(t2, t0);
+#elif defined(_XM_SSE_INTRINSICS_)
+    static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
+    static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
+    static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; 
+
+    XMVECTOR r0 = M.r[0];  // (r00, r01, r02, 0)
+    XMVECTOR r1 = M.r[1];  // (r10, r11, r12, 0)
+    XMVECTOR r2 = M.r[2];  // (r20, r21, r22, 0)
+
+    // (r00, r00, r00, r00)
+    XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0));
+    // (r11, r11, r11, r11)
+    XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1));
+    // (r22, r22, r22, r22)
+    XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2));
+
+    // x^2 >= y^2 equivalent to r11 - r00 <= 0
+    // (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
+    XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
+    XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
+
+    // z^2 >= w^2 equivalent to r11 + r00 <= 0
+    // (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
+    XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
+    XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
+
+    // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
+    XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
+
+    // (+r00, -r00, -r00, +r00)
+    XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00);
+
+    // (-r11, +r11, -r11, +r11)
+    XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
+
+    // (-r22, -r22, +r22, +r22)
+    XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22);
+
+    // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
+    XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1);
+    x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2);
+    x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne);
+
+    // (r01, r02, r12, r11)
+    t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1));
+    // (r10, r10, r20, r21)
+    t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0));
+    // (r10, r20, r21, r10)
+    t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
+    // (4*x*y, 4*x*z, 4*y*z, unused)
+    XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
+
+    // (r21, r20, r10, r10)
+    t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1));
+    // (r12, r12, r02, r01)
+    t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2));
+    // (r12, r02, r01, r12)
+    t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
+    // (4*x*w, 4*y*w, 4*z*w, unused)
+    XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
+    xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
+
+    // (4*x^2, 4*y^2, 4*x*y, unused)
+    t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0));
+    // (4*z^2, 4*w^2, 4*z*w, unused)
+    t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2));
+    // (4*x*z, 4*y*z, 4*x*w, 4*y*w)
+    t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1));
+
+    // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
+    XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0));
+    // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
+    XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2));
+    // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
+    XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0));
+    // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
+    XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2));
+
+    // Select the row of the tensor-product matrix that has the largest
+    // magnitude.
+    t0 = _mm_and_ps(x2gey2, tensor0);
+    t1 = _mm_andnot_ps(x2gey2, tensor1);
+    t0 = _mm_or_ps(t0, t1);
+    t1 = _mm_and_ps(z2gew2, tensor2);
+    t2 = _mm_andnot_ps(z2gew2, tensor3);
+    t1 = _mm_or_ps(t1, t2);
+    t0 = _mm_and_ps(x2py2gez2pw2, t0);
+    t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
+    t2 = _mm_or_ps(t0, t1);
+
+    // Normalize the row.  No division by zero is possible because the
+    // quaternion is unit-length (and the row is a nonzero multiple of
+    // the quaternion).
+    t0 = XMVector4Length(t2);
+    return _mm_div_ps(t2, t0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMQuaternionToAxisAngle
+(
+    XMVECTOR* pAxis,
+    float*    pAngle,
+    FXMVECTOR  Q
+)
+{
+    assert(pAxis);
+    assert(pAngle);
+
+    *pAxis = Q;
+
+    *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
+}
+
+/****************************************************************************
+ *
+ * Plane
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneEqual
+(
+    FXMVECTOR P1,
+    FXMVECTOR P2
+)
+{
+    return XMVector4Equal(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneNearEqual
+(
+    FXMVECTOR P1,
+    FXMVECTOR P2,
+    FXMVECTOR Epsilon
+)
+{
+    XMVECTOR NP1 = XMPlaneNormalize(P1);
+    XMVECTOR NP2 = XMPlaneNormalize(P2);
+    return XMVector4NearEqual(NP1, NP2, Epsilon);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneNotEqual
+(
+    FXMVECTOR P1,
+    FXMVECTOR P2
+)
+{
+    return XMVector4NotEqual(P1, P2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneIsNaN
+(
+    FXMVECTOR P
+)
+{
+    return XMVector4IsNaN(P);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMPlaneIsInfinite
+(
+    FXMVECTOR P
+)
+{
+    return XMVector4IsInfinite(P);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDot
+(
+    FXMVECTOR P,
+    FXMVECTOR V
+)
+{
+    return XMVector4Dot(P, V);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDotCoord
+(
+    FXMVECTOR P,
+    FXMVECTOR V
+)
+{
+    // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
+    XMVECTOR Result = XMVector4Dot(P, V3);
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneDotNormal
+(
+    FXMVECTOR P,
+    FXMVECTOR V
+)
+{
+    return XMVector3Dot(P, V);
+}
+
+//------------------------------------------------------------------------------
+// XMPlaneNormalizeEst uses a reciprocal estimate and
+// returns QNaN on zero and infinite vectors.
+
+inline XMVECTOR XMPlaneNormalizeEst
+(
+    FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
+    return XMVectorMultiply(P, Result);
+
+#elif defined(_XM_SSE_INTRINSICS_)
+    // Perform the dot product
+    XMVECTOR vDot = _mm_mul_ps(P,P);
+    // x=Dot.y, y=Dot.z
+    XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
+    // Result.x = x+y
+    vDot = _mm_add_ss(vDot,vTemp);
+    // x=Dot.z
+    vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+    // Result.x = (x+y)+z
+    vDot = _mm_add_ss(vDot,vTemp);
+    // Splat x
+    vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
+    // Get the reciprocal
+    vDot = _mm_rsqrt_ps(vDot);
+    // Get the reciprocal
+    vDot = _mm_mul_ps(vDot,P);
+    return vDot;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneNormalize
+(
+    FXMVECTOR P
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+    float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
+    // Prevent divide by zero
+    if (fLengthSq) {
+        fLengthSq = 1.0f/fLengthSq;
+    }
+    {
+    XMVECTOR vResult = {
+        P.vector4_f32[0]*fLengthSq,
+        P.vector4_f32[1]*fLengthSq,
+        P.vector4_f32[2]*fLengthSq,
+        P.vector4_f32[3]*fLengthSq
+    };
+    return vResult;
+    }
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    XMVECTOR vLength = XMVector3ReciprocalLength(P);
+    return XMVectorMultiply( P, vLength );
+#elif defined(_XM_SSE_INTRINSICS_)
+    // Perform the dot product on x,y and z only
+    XMVECTOR vLengthSq = _mm_mul_ps(P,P);
+    XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
+    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+    vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
+    vLengthSq = _mm_add_ss(vLengthSq,vTemp);
+    vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
+    // Prepare for the division
+    XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
+    // Failsafe on zero (Or epsilon) length planes
+    // If the length is infinity, set the elements to zero
+    vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
+    // Reciprocal mul to perform the normalization
+    vResult = _mm_div_ps(P,vResult);
+    // Any that are infinity, set to zero
+    vResult = _mm_and_ps(vResult,vLengthSq);
+    return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneIntersectLine
+(
+    FXMVECTOR P,
+    FXMVECTOR LinePoint1,
+    FXMVECTOR LinePoint2
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
+    XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
+    XMVECTOR D = XMVectorSubtract(V1, V2);
+
+    XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
+    VT = XMVectorDivide(VT, D);
+
+    XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
+    Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
+
+    const XMVECTOR Zero = XMVectorZero();
+    XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
+
+    return XMVectorSelect(Point, g_XMQNaN.v, Control);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline void XMPlaneIntersectPlane
+(
+    XMVECTOR* pLinePoint1,
+    XMVECTOR* pLinePoint2,
+    FXMVECTOR  P1,
+    FXMVECTOR  P2
+)
+{
+    assert(pLinePoint1);
+    assert(pLinePoint2);
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR V1 = XMVector3Cross(P2, P1);
+
+    XMVECTOR LengthSq = XMVector3LengthSq(V1);
+
+    XMVECTOR V2 = XMVector3Cross(P2, V1);
+
+    XMVECTOR P1W = XMVectorSplatW(P1);
+    XMVECTOR Point = XMVectorMultiply(V2, P1W);
+
+    XMVECTOR V3 = XMVector3Cross(V1, P1);
+
+    XMVECTOR P2W = XMVectorSplatW(P2);
+    Point = XMVectorMultiplyAdd(V3, P2W, Point);
+
+    XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
+
+    XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
+
+    XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
+    *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
+    *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneTransform
+(
+    FXMVECTOR P,
+    CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR W = XMVectorSplatW(P);
+    XMVECTOR Z = XMVectorSplatZ(P);
+    XMVECTOR Y = XMVectorSplatY(P);
+    XMVECTOR X = XMVectorSplatX(P);
+
+    XMVECTOR Result = XMVectorMultiply(W, M.r[3]);
+    Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
+    Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
+    Result = XMVectorMultiplyAdd(X, M.r[0], Result);
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4* XMPlaneTransformStream
+(
+    XMFLOAT4*       pOutputStream,
+    size_t          OutputStride,
+    const XMFLOAT4* pInputStream,    
+    size_t          InputStride,
+    size_t          PlaneCount,
+    CXMMATRIX       M
+)
+{
+    return XMVector4TransformStream(pOutputStream,
+                                    OutputStride,
+                                    pInputStream,
+                                    InputStride,
+                                    PlaneCount,
+                                    M);
+}
+
+//------------------------------------------------------------------------------
+// Conversion operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneFromPointNormal
+(
+    FXMVECTOR Point,
+    FXMVECTOR Normal
+)
+{
+    XMVECTOR W = XMVector3Dot(Point, Normal);
+    W = XMVectorNegate(W);
+    return XMVectorSelect(W, Normal, g_XMSelect1110.v);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMPlaneFromPoints
+(
+    FXMVECTOR Point1,
+    FXMVECTOR Point2,
+    FXMVECTOR Point3
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
+    XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
+
+    XMVECTOR N = XMVector3Cross(V21, V31);
+    N = XMVector3Normalize(N);
+
+    XMVECTOR D = XMPlaneDotNormal(N, Point1);
+    D = XMVectorNegate(D);
+
+    XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
+
+    return Result;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+/****************************************************************************
+ *
+ * Color
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorEqual
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4Equal(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorNotEqual
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4NotEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorGreater
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4Greater(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorGreaterOrEqual
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4GreaterOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorLess
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4Less(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorLessOrEqual
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVector4LessOrEqual(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorIsNaN
+(
+    FXMVECTOR C
+)
+{
+    return XMVector4IsNaN(C);
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMColorIsInfinite
+(
+    FXMVECTOR C
+)
+{
+    return XMVector4IsInfinite(C);
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorNegative
+(
+    FXMVECTOR vColor
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+    XMVECTORF32 vResult = {
+        1.0f - vColor.vector4_f32[0],
+        1.0f - vColor.vector4_f32[1],
+        1.0f - vColor.vector4_f32[2],
+        vColor.vector4_f32[3]
+    };
+    return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3);
+    return vaddq_f32(vTemp,g_XMOne3);
+#elif defined(_XM_SSE_INTRINSICS_)
+    // Negate only x,y and z.
+    XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
+    // Add 1,1,1,0 to -x,-y,-z,w
+    return _mm_add_ps(vTemp,g_XMOne3);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorModulate
+(
+    FXMVECTOR C1,
+    FXMVECTOR C2
+)
+{
+    return XMVectorMultiply(C1, C2);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorAdjustSaturation
+(
+    FXMVECTOR vColor,
+    float    fSaturation
+)
+{
+    // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
+    // Result = (C - Luminance) * Saturation + Luminance;
+
+#if defined(_XM_NO_INTRINSICS_)
+    const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+
+    float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]);
+    XMVECTORF32 vResult = {
+        ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
+        ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
+        ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
+        vColor.vector4_f32[3]};
+    return vResult.v;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+    XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
+    XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
+    XMVECTOR vSaturation = vdupq_n_f32(fSaturation);
+    vResult = vmlaq_f32( vLuminance, vResult, vSaturation );
+    return vbslq_f32( g_XMSelect1110, vResult, vColor );
+#elif defined(_XM_SSE_INTRINSICS_)
+    static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
+    XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
+// Splat fSaturation
+    XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
+// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
+    XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance);
+    vResult = _mm_mul_ps(vResult,vSaturation);
+    vResult = _mm_add_ps(vResult,vLuminance);
+// Retain w from the source color
+    vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+    vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+    return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorAdjustContrast
+(
+    FXMVECTOR vColor,
+    float    fContrast
+)
+{
+    // Result = (vColor - 0.5f) * fContrast + 0.5f;
+
+#if defined(_XM_NO_INTRINSICS_)
+    XMVECTORF32 vResult = {
+        ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
+        ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
+        ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
+        vColor.vector4_f32[3]        // Leave W untouched
+    };
+    return vResult.v;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+    XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
+    XMVECTOR vContrast = vdupq_n_f32(fContrast);
+    vResult = vmlaq_f32( g_XMOneHalf.v, vResult, vContrast );
+    return vbslq_f32( g_XMSelect1110, vResult, vColor );
+#elif defined(_XM_SSE_INTRINSICS_)
+    XMVECTOR vScale = _mm_set_ps1(fContrast);           // Splat the scale
+    XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf);  // Subtract 0.5f from the source (Saving source)
+    vResult = _mm_mul_ps(vResult,vScale);               // Mul by scale
+    vResult = _mm_add_ps(vResult,g_XMOneHalf);          // Add 0.5f
+// Retain w from the source color
+    vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2));   // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
+    vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0));  // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
+    return vResult;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb )
+{
+    XMVECTOR r = XMVectorSplatX( rgb );
+    XMVECTOR g = XMVectorSplatY( rgb );
+    XMVECTOR b = XMVectorSplatZ( rgb );
+
+    XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
+    XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) );
+
+    XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf );
+
+    XMVECTOR d = XMVectorSubtract( max, min );
+
+    XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 );
+
+    if ( XMVector3Less( d, g_XMEpsilon ) )
+    {
+        // Achromatic, assume H and S of 0
+        return XMVectorSelect( la, g_XMZero, g_XMSelect1100 );
+    }
+    else
+    {
+        XMVECTOR s, h;
+
+        XMVECTOR d2 = XMVectorAdd( min, max );
+
+        if ( XMVector3Greater( l, g_XMOneHalf ) )
+        {
+            // d / (2-max-min)
+            s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) ); 
+        }
+        else
+        {
+            // d / (max+min)
+            s = XMVectorDivide( d, d2 ); 
+        }
+
+        if ( XMVector3Equal( r, max ) )
+        {
+            // Red is max
+            h = XMVectorDivide( XMVectorSubtract( g, b ), d );
+        }
+        else if ( XMVector3Equal( g, max ) )
+        {
+            // Green is max
+            h = XMVectorDivide( XMVectorSubtract( b, r ), d );
+            h = XMVectorAdd( h, g_XMTwo );
+        }
+        else
+        {
+            // Blue is max
+            h = XMVectorDivide( XMVectorSubtract( r, g ), d );
+            h = XMVectorAdd( h, g_XMFour );
+        }
+
+        h = XMVectorDivide( h, g_XMSix );
+
+        if ( XMVector3Less( h, g_XMZero ) )
+            h = XMVectorAdd( h, g_XMOne );
+
+        XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 );
+        return XMVectorSelect( s, lha, g_XMSelect1011 );
+    }
+}
+
+//------------------------------------------------------------------------------
+
+namespace Internal
+{
+
+inline XMVECTOR XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h )
+{
+    static const XMVECTORF32 oneSixth  = { 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f };
+    static const XMVECTORF32 twoThirds = { 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f };
+    
+    XMVECTOR t = h;
+
+    if ( XMVector3Less( t, g_XMZero ) )
+        t = XMVectorAdd( t, g_XMOne );
+
+    if ( XMVector3Greater( t, g_XMOne ) )
+        t = XMVectorSubtract( t, g_XMOne );
+
+    if ( XMVector3Less( t, oneSixth ) )
+    {
+        // p + (q - p) * 6 * t
+        XMVECTOR t1 = XMVectorSubtract( q, p );
+        XMVECTOR t2 = XMVectorMultiply( g_XMSix, t );
+        return XMVectorMultiplyAdd( t1, t2, p );
+    }
+
+    if ( XMVector3Less( t, g_XMOneHalf ) )
+        return q;
+
+    if ( XMVector3Less( t, twoThirds ) )
+    {
+        // p + (q - p) * 6 * (2/3 - t)
+        XMVECTOR t1 = XMVectorSubtract( q, p );
+        XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) );
+        return XMVectorMultiplyAdd( t1, t2, p );
+    }
+
+    return p;
+}
+
+}; // namespace Internal
+
+inline XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl )
+{
+    static const XMVECTORF32 oneThird = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f };
+
+    XMVECTOR s = XMVectorSplatY( hsl );
+    XMVECTOR l = XMVectorSplatZ( hsl );
+
+    if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) )
+    {
+        // Achromatic
+        return XMVectorSelect( hsl, l, g_XMSelect1110 );
+    }
+    else
+    {
+        XMVECTOR h = XMVectorSplatX( hsl );
+
+        XMVECTOR q;
+        if ( XMVector3Less( l, g_XMOneHalf ) )
+        {
+            q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) );
+        }
+        else
+        {
+            q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) );
+        }
+
+        XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q );
+
+        XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) );
+        XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h );
+        XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) );
+
+        XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 );
+        XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 );
+
+        return XMVectorSelect( ba, rg, g_XMSelect1100 );
+    }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb )
+{
+    XMVECTOR r = XMVectorSplatX( rgb );
+    XMVECTOR g = XMVectorSplatY( rgb );
+    XMVECTOR b = XMVectorSplatZ( rgb );
+
+    XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
+    XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) );
+
+    XMVECTOR d = XMVectorSubtract( v, min );
+
+    XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v );
+
+    if ( XMVector3Less( d, g_XMEpsilon ) )
+    {
+        // Achromatic, assume H of 0
+        XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 );
+        XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
+        return XMVectorSelect( s, hva, g_XMSelect1011 );
+    }
+    else
+    {
+        XMVECTOR h;
+
+        if ( XMVector3Equal( r, v ) )
+        {
+            // Red is max
+            h = XMVectorDivide( XMVectorSubtract( g, b ), d );
+
+            if ( XMVector3Less( g, b ) )
+                h = XMVectorAdd( h, g_XMSix );
+        }
+        else if ( XMVector3Equal( g, v ) )
+        {
+            // Green is max
+            h = XMVectorDivide( XMVectorSubtract( b, r ), d );
+            h = XMVectorAdd( h, g_XMTwo );
+        }
+        else
+        {
+            // Blue is max
+            h = XMVectorDivide( XMVectorSubtract( r, g ), d );
+            h = XMVectorAdd( h, g_XMFour );
+        }
+
+        h = XMVectorDivide( h, g_XMSix );
+
+        XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 );
+        XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
+        return XMVectorSelect( s, hva, g_XMSelect1011 );
+    }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv )
+{
+    XMVECTOR h = XMVectorSplatX( hsv );
+    XMVECTOR s = XMVectorSplatY( hsv );
+    XMVECTOR v = XMVectorSplatZ( hsv );
+
+    XMVECTOR h6 = XMVectorMultiply( h, g_XMSix );
+
+    XMVECTOR i = XMVectorFloor( h6 );
+    XMVECTOR f = XMVectorSubtract( h6, i );
+
+    // p = v* (1-s)
+    XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) );
+
+    // q = v*(1-f*s)
+    XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) );
+
+    // t = v*(1 - (1-f)*s)
+    XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) );
+
+    int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) );
+
+    XMVECTOR _rgb;
+
+    switch (ii)
+    {
+    case 0: // rgb = vtp
+        {
+            XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 );
+            _rgb = XMVectorSelect( p, vt, g_XMSelect1100 );
+        }
+        break;
+    case 1: // rgb = qvp
+        {
+            XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 );
+            _rgb = XMVectorSelect( p, qv, g_XMSelect1100 );
+        }
+        break;
+    case 2: // rgb = pvt
+        {
+            XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 );
+            _rgb = XMVectorSelect( t, pv, g_XMSelect1100 );
+        }
+        break;
+    case 3: // rgb = pqv
+        {
+            XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 );
+            _rgb = XMVectorSelect( v, pq, g_XMSelect1100 );
+        }
+        break;
+    case 4: // rgb = tpv
+        {
+            XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 );
+            _rgb = XMVectorSelect( v, tp, g_XMSelect1100 );
+        }
+        break;
+    default: // rgb = vpq
+        {
+            XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 );
+            _rgb = XMVectorSelect( q, vp, g_XMSelect1100 );
+        }
+        break;
+    }
+
+    return XMVectorSelect( hsv, _rgb, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb )
+{
+    static const XMVECTORF32 Scale0 = {  0.299f, -0.147f,  0.615f, 0.0f }; 
+    static const XMVECTORF32 Scale1 = {  0.587f, -0.289f, -0.515f, 0.0f };
+    static const XMVECTORF32 Scale2 = {  0.114f,  0.436f, -0.100f, 0.0f };
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( rgb, M );
+
+    return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv )
+{
+    static const XMVECTORF32 Scale1 = {   0.0f, -0.395f, 2.032f, 0.0f };
+    static const XMVECTORF32 Scale2 = { 1.140f, -0.581f,   0.0f, 0.0f };
+
+    XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( yuv, M );
+
+    return XMVectorSelect( yuv, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb )
+{
+    static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f,  0.6150f, 0.0f };
+    static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f };
+    static const XMVECTORF32 Scale2 = { 0.0722f,  0.4351f, -0.0564f, 0.0f };
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( rgb, M );
+
+    return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv )
+{
+    static const XMVECTORF32 Scale1 = {    0.0f, -0.2153f, 2.1324f, 0.0f };
+    static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f,    0.0f, 0.0f };
+        
+    XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( yuv, M );
+
+    return XMVectorSelect( yuv, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb )
+{
+    static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f };
+    static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f };
+    static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f };
+    static const XMVECTORF32 Scale = { 1.f/0.17697f, 1.f/0.17697f, 1.f/0.17697f, 0.0f };
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale );
+
+    return XMVectorSelect( rgb, clr, g_XMSelect1110 );
+}
+
+inline XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz )
+{
+    static const XMVECTORF32 Scale0 = {  2.3706743f, -0.5138850f,  0.0052982f, 0.0f };
+    static const XMVECTORF32 Scale1 = { -0.9000405f,  1.4253036f, -0.0146949f, 0.0f };
+    static const XMVECTORF32 Scale2 = { -0.4706338f,  0.0885814f,  1.0093968f, 0.0f };
+    static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f };
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M );
+
+    return XMVectorSelect( xyz, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz )
+{
+    static const XMVECTORF32 Scale0 = {  3.2406f, -0.9689f,  0.0557f, 0.0f };
+    static const XMVECTORF32 Scale1 = { -1.5372f,  1.8758f, -0.2040f, 0.0f };
+    static const XMVECTORF32 Scale2 = { -0.4986f,  0.0415f,  1.0570f, 0.0f };
+    static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f };
+    static const XMVECTORF32 Exp    = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f };
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR lclr = XMVector3Transform( xyz, M );
+
+    XMVECTOR sel = XMVectorGreater( lclr, Cutoff );
+
+    // clr = 12.92 * lclr for lclr <= 0.0031308f
+    XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale );
+
+    // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
+    XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA );
+
+    XMVECTOR clr = XMVectorSelect( smallC, largeC, sel );
+
+    return XMVectorSelect( xyz, clr, g_XMSelect1110 );
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb )
+{
+    static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f };
+    static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f };
+    static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f };
+    static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f };
+    static const XMVECTORF32 Exp    = { 2.4f, 2.4f, 2.4f, 1.0f };
+
+    XMVECTOR sel = XMVectorGreater( srgb, Cutoff );
+
+    // lclr = clr / 12.92
+    XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale );
+
+    // lclr = pow( (clr + a) / (1+a), 2.4 )
+    XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp );
+
+    XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel );
+
+    XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
+    XMVECTOR clr = XMVector3Transform( lclr, M );
+
+    return XMVectorSelect( srgb, clr, g_XMSelect1110 );
+}
+
+/****************************************************************************
+ *
+ * Miscellaneous
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline bool XMVerifyCPUSupport()
+{
+#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#if defined(_M_AMD64)
+    // The X64 processor model requires SSE2 support
+    return true;
+#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE)
+    // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
+    // Detecting SSE2 on older versions of Windows would require using cpuid directly
+    return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 );
+#else
+    // If windows.h is not included, we return false (likely a false negative)
+    return false;
+#endif
+#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
+#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
+    return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 );
+#else
+    // If windows.h is not included, we return false (likely a false negative)
+    return false;
+#endif
+#else
+    return true;
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMFresnelTerm
+(
+    FXMVECTOR CosIncidentAngle,
+    FXMVECTOR RefractionIndex
+)
+{
+    assert(!XMVector4IsInfinite(CosIncidentAngle));
+
+    // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
+    // c = CosIncidentAngle
+    // g = sqrt(c^2 + RefractionIndex^2 - 1)
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+    XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
+    G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
+    G = XMVectorAbs(G);
+    G = XMVectorSqrt(G);
+
+    XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
+    XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
+
+    XMVECTOR V0 = XMVectorMultiply(D, D);
+    XMVECTOR V1 = XMVectorMultiply(S, S);
+    V1 = XMVectorReciprocal(V1);
+    V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
+    V0 = XMVectorMultiply(V0, V1);
+
+    XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
+    XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
+    V2 = XMVectorMultiply(V2, V2);
+    V3 = XMVectorMultiply(V3, V3);
+    V3 = XMVectorReciprocal(V3);
+    V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
+
+    XMVECTOR Result = XMVectorMultiply(V0, V2);
+
+    Result = XMVectorSaturate(Result);
+
+    return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+    // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
+    XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
+    XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
+    G = _mm_sub_ps(G,g_XMOne);
+    vTemp = _mm_add_ps(vTemp,G);
+    // max((0-vTemp),vTemp) == abs(vTemp)
+    // The abs is needed to deal with refraction and cosine being zero
+    G = _mm_setzero_ps();
+    G = _mm_sub_ps(G,vTemp);
+    G = _mm_max_ps(G,vTemp);
+    // Last operation, the sqrt()
+    G = _mm_sqrt_ps(G);
+
+    // Calc G-C and G+C
+    XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
+    XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
+    // Perform the term (0.5f *(g - c)^2) / (g + c)^2 
+    XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
+    vTemp = _mm_mul_ps(GAddC,GAddC);
+    vResult = _mm_mul_ps(vResult,g_XMOneHalf);
+    vResult = _mm_div_ps(vResult,vTemp);
+    // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
+    GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
+    GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
+    GAddC = _mm_sub_ps(GAddC,g_XMOne);
+    GSubC = _mm_add_ps(GSubC,g_XMOne);
+    GAddC = _mm_mul_ps(GAddC,GAddC);
+    GSubC = _mm_mul_ps(GSubC,GSubC);
+    GAddC = _mm_div_ps(GAddC,GSubC);
+    GAddC = _mm_add_ps(GAddC,g_XMOne);
+    // Multiply the two term parts
+    vResult = _mm_mul_ps(vResult,GAddC);
+    // Clamp to 0.0 - 1.0f
+    vResult = _mm_max_ps(vResult,g_XMZero);
+    vResult = _mm_min_ps(vResult,g_XMOne);
+    return vResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline bool XMScalarNearEqual
+(
+    float S1,
+    float S2,
+    float Epsilon
+)
+{
+    float Delta = S1 - S2;
+    return (fabsf(Delta) <= Epsilon);
+}
+
+//------------------------------------------------------------------------------
+// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
+inline float XMScalarModAngle
+(
+    float Angle
+)
+{
+    // Note: The modulo is performed with unsigned math only to work
+    // around a precision error on numbers that are close to PI
+
+    // Normalize the range from 0.0f to XM_2PI
+    Angle = Angle + XM_PI;
+    // Perform the modulo, unsigned
+    float fTemp = fabsf(Angle);
+    fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI)));
+    // Restore the number to the range of -XM_PI to XM_PI-epsilon
+    fTemp = fTemp - XM_PI;
+    // If the modulo'd value was negative, restore negation
+    if (Angle<0.0f) {
+        fTemp = -fTemp;
+    }
+    return fTemp;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarSin
+(
+    float Value
+)
+{
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+    }
+
+    // 11-degree minimax approximation
+    float y2 = y * y;
+    return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarSinEst
+(
+    float Value
+)
+{
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+    }
+
+    // 7-degree minimax approximation
+    float y2 = y * y;
+    return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarCos
+(
+    float Value
+)
+{
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
+    float sign;
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+        sign = -1.0f;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+        sign = -1.0f;
+    }
+    else
+    {
+        sign = +1.0f;
+    }
+
+    // 10-degree minimax approximation
+    float y2 = y*y;
+    float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
+    return sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarCosEst
+(
+    float Value
+)
+{
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
+    float sign;
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+        sign = -1.0f;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+        sign = -1.0f;
+    }
+    else
+    {
+        sign = +1.0f;
+    }
+
+    // 6-degree minimax approximation
+    float y2 = y * y;
+    float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
+    return sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMScalarSinCos
+(
+    float* pSin,
+    float* pCos,
+    float  Value
+)
+{
+    assert(pSin);
+    assert(pCos);
+
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+    float sign;
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+        sign = -1.0f;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+        sign = -1.0f;
+    }
+    else
+    {
+        sign = +1.0f;
+    }
+
+    float y2 = y * y;
+
+    // 11-degree minimax approximation
+    *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
+
+    // 10-degree minimax approximation
+    float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
+    *pCos = sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+_Use_decl_annotations_
+inline void XMScalarSinCosEst
+(
+    float* pSin,
+    float* pCos,
+    float  Value
+)
+{
+    assert(pSin);
+    assert(pCos);
+
+    // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
+    float quotient = XM_1DIV2PI*Value;
+    if (Value >= 0.0f)
+    {
+        quotient = (float)((int)(quotient + 0.5f));
+    }
+    else
+    {
+        quotient = (float)((int)(quotient - 0.5f));
+    }
+    float y = Value - XM_2PI*quotient;
+
+    // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
+    float sign;
+    if (y > XM_PIDIV2)
+    {
+        y = XM_PI - y;
+        sign = -1.0f;
+    }
+    else if (y < -XM_PIDIV2)
+    {
+        y = -XM_PI - y;
+        sign = -1.0f;
+    }
+    else
+    {
+        sign = +1.0f;
+    }
+
+    float y2 = y * y;
+
+    // 7-degree minimax approximation
+    *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
+
+    // 6-degree minimax approximation
+    float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
+    *pCos = sign*p;
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarASin
+(
+    float Value
+)
+{
+    // Clamp input to [-1,1].
+    bool nonnegative = (Value >= 0.0f);
+    float x = fabsf(Value);
+    float omx = 1.0f - x;
+    if (omx < 0.0f)
+    {
+        omx = 0.0f;
+    }
+    float root = sqrt(omx);
+
+    // 7-degree minimax approximation
+    float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
+    result *= root;  // acos(|x|)
+
+    // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
+    return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarASinEst
+(
+    float Value
+)
+{
+    // Clamp input to [-1,1].
+    bool nonnegative = (Value >= 0.0f);
+    float x = fabsf(Value);
+    float omx = 1.0f - x;
+    if (omx < 0.0f)
+    {
+        omx = 0.0f;
+    }
+    float root = sqrt(omx);
+
+    // 3-degree minimax approximation
+    float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f;
+    result *= root;  // acos(|x|)
+
+    // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
+    return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarACos
+(
+    float Value
+)
+{
+    // Clamp input to [-1,1].
+    bool nonnegative = (Value >= 0.0f);
+    float x = fabsf(Value);
+    float omx = 1.0f - x;
+    if (omx < 0.0f)
+    {
+        omx = 0.0f;
+    }
+    float root = sqrtf(omx);
+
+    // 7-degree minimax approximation
+    float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
+    result *= root;
+
+    // acos(x) = pi - acos(-x) when x < 0
+    return (nonnegative ? result : XM_PI - result);
+}
+
+//------------------------------------------------------------------------------
+
+inline float XMScalarACosEst
+(
+    float Value
+)
+{
+    // Clamp input to [-1,1].
+    bool nonnegative = (Value >= 0.0f);
+    float x = fabsf(Value);
+    float omx = 1.0f - x;
+    if (omx < 0.0f)
+    {
+        omx = 0.0f;
+    }
+    float root = sqrtf(omx);
+
+    // 3-degree minimax approximation
+    float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f;
+    result *= root;
+
+    // acos(x) = pi - acos(-x) when x < 0
+    return (nonnegative ? result : XM_PI - result);
+}
+
author	daoge_cmd <3523206925@qq.com>	2026-03-01 12:16:08 +0800
committer	daoge_cmd <3523206925@qq.com>	2026-03-01 12:16:08 +0800
commit	b691c43c44ff180d10e7d4a9afc83b98551ff586 (patch)
tree	3e9849222cbc6ba49f2f1fc6e5fe7179632c7390 /Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl
parent	def8cb415354ac390b7e89052a50605285f1aca9 (diff)