diff options
| author | daoge_cmd <3523206925@qq.com> | 2026-03-01 12:16:08 +0800 |
|---|---|---|
| committer | daoge_cmd <3523206925@qq.com> | 2026-03-01 12:16:08 +0800 |
| commit | b691c43c44ff180d10e7d4a9afc83b98551ff586 (patch) | |
| tree | 3e9849222cbc6ba49f2f1fc6e5fe7179632c7390 /Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl | |
| parent | def8cb415354ac390b7e89052a50605285f1aca9 (diff) | |
Initial commit
Diffstat (limited to 'Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl')
| -rw-r--r-- | Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl | 2501 |
1 files changed, 2501 insertions, 0 deletions
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl new file mode 100644 index 00000000..f3461e6c --- /dev/null +++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMisc.inl @@ -0,0 +1,2501 @@ +//------------------------------------------------------------------------------------- +// DirectXMathMisc.inl -- SIMD C++ Math library +// +// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF +// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A +// PARTICULAR PURPOSE. +// +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------------------------------- + +#ifdef _MSC_VER +#pragma once +#endif + +/**************************************************************************** + * + * Quaternion + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparison operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline bool XMQuaternionEqual +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) +{ + return XMVector4Equal(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline bool XMQuaternionNotEqual +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) +{ + return XMVector4NotEqual(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline bool XMQuaternionIsNaN +( + FXMVECTOR Q +) +{ + return XMVector4IsNaN(Q); +} + +//------------------------------------------------------------------------------ + +inline bool XMQuaternionIsInfinite +( + FXMVECTOR Q +) +{ + return XMVector4IsInfinite(Q); +} + +//------------------------------------------------------------------------------ + +inline bool XMQuaternionIsIdentity +( + FXMVECTOR Q +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + return XMVector4Equal(Q, g_XMIdentityR3.v); +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionDot +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) +{ + return XMVector4Dot(Q1, Q2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionMultiply +( + FXMVECTOR Q1, + FXMVECTOR Q2 +) +{ + // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2) + + // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), + // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), + // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), + // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] + +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR Result = { + (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), + (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]), + (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]), + (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) }; + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; + static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; + static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; + + __n64 Q2L = vget_low_f32(Q2); + __n64 Q2H = vget_high_f32(Q2); + + __n128 Q2X = vdupq_lane_f32( Q2L, 0 ); + __n128 Q2Y = vdupq_lane_f32( Q2L, 1 ); + __n128 Q2Z = vdupq_lane_f32( Q2H, 0 ); + __n128 vResult = vdupq_lane_f32( Q2H, 1 ); + vResult = vmulq_f32(vResult,Q1); + + // Mul by Q1WZYX + __n128 vTemp = vrev64q_u32(Q1); + vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) ); + Q2X = vmulq_f32(Q2X,vTemp); + vResult = vmlaq_f32( vResult, Q2X, ControlWZYX ); + + // Mul by Q1ZWXY + vTemp = vrev64q_u32(vTemp); + Q2Y = vmulq_f32(Q2Y,vTemp); + vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); + + // Mul by Q1YXWZ + vTemp = vrev64q_u32(vTemp); + vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); + Q2Z = vmulq_f32(Q2Z,vTemp); + vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); + return vResult; +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; + static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; + static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; + // Copy to SSE registers and use as few as possible for x86 + XMVECTOR Q2X = Q2; + XMVECTOR Q2Y = Q2; + XMVECTOR Q2Z = Q2; + XMVECTOR vResult = Q2; + // Splat with one instruction + vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3)); + Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0)); + Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1)); + Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2)); + // Retire Q1 and perform Q1*Q2W + vResult = _mm_mul_ps(vResult,Q1); + XMVECTOR Q1Shuffle = Q1; + // Shuffle the copies of Q1 + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); + // Mul by Q1WZYX + Q2X = _mm_mul_ps(Q2X,Q1Shuffle); + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1)); + // Flip the signs on y and z + Q2X = _mm_mul_ps(Q2X,ControlWZYX); + // Mul by Q1ZWXY + Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle); + Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); + // Flip the signs on z and w + Q2Y = _mm_mul_ps(Q2Y,ControlZWXY); + // Mul by Q1YXWZ + Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle); + vResult = _mm_add_ps(vResult,Q2X); + // Flip the signs on x and w + Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ); + Q2Y = _mm_add_ps(Q2Y,Q2Z); + vResult = _mm_add_ps(vResult,Q2Y); + return vResult; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionLengthSq +( + FXMVECTOR Q +) +{ + return XMVector4LengthSq(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionReciprocalLength +( + FXMVECTOR Q +) +{ + return XMVector4ReciprocalLength(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionLength +( + FXMVECTOR Q +) +{ + return XMVector4Length(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionNormalizeEst +( + FXMVECTOR Q +) +{ + return XMVector4NormalizeEst(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionNormalize +( + FXMVECTOR Q +) +{ + return XMVector4Normalize(Q); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionConjugate +( + FXMVECTOR Q +) +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTOR Result = { + -Q.vector4_f32[0], + -Q.vector4_f32[1], + -Q.vector4_f32[2], + Q.vector4_f32[3] + }; + return Result; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; + return vmulq_f32(Q, NegativeOne3.v ); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; + return _mm_mul_ps(Q,NegativeOne3); +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionInverse +( + FXMVECTOR Q +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + const XMVECTOR Zero = XMVectorZero(); + + XMVECTOR L = XMVector4LengthSq(Q); + XMVECTOR Conjugate = XMQuaternionConjugate(Q); + + XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); + + XMVECTOR Result = XMVectorDivide(Conjugate, L); + + Result = XMVectorSelect(Result, Zero, Control); + + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionLn +( + FXMVECTOR Q +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; + + XMVECTOR QW = XMVectorSplatW(Q); + XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); + + XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); + + XMVECTOR Theta = XMVectorACos(QW); + XMVECTOR SinTheta = XMVectorSin(Theta); + + XMVECTOR S = XMVectorDivide(Theta,SinTheta); + + XMVECTOR Result = XMVectorMultiply(Q0, S); + Result = XMVectorSelect(Q0, Result, ControlW); + + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionExp +( + FXMVECTOR Q +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR Theta = XMVector3Length(Q); + + XMVECTOR SinTheta, CosTheta; + XMVectorSinCos(&SinTheta, &CosTheta, Theta); + + XMVECTOR S = XMVectorDivide(SinTheta, Theta); + + XMVECTOR Result = XMVectorMultiply(Q, S); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); + Result = XMVectorSelect(Result, Q, Control); + + Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); + + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionSlerp +( + FXMVECTOR Q0, + FXMVECTOR Q1, + float t +) +{ + XMVECTOR T = XMVectorReplicate(t); + return XMQuaternionSlerpV(Q0, Q1, T); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionSlerpV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR T +) +{ + assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); + + // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; + + XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorLess(CosOmega, Zero); + XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); + + CosOmega = XMVectorMultiply(CosOmega, Sign); + + Control = XMVectorLess(CosOmega, OneMinusEpsilon); + + XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); + SinOmega = XMVectorSqrt(SinOmega); + + XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); + + XMVECTOR SignMask = XMVectorSplatSignMask(); + XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); + SignMask = XMVectorShiftLeft(SignMask, Zero, 3); + V01 = XMVectorXorInt(V01, SignMask); + V01 = XMVectorAdd(g_XMIdentityR0.v, V01); + + XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); + + XMVECTOR S0 = XMVectorMultiply(V01, Omega); + S0 = XMVectorSin(S0); + S0 = XMVectorMultiply(S0, InvSinOmega); + + S0 = XMVectorSelect(V01, S0, Control); + + XMVECTOR S1 = XMVectorSplatY(S0); + S0 = XMVectorSplatX(S0); + + S1 = XMVectorMultiply(S1, Sign); + + XMVECTOR Result = XMVectorMultiply(Q0, S0); + Result = XMVectorMultiplyAdd(Q1, S1, Result); + + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; + static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000}; + static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000}; + + XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorLess(CosOmega, Zero); + XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); + + CosOmega = _mm_mul_ps(CosOmega, Sign); + + Control = XMVectorLess(CosOmega, OneMinusEpsilon); + + XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega); + SinOmega = _mm_sub_ps(g_XMOne,SinOmega); + SinOmega = _mm_sqrt_ps(SinOmega); + + XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); + + XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1)); + V01 = _mm_and_ps(V01,MaskXY); + V01 = _mm_xor_ps(V01,SignMask2); + V01 = _mm_add_ps(g_XMIdentityR0, V01); + + XMVECTOR S0 = _mm_mul_ps(V01, Omega); + S0 = XMVectorSin(S0); + S0 = _mm_div_ps(S0, SinOmega); + + S0 = XMVectorSelect(V01, S0, Control); + + XMVECTOR S1 = XMVectorSplatY(S0); + S0 = XMVectorSplatX(S0); + + S1 = _mm_mul_ps(S1, Sign); + XMVECTOR Result = _mm_mul_ps(Q0, S0); + S1 = _mm_mul_ps(S1, Q1); + Result = _mm_add_ps(Result,S1); + return Result; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionSquad +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3, + float t +) +{ + XMVECTOR T = XMVectorReplicate(t); + return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionSquadV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3, + CXMVECTOR T +) +{ + assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) ); + + XMVECTOR TP = T; + const XMVECTOR Two = XMVectorSplatConstant(2, 0); + + XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); + XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); + + TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); + TP = XMVectorMultiply(TP, Two); + + XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); + + return Result; +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XMQuaternionSquadSetup +( + XMVECTOR* pA, + XMVECTOR* pB, + XMVECTOR* pC, + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR Q3 +) +{ + assert(pA); + assert(pB); + assert(pC); + + XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); + XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); + XMVECTOR SQ2 = XMVectorNegate(Q2); + + XMVECTOR Control1 = XMVectorLess(LS12, LD12); + SQ2 = XMVectorSelect(Q2, SQ2, Control1); + + XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); + XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); + XMVECTOR SQ0 = XMVectorNegate(Q0); + + XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); + XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); + XMVECTOR SQ3 = XMVectorNegate(Q3); + + XMVECTOR Control0 = XMVectorLess(LS01, LD01); + XMVECTOR Control2 = XMVectorLess(LS23, LD23); + + SQ0 = XMVectorSelect(Q0, SQ0, Control0); + SQ3 = XMVectorSelect(Q3, SQ3, Control2); + + XMVECTOR InvQ1 = XMQuaternionInverse(Q1); + XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); + + XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); + XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); + XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); + XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); + + const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); + + XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); + XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); + ExpQ02 = XMQuaternionExp(ExpQ02); + ExpQ13 = XMQuaternionExp(ExpQ13); + + *pA = XMQuaternionMultiply(Q1, ExpQ02); + *pB = XMQuaternionMultiply(SQ2, ExpQ13); + *pC = SQ2; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionBaryCentric +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + float f, + float g +) +{ + float s = f + g; + + XMVECTOR Result; + if ((s < 0.00001f) && (s > -0.00001f)) + { + Result = Q0; + } + else + { + XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); + XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); + + Result = XMQuaternionSlerp(Q01, Q02, g / s); + } + + return Result; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionBaryCentricV +( + FXMVECTOR Q0, + FXMVECTOR Q1, + FXMVECTOR Q2, + GXMVECTOR F, + CXMVECTOR G +) +{ + assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) ); + assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) ); + + const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); + + XMVECTOR S = XMVectorAdd(F, G); + + XMVECTOR Result; + if (XMVector4InBounds(S, Epsilon)) + { + Result = Q0; + } + else + { + XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); + XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); + XMVECTOR GS = XMVectorReciprocal(S); + GS = XMVectorMultiply(G, GS); + + Result = XMQuaternionSlerpV(Q01, Q02, GS); + } + + return Result; +} + +//------------------------------------------------------------------------------ +// Transformation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionIdentity() +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + return g_XMIdentityR3.v; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionRotationRollPitchYaw +( + float Pitch, + float Yaw, + float Roll +) +{ + XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); + XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); + return Q; +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionRotationRollPitchYawFromVector +( + FXMVECTOR Angles // <Pitch, Yaw, Roll, 0> +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + static const XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f}; + + XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); + + XMVECTOR SinAngles, CosAngles; + XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); + + XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles); + XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles); + XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles); + XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles); + XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles); + XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles); + + XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); + XMVECTOR Q0 = XMVectorMultiply(P0, Y0); + Q1 = XMVectorMultiply(Q1, Y1); + Q0 = XMVectorMultiply(Q0, R0); + XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); + + return Q; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionRotationNormal +( + FXMVECTOR NormalAxis, + float Angle +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); + + float SinV, CosV; + XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); + + XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV ); + return XMVectorMultiply(N, Scale); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3); + N = _mm_or_ps(N,g_XMIdentityR3); + XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); + XMVECTOR vSine; + XMVECTOR vCosine; + XMVectorSinCos(&vSine,&vCosine,Scale); + Scale = _mm_and_ps(vSine,g_XMMask3); + vCosine = _mm_and_ps(vCosine,g_XMMaskW); + Scale = _mm_or_ps(Scale,vCosine); + N = _mm_mul_ps(N,Scale); + return N; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionRotationAxis +( + FXMVECTOR Axis, + float Angle +) +{ + assert(!XMVector3Equal(Axis, XMVectorZero())); + assert(!XMVector3IsInfinite(Axis)); + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR Normal = XMVector3Normalize(Axis); + XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); + return Q; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMQuaternionRotationMatrix +( + CXMMATRIX M +) +{ +#if defined(_XM_NO_INTRINSICS_) + + XMVECTORF32 q; + float r22 = M.m[2][2]; + if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 + { + float dif10 = M.m[1][1] - M.m[0][0]; + float omr22 = 1.f - r22; + if (dif10 <= 0.f) // x^2 >= y^2 + { + float fourXSqr = omr22 - dif10; + float inv4x = 0.5f / sqrtf(fourXSqr); + q.f[0] = fourXSqr*inv4x; + q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x; + q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x; + q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x; + } + else // y^2 >= x^2 + { + float fourYSqr = omr22 + dif10; + float inv4y = 0.5f / sqrtf(fourYSqr); + q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y; + q.f[1] = fourYSqr*inv4y; + q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y; + q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y; + } + } + else // z^2 + w^2 >= x^2 + y^2 + { + float sum10 = M.m[1][1] + M.m[0][0]; + float opr22 = 1.f + r22; + if (sum10 <= 0.f) // z^2 >= w^2 + { + float fourZSqr = opr22 - sum10; + float inv4z = 0.5f / sqrtf(fourZSqr); + q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z; + q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z; + q.f[2] = fourZSqr*inv4z; + q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z; + } + else // w^2 >= z^2 + { + float fourWSqr = opr22 + sum10; + float inv4w = 0.5f / sqrtf(fourWSqr); + q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w; + q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w; + q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w; + q.f[3] = fourWSqr*inv4w; + } + } + return q.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f}; + static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f}; + static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; + static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 }; + static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 }; + + XMVECTOR r0 = M.r[0]; + XMVECTOR r1 = M.r[1]; + XMVECTOR r2 = M.r[2]; + + XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0); + XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1); + XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0); + + // x^2 >= y^2 equivalent to r11 - r00 <= 0 + XMVECTOR r11mr00 = vsubq_f32(r11, r00); + XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero); + + // z^2 >= w^2 equivalent to r11 + r00 <= 0 + XMVECTOR r11pr00 = vaddq_f32(r11, r00); + XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero); + + // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 + XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); + + // (4*x^2, 4*y^2, 4*z^2, 4*w^2) + XMVECTOR t0 = vmulq_f32( XMPMMP, r00 ); + XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 ); + x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 ); + x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne ); + + // (r01, r02, r12, r11) + t0 = vextq_f32(r0, r0, 1); + XMVECTOR t1 = vextq_f32(r1, r1, 1); + t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) ); + + // (r10, r20, r21, r10) + t1 = vextq_f32(r2, r2, 3); + XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 ); + t1 = vbslq_f32( Select0110, t1, r10 ); + + // (4*x*y, 4*x*z, 4*y*z, unused) + XMVECTOR xyxzyz = vaddq_f32(t0, t1); + + // (r21, r20, r10, r10) + t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) ); + + // (r12, r02, r01, r12) + XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) ); + XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 ); + t1 = vbslq_f32( Select0110, t2, t3 ); + + // (4*x*w, 4*y*w, 4*z*w, unused) + XMVECTOR xwywzw = vsubq_f32(t0, t1); + xwywzw = vmulq_f32(XMMPMP, xwywzw); + + // (4*x*x, 4*x*y, 4*x*z, 4*x*w) + t0 = vextq_f32( xyxzyz, xyxzyz, 3 ); + t1 = vbslq_f32( Select0110, t0, x2y2z2w2 ); + t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 ); + XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 ); + + // (4*y*x, 4*y*y, 4*y*z, 4*y*w) + t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 ); + t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 ); + XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 ); + + // (4*z*x, 4*z*y, 4*z*z, 4*z*w) + t0 = vextq_f32(xyxzyz, xyxzyz, 1); + t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) ); + XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 ); + + // (4*w*x, 4*w*y, 4*w*z, 4*w*w) + XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 ); + + // Select the row of the tensor-product matrix that has the largest + // magnitude. + t0 = vbslq_f32( x2gey2, tensor0, tensor1 ); + t1 = vbslq_f32( z2gew2, tensor2, tensor3 ); + t2 = vbslq_f32( x2py2gez2pw2, t0, t1 ); + + // Normalize the row. No division by zero is possible because the + // quaternion is unit-length (and the row is a nonzero multiple of + // the quaternion). + t0 = XMVector4Length(t2); + return XMVectorDivide(t2, t0); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f}; + static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f}; + static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; + + XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0) + XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0) + XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0) + + // (r00, r00, r00, r00) + XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0)); + // (r11, r11, r11, r11) + XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1)); + // (r22, r22, r22, r22) + XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2)); + + // x^2 >= y^2 equivalent to r11 - r00 <= 0 + // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) + XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); + XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); + + // z^2 >= w^2 equivalent to r11 + r00 <= 0 + // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) + XMVECTOR r11pr00 = _mm_add_ps(r11, r00); + XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); + + // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 + XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); + + // (+r00, -r00, -r00, +r00) + XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00); + + // (-r11, +r11, -r11, +r11) + XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); + + // (-r22, -r22, +r22, +r22) + XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22); + + // (4*x^2, 4*y^2, 4*z^2, 4*w^2) + XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1); + x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2); + x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne); + + // (r01, r02, r12, r11) + t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1)); + // (r10, r10, r20, r21) + t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0)); + // (r10, r20, r21, r10) + t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); + // (4*x*y, 4*x*z, 4*y*z, unused) + XMVECTOR xyxzyz = _mm_add_ps(t0, t1); + + // (r21, r20, r10, r10) + t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1)); + // (r12, r12, r02, r01) + t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2)); + // (r12, r02, r01, r12) + t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); + // (4*x*w, 4*y*w, 4*z*w, unused) + XMVECTOR xwywzw = _mm_sub_ps(t0, t1); + xwywzw = _mm_mul_ps(XMMPMP, xwywzw); + + // (4*x^2, 4*y^2, 4*x*y, unused) + t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0)); + // (4*z^2, 4*w^2, 4*z*w, unused) + t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2)); + // (4*x*z, 4*y*z, 4*x*w, 4*y*w) + t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1)); + + // (4*x*x, 4*x*y, 4*x*z, 4*x*w) + XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0)); + // (4*y*x, 4*y*y, 4*y*z, 4*y*w) + XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2)); + // (4*z*x, 4*z*y, 4*z*z, 4*z*w) + XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0)); + // (4*w*x, 4*w*y, 4*w*z, 4*w*w) + XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2)); + + // Select the row of the tensor-product matrix that has the largest + // magnitude. + t0 = _mm_and_ps(x2gey2, tensor0); + t1 = _mm_andnot_ps(x2gey2, tensor1); + t0 = _mm_or_ps(t0, t1); + t1 = _mm_and_ps(z2gew2, tensor2); + t2 = _mm_andnot_ps(z2gew2, tensor3); + t1 = _mm_or_ps(t1, t2); + t0 = _mm_and_ps(x2py2gez2pw2, t0); + t1 = _mm_andnot_ps(x2py2gez2pw2, t1); + t2 = _mm_or_ps(t0, t1); + + // Normalize the row. No division by zero is possible because the + // quaternion is unit-length (and the row is a nonzero multiple of + // the quaternion). + t0 = XMVector4Length(t2); + return _mm_div_ps(t2, t0); +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +// Conversion operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XMQuaternionToAxisAngle +( + XMVECTOR* pAxis, + float* pAngle, + FXMVECTOR Q +) +{ + assert(pAxis); + assert(pAngle); + + *pAxis = Q; + + *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); +} + +/**************************************************************************** + * + * Plane + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparison operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline bool XMPlaneEqual +( + FXMVECTOR P1, + FXMVECTOR P2 +) +{ + return XMVector4Equal(P1, P2); +} + +//------------------------------------------------------------------------------ + +inline bool XMPlaneNearEqual +( + FXMVECTOR P1, + FXMVECTOR P2, + FXMVECTOR Epsilon +) +{ + XMVECTOR NP1 = XMPlaneNormalize(P1); + XMVECTOR NP2 = XMPlaneNormalize(P2); + return XMVector4NearEqual(NP1, NP2, Epsilon); +} + +//------------------------------------------------------------------------------ + +inline bool XMPlaneNotEqual +( + FXMVECTOR P1, + FXMVECTOR P2 +) +{ + return XMVector4NotEqual(P1, P2); +} + +//------------------------------------------------------------------------------ + +inline bool XMPlaneIsNaN +( + FXMVECTOR P +) +{ + return XMVector4IsNaN(P); +} + +//------------------------------------------------------------------------------ + +inline bool XMPlaneIsInfinite +( + FXMVECTOR P +) +{ + return XMVector4IsInfinite(P); +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneDot +( + FXMVECTOR P, + FXMVECTOR V +) +{ + return XMVector4Dot(P, V); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneDotCoord +( + FXMVECTOR P, + FXMVECTOR V +) +{ + // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); + XMVECTOR Result = XMVector4Dot(P, V3); + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneDotNormal +( + FXMVECTOR P, + FXMVECTOR V +) +{ + return XMVector3Dot(P, V); +} + +//------------------------------------------------------------------------------ +// XMPlaneNormalizeEst uses a reciprocal estimate and +// returns QNaN on zero and infinite vectors. + +inline XMVECTOR XMPlaneNormalizeEst +( + FXMVECTOR P +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR Result = XMVector3ReciprocalLengthEst(P); + return XMVectorMultiply(P, Result); + +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product + XMVECTOR vDot = _mm_mul_ps(P,P); + // x=Dot.y, y=Dot.z + XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1)); + // Result.x = x+y + vDot = _mm_add_ss(vDot,vTemp); + // x=Dot.z + vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); + // Result.x = (x+y)+z + vDot = _mm_add_ss(vDot,vTemp); + // Splat x + vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0)); + // Get the reciprocal + vDot = _mm_rsqrt_ps(vDot); + // Get the reciprocal + vDot = _mm_mul_ps(vDot,P); + return vDot; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneNormalize +( + FXMVECTOR P +) +{ +#if defined(_XM_NO_INTRINSICS_) + float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2])); + // Prevent divide by zero + if (fLengthSq) { + fLengthSq = 1.0f/fLengthSq; + } + { + XMVECTOR vResult = { + P.vector4_f32[0]*fLengthSq, + P.vector4_f32[1]*fLengthSq, + P.vector4_f32[2]*fLengthSq, + P.vector4_f32[3]*fLengthSq + }; + return vResult; + } +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vLength = XMVector3ReciprocalLength(P); + return XMVectorMultiply( P, vLength ); +#elif defined(_XM_SSE_INTRINSICS_) + // Perform the dot product on x,y and z only + XMVECTOR vLengthSq = _mm_mul_ps(P,P); + XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1)); + vLengthSq = _mm_add_ss(vLengthSq,vTemp); + vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); + vLengthSq = _mm_add_ss(vLengthSq,vTemp); + vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0)); + // Prepare for the division + XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); + // Failsafe on zero (Or epsilon) length planes + // If the length is infinity, set the elements to zero + vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); + // Reciprocal mul to perform the normalization + vResult = _mm_div_ps(P,vResult); + // Any that are infinity, set to zero + vResult = _mm_and_ps(vResult,vLengthSq); + return vResult; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneIntersectLine +( + FXMVECTOR P, + FXMVECTOR LinePoint1, + FXMVECTOR LinePoint2 +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR V1 = XMVector3Dot(P, LinePoint1); + XMVECTOR V2 = XMVector3Dot(P, LinePoint2); + XMVECTOR D = XMVectorSubtract(V1, V2); + + XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); + VT = XMVectorDivide(VT, D); + + XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); + Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); + + const XMVECTOR Zero = XMVectorZero(); + XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); + + return XMVectorSelect(Point, g_XMQNaN.v, Control); + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline void XMPlaneIntersectPlane +( + XMVECTOR* pLinePoint1, + XMVECTOR* pLinePoint2, + FXMVECTOR P1, + FXMVECTOR P2 +) +{ + assert(pLinePoint1); + assert(pLinePoint2); +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR V1 = XMVector3Cross(P2, P1); + + XMVECTOR LengthSq = XMVector3LengthSq(V1); + + XMVECTOR V2 = XMVector3Cross(P2, V1); + + XMVECTOR P1W = XMVectorSplatW(P1); + XMVECTOR Point = XMVectorMultiply(V2, P1W); + + XMVECTOR V3 = XMVector3Cross(V1, P1); + + XMVECTOR P2W = XMVectorSplatW(P2); + Point = XMVectorMultiplyAdd(V3, P2W, Point); + + XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); + + XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); + + XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); + *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control); + *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control); + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneTransform +( + FXMVECTOR P, + CXMMATRIX M +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR W = XMVectorSplatW(P); + XMVECTOR Z = XMVectorSplatZ(P); + XMVECTOR Y = XMVectorSplatY(P); + XMVECTOR X = XMVectorSplatX(P); + + XMVECTOR Result = XMVectorMultiply(W, M.r[3]); + Result = XMVectorMultiplyAdd(Z, M.r[2], Result); + Result = XMVectorMultiplyAdd(Y, M.r[1], Result); + Result = XMVectorMultiplyAdd(X, M.r[0], Result); + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ +_Use_decl_annotations_ +inline XMFLOAT4* XMPlaneTransformStream +( + XMFLOAT4* pOutputStream, + size_t OutputStride, + const XMFLOAT4* pInputStream, + size_t InputStride, + size_t PlaneCount, + CXMMATRIX M +) +{ + return XMVector4TransformStream(pOutputStream, + OutputStride, + pInputStream, + InputStride, + PlaneCount, + M); +} + +//------------------------------------------------------------------------------ +// Conversion operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneFromPointNormal +( + FXMVECTOR Point, + FXMVECTOR Normal +) +{ + XMVECTOR W = XMVector3Dot(Point, Normal); + W = XMVectorNegate(W); + return XMVectorSelect(W, Normal, g_XMSelect1110.v); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMPlaneFromPoints +( + FXMVECTOR Point1, + FXMVECTOR Point2, + FXMVECTOR Point3 +) +{ +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR V21 = XMVectorSubtract(Point1, Point2); + XMVECTOR V31 = XMVectorSubtract(Point1, Point3); + + XMVECTOR N = XMVector3Cross(V21, V31); + N = XMVector3Normalize(N); + + XMVECTOR D = XMPlaneDotNormal(N, Point1); + D = XMVectorNegate(D); + + XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); + + return Result; + +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +/**************************************************************************** + * + * Color + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ +// Comparison operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline bool XMColorEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4Equal(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorNotEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4NotEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorGreater +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4Greater(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorGreaterOrEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4GreaterOrEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorLess +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4Less(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorLessOrEqual +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVector4LessOrEqual(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorIsNaN +( + FXMVECTOR C +) +{ + return XMVector4IsNaN(C); +} + +//------------------------------------------------------------------------------ + +inline bool XMColorIsInfinite +( + FXMVECTOR C +) +{ + return XMVector4IsInfinite(C); +} + +//------------------------------------------------------------------------------ +// Computation operations +//------------------------------------------------------------------------------ + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorNegative +( + FXMVECTOR vColor +) +{ +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { + 1.0f - vColor.vector4_f32[0], + 1.0f - vColor.vector4_f32[1], + 1.0f - vColor.vector4_f32[2], + vColor.vector4_f32[3] + }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3); + return vaddq_f32(vTemp,g_XMOne3); +#elif defined(_XM_SSE_INTRINSICS_) + // Negate only x,y and z. + XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3); + // Add 1,1,1,0 to -x,-y,-z,w + return _mm_add_ps(vTemp,g_XMOne3); +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorModulate +( + FXMVECTOR C1, + FXMVECTOR C2 +) +{ + return XMVectorMultiply(C1, C2); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorAdjustSaturation +( + FXMVECTOR vColor, + float fSaturation +) +{ + // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; + // Result = (C - Luminance) * Saturation + Luminance; + +#if defined(_XM_NO_INTRINSICS_) + const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; + + float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]); + XMVECTORF32 vResult = { + ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance, + ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance, + ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance, + vColor.vector4_f32[3]}; + return vResult.v; + +#elif defined(_XM_ARM_NEON_INTRINSICS_) + static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; + XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); + XMVECTOR vResult = vsubq_f32(vColor, vLuminance); + XMVECTOR vSaturation = vdupq_n_f32(fSaturation); + vResult = vmlaq_f32( vLuminance, vResult, vSaturation ); + return vbslq_f32( g_XMSelect1110, vResult, vColor ); +#elif defined(_XM_SSE_INTRINSICS_) + static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; + XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); +// Splat fSaturation + XMVECTOR vSaturation = _mm_set_ps1(fSaturation); +// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; + XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance); + vResult = _mm_mul_ps(vResult,vSaturation); + vResult = _mm_add_ps(vResult,vLuminance); +// Retain w from the source color + vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w + vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w + return vResult; +#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorAdjustContrast +( + FXMVECTOR vColor, + float fContrast +) +{ + // Result = (vColor - 0.5f) * fContrast + 0.5f; + +#if defined(_XM_NO_INTRINSICS_) + XMVECTORF32 vResult = { + ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f, + ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f, + ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f, + vColor.vector4_f32[3] // Leave W untouched + }; + return vResult.v; +#elif defined(_XM_ARM_NEON_INTRINSICS_) + XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); + XMVECTOR vContrast = vdupq_n_f32(fContrast); + vResult = vmlaq_f32( g_XMOneHalf.v, vResult, vContrast ); + return vbslq_f32( g_XMSelect1110, vResult, vColor ); +#elif defined(_XM_SSE_INTRINSICS_) + XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale + XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source) + vResult = _mm_mul_ps(vResult,vScale); // Mul by scale + vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f +// Retain w from the source color + vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w + vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w + return vResult; +#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb ) +{ + XMVECTOR r = XMVectorSplatX( rgb ); + XMVECTOR g = XMVectorSplatY( rgb ); + XMVECTOR b = XMVectorSplatZ( rgb ); + + XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); + XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) ); + + XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf ); + + XMVECTOR d = XMVectorSubtract( max, min ); + + XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 ); + + if ( XMVector3Less( d, g_XMEpsilon ) ) + { + // Achromatic, assume H and S of 0 + return XMVectorSelect( la, g_XMZero, g_XMSelect1100 ); + } + else + { + XMVECTOR s, h; + + XMVECTOR d2 = XMVectorAdd( min, max ); + + if ( XMVector3Greater( l, g_XMOneHalf ) ) + { + // d / (2-max-min) + s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) ); + } + else + { + // d / (max+min) + s = XMVectorDivide( d, d2 ); + } + + if ( XMVector3Equal( r, max ) ) + { + // Red is max + h = XMVectorDivide( XMVectorSubtract( g, b ), d ); + } + else if ( XMVector3Equal( g, max ) ) + { + // Green is max + h = XMVectorDivide( XMVectorSubtract( b, r ), d ); + h = XMVectorAdd( h, g_XMTwo ); + } + else + { + // Blue is max + h = XMVectorDivide( XMVectorSubtract( r, g ), d ); + h = XMVectorAdd( h, g_XMFour ); + } + + h = XMVectorDivide( h, g_XMSix ); + + if ( XMVector3Less( h, g_XMZero ) ) + h = XMVectorAdd( h, g_XMOne ); + + XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 ); + return XMVectorSelect( s, lha, g_XMSelect1011 ); + } +} + +//------------------------------------------------------------------------------ + +namespace Internal +{ + +inline XMVECTOR XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h ) +{ + static const XMVECTORF32 oneSixth = { 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f }; + static const XMVECTORF32 twoThirds = { 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f }; + + XMVECTOR t = h; + + if ( XMVector3Less( t, g_XMZero ) ) + t = XMVectorAdd( t, g_XMOne ); + + if ( XMVector3Greater( t, g_XMOne ) ) + t = XMVectorSubtract( t, g_XMOne ); + + if ( XMVector3Less( t, oneSixth ) ) + { + // p + (q - p) * 6 * t + XMVECTOR t1 = XMVectorSubtract( q, p ); + XMVECTOR t2 = XMVectorMultiply( g_XMSix, t ); + return XMVectorMultiplyAdd( t1, t2, p ); + } + + if ( XMVector3Less( t, g_XMOneHalf ) ) + return q; + + if ( XMVector3Less( t, twoThirds ) ) + { + // p + (q - p) * 6 * (2/3 - t) + XMVECTOR t1 = XMVectorSubtract( q, p ); + XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) ); + return XMVectorMultiplyAdd( t1, t2, p ); + } + + return p; +} + +}; // namespace Internal + +inline XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl ) +{ + static const XMVECTORF32 oneThird = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f }; + + XMVECTOR s = XMVectorSplatY( hsl ); + XMVECTOR l = XMVectorSplatZ( hsl ); + + if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) ) + { + // Achromatic + return XMVectorSelect( hsl, l, g_XMSelect1110 ); + } + else + { + XMVECTOR h = XMVectorSplatX( hsl ); + + XMVECTOR q; + if ( XMVector3Less( l, g_XMOneHalf ) ) + { + q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) ); + } + else + { + q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) ); + } + + XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q ); + + XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) ); + XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h ); + XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) ); + + XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 ); + XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 ); + + return XMVectorSelect( ba, rg, g_XMSelect1100 ); + } +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb ) +{ + XMVECTOR r = XMVectorSplatX( rgb ); + XMVECTOR g = XMVectorSplatY( rgb ); + XMVECTOR b = XMVectorSplatZ( rgb ); + + XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); + XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) ); + + XMVECTOR d = XMVectorSubtract( v, min ); + + XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v ); + + if ( XMVector3Less( d, g_XMEpsilon ) ) + { + // Achromatic, assume H of 0 + XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 ); + XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); + return XMVectorSelect( s, hva, g_XMSelect1011 ); + } + else + { + XMVECTOR h; + + if ( XMVector3Equal( r, v ) ) + { + // Red is max + h = XMVectorDivide( XMVectorSubtract( g, b ), d ); + + if ( XMVector3Less( g, b ) ) + h = XMVectorAdd( h, g_XMSix ); + } + else if ( XMVector3Equal( g, v ) ) + { + // Green is max + h = XMVectorDivide( XMVectorSubtract( b, r ), d ); + h = XMVectorAdd( h, g_XMTwo ); + } + else + { + // Blue is max + h = XMVectorDivide( XMVectorSubtract( r, g ), d ); + h = XMVectorAdd( h, g_XMFour ); + } + + h = XMVectorDivide( h, g_XMSix ); + + XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 ); + XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); + return XMVectorSelect( s, hva, g_XMSelect1011 ); + } +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv ) +{ + XMVECTOR h = XMVectorSplatX( hsv ); + XMVECTOR s = XMVectorSplatY( hsv ); + XMVECTOR v = XMVectorSplatZ( hsv ); + + XMVECTOR h6 = XMVectorMultiply( h, g_XMSix ); + + XMVECTOR i = XMVectorFloor( h6 ); + XMVECTOR f = XMVectorSubtract( h6, i ); + + // p = v* (1-s) + XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) ); + + // q = v*(1-f*s) + XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) ); + + // t = v*(1 - (1-f)*s) + XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) ); + + int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) ); + + XMVECTOR _rgb; + + switch (ii) + { + case 0: // rgb = vtp + { + XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 ); + _rgb = XMVectorSelect( p, vt, g_XMSelect1100 ); + } + break; + case 1: // rgb = qvp + { + XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 ); + _rgb = XMVectorSelect( p, qv, g_XMSelect1100 ); + } + break; + case 2: // rgb = pvt + { + XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 ); + _rgb = XMVectorSelect( t, pv, g_XMSelect1100 ); + } + break; + case 3: // rgb = pqv + { + XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 ); + _rgb = XMVectorSelect( v, pq, g_XMSelect1100 ); + } + break; + case 4: // rgb = tpv + { + XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 ); + _rgb = XMVectorSelect( v, tp, g_XMSelect1100 ); + } + break; + default: // rgb = vpq + { + XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 ); + _rgb = XMVectorSelect( q, vp, g_XMSelect1100 ); + } + break; + } + + return XMVectorSelect( hsv, _rgb, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb ) +{ + static const XMVECTORF32 Scale0 = { 0.299f, -0.147f, 0.615f, 0.0f }; + static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f }; + static const XMVECTORF32 Scale2 = { 0.114f, 0.436f, -0.100f, 0.0f }; + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( rgb, M ); + + return XMVectorSelect( rgb, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv ) +{ + static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f }; + static const XMVECTORF32 Scale2 = { 1.140f, -0.581f, 0.0f, 0.0f }; + + XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( yuv, M ); + + return XMVectorSelect( yuv, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb ) +{ + static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f, 0.6150f, 0.0f }; + static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f }; + static const XMVECTORF32 Scale2 = { 0.0722f, 0.4351f, -0.0564f, 0.0f }; + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( rgb, M ); + + return XMVectorSelect( rgb, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv ) +{ + static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f }; + static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f, 0.0f, 0.0f }; + + XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( yuv, M ); + + return XMVectorSelect( yuv, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb ) +{ + static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f }; + static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f }; + static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f }; + static const XMVECTORF32 Scale = { 1.f/0.17697f, 1.f/0.17697f, 1.f/0.17697f, 0.0f }; + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale ); + + return XMVectorSelect( rgb, clr, g_XMSelect1110 ); +} + +inline XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz ) +{ + static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f }; + static const XMVECTORF32 Scale1 = { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f }; + static const XMVECTORF32 Scale2 = { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f }; + static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f }; + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M ); + + return XMVectorSelect( xyz, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz ) +{ + static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f, 0.0557f, 0.0f }; + static const XMVECTORF32 Scale1 = { -1.5372f, 1.8758f, -0.2040f, 0.0f }; + static const XMVECTORF32 Scale2 = { -0.4986f, 0.0415f, 1.0570f, 0.0f }; + static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f }; + static const XMVECTORF32 Exp = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f }; + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR lclr = XMVector3Transform( xyz, M ); + + XMVECTOR sel = XMVectorGreater( lclr, Cutoff ); + + // clr = 12.92 * lclr for lclr <= 0.0031308f + XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale ); + + // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) + XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA ); + + XMVECTOR clr = XMVectorSelect( smallC, largeC, sel ); + + return XMVectorSelect( xyz, clr, g_XMSelect1110 ); +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb ) +{ + static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f }; + static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f }; + static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f }; + static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f }; + static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f }; + + XMVECTOR sel = XMVectorGreater( srgb, Cutoff ); + + // lclr = clr / 12.92 + XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale ); + + // lclr = pow( (clr + a) / (1+a), 2.4 ) + XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp ); + + XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel ); + + XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); + XMVECTOR clr = XMVector3Transform( lclr, M ); + + return XMVectorSelect( srgb, clr, g_XMSelect1110 ); +} + +/**************************************************************************** + * + * Miscellaneous + * + ****************************************************************************/ + +//------------------------------------------------------------------------------ + +inline bool XMVerifyCPUSupport() +{ +#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#if defined(_M_AMD64) + // The X64 processor model requires SSE2 support + return true; +#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE) + // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail + // Detecting SSE2 on older versions of Windows would require using cpuid directly + return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 ); +#else + // If windows.h is not included, we return false (likely a false negative) + return false; +#endif +#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) +#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE + return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 ); +#else + // If windows.h is not included, we return false (likely a false negative) + return false; +#endif +#else + return true; +#endif +} + +//------------------------------------------------------------------------------ + +inline XMVECTOR XMFresnelTerm +( + FXMVECTOR CosIncidentAngle, + FXMVECTOR RefractionIndex +) +{ + assert(!XMVector4IsInfinite(CosIncidentAngle)); + + // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where + // c = CosIncidentAngle + // g = sqrt(c^2 + RefractionIndex^2 - 1) + +#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) + + XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); + G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); + G = XMVectorAbs(G); + G = XMVectorSqrt(G); + + XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); + XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); + + XMVECTOR V0 = XMVectorMultiply(D, D); + XMVECTOR V1 = XMVectorMultiply(S, S); + V1 = XMVectorReciprocal(V1); + V0 = XMVectorMultiply(g_XMOneHalf.v, V0); + V0 = XMVectorMultiply(V0, V1); + + XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); + XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); + V2 = XMVectorMultiply(V2, V2); + V3 = XMVectorMultiply(V3, V3); + V3 = XMVectorReciprocal(V3); + V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); + + XMVECTOR Result = XMVectorMultiply(V0, V2); + + Result = XMVectorSaturate(Result); + + return Result; + +#elif defined(_XM_SSE_INTRINSICS_) + // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) + XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex); + XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle); + G = _mm_sub_ps(G,g_XMOne); + vTemp = _mm_add_ps(vTemp,G); + // max((0-vTemp),vTemp) == abs(vTemp) + // The abs is needed to deal with refraction and cosine being zero + G = _mm_setzero_ps(); + G = _mm_sub_ps(G,vTemp); + G = _mm_max_ps(G,vTemp); + // Last operation, the sqrt() + G = _mm_sqrt_ps(G); + + // Calc G-C and G+C + XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle); + XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle); + // Perform the term (0.5f *(g - c)^2) / (g + c)^2 + XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC); + vTemp = _mm_mul_ps(GAddC,GAddC); + vResult = _mm_mul_ps(vResult,g_XMOneHalf); + vResult = _mm_div_ps(vResult,vTemp); + // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) + GAddC = _mm_mul_ps(GAddC,CosIncidentAngle); + GSubC = _mm_mul_ps(GSubC,CosIncidentAngle); + GAddC = _mm_sub_ps(GAddC,g_XMOne); + GSubC = _mm_add_ps(GSubC,g_XMOne); + GAddC = _mm_mul_ps(GAddC,GAddC); + GSubC = _mm_mul_ps(GSubC,GSubC); + GAddC = _mm_div_ps(GAddC,GSubC); + GAddC = _mm_add_ps(GAddC,g_XMOne); + // Multiply the two term parts + vResult = _mm_mul_ps(vResult,GAddC); + // Clamp to 0.0 - 1.0f + vResult = _mm_max_ps(vResult,g_XMZero); + vResult = _mm_min_ps(vResult,g_XMOne); + return vResult; +#else // _XM_VMX128_INTRINSICS_ +#endif // _XM_VMX128_INTRINSICS_ +} + +//------------------------------------------------------------------------------ + +inline bool XMScalarNearEqual +( + float S1, + float S2, + float Epsilon +) +{ + float Delta = S1 - S2; + return (fabsf(Delta) <= Epsilon); +} + +//------------------------------------------------------------------------------ +// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI +inline float XMScalarModAngle +( + float Angle +) +{ + // Note: The modulo is performed with unsigned math only to work + // around a precision error on numbers that are close to PI + + // Normalize the range from 0.0f to XM_2PI + Angle = Angle + XM_PI; + // Perform the modulo, unsigned + float fTemp = fabsf(Angle); + fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI))); + // Restore the number to the range of -XM_PI to XM_PI-epsilon + fTemp = fTemp - XM_PI; + // If the modulo'd value was negative, restore negation + if (Angle<0.0f) { + fTemp = -fTemp; + } + return fTemp; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarSin +( + float Value +) +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + if (y > XM_PIDIV2) + { + y = XM_PI - y; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + } + + // 11-degree minimax approximation + float y2 = y * y; + return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarSinEst +( + float Value +) +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + if (y > XM_PIDIV2) + { + y = XM_PI - y; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + } + + // 7-degree minimax approximation + float y2 = y * y; + return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarCos +( + float Value +) +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + // 10-degree minimax approximation + float y2 = y*y; + float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; + return sign*p; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarCosEst +( + float Value +) +{ + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + // 6-degree minimax approximation + float y2 = y * y; + float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; + return sign*p; +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XMScalarSinCos +( + float* pSin, + float* pCos, + float Value +) +{ + assert(pSin); + assert(pCos); + + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + float y2 = y * y; + + // 11-degree minimax approximation + *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; + + // 10-degree minimax approximation + float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; + *pCos = sign*p; +} + +//------------------------------------------------------------------------------ + +_Use_decl_annotations_ +inline void XMScalarSinCosEst +( + float* pSin, + float* pCos, + float Value +) +{ + assert(pSin); + assert(pCos); + + // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. + float quotient = XM_1DIV2PI*Value; + if (Value >= 0.0f) + { + quotient = (float)((int)(quotient + 0.5f)); + } + else + { + quotient = (float)((int)(quotient - 0.5f)); + } + float y = Value - XM_2PI*quotient; + + // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). + float sign; + if (y > XM_PIDIV2) + { + y = XM_PI - y; + sign = -1.0f; + } + else if (y < -XM_PIDIV2) + { + y = -XM_PI - y; + sign = -1.0f; + } + else + { + sign = +1.0f; + } + + float y2 = y * y; + + // 7-degree minimax approximation + *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; + + // 6-degree minimax approximation + float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; + *pCos = sign*p; +} + +//------------------------------------------------------------------------------ + +inline float XMScalarASin +( + float Value +) +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrt(omx); + + // 7-degree minimax approximation + float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; + result *= root; // acos(|x|) + + // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) + return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarASinEst +( + float Value +) +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrt(omx); + + // 3-degree minimax approximation + float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f; + result *= root; // acos(|x|) + + // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) + return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarACos +( + float Value +) +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 7-degree minimax approximation + float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; + result *= root; + + // acos(x) = pi - acos(-x) when x < 0 + return (nonnegative ? result : XM_PI - result); +} + +//------------------------------------------------------------------------------ + +inline float XMScalarACosEst +( + float Value +) +{ + // Clamp input to [-1,1]. + bool nonnegative = (Value >= 0.0f); + float x = fabsf(Value); + float omx = 1.0f - x; + if (omx < 0.0f) + { + omx = 0.0f; + } + float root = sqrtf(omx); + + // 3-degree minimax approximation + float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f; + result *= root; + + // acos(x) = pi - acos(-x) when x < 0 + return (nonnegative ? result : XM_PI - result); +} + |
