aboutsummaryrefslogtreecommitdiff
path: root/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl
diff options
context:
space:
mode:
Diffstat (limited to 'Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl')
-rw-r--r--Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl3414
1 files changed, 3414 insertions, 0 deletions
diff --git a/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl
new file mode 100644
index 00000000..d665d333
--- /dev/null
+++ b/Minecraft.Client/PS3/PS3Extras/DirectX/DirectXMathMatrix.inl
@@ -0,0 +1,3414 @@
+//-------------------------------------------------------------------------------------
+// DirectXMathMatrix.inl -- SIMD C++ Math library
+//
+// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
+// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
+// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
+// PARTICULAR PURPOSE.
+//
+// Copyright (c) Microsoft Corporation. All rights reserved.
+//-------------------------------------------------------------------------------------
+
+#ifdef _MSC_VER
+#pragma once
+#endif
+
+/****************************************************************************
+ *
+ * Matrix
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+// Comparison operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+// Return true if any entry in the matrix is NaN
+inline bool XMMatrixIsNaN
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ size_t i = 16;
+ const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
+ do {
+ // Fetch value into integer unit
+ uint32_t uTest = pWork[0];
+ // Remove sign
+ uTest &= 0x7FFFFFFFU;
+ // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
+ uTest -= 0x7F800001U;
+ if (uTest<0x007FFFFFU) {
+ break; // NaN found
+ }
+ ++pWork; // Next entry
+ } while (--i);
+ return (i!=0); // i == 0 if nothing matched
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Load in registers
+ XMVECTOR vX = M.r[0];
+ XMVECTOR vY = M.r[1];
+ XMVECTOR vZ = M.r[2];
+ XMVECTOR vW = M.r[3];
+ // Test themselves to check for NaN
+ vX = vmvnq_u32(vceqq_f32(vX, vX));
+ vY = vmvnq_u32(vceqq_f32(vY, vY));
+ vZ = vmvnq_u32(vceqq_f32(vZ, vZ));
+ vW = vmvnq_u32(vceqq_f32(vW, vW));
+ // Or all the results
+ vX = vorrq_u32(vX,vZ);
+ vY = vorrq_u32(vY,vW);
+ vX = vorrq_u32(vX,vY);
+ // If any tested true, return true
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return (r != 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Load in registers
+ XMVECTOR vX = M.r[0];
+ XMVECTOR vY = M.r[1];
+ XMVECTOR vZ = M.r[2];
+ XMVECTOR vW = M.r[3];
+ // Test themselves to check for NaN
+ vX = _mm_cmpneq_ps(vX,vX);
+ vY = _mm_cmpneq_ps(vY,vY);
+ vZ = _mm_cmpneq_ps(vZ,vZ);
+ vW = _mm_cmpneq_ps(vW,vW);
+ // Or all the results
+ vX = _mm_or_ps(vX,vZ);
+ vY = _mm_or_ps(vY,vW);
+ vX = _mm_or_ps(vX,vY);
+ // If any tested true, return true
+ return (_mm_movemask_ps(vX)!=0);
+#else
+#endif
+}
+
+//------------------------------------------------------------------------------
+
+// Return true if any entry in the matrix is +/-INF
+inline bool XMMatrixIsInfinite
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ size_t i = 16;
+ const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
+ do {
+ // Fetch value into integer unit
+ uint32_t uTest = pWork[0];
+ // Remove sign
+ uTest &= 0x7FFFFFFFU;
+ // INF is 0x7F800000
+ if (uTest==0x7F800000U) {
+ break; // INF found
+ }
+ ++pWork; // Next entry
+ } while (--i);
+ return (i!=0); // i == 0 if nothing matched
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ // Mask off the sign bits
+ XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask);
+ XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask);
+ XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask);
+ XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask);
+ // Compare to infinity
+ vTemp1 = vceqq_f32(vTemp1,g_XMInfinity);
+ vTemp2 = vceqq_f32(vTemp2,g_XMInfinity);
+ vTemp3 = vceqq_f32(vTemp3,g_XMInfinity);
+ vTemp4 = vceqq_f32(vTemp4,g_XMInfinity);
+ // Or the answers together
+ vTemp1 = vorrq_u32(vTemp1,vTemp2);
+ vTemp3 = vorrq_u32(vTemp3,vTemp4);
+ vTemp1 = vorrq_u32(vTemp1,vTemp3);
+ // If any are infinity, the signs are true.
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return (r != 0);
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Mask off the sign bits
+ XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
+ XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
+ XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
+ XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
+ // Compare to infinity
+ vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
+ vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
+ vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
+ vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
+ // Or the answers together
+ vTemp1 = _mm_or_ps(vTemp1,vTemp2);
+ vTemp3 = _mm_or_ps(vTemp3,vTemp4);
+ vTemp1 = _mm_or_ps(vTemp1,vTemp3);
+ // If any are infinity, the signs are true.
+ return (_mm_movemask_ps(vTemp1)!=0);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+// Return true if the XMMatrix is equal to identity
+inline bool XMMatrixIsIdentity
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ // Use the integer pipeline to reduce branching to a minimum
+ const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]);
+ // Convert 1.0f to zero and or them together
+ uint32_t uOne = pWork[0]^0x3F800000U;
+ // Or all the 0.0f entries together
+ uint32_t uZero = pWork[1];
+ uZero |= pWork[2];
+ uZero |= pWork[3];
+ // 2nd row
+ uZero |= pWork[4];
+ uOne |= pWork[5]^0x3F800000U;
+ uZero |= pWork[6];
+ uZero |= pWork[7];
+ // 3rd row
+ uZero |= pWork[8];
+ uZero |= pWork[9];
+ uOne |= pWork[10]^0x3F800000U;
+ uZero |= pWork[11];
+ // 4th row
+ uZero |= pWork[12];
+ uZero |= pWork[13];
+ uZero |= pWork[14];
+ uOne |= pWork[15]^0x3F800000U;
+ // If all zero entries are zero, the uZero==0
+ uZero &= 0x7FFFFFFF; // Allow -0.0f
+ // If all 1.0f entries are 1.0f, then uOne==0
+ uOne |= uZero;
+ return (uOne==0);
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0);
+ XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1);
+ XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2);
+ XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3);
+ vTemp1 = vandq_u32(vTemp1,vTemp2);
+ vTemp3 = vandq_u32(vTemp3,vTemp4);
+ vTemp1 = vandq_u32(vTemp1,vTemp3);
+ int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
+ vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
+ uint32_t r = vget_lane_u32(vTemp.val[1], 1);
+ return ( r == 0xFFFFFFFFU );
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
+ XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
+ XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
+ XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
+ vTemp1 = _mm_and_ps(vTemp1,vTemp2);
+ vTemp3 = _mm_and_ps(vTemp3,vTemp4);
+ vTemp1 = _mm_and_ps(vTemp1,vTemp3);
+ return (_mm_movemask_ps(vTemp1)==0x0f);
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Computation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+// Perform a 4x4 matrix multiply by a 4x4 matrix
+inline XMMATRIX XMMatrixMultiply
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMMATRIX mResult;
+ // Cache the invariants in registers
+ float x = M1.m[0][0];
+ float y = M1.m[0][1];
+ float z = M1.m[0][2];
+ float w = M1.m[0][3];
+ // Perform the operation on the first row
+ mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ // Repeat for all the other rows
+ x = M1.m[1][0];
+ y = M1.m[1][1];
+ z = M1.m[1][2];
+ w = M1.m[1][3];
+ mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ x = M1.m[2][0];
+ y = M1.m[2][1];
+ z = M1.m[2][2];
+ w = M1.m[2][3];
+ mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ x = M1.m[3][0];
+ y = M1.m[3][1];
+ z = M1.m[3][2];
+ w = M1.m[3][3];
+ mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
+ mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
+ mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
+ mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
+ return mResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX mResult;
+ __n64 VL = vget_low_f32( M1.r[0] );
+ __n64 VH = vget_high_f32( M1.r[0] );
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = vdupq_lane_f32(VL, 0);
+ XMVECTOR vY = vdupq_lane_f32(VL, 1);
+ XMVECTOR vZ = vdupq_lane_f32(VH, 0);
+ XMVECTOR vW = vdupq_lane_f32(VH, 1);
+ // Perform the operation on the first row
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[0] = vaddq_f32( vZ, vW );
+ // Repeat for the other 3 rows
+ VL = vget_low_f32( M1.r[1] );
+ VH = vget_high_f32( M1.r[1] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[1] = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[2] );
+ VH = vget_high_f32( M1.r[2] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[2] = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[3] );
+ VH = vget_high_f32( M1.r[3] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ mResult.r[3] = vaddq_f32( vZ, vW );
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX mResult;
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ // Perform a binary add to reduce cumulative errors
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[0] = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[1] = vX;
+ vW = M1.r[2];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[2] = vX;
+ vW = M1.r[3];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ mResult.r[3] = vX;
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixMultiplyTranspose
+(
+ CXMMATRIX M1,
+ CXMMATRIX M2
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+ XMMATRIX mResult;
+ // Cache the invariants in registers
+ float x = M2.m[0][0];
+ float y = M2.m[1][0];
+ float z = M2.m[2][0];
+ float w = M2.m[3][0];
+ // Perform the operation on the first row
+ mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ // Repeat for all the other rows
+ x = M2.m[0][1];
+ y = M2.m[1][1];
+ z = M2.m[2][1];
+ w = M2.m[3][1];
+ mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ x = M2.m[0][2];
+ y = M2.m[1][2];
+ z = M2.m[2][2];
+ w = M2.m[3][2];
+ mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ x = M2.m[0][3];
+ y = M2.m[1][3];
+ z = M2.m[2][3];
+ w = M2.m[3][3];
+ mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
+ mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
+ mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
+ mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
+ return mResult;
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ __n64 VL = vget_low_f32( M1.r[0] );
+ __n64 VH = vget_high_f32( M1.r[0] );
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = vdupq_lane_f32(VL, 0);
+ XMVECTOR vY = vdupq_lane_f32(VL, 1);
+ XMVECTOR vZ = vdupq_lane_f32(VH, 0);
+ XMVECTOR vW = vdupq_lane_f32(VH, 1);
+ // Perform the operation on the first row
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r0 = vaddq_f32( vZ, vW );
+ // Repeat for the other 3 rows
+ VL = vget_low_f32( M1.r[1] );
+ VH = vget_high_f32( M1.r[1] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r1 = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[2] );
+ VH = vget_high_f32( M1.r[2] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r2 = vaddq_f32( vZ, vW );
+ VL = vget_low_f32( M1.r[3] );
+ VH = vget_high_f32( M1.r[3] );
+ vX = vdupq_lane_f32(VL, 0);
+ vY = vdupq_lane_f32(VL, 1);
+ vZ = vdupq_lane_f32(VH, 0);
+ vW = vdupq_lane_f32(VH, 1);
+ vX = vmulq_f32(vX,M2.r[0]);
+ vY = vmulq_f32(vY,M2.r[1]);
+ vZ = vmlaq_f32(vX,vZ,M2.r[2]);
+ vW = vmlaq_f32(vY,vW,M2.r[3]);
+ __n128 r3 = vaddq_f32( vZ, vW );
+
+ // Transpose result
+ float32x4x2_t P0 = vzipq_f32( r0, r2 );
+ float32x4x2_t P1 = vzipq_f32( r1, r3 );
+
+ float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
+ float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
+
+ XMMATRIX mResult;
+ mResult.r[0] = T0.val[0];
+ mResult.r[1] = T0.val[1];
+ mResult.r[2] = T1.val[0];
+ mResult.r[3] = T1.val[1];
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // Use vW to hold the original row
+ XMVECTOR vW = M1.r[0];
+ // Splat the component X,Y,Z then W
+ XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ // Perform the operation on the first row
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ // Perform a binary add to reduce cumulative errors
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r0 = vX;
+ // Repeat for the other 3 rows
+ vW = M1.r[1];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r1 = vX;
+ vW = M1.r[2];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r2 = vX;
+ vW = M1.r[3];
+ vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
+ vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
+ vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
+ vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
+ vX = _mm_mul_ps(vX,M2.r[0]);
+ vY = _mm_mul_ps(vY,M2.r[1]);
+ vZ = _mm_mul_ps(vZ,M2.r[2]);
+ vW = _mm_mul_ps(vW,M2.r[3]);
+ vX = _mm_add_ps(vX,vZ);
+ vY = _mm_add_ps(vY,vW);
+ vX = _mm_add_ps(vX,vY);
+ __m128 r3 = vX;
+
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
+
+ XMMATRIX mResult;
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranspose
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ // Original matrix:
+ //
+ // m00m01m02m03
+ // m10m11m12m13
+ // m20m21m22m23
+ // m30m31m32m33
+
+ XMMATRIX P;
+ P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
+ P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
+ P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
+ P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
+
+ XMMATRIX MT;
+ MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
+ MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
+ MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
+ MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
+ return MT;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
+ float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
+
+ float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
+ float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
+
+ XMMATRIX mResult;
+ mResult.r[0] = T0.val[0];
+ mResult.r[1] = T0.val[1];
+ mResult.r[2] = T1.val[0];
+ mResult.r[3] = T1.val[1];
+ return mResult;
+#elif defined(_XM_SSE_INTRINSICS_)
+ // x.x,x.y,y.x,y.y
+ XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
+ // x.z,x.w,y.z,y.w
+ XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
+ // z.x,z.y,w.x,w.y
+ XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
+ // z.z,z.w,w.z,w.w
+ XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
+ XMMATRIX mResult;
+
+ // x.x,y.x,z.x,w.x
+ mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
+ // x.y,y.y,z.y,w.y
+ mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
+ // x.z,y.z,z.z,w.z
+ mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
+ // x.w,y.w,z.w,w.w
+ mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+// Return the inverse and the determinant of a 4x4 matrix
+_Use_decl_annotations_
+inline XMMATRIX XMMatrixInverse
+(
+ XMVECTOR* pDeterminant,
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMMATRIX MT = XMMatrixTranspose(M);
+
+ XMVECTOR V0[4], V1[4];
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
+ V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
+ V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
+ V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
+
+ XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
+ XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
+ XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
+ V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
+ V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
+ V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
+
+ D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
+ D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
+ D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
+
+ XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
+ XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
+ XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
+ XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
+
+ C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
+ C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
+ C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
+ C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
+
+ V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
+ V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
+ V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
+ V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
+ V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
+ V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
+ V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
+ V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
+
+ XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
+ C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
+ XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
+ C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
+ XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
+ C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
+ XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
+ C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
+
+ XMMATRIX R;
+ R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
+ R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
+ R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
+ R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
+
+ XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
+
+ if (pDeterminant != NULL)
+ *pDeterminant = Determinant;
+
+ XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
+
+ XMMATRIX Result;
+ Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
+ Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
+ Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
+ Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
+ return Result;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX MT = XMMatrixTranspose(M);
+ XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
+ XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
+ XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
+ XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
+ XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
+ XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
+
+ XMVECTOR D0 = _mm_mul_ps(V00,V10);
+ XMVECTOR D1 = _mm_mul_ps(V01,V11);
+ XMVECTOR D2 = _mm_mul_ps(V02,V12);
+
+ V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
+ V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
+ V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
+ V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
+ V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
+ V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ D0 = _mm_sub_ps(D0,V00);
+ D1 = _mm_sub_ps(D1,V01);
+ D2 = _mm_sub_ps(D2,V02);
+ // V11 = D0Y,D0W,D2Y,D2Y
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
+ V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
+ V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
+ V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
+ V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
+ // V13 = D1Y,D1W,D2W,D2W
+ XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
+ V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
+ V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
+ XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
+ V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
+
+ XMVECTOR C0 = _mm_mul_ps(V00,V10);
+ XMVECTOR C2 = _mm_mul_ps(V01,V11);
+ XMVECTOR C4 = _mm_mul_ps(V02,V12);
+ XMVECTOR C6 = _mm_mul_ps(V03,V13);
+
+ // V11 = D0X,D0Y,D2X,D2X
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
+ V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
+ V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
+ V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
+ V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
+ // V13 = D1X,D1Y,D2Z,D2Z
+ V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
+ V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
+ V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
+ V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
+ V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ V03 = _mm_mul_ps(V03,V13);
+ C0 = _mm_sub_ps(C0,V00);
+ C2 = _mm_sub_ps(C2,V01);
+ C4 = _mm_sub_ps(C4,V02);
+ C6 = _mm_sub_ps(C6,V03);
+
+ V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
+ // V10 = D0Z,D0Z,D2X,D2Y
+ V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
+ V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
+ V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
+ // V11 = D0X,D0W,D2X,D2Y
+ V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
+ V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
+ V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
+ // V12 = D1Z,D1Z,D2Z,D2W
+ V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
+ V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
+ V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
+ // V13 = D1X,D1W,D2Z,D2W
+ V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
+ V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
+
+ V00 = _mm_mul_ps(V00,V10);
+ V01 = _mm_mul_ps(V01,V11);
+ V02 = _mm_mul_ps(V02,V12);
+ V03 = _mm_mul_ps(V03,V13);
+ XMVECTOR C1 = _mm_sub_ps(C0,V00);
+ C0 = _mm_add_ps(C0,V00);
+ XMVECTOR C3 = _mm_add_ps(C2,V01);
+ C2 = _mm_sub_ps(C2,V01);
+ XMVECTOR C5 = _mm_sub_ps(C4,V02);
+ C4 = _mm_add_ps(C4,V02);
+ XMVECTOR C7 = _mm_add_ps(C6,V03);
+ C6 = _mm_sub_ps(C6,V03);
+
+ C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
+ C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
+ C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
+ C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
+ C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
+ C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
+ C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
+ C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
+ // Get the determinate
+ XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
+ if (pDeterminant != NULL)
+ *pDeterminant = vTemp;
+ vTemp = _mm_div_ps(g_XMOne,vTemp);
+ XMMATRIX mResult;
+ mResult.r[0] = _mm_mul_ps(C0,vTemp);
+ mResult.r[1] = _mm_mul_ps(C2,vTemp);
+ mResult.r[2] = _mm_mul_ps(C4,vTemp);
+ mResult.r[3] = _mm_mul_ps(C6,vTemp);
+ return mResult;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMVECTOR XMMatrixDeterminant
+(
+ CXMMATRIX M
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f};
+
+ XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
+ XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
+ XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
+ XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
+ XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
+ XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
+
+ XMVECTOR P0 = XMVectorMultiply(V0, V1);
+ XMVECTOR P1 = XMVectorMultiply(V2, V3);
+ XMVECTOR P2 = XMVectorMultiply(V4, V5);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
+ V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
+ V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
+ V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
+ V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
+
+ P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
+ P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
+ P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
+ V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
+
+ XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
+ XMVECTOR R = XMVectorMultiply(V0, P0);
+ R = XMVectorNegativeMultiplySubtract(V1, P1, R);
+ R = XMVectorMultiplyAdd(V2, P2, R);
+
+ return XMVector4Dot(S, R);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
+ if((x) < (y)) \
+ { \
+ if((y) < (z)) \
+ { \
+ (a) = 2; \
+ (b) = 1; \
+ (c) = 0; \
+ } \
+ else \
+ { \
+ (a) = 1; \
+ \
+ if((x) < (z)) \
+ { \
+ (b) = 2; \
+ (c) = 0; \
+ } \
+ else \
+ { \
+ (b) = 0; \
+ (c) = 2; \
+ } \
+ } \
+ } \
+ else \
+ { \
+ if((x) < (z)) \
+ { \
+ (a) = 2; \
+ (b) = 0; \
+ (c) = 1; \
+ } \
+ else \
+ { \
+ (a) = 0; \
+ \
+ if((y) < (z)) \
+ { \
+ (b) = 2; \
+ (c) = 1; \
+ } \
+ else \
+ { \
+ (b) = 1; \
+ (c) = 2; \
+ } \
+ } \
+ }
+
+#define XM3_DECOMP_EPSILON 0.0001f
+
+_Use_decl_annotations_
+inline bool XMMatrixDecompose
+(
+ XMVECTOR *outScale,
+ XMVECTOR *outRotQuat,
+ XMVECTOR *outTrans,
+ CXMMATRIX M
+)
+{
+ static const XMVECTOR *pvCanonicalBasis[3] = {
+ &g_XMIdentityR0.v,
+ &g_XMIdentityR1.v,
+ &g_XMIdentityR2.v
+ };
+
+ assert( outScale != NULL );
+ assert( outRotQuat != NULL );
+ assert( outTrans != NULL );
+
+ // Get the translation
+ outTrans[0] = M.r[3];
+
+ XMVECTOR *ppvBasis[3];
+ XMMATRIX matTemp;
+ ppvBasis[0] = &matTemp.r[0];
+ ppvBasis[1] = &matTemp.r[1];
+ ppvBasis[2] = &matTemp.r[2];
+
+ matTemp.r[0] = M.r[0];
+ matTemp.r[1] = M.r[1];
+ matTemp.r[2] = M.r[2];
+ matTemp.r[3] = g_XMIdentityR3.v;
+
+ float *pfScales = (float *)outScale;
+
+ size_t a, b, c;
+ XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
+ XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
+ XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
+ pfScales[3] = 0.f;
+
+ XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
+
+ if(pfScales[a] < XM3_DECOMP_EPSILON)
+ {
+ ppvBasis[a][0] = pvCanonicalBasis[a][0];
+ }
+ ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
+
+ if(pfScales[b] < XM3_DECOMP_EPSILON)
+ {
+ size_t aa, bb, cc;
+ float fAbsX, fAbsY, fAbsZ;
+
+ fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
+ fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
+ fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
+
+ XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
+
+ ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
+ }
+
+ ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
+
+ if(pfScales[c] < XM3_DECOMP_EPSILON)
+ {
+ ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
+ }
+
+ ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
+
+ float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
+
+ // use Kramer's rule to check for handedness of coordinate system
+ if(fDet < 0.0f)
+ {
+ // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
+ pfScales[a] = -pfScales[a];
+ ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
+
+ fDet = -fDet;
+ }
+
+ fDet -= 1.0f;
+ fDet *= fDet;
+
+ if(XM3_DECOMP_EPSILON < fDet)
+ {
+ // Non-SRT matrix encountered
+ return false;
+ }
+
+ // generate the quaternion from the matrix
+ outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
+ return true;
+}
+
+#undef XM3_DECOMP_EPSILON
+#undef XM3RANKDECOMPOSE
+
+//------------------------------------------------------------------------------
+// Transformation operations
+//------------------------------------------------------------------------------
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixIdentity()
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixSet
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ XMMATRIX M;
+#if defined(_XM_NO_INTRINSICS_)
+ M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
+ M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
+ M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
+ M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
+#else
+ M.r[0] = XMVectorSet(m00, m01, m02, m03);
+ M.r[1] = XMVectorSet(m10, m11, m12, m13);
+ M.r[2] = XMVectorSet(m20, m21, m22, m23);
+ M.r[3] = XMVectorSet(m30, m31, m32, m33);
+#endif
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranslation
+(
+ float OffsetX,
+ float OffsetY,
+ float OffsetZ
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = OffsetX;
+ M.m[3][1] = OffsetY;
+ M.m[3][2] = OffsetZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTranslationFromVector
+(
+ FXMVECTOR Offset
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = Offset.vector4_f32[0];
+ M.m[3][1] = Offset.vector4_f32[1];
+ M.m[3][2] = Offset.vector4_f32[2];
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixScaling
+(
+ float ScaleX,
+ float ScaleY,
+ float ScaleZ
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = ScaleX;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ScaleY;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = ScaleZ;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 );
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
+ M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
+ M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixScalingFromVector
+(
+ FXMVECTOR Scale
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ XMMATRIX M;
+ M.m[0][0] = Scale.vector4_f32[0];
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Scale.vector4_f32[1];
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = Scale.vector4_f32[2];
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = vandq_u32(Scale,g_XMMaskX);
+ M.r[1] = vandq_u32(Scale,g_XMMaskY);
+ M.r[2] = vandq_u32(Scale,g_XMMaskZ);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
+ M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
+ M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationX
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = 1.0f;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = fCosAngle;
+ M.m[1][2] = fSinAngle;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = -fSinAngle;
+ M.m[2][2] = fCosAngle;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 );
+ T1 = vsetq_lane_f32( fSinAngle, T1, 2 );
+
+ XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 );
+ T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
+
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0.v;
+ M.r[1] = T1;
+ M.r[2] = T2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = 0,y = cos,z = sin, w = 0
+ vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
+ XMMATRIX M;
+ M.r[0] = g_XMIdentityR0;
+ M.r[1] = vCos;
+ // x = 0,y = sin,z = cos, w = 0
+ vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
+ // x = 0,y = -sin,z = cos, w = 0
+ vCos = _mm_mul_ps(vCos,g_XMNegateY);
+ M.r[2] = vCos;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationY
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = fCosAngle;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = -fSinAngle;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 1.0f;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = fSinAngle;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fCosAngle;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
+ T0 = vsetq_lane_f32( -fSinAngle, T0, 2 );
+
+ XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 );
+ T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
+
+ XMMATRIX M;
+ M.r[0] = T0;
+ M.r[1] = g_XMIdentityR1.v;
+ M.r[2] = T2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = sin,y = 0,z = cos, w = 0
+ vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
+ XMMATRIX M;
+ M.r[2] = vSin;
+ M.r[1] = g_XMIdentityR1;
+ // x = cos,y = 0,z = sin, w = 0
+ vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
+ // x = cos,y = 0,z = -sin, w = 0
+ vSin = _mm_mul_ps(vSin,g_XMNegateZ);
+ M.r[0] = vSin;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationZ
+(
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMMATRIX M;
+ M.m[0][0] = fCosAngle;
+ M.m[0][1] = fSinAngle;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = -fSinAngle;
+ M.m[1][1] = fCosAngle;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = 1.0f;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = 0.0f;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
+ T0 = vsetq_lane_f32( fSinAngle, T0, 1 );
+
+ XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 );
+ T1 = vsetq_lane_f32( fCosAngle, T1, 1 );
+
+ XMMATRIX M;
+ M.r[0] = T0;
+ M.r[1] = T1;
+ M.r[2] = g_XMIdentityR2.v;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinAngle;
+ float CosAngle;
+ XMScalarSinCos(&SinAngle, &CosAngle, Angle);
+
+ XMVECTOR vSin = _mm_set_ss(SinAngle);
+ XMVECTOR vCos = _mm_set_ss(CosAngle);
+ // x = cos,y = sin,z = 0, w = 0
+ vCos = _mm_unpacklo_ps(vCos,vSin);
+ XMMATRIX M;
+ M.r[0] = vCos;
+ // x = sin,y = cos,z = 0, w = 0
+ vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
+ // x = cos,y = -sin,z = 0, w = 0
+ vCos = _mm_mul_ps(vCos,g_XMNegateX);
+ M.r[1] = vCos;
+ M.r[2] = g_XMIdentityR2;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationRollPitchYaw
+(
+ float Pitch,
+ float Yaw,
+ float Roll
+)
+{
+ XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
+ return XMMatrixRotationRollPitchYawFromVector(Angles);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationRollPitchYawFromVector
+(
+ FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
+)
+{
+ XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
+ return XMMatrixRotationQuaternion(Q);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationNormal
+(
+ FXMVECTOR NormalAxis,
+ float Angle
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
+
+ XMVECTOR C2 = XMVectorSplatZ(A);
+ XMVECTOR C1 = XMVectorSplatY(A);
+ XMVECTOR C0 = XMVectorSplatX(A);
+
+ XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
+ XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
+
+ XMVECTOR V0 = XMVectorMultiply(C2, N0);
+ V0 = XMVectorMultiply(V0, N1);
+
+ XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
+ R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
+
+ XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
+ XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
+
+ V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
+ XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
+ XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
+ M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
+ M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ float fSinAngle;
+ float fCosAngle;
+ XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
+
+ XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
+ XMVECTOR C1 = _mm_set_ps1(fCosAngle);
+ XMVECTOR C0 = _mm_set_ps1(fSinAngle);
+
+ XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
+ XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
+
+ XMVECTOR V0 = _mm_mul_ps(C2, N0);
+ V0 = _mm_mul_ps(V0, N1);
+
+ XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
+ R0 = _mm_mul_ps(R0, NormalAxis);
+ R0 = _mm_add_ps(R0, C1);
+
+ XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
+ R1 = _mm_add_ps(R1, V0);
+ XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
+ R2 = _mm_sub_ps(V0,R2);
+
+ V0 = _mm_and_ps(R0,g_XMMask3);
+ XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
+ V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
+ XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
+ V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
+
+ R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
+ R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
+
+ XMMATRIX M;
+ M.r[0] = R2;
+
+ R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
+ R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
+ M.r[1] = R2;
+
+ V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
+ M.r[2] = V2;
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationAxis
+(
+ FXMVECTOR Axis,
+ float Angle
+)
+{
+ assert(!XMVector3Equal(Axis, XMVectorZero()));
+ assert(!XMVector3IsInfinite(Axis));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR Normal = XMVector3Normalize(Axis);
+ return XMMatrixRotationNormal(Normal, Angle);
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixRotationQuaternion
+(
+ FXMVECTOR Quaternion
+)
+{
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
+
+ XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
+ XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
+
+ XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
+ XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
+ XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
+ R0 = XMVectorSubtract(R0, V1);
+
+ V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
+ V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
+ V0 = XMVectorMultiply(V0, V1);
+
+ V1 = XMVectorSplatW(Quaternion);
+ XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
+ V1 = XMVectorMultiply(V1, V2);
+
+ XMVECTOR R1 = XMVectorAdd(V0, V1);
+ XMVECTOR R2 = XMVectorSubtract(V0, V1);
+
+ V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
+ V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
+ M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
+ M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
+ M.r[3] = g_XMIdentityR3.v;
+ return M;
+
+#elif defined(_XM_SSE_INTRINSICS_)
+ static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
+
+ XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
+ XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
+
+ XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
+ V0 = _mm_and_ps(V0,g_XMMask3);
+ XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
+ V1 = _mm_and_ps(V1,g_XMMask3);
+ XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
+ R0 = _mm_sub_ps(R0, V1);
+
+ V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
+ V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
+ V0 = _mm_mul_ps(V0, V1);
+
+ V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
+ XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
+ V1 = _mm_mul_ps(V1, V2);
+
+ XMVECTOR R1 = _mm_add_ps(V0, V1);
+ XMVECTOR R2 = _mm_sub_ps(V0, V1);
+
+ V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
+ V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
+ V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
+ V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
+
+ Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
+ Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
+
+ XMMATRIX M;
+ M.r[0] = Q1;
+
+ Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
+ Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
+ M.r[1] = Q1;
+
+ Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
+ M.r[2] = Q1;
+ M.r[3] = g_XMIdentityR3;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTransformation2D
+(
+ FXMVECTOR ScalingOrigin,
+ float ScalingOrientation,
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ float Rotation,
+ GXMVECTOR Translation
+)
+{
+ // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
+ // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
+ XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
+
+ XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
+ XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
+ XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
+ XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
+ XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
+
+ XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
+ M = XMMatrixMultiply(M, MScaling);
+ M = XMMatrixMultiply(M, MScalingOrientation);
+ M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixTransformation
+(
+ FXMVECTOR ScalingOrigin,
+ FXMVECTOR ScalingOrientationQuaternion,
+ FXMVECTOR Scaling,
+ GXMVECTOR RotationOrigin,
+ CXMVECTOR RotationQuaternion,
+ CXMVECTOR Translation
+)
+{
+ // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
+ // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
+ XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
+
+ XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
+ XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
+ XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
+ XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
+
+ XMMATRIX M;
+ M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
+ M = XMMatrixMultiply(M, MScaling);
+ M = XMMatrixMultiply(M, MScalingOrientation);
+ M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixAffineTransformation2D
+(
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ float Rotation,
+ FXMVECTOR Translation
+)
+{
+ // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
+ XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
+ XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
+
+ XMMATRIX M;
+ M = MScaling;
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixAffineTransformation
+(
+ FXMVECTOR Scaling,
+ FXMVECTOR RotationOrigin,
+ FXMVECTOR RotationQuaternion,
+ GXMVECTOR Translation
+)
+{
+ // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
+
+ XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
+ XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
+ XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
+ XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
+
+ XMMATRIX M;
+ M = MScaling;
+ M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
+ M = XMMatrixMultiply(M, MRotation);
+ M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
+ M.r[3] = XMVectorAdd(M.r[3], VTranslation);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixReflect
+(
+ FXMVECTOR ReflectionPlane
+)
+{
+ assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
+ assert(!XMPlaneIsInfinite(ReflectionPlane));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ static const XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};
+
+ XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
+ XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
+
+ XMVECTOR A = XMVectorSplatX(P);
+ XMVECTOR B = XMVectorSplatY(P);
+ XMVECTOR C = XMVectorSplatZ(P);
+ XMVECTOR D = XMVectorSplatW(P);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
+ M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
+ M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
+ M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixShadow
+(
+ FXMVECTOR ShadowPlane,
+ FXMVECTOR LightPosition
+)
+{
+ static const XMVECTORU32 Select0001 = {XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1};
+
+ assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
+ assert(!XMPlaneIsInfinite(ShadowPlane));
+
+ XMVECTOR P = XMPlaneNormalize(ShadowPlane);
+ XMVECTOR Dot = XMPlaneDot(P, LightPosition);
+ P = XMVectorNegate(P);
+ XMVECTOR D = XMVectorSplatW(P);
+ XMVECTOR C = XMVectorSplatZ(P);
+ XMVECTOR B = XMVectorSplatY(P);
+ XMVECTOR A = XMVectorSplatX(P);
+ Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
+
+ XMMATRIX M;
+ M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
+ Dot = XMVectorRotateLeft(Dot, 1);
+ M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
+ return M;
+}
+
+//------------------------------------------------------------------------------
+// View and projection initialization operations
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookAtLH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR FocusPosition,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
+ return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookAtRH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR FocusPosition,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
+ return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookToLH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR EyeDirection,
+ FXMVECTOR UpDirection
+)
+{
+ assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
+ assert(!XMVector3IsInfinite(EyeDirection));
+ assert(!XMVector3Equal(UpDirection, XMVectorZero()));
+ assert(!XMVector3IsInfinite(UpDirection));
+
+#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
+
+ XMVECTOR R2 = XMVector3Normalize(EyeDirection);
+
+ XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
+ R0 = XMVector3Normalize(R0);
+
+ XMVECTOR R1 = XMVector3Cross(R2, R0);
+
+ XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
+
+ XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
+ XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
+ XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
+
+ XMMATRIX M;
+ M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
+ M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
+ M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
+ M.r[3] = g_XMIdentityR3.v;
+
+ M = XMMatrixTranspose(M);
+
+ return M;
+
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixLookToRH
+(
+ FXMVECTOR EyePosition,
+ FXMVECTOR EyeDirection,
+ FXMVECTOR UpDirection
+)
+{
+ XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
+ return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveLH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (FarZ - NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ / ViewWidth,
+ TwoNearZ / ViewHeight,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,1.0f
+ vTemp = _mm_setzero_ps();
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveRH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ - FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ - FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ + NearZ;
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ / ViewWidth,
+ TwoNearZ / ViewHeight,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,-1.0f
+ vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,-1.0f
+ vTemp = _mm_setzero_ps();
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveFovLH
+(
+ float FovAngleY,
+ float AspectHByW,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+ assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ float fRange = FarZ / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = Width;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Height;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float fRange = FarZ / (FarZ-NearZ);
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float fRange = FarZ / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ float Height = CosFov / SinFov;
+ XMVECTOR rMem = {
+ Height / AspectHByW,
+ Height,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // CosFov / SinFov,0,0,0
+ XMMATRIX M;
+ M.r[0] = vTemp;
+ // 0,Height / AspectHByW,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveFovRH
+(
+ float FovAngleY,
+ float AspectHByW,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
+ assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ float fRange = FarZ / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = Width;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = Height;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+ float fRange = FarZ / (NearZ-FarZ);
+ float Height = CosFov / SinFov;
+ float Width = Height / AspectHByW;
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ float SinFov;
+ float CosFov;
+ XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ float Height = CosFov / SinFov;
+ XMVECTOR rMem = {
+ Height / AspectHByW,
+ Height,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // CosFov / SinFov,0,0,0
+ XMMATRIX M;
+ M.r[0] = vTemp;
+ // 0,Height / AspectHByW,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,-1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,-1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,fRange * NearZ,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveOffCenterLH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ * ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ * ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
+ M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ 1.0f);
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ+NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ*ReciprocalWidth,
+ TwoNearZ*ReciprocalHeight,
+ -fRange * NearZ,
+ 0
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ*ReciprocalWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ*ReciprocalHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // 0,0,fRange,1.0f
+ M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ 1.0f );
+ // 0,0,-fRange * NearZ,0.0f
+ vValues = _mm_and_ps(vValues,g_XMMaskZ);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixPerspectiveOffCenterRH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = TwoNearZ * ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = TwoNearZ * ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[2][2] = fRange;
+ M.m[2][3] = -1.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 0.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float TwoNearZ = NearZ + NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
+ M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
+ (ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ -1.0f);
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float TwoNearZ = NearZ+NearZ;
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = FarZ / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ TwoNearZ*ReciprocalWidth,
+ TwoNearZ*ReciprocalHeight,
+ fRange * NearZ,
+ 0
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // TwoNearZ*ReciprocalWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,TwoNearZ*ReciprocalHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // 0,0,fRange,1.0f
+ M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
+ (ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange,
+ -1.0f );
+ // 0,0,-fRange * NearZ,0.0f
+ vValues = _mm_and_ps(vValues,g_XMMaskZ);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicLH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = 2.0f / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 2.0f / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fRange = 1.0f / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ 2.0f / ViewWidth,
+ 2.0f / ViewHeight,
+ fRange,
+ -fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // 2.0f / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,2.0f / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=-fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,-fRange * NearZ,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicRH
+(
+ float ViewWidth,
+ float ViewHeight,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = 2.0f / ViewWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = 2.0f / ViewHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = 0.0f;
+ M.m[3][1] = 0.0f;
+ M.m[3][2] = fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 );
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fRange = 1.0f / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ 2.0f / ViewWidth,
+ 2.0f / ViewHeight,
+ fRange,
+ fRange * NearZ
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // 2.0f / ViewWidth,0,0,0
+ M.r[0] = vTemp;
+ // 0,2.0f / ViewHeight,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ M.r[1] = vTemp;
+ // x=fRange,y=fRange * NearZ,0,1.0f
+ vTemp = _mm_setzero_ps();
+ vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
+ // 0,0,fRange,0.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
+ M.r[2] = vTemp;
+ // 0,0,fRange * NearZ,1.0f
+ vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
+ M.r[3] = vTemp;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicOffCenterLH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+
+ XMMATRIX M;
+ M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
+ M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
+ M.m[3][2] = -fRange * NearZ;
+ M.m[3][3] = 1.0f;
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ -fRange * NearZ,
+ 1.0f);
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (FarZ-NearZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ fReciprocalWidth,
+ fReciprocalHeight,
+ fRange,
+ 1.0f
+ };
+ XMVECTOR rMem2 = {
+ -(ViewLeft + ViewRight),
+ -(ViewTop + ViewBottom),
+ -NearZ,
+ 1.0f
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // fReciprocalWidth*2,0,0,0
+ vTemp = _mm_add_ss(vTemp,vTemp);
+ M.r[0] = vTemp;
+ // 0,fReciprocalHeight*2,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ vTemp = _mm_add_ps(vTemp,vTemp);
+ M.r[1] = vTemp;
+ // 0,0,fRange,0.0f
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
+ M.r[2] = vTemp;
+ // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
+ vValues = _mm_mul_ps(vValues,rMem2);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMatrixOrthographicOffCenterRH
+(
+ float ViewLeft,
+ float ViewRight,
+ float ViewBottom,
+ float ViewTop,
+ float NearZ,
+ float FarZ
+)
+{
+ assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
+ assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
+ assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
+
+#if defined(_XM_NO_INTRINSICS_)
+
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+
+ XMMATRIX M;
+ M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
+ M.m[0][1] = 0.0f;
+ M.m[0][2] = 0.0f;
+ M.m[0][3] = 0.0f;
+
+ M.m[1][0] = 0.0f;
+ M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
+ M.m[1][2] = 0.0f;
+ M.m[1][3] = 0.0f;
+
+ M.m[2][0] = 0.0f;
+ M.m[2][1] = 0.0f;
+ M.m[2][2] = fRange;
+ M.m[2][3] = 0.0f;
+
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange * NearZ,
+ 1.0f);
+ return M;
+
+#elif defined(_XM_ARM_NEON_INTRINSICS_)
+ float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+ const XMVECTOR Zero = vdupq_n_f32(0);
+ XMMATRIX M;
+ M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
+ M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
+ M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
+ M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
+ -(ViewTop + ViewBottom) * ReciprocalHeight,
+ fRange * NearZ,
+ 1.0f);
+ return M;
+#elif defined(_XM_SSE_INTRINSICS_)
+ XMMATRIX M;
+ float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
+ float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
+ float fRange = 1.0f / (NearZ-FarZ);
+ // Note: This is recorded on the stack
+ XMVECTOR rMem = {
+ fReciprocalWidth,
+ fReciprocalHeight,
+ fRange,
+ 1.0f
+ };
+ XMVECTOR rMem2 = {
+ -(ViewLeft + ViewRight),
+ -(ViewTop + ViewBottom),
+ NearZ,
+ 1.0f
+ };
+ // Copy from memory to SSE register
+ XMVECTOR vValues = rMem;
+ XMVECTOR vTemp = _mm_setzero_ps();
+ // Copy x only
+ vTemp = _mm_move_ss(vTemp,vValues);
+ // fReciprocalWidth*2,0,0,0
+ vTemp = _mm_add_ss(vTemp,vTemp);
+ M.r[0] = vTemp;
+ // 0,fReciprocalHeight*2,0,0
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskY);
+ vTemp = _mm_add_ps(vTemp,vTemp);
+ M.r[1] = vTemp;
+ // 0,0,fRange,0.0f
+ vTemp = vValues;
+ vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
+ M.r[2] = vTemp;
+ // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
+ vValues = _mm_mul_ps(vValues,rMem2);
+ M.r[3] = vValues;
+ return M;
+#else // _XM_VMX128_INTRINSICS_
+#endif // _XM_VMX128_INTRINSICS_
+}
+
+
+/****************************************************************************
+ *
+ * XMMATRIX operators and methods
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX::XMMATRIX
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ r[0] = XMVectorSet(m00, m01, m02, m03);
+ r[1] = XMVectorSet(m10, m11, m12, m13);
+ r[2] = XMVectorSet(m20, m21, m22, m23);
+ r[3] = XMVectorSet(m30, m31, m32, m33);
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMMATRIX::XMMATRIX
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+ r[0] = XMLoadFloat4((const XMFLOAT4*)pArray);
+ r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4));
+ r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8));
+ r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12));
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator- () const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorNegate( r[0] );
+ R.r[1] = XMVectorNegate( r[1] );
+ R.r[2] = XMVectorNegate( r[2] );
+ R.r[3] = XMVectorNegate( r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator+= (CXMMATRIX M)
+{
+ r[0] = XMVectorAdd( r[0], M.r[0] );
+ r[1] = XMVectorAdd( r[1], M.r[1] );
+ r[2] = XMVectorAdd( r[2], M.r[2] );
+ r[3] = XMVectorAdd( r[3], M.r[3] );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator-= (CXMMATRIX M)
+{
+ r[0] = XMVectorSubtract( r[0], M.r[0] );
+ r[1] = XMVectorSubtract( r[1], M.r[1] );
+ r[2] = XMVectorSubtract( r[2], M.r[2] );
+ r[3] = XMVectorSubtract( r[3], M.r[3] );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator*=(CXMMATRIX M)
+{
+ *this = XMMatrixMultiply( *this, M );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator*= (float S)
+{
+ r[0] = XMVectorScale( r[0], S );
+ r[1] = XMVectorScale( r[1], S );
+ r[2] = XMVectorScale( r[2], S );
+ r[3] = XMVectorScale( r[3], S );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX& XMMATRIX::operator/= (float S)
+{
+ assert( S != 0.0f );
+ float t = 1.0f / S;
+ r[0] = XMVectorScale( r[0], t );
+ r[1] = XMVectorScale( r[1], t );
+ r[2] = XMVectorScale( r[2], t );
+ r[3] = XMVectorScale( r[3], t );
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator+ (CXMMATRIX M) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorAdd( r[0], M.r[0] );
+ R.r[1] = XMVectorAdd( r[1], M.r[1] );
+ R.r[2] = XMVectorAdd( r[2], M.r[2] );
+ R.r[3] = XMVectorAdd( r[3], M.r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator- (CXMMATRIX M) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorSubtract( r[0], M.r[0] );
+ R.r[1] = XMVectorSubtract( r[1], M.r[1] );
+ R.r[2] = XMVectorSubtract( r[2], M.r[2] );
+ R.r[3] = XMVectorSubtract( r[3], M.r[3] );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator*(CXMMATRIX M) const
+{
+ return XMMatrixMultiply(*this, M);
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator* (float S) const
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorScale( r[0], S );
+ R.r[1] = XMVectorScale( r[1], S );
+ R.r[2] = XMVectorScale( r[2], S );
+ R.r[3] = XMVectorScale( r[3], S );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX XMMATRIX::operator/ (float S) const
+{
+ assert( S != 0.0f );
+ XMMATRIX R;
+ float t = 1.0f / S;
+ R.r[0] = XMVectorScale( r[0], t );
+ R.r[1] = XMVectorScale( r[1], t );
+ R.r[2] = XMVectorScale( r[2], t );
+ R.r[3] = XMVectorScale( r[3], t );
+ return R;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMMATRIX operator*
+(
+ float S,
+ CXMMATRIX M
+)
+{
+ XMMATRIX R;
+ R.r[0] = XMVectorScale( M.r[0], S );
+ R.r[1] = XMVectorScale( M.r[1], S );
+ R.r[2] = XMVectorScale( M.r[2], S );
+ R.r[3] = XMVectorScale( M.r[3], S );
+ return R;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT3X3 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT3X3::XMFLOAT3X3
+(
+ float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT3X3::XMFLOAT3X3
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+ for (size_t Row = 0; Row < 3; Row++)
+ {
+ for (size_t Column = 0; Column < 3; Column++)
+ {
+ m[Row][Column] = pArray[Row * 3 + Column];
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT3X3& XMFLOAT3X3::operator=
+(
+ const XMFLOAT3X3& Float3x3
+)
+{
+ _11 = Float3x3._11;
+ _12 = Float3x3._12;
+ _13 = Float3x3._13;
+ _21 = Float3x3._21;
+ _22 = Float3x3._22;
+ _23 = Float3x3._23;
+ _31 = Float3x3._31;
+ _32 = Float3x3._32;
+ _33 = Float3x3._33;
+
+ return *this;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT4X3 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3::XMFLOAT4X3
+(
+ float m00, float m01, float m02,
+ float m10, float m11, float m12,
+ float m20, float m21, float m22,
+ float m30, float m31, float m32
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+
+ m[3][0] = m30;
+ m[3][1] = m31;
+ m[3][2] = m32;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4X3::XMFLOAT4X3
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+
+ m[0][0] = pArray[0];
+ m[0][1] = pArray[1];
+ m[0][2] = pArray[2];
+
+ m[1][0] = pArray[3];
+ m[1][1] = pArray[4];
+ m[1][2] = pArray[5];
+
+ m[2][0] = pArray[6];
+ m[2][1] = pArray[7];
+ m[2][2] = pArray[8];
+
+ m[3][0] = pArray[9];
+ m[3][1] = pArray[10];
+ m[3][2] = pArray[11];
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3& XMFLOAT4X3::operator=
+(
+ const XMFLOAT4X3& Float4x3
+)
+{
+ XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11);
+ XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22);
+ XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33);
+
+ XMStoreFloat4((XMFLOAT4*)&_11, V1);
+ XMStoreFloat4((XMFLOAT4*)&_22, V2);
+ XMStoreFloat4((XMFLOAT4*)&_33, V3);
+
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X3A& XMFLOAT4X3A::operator=
+(
+ const XMFLOAT4X3A& Float4x3
+)
+{
+ XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11);
+ XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22);
+ XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33);
+
+ XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
+ XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
+ XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
+
+ return *this;
+}
+
+/****************************************************************************
+ *
+ * XMFLOAT4X4 operators
+ *
+ ****************************************************************************/
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4::XMFLOAT4X4
+(
+ float m00, float m01, float m02, float m03,
+ float m10, float m11, float m12, float m13,
+ float m20, float m21, float m22, float m23,
+ float m30, float m31, float m32, float m33
+)
+{
+ m[0][0] = m00;
+ m[0][1] = m01;
+ m[0][2] = m02;
+ m[0][3] = m03;
+
+ m[1][0] = m10;
+ m[1][1] = m11;
+ m[1][2] = m12;
+ m[1][3] = m13;
+
+ m[2][0] = m20;
+ m[2][1] = m21;
+ m[2][2] = m22;
+ m[2][3] = m23;
+
+ m[3][0] = m30;
+ m[3][1] = m31;
+ m[3][2] = m32;
+ m[3][3] = m33;
+}
+
+//------------------------------------------------------------------------------
+_Use_decl_annotations_
+inline XMFLOAT4X4::XMFLOAT4X4
+(
+ const float* pArray
+)
+{
+ assert( pArray != NULL );
+
+ m[0][0] = pArray[0];
+ m[0][1] = pArray[1];
+ m[0][2] = pArray[2];
+ m[0][3] = pArray[3];
+
+ m[1][0] = pArray[4];
+ m[1][1] = pArray[5];
+ m[1][2] = pArray[6];
+ m[1][3] = pArray[7];
+
+ m[2][0] = pArray[8];
+ m[2][1] = pArray[9];
+ m[2][2] = pArray[10];
+ m[2][3] = pArray[11];
+
+ m[3][0] = pArray[12];
+ m[3][1] = pArray[13];
+ m[3][2] = pArray[14];
+ m[3][3] = pArray[15];
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4& XMFLOAT4X4::operator=
+(
+ const XMFLOAT4X4& Float4x4
+)
+{
+ XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11);
+ XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21);
+ XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31);
+ XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41);
+
+ XMStoreFloat4((XMFLOAT4*)&_11, V1);
+ XMStoreFloat4((XMFLOAT4*)&_21, V2);
+ XMStoreFloat4((XMFLOAT4*)&_31, V3);
+ XMStoreFloat4((XMFLOAT4*)&_41, V4);
+
+ return *this;
+}
+
+//------------------------------------------------------------------------------
+
+inline XMFLOAT4X4A& XMFLOAT4X4A::operator=
+(
+ const XMFLOAT4X4A& Float4x4
+)
+{
+ XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11);
+ XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21);
+ XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31);
+ XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41);
+
+ XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
+ XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
+ XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
+ XMStoreFloat4A((XMFLOAT4A*)&_41, V4);
+
+ return *this;
+}
+